8. File Contents

Introduction

#-----------------------------
# datafile is a file or IO object
datafile.readlines.each { |line|
    line.chomp!
    size = line.length
    puts size
}
#-----------------------------
datafile.readlines.each { |line|
    puts line.chomp!.length
}
#-----------------------------
lines = datafile.readlines
#-----------------------------
whole_file = file.read
#-----------------------------
# ruby -040 -e 'word = gets; puts "First word is #{word}"'
#-----------------------------
# ruby -ne 'BEGIN { $/="%%\n" }; $_.chomp; puts $_ if( $_=~/Unix/i)' fortune.dat
#-----------------------------
handle.print "one", "two", "three" # "onetwothree"
puts "Baa baa black sheep."        # sent to $stdout
#-----------------------------
buffer = handle.read(4096)
rv     = buffer.length
#-----------------------------
handle.truncate(length)
open("/tmp#{$$}.pid", 'w') { |handle| handle.truncate(length) }
#-----------------------------
pos = datafile.pos  # tell is an alias of pos
puts "I'm #{pos} bytes from the start of datafile"
#-----------------------------
logfile.seek(0, IO::SEEK_END)
datafile.seek(pos)  #  IO::SEEK_SET is the default
out.seek(-20, IO::SEEK_CUR)
#-----------------------------
written = datafile.syswrite(mystring)
raise RunTimeError unless written == mystring.length
block = infile.sysread(256)   # no equivalent to perl offset parameter in sysread
puts "only read #{block.length} bytes" if 256 != block.length
#-----------------------------
pos = handle.sysseek(0, IO::SEEK_CUR)  # don't change position

Reading Lines with Continuation Characters

while (line = fh.gets)
    line.chomp!
    nextline = nil
    line.gsub!(/\\$/) { |match| nextline = fh.gets; '' }
    if (nextline != nil)
        line += nextline 
        redo
    end
    # process full record in line here
end
#-----------------------------
# DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \
#         $(TEXINFOS) $(INFOS) $(MANS) $(DATA)
# DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \
#         $(TEXINFOS) $(INFO_DEPS) $(MANS) $(DATA) \
#         $(EXTRA_DIST)
#-----------------------------
line.gsub!(/\\\s*$/, '') {
    # as before
}

Counting Lines (or Paragraphs or Records) in a File

#-----------------------------
count = `wc -l < #{filename}`
fail "wc failed: #{$?}" if $? != 0
count.chomp!
#-----------------------------
count = 0
File.open(file, 'r') { |fh|
    count += 1 while fh.gets
}
# count now holds the number of lines read
#-----------------------------
count = 0
while (chunk = file.sysread(2**16)) 
    count += chunk.count("\n")
end rescue EOFError
#-----------------------------
File.open(filename,'r') { |fh|
    count += 1 while fh.gets
}
# count now holds the number of lines read
#-----------------------------
# As ruby doesn't quite have an equivalent to using a for
# statement as in perl, I threw this in
count = File.readlines(filename).size
#-----------------------------
1 while file.gets
count = $.
#-----------------------------
$/ = ''
open(filename, 'r') { |fh|
    1 while fh.gets
    para_count = $.
} rescue fail("can't open #{filename}: $!") 
#-----------------------------

Processing Every Word in a File

#-----------------------------
while (gets)
    split.each { |chunk|
        # do something with chunk
    }
end
#-----------------------------
while (gets)
    gsub(/(\w[\w'-]*)/) { |word|
        # do something with word
    }
end
#-----------------------------
# Make a word frequency count
# normally hashes can be created using {} or just Hash.new
# but we want the default value of an entry to be 0 instead 
# of nil. (nil can't be incremented)
seen = Hash.new(0)
while (gets)
    gsub(/(\w[\w'-]*)/) { |word|
        seen[word.downcase] += 1
    }
end
# output hash in a descending numeric sort of its values
seen.sort { |a,b| b[1] <=> a[1] }.each do |k,v|
    printf("%5d %s\n", v, k )
end

#-----------------------------
# Line frequency count
seen = Hash.new(0)
while (gets)
    seen[$_.downcase] += 1
end
seen.sort { |a,b| b[1] <=> a[1] }.each do |k,v|
    printf("%5d %s\n", v, k )
end
#-----------------------------

Reading a File Backwards by Line or Paragraph

#-----------------------------
# instead of file handle FILE, we can just
# use a string containing the filename
File.readlines(file).each { |line|
    # do something with line
}
#-----------------------------
File.readlines(file).reverse_each { |line|
    # do something with line
}
#-----------------------------
# the variable lines might have been created
# this way
# lines = File.readlines(file)
#
# normally one would use the reverse_each, but
# if you insist on using a numerical index to
# iterate over the lines array...
(lines.size - 1).downto(0) { |i|
    line = lines[i]
}
#-----------------------------
# the second readlines argument is a the 
# record separator $/, just like perl, a blank
# separator splits the records into paragraphs
File.readlines(file, '').each { |paragraph|
    # do something with paragraph
    puts "->Paragraph #{paragraph}"
}
#-----------------------------

Trailing a Growing File

# This shows both reading the growing file and a fall back (exit) when the file is deleted.
file = File.open('growing.txt') # Open the file, default mode is reading.
while File.exists?(file.path) # Check if the file exists, exit if not.
    puts file.gets while !file.eof? # Print file contents until the end-of-file is reached.
    sleep(1) # Wait 1 second, so the file might grow in the meantime.
    # No need to seek  - eof will be reset automatically if the file grows.
end

# Another option - reopen the file if current position is greater then the length of the file.
filename = 'growing.txt'
file = File.open(filename)
while File.exists?(file.path)
    if File.size(filename) < file.pos
        puts "File truncated -  reopening."
        file = File.open(filename)
    end
    puts file.gets while !file.eof?
    sleep(1)
end

Picking a Random Line from a File

$/ = "%\n"
srand

File.open('/usr/share/fortune/humorists').each do |line|
    adage = line if rand($.) < 1
end

puts adage

Randomizing All Lines

# Helper function from chapter 4.17: Randomizing an Array
def fisher_yates_shuffle(a)
    (a.size-1).downto(1) { |i|
        j = rand(i+1)
        a[i], a[j] = a[j], a[i] if i != j
    }
end

# Open the file, default mode is reading. Read all lines into an array.
lines = File.open('to_randomize.txt').collect

# Shuffle them.
fisher_yates_shuffle(lines)

# Print the shuffled lines.
puts lines 

Reading a Particular Line in a File

Processing Variable-Length Text Fields

Removing the Last Line of a File

begin
    fh = File.open(file, "r+")
    addr = fh.tell unless fh.eof while fh.gets
    fh.truncate(addr)
rescue SystemCallError
    $stderr.puts "#$!"
end

Processing Binary Files

Using Random-Access I/O

Updating a Random-Access File

Reading a String from a Binary File

Reading Fixed-Length Records

Reading Configuration Files

Testing a File for Trustworthiness

Program: tailwtmp

Program: tctee

Program: laston