7. File Access

Introduction

# An IO object being Enumerable, we can use 'each' directly on it
File.open("/usr/local/widgets/data").each { |line|
    puts line if line =~ /blue/
}

logfile = File.new("/var/log/rubylog.txt", "w")
mysub($stdin, logfile)

# The method IO#readline is similar  to IO#gets
# but throws an exception when it reaches EOF
f = File.new("bla.txt")
begin
    while (line = f.readline)
        line.chomp
        $stdout.print line if line =~ /blue/
    end
rescue EOFError
    f.close
end

while $stdin.gets                        # reads from STDIN
    unless (/\d/) 
        $stderr.puts "No digit found."   # writes to STDERR
    end
    puts "Read: #{$_}"                   # writes to STDOUT
end

logfile = File.new("/tmp/log", "w")

logfile.close

# $defout (or its synonym '$>') is the destination of output
# for Kernel#print, Kernel#puts, and family functions
logfile = File.new("log.txt", "w")
old = $defout
$defout = logfile                 # switch to logfile for output
puts "Countdown initiated ..."
$defout = old                     # return to original output
puts "You have 30 seconds to reach minimum safety distance."

Opening a File

source = File.new(path, "r")  # open file "path" for reading only
sink   = File.new(path, "w")  # open file "path" for writing only

source = File.open(path, File::RDONLY)  # open file "path" for reading only
sink   = File.open(path, File::WRONLY)  # open file "path" for writing only

file   = File.open(path, "r+")  # open "path" for reading and writing
file   = File.open(path, flags) # open "path" with the flags "flags" (see examples below for flags)

# open file "path" read only
file   = File.open(path, "r")
file   = File.open(path, File::RDONLY)

# open file "path" write only, create it if it does not exist
# truncate it to zero length if it exists
file   = File.open(path, "w")
file   = File.open(path, File::WRONLY|File::TRUNC|File::CREAT) 
file   = File.open(path, File::WRONLY|File::TRUNC|File::CREAT, 0666)  # with permission 0666

# open file "path" write only, fails if file exists
file   = File.open(path, File::WRONLY|File::EXCL|File::CREAT) 
file   = File.open(path, File::WRONLY|File::EXCL|File::CREAT, 0666) 

# open file "path" for appending
file   = File.open(path, "a")
file   = File.open(path, File::WRONLY|File::APPEND|File::CREAT) 
file   = File.open(path, File::WRONLY|File::APPEND|File::CREAT, 0666) 

# open file "path" for appending only when file exists
file   = File.open(path, File::WRONLY|File::APPEND) 

# open file "path" for reading and writing
file   = File.open(path, "r+")
file   = File.open(path, File::RDWR)

# open file for reading and writing, create a new file if it does not exist
file   = File.open(path, File::RDWR|File::CREAT)
file   = File.open(path, File::RDWR|File::CREAT, 0600)

# open file "path" reading and writing, fails if file exists
file   = File.open(path, File::RDWR|File::EXCL|File::CREAT)
file   = File.open(path, File::RDWR|File::EXCL|File::CREAT, 0600)

Opening Files with Unusual Filenames

# No problem with Ruby since the filename doesn't contain characters with
# special meaning; like Perl's sysopen
File.open(filename, 'r')

Expanding Tildes in Filenames

File.expand_path('~root/tmp')
#=> "/root/tmp"
File.expand_path('~rpcuser')
#=> "/var/lib/nfs"

# To expand ~/.. it explicitely needs the environment variable HOME
File.expand_path('~/tmp')
#=> "/home/gc/tmp"

Making Perl Report Filenames in Errors

# The exception raised in Ruby reports the filename
File.open('afile')

Creating Temporary Files

# Standard Ruby distribution provides the following useful extension
require 'tempfile'
# With the Tempfile class, the file is automatically deleted on garbage
# collection, so you won't need to remove it, later on.
tf = Tempfile.new('tmp')   # a name is required to create the filename

# If you need to pass the filename to an external program you can use
# File#path, but don't forget to File#flush in order to flush anything
# living in some buffer somewhere.
tf.flush
system("/usr/bin/dowhatever #{tf.path}")

fh = Tempfile.new('tmp')
fh.sync = true                # autoflushes
10.times { |i| fh.puts i }
fh.rewind
puts 'Tmp file has: ', fh.readlines

Storing Files Inside Your Program Text

while (DATA.gets) do
    # process the line  
end
__END__
# your data goes here

# __DATA__ doesn't exist in Ruby

# get info about the script (size, date of last modification)
kilosize = DATA.stat.size / 1024
last_modif = DATA.stat.mtime
puts "<P>Script size is #{kilosize}"
puts "<P>Last script update: #{last_modif}"
__END__
# DO NOT REMOVE THE PRECEEDING LINE.
# Everything else in this file will be ignored.

Writing a Filter

while line = gets do
    # do something with line.
end

#  or 
while gets do
    # do something with $_
end

# or more rubyish
$stdin.each do |line|
    # do stuff with line
end


# ARGF may makes this more easy
# this is skipped if ARGV.size==0
ARGV.each do |filename| 
    # closing and exception handling are done by the block
    open(filename) do |fd| 
        fd.each do |line|
            # do stuff with line
        end   
    end rescue abort("can't open %s" % filename)
end

# globbing is done in the Dir module
ARGV = Dir["*.[Cch]"] if ARGV.empty?

# note: optparse is the preferred way to handle this
if (ARGV[0] == '-c')  
    chop_first += 1
    ARGV.shift
end


# processing numerical options
if ARGV[0] =~ /^-(\d+)$/
    columns = $1
    ARGV.shift
end

# again, better to use optparse:
require 'optparse'
nostdout = 0
append = 0
unbuffer = 0
ignore_ints = 0
ARGV.options do |opt|
    opt.on('-n') { nostdout +=1 }
    opt.on('-a') { append   +=1 }
    opt.on('-u') { unbuffer +=1 }
    opt.on('-i') { ignore_ints +=1 }
    opt.parse!
end or abort("usage: " + __FILE__ + " [-ainu] [filenames]")

# no need to do undef $/, we have File.read
str = File.read(ARGV[0])

# again we have File.read
str = File.read(ARGV[0])

# not sure what this should do:
# I believe open the file, print filename, lineno and line:
ARGF.each_with_index do |line, idx|
    print ARGF.filename, ":", idx, ";", line
end

# print all the lines in every file passed via command line that contains login
ARGF.each do |line|
    puts line if line =~ /login/
end
#
# even this would fit
#%ruby -ne "print if /f/" 2.log
#

ARGF.each { |l| puts l.downcase! }

#------------------
#!/usr/bin/ruby -p
# just like perl's -p
$_.downcase!
#

# I don't know who should I trust. 
# perl's version splits on \w+ while python's on \w.

chunks = 0

File.read(ARGV[0]).split.each do |word|
    next if word =~ /^#/
    break if ["__DATA__", "__END__"].member? word
    chunks += 1 
end

print "Found ", chunks, " chunks\n"

Modifying a File in Place with Temporary File

old = File.open(old_file)
new = File.open(new_file, "w")
while old.gets do
    # change $_, then...
    new.print $_
end
old.close
new.close
File.rename(old_file, "old.orig")
File.rename(new_file, old_file)

while old.gets do
    if $. == 20 then # we are at the 20th line
        new.puts "Extra line 1"
        new.puts "Extra line 2"
    end
    new.print $_
end

while old.gets do
    next if 20..30 # skip the 20th line to the 30th
                   # Ruby (and Perl) permit to write if 20..30 
                   # instead of if (20 <= $.) and ($. <= 30)
    new.print $_
end

Modifying a File in Place with -i Switch

#% ruby -i.orig -pe 'FILTER COMMAND' file1 file2 file3 ...
#
#-----------------------------
##!/usr/bin/ruby -i.orig -p
# filter commands go here
#-----------------------------

#% ruby -pi.orig -e 'gsub!(/DATE/){Time.now)'

# effectively becomes:
ARGV << 'I'
oldfile = ""
while gets
    if ARGF.filename != oldfile
        newfile = ARGF.filename
        File.rename(newfile, newfile + ".orig")
        $stdout = File.open(newfile,'w')
        oldfile = newfile
    end
    gsub!(/DATE/){Time.now}
    print 
end
$stdout = STDOUT
#-----------------------------
#% ruby -i.old -pe 'gsub!(%r{\bhisvar\b}, 'hervar')' *.[Cchy]

#-----------------------------
# set up to iterate over the *.c files in the current directory,
# editing in place and saving the old file with a .orig extension
$-i = '.orig'                       # set up -i mode
ARGV.replace(Dir['*.[Cchy]'])
while gets
    if $. == 1
        print "This line should appear at the top of each file\n"
    end
    gsub!(/\b(p)earl\b/i, '\1erl')    # Correct typos, preserving case
    print
    ARGF.close if ARGF.eof
end

Modifying a File in Place Without a Temporary File

File.open('itest', 'r+') do |f|   # open file for update
    lines = f.readlines           # read into array of lines
    lines.each do |it|            # modify lines
        it.gsub!(/foo/, 'QQQ')
    end
    f.pos = 0                     # back to start
    f.print lines                 # write out modified lines
    f.truncate(f.pos)             # truncate to new length
end                               # file is automatically closed
#-----------------------------
File.open('itest', 'r+') do |f|   
    out = ""
    f.each do |line|
        out << line.gsub(/DATE/) {Time.now}
    end
    f.pos = 0                     
    f.print out
    f.truncate(f.pos)             
end

Locking a File

File.open('infile', 'r+') do |f|
    f.flock File::LOCK_EX
    # update file
end
#-----------------------------
File::LOCK_SH     # shared lock (for reading)
File::LOCK_EX     # exclusive lock (for writing)
File::LOCK_NB     # non-blocking request
File::LOCK_UN     # free lock
#-----------------------------
unless f.flock File::LOCK_EX | File::LOCK_NB
    warn "can't get immediate lock: blocking ..."
    f.flock File::LOCK_EX 
end
#-----------------------------
File.open('numfile', File::RDWR|File::CREAT) do |f|
    f.flock(File::LOCK_EX)
    num = f.gets.to_i || 0
    f.pos = 0
    f.truncate 0
    f.puts num + 1q
end

Flushing Output

output_handle.sync = true
# Please note that like in Perl, $stderr is already unbuffered
#-----------------------------
#!/usr/bin/ruby -w
# seeme - demo stdio output buffering
$stdout.sync = ARGV.size > 0
print "Now you don't see it..."
sleep 2
puts "now you do"
#-----------------------------
$stderr.sync = true
afile.sync = false
#-----------------------------
# assume 'remote_con' is an interactive socket handle,
# but 'disk_file' is a handle to a regular file.
remote_con.sync = true       # unbuffer for clarity
disk_file.sync = false       # buffered for speed
#-----------------------------
require 'socket'
sock = TCPSocket.new('www.ruby-lang.org', 80)
sock.sync = true
sock.puts "GET /en/ HTTP/1.0 \n\n"
resp = sock.read
print "DOC IS: #{resp}\n"

Reading from Many Filehandles Without Blocking

#-----------------------------
# assumes fh1, fh2, fh2 are oen IO objects
nfound = select([$stdin, fh1, fh2, fh3], nil, nil, 0)
nfound[0].each do |file|
    case file
        when fh1
            # do something with fh1
        when fh2
            # do something with fh2
        when fh3
            # do something with fh3
    end
end
#-----------------------------
input_files = []
# repeat next line for all in-files to poll
input_files << fh1
if nfound = select(input_files, nil, nil, 0)
    # input ready on files in nfound[0]
end

Doing Non-Blocking I/O

# It throws exception on EOF, instead of sysread, you can use read_nonblock(), too.
begin
   File.open fname, (File::RDONLY | File::NONBLOCK) do |io|
     puts io.sysread(4096) # throws exception
   end
rescue EOFError
rescue IOError => e
   puts e.exception
rescue Errno::ENOENT
   puts "no such file #{fname}"
end

# return nil on EOF
begin
   File.open fname, (File::RDONLY | File::NONBLOCK) do |io|
     puts io.read(4096) # returns nil
   end
rescue Errno::ENOENT
   puts "no such file #{fname}"
end

Determining the Number of Bytes to Read

Storing Filehandles in Variables

# filehandles are normal variables, so they behave properly
def subroutine(fh):
  fh.print "Hello, file"
end

variable = fh
subroutine(variable)

Caching Open Output Filehandles

Printing to Many Filehandles Simultaneously

#----------------------------
filehandles.each do |filehandle|
  filehandle.print stuff_to_print
end
#----------------------------
# NOTE: this is unix specific
IO.popen("tee file1 file2 file3 >/dev/null", "w") do |many|
  many.puts "data"
end
#----------------------------
# (really a Perl issue here, no problem in ruby)
[fh1 fh2 fh3].each {|fh| fh.puts "whatever" }
#----------------------------
# redirect to stdout to use print/puts directly
$stdout = IO.popen("tee file1 file2 file3", "w")
puts "whatever"
$stdout.close
$stdout = STDOUT   # get things back to the way they were
#----------------------------
# create a class/object to encapsulate the behavior in ruby
class MultiDispatcher < BasicObject # inherit from BasicObject in 1.9.x only
 def initialize(targets)
   @targets = targets
 end

 def method_missing(*a,&b)
   @targets.each {|tgt| tgt.send(*a,&b)}
 end
end

md = MultiDispatcher.new [$stdout, $stderr]
4.times {|i| md.printf "%3d\n", i}
md.close

Opening and Closing File Descriptors by Number

Copying Filehandles

Program: netlock

Program: lockarea