7. File Access

Introduction

for line in open("/usr/local/widgets/data"):
    if blue in line:
        print line[:-1]
#---------
import sys, re
pattern = re.compile(r"\d")
for line in sys.stdin:
    if not pattern.search(line):
        sys.stderr.write("No digit found.\n")
    sys.stdout.write("Read: " + line)
sys.stdout.close()
#---------
logfile = open("/tmp/log", "w")
#---------
logfile.close()
#---------
print>>logfile, "Countdown initiated ..."
print "You have 30 seconds to reach minimum safety distance."

# DONT DO THIS
import sys
old_output, sys.stdout = sys.stdout, logfile
print "Countdown initiated ..."
sys.stdout = old_output
print "You have 30 seconds to reach minimum safety distance."
#---------

Opening a File

# Python's open() function somewhat covers both perl's open() and 
# sysopen() as it has optional arguments for mode and buffering.
source = open(path)
sink = open(path, "w")
#---------
# NOTE: almost no one uses the low-level os.open and os.fdopen
# commands, so their inclusion here is just silly.  If 
# os.fdopen(os.open(...)) were needed often, it would be turned
# into its own function.  Instead, I'll use 'fd' to hint that
# os.open returns a file descriptor
import os
source_fd = os.open(path, os.O_RDONLY)
source = os.fdopen(fd)
sink_fd = os.open(path, os.O_WRONLY)
sink = os.fdopen(sink_fd)
#---------
myfile = open(filename, "w")
fd = os.open(filename, os.O_WRONLY | os.O_CREAT)
myfile = open(filename, "r+")
#---------
fd = os.open(name, flags)
fd = os.open(name, flags, mode)
#---------
myfile = open(path)
fd = os.open(path, os.O_RDONLY)
#-----------------------------
myfile = open(path, "w")
fd = os.open(path, os.O_WRONLY|os.O_TRUNC|os.O_CREAT)
fd = os.open(path, os.O_WRONLY|os.O_TRUNC|os.O_CREAT, 0600)
#-----------------------------
fd = os.open(path, os.O_WRONLY|os.O_EXCL|os.O_CREAT)
fd = os.open(path, os.O_WRONLY|os.O_EXCL|os.O_CREAT, 0600)
#-----------------------------
myfile = open(path, "a")
fd = os.open(path, os.O_WRONLY|os.O_APPEND|os.O_CREAT)
fd = os.open(path, os.O_WRONLY|os.O_APPEND|s.O_CREAT, 0600)
#-----------------------------
fd = os.open(path, os.O_WRONLY|os.O_APPEND)
#-----------------------------
myfile = open(path, "rw")
fd = os.open(path, os.O_RDWR)
#-----------------------------
fd = os.open(path, os.O_RDWR|os.O_CREAT)
fd = os.open(path, os.O_RDWR|os.O_CREAT, 0600)
#-----------------------------
fd = os.open(path, os.O_RDWR|os.O_EXCL|os.O_CREAT)
fd = os.open(path, os.O_RDWR|os.O_EXCL|os.O_CREAT, 0600)
#-----------------------------

Opening Files with Unusual Filenames

# Nothing different needs to be done with Python

Expanding Tildes in Filenames

import os
filename = os.path.expanduser(filename)

Making Perl Report Filenames in Errors

myfile = open(filename)   # raise an exception on error

try:
    myfile = open(filename)
except IOError, err:
    raise AssertionError("Couldn't open %s for reading : %s" %
                         (filename, err.strerror))

Creating Temporary Files

import tempfile

myfile = tempfile.TemporaryFile()

#-----------------------------
# NOTE: The TemporaryFile() call is much more appropriate
# I would not suggest using this code for real work.
import os, tempfile

while True:
    name = os.tmpnam()
    try:
        fd = os.open(name, os.O_RDWR|os.O_CREAT|os.O_EXCL)
        break
    except os.error:
        pass
myfile = tempfile.TemporaryFileWrapper(os.fdopen(fd), name)

# now go on to use the file ...
#-----------------------------
import os
while True:
    tmpname = os.tmpnam()
    fd = os.open(tmpnam, os.O_RDWR | os.O_CREAT | os.O_EXCL)
    if fd:
        tmpfile = os.fdopen(fd)
        break

os.remove(tmpnam)

#-----------------------------
import tempfile

myfile = tempfile.TemporaryFile(bufsize = 0)
for i in range(10):
    print>>myfile, i
myfile.seek(0)
print "Tmp file has:", myfile.read()
#-----------------------------

Storing Files Inside Your Program Text

DATA = """\
your data goes here
"""
for line in DATA.split("\n"):
    pass # process the line

Writing a Filter


for line in sys.stdin:
    pass # do something with the line

# processing a list of files from commandline
import fileinput
for line in fileinput.input():
     do something with the line

#-----------------------------
import sys

def do_with(myfile):
    for line in myfile:
        print line[:-1]

filenames = sys.argv[1:]
if filenames:
    for filename in filenames:
        try:
            do_with(open(filename))
        except IOError, err:
            sys.stderr.write("Can't open %s: %s\n" % (filename, err.strerror))
            continue
else:
    do_with(sys.stdin)

#-----------------------------
import sys, glob
ARGV = sys.argv[1:] or glob.glob("*.[Cch]")
#-----------------------------
# NOTE: the getopt module is the prefered mechanism for reading
# command line arguments
import sys
args = sys.argv[1:]
chop_first = 0

if args and args[0] == "-c":
    chop_first += 1
    args = args[1:]

# arg demo 2: Process optional -NUMBER flag

# NOTE: You just wouldn't process things this way for Python,
# but I'm trying to preserve the same semantics.

import sys, re
digit_pattern = re.compile(r"-(\d+)$")

args = sys.argv[1:]
if args:
    match = digit_pattern.match(args[0])
    if match:
        columns = int(match.group(1))
        args = args[1:]

# NOTE: here's the more idiomatic way, which also checks
# for the "--" or a non "-" argument to stop processing

args = sys.argv[1:]
for i in range(len(args)):
    arg = args[i]
    if arg == "--" or not arg.startwith("-"):
        break
    if arg[1:].isdigit():
        columns = int(arg[1:])
        continue



# arg demo 3: Process clustering -a, -i, -n, or -u flags
import sys, getopt
try:
    args, filenames = getopt.getopt(sys.argv[1:], "ainu")
except getopt.error:
    raise SystemExit("usage: %s [-ainu] [filenames] ..." % sys.argv[0])

append = ignore_ints = nostdout = unbuffer = 0
for k, v in args:
    if k == "-a": append += 1
    elif k == "-i": ignore_ints += 1
    elif k == "-n": nostdout += 1
    elif k == "-u": unbuffer += 1
    else:
        raise AssertionError("Unexpected argument: %s" % k)

#-----------------------------
# Note: Idiomatic Perl get translated to idiomatic Python
import fileinput
for line in fileinput.input():
    sys.stdout.write("%s:%s:%s" %
                     (fileinput.filename(), fileinput.filelineno(), line))
#-----------------------------
#!/usr/bin/env python
# findlogin1 - print all lines containing the string "login"
for line in fileinput.input(): # loop over files on command line
    if line.find("login") != -1:
        sys.stdout.write(line)

#-----------------------------
#!/usr/bin/env python
# lowercase - turn all lines into lowercase
### NOTE: I don't know how to do locales in Python
for line in fileinput.input(): # loop over files on command line
    sys.stdout.write(line.lower())

#-----------------------------
#!/usr/bin/env python
# NOTE: The Perl code appears buggy, in that "Q__END__W" is considered
#       to be a __END__ and words after the __END__ on the same line
#       are included in the count!!!
# countchunks - count how many words are used.
# skip comments, and bail on file if __END__
# or __DATA__ seen.
chunks = 0
for line in fileinput.input():
    for word in line.split():
        if word.startswith("#"):
            continue
        if word in ("__DATA__", "__END__"):
            fileinput.close()
            break
        chunks += 1
print "Found", chunks, "chunks"

Modifying a File in Place with Temporary File

import shutil

old = open("old")
new = open("new","w")

for line in old:
    new.writeline(line)
new.close()
old.close()

shutil.copyfile("old", "old.orig")
shutil.copyfile("new", "old")

# insert lines at line 20:
for i, line in enumerate(old):
    if i == 20:
        print>>new, "Extra line 1\n"
        print>>new, "Extra line 2\n"
    print>>new, line


# or delete lines 20 through 30:
for i, line in enumerate(old):
    if 20 <= i <= 30:
        continue
    print>>new, line

Modifying a File in Place with -i Switch

# modifying with "-i" commandline switch is a perl feature
# python has fileinput
import fileinput, sys, time
today = time.strftime("%Y-%m-%d",time.localtime())
for line in fileinput.input(inplace=1, backup=".orig"):
    sys.stdout.write(line.replace("DATE",today))

# set up to iterate over the *.c files in the current directory,
# editing in place and saving the old file with a .orig extension.
import glob, re
match = re.compile("(?<=[pP])earl")
files = fileinput.FileInput(glob.glob("*.c"), inplace=1, backup=".orig")
while True:
    line = files.readline()
    sys.stderr.write(line)
    if not line:
        break
    if files.isfirstline():
        sys.stdout.write("This line should appear at the top of each file\n")
    sys.stdout.write(match.sub("erl",line))

Modifying a File in Place Without a Temporary File

#-----------------------------
myfile = open(filename, "r+")
data = myfile.read()
# change data here
myfile.seek(0, 0)
myfile.write(data)
myfile.truncate(myfile.tell())
myfile.close()
#-----------------------------
myfile = open(filename, "r+")
data = [process(line) for line in myfile]
myfile.seek(0, 0)
myfile.writelines(data)
myfile.truncate(myfile.tell())
myfile.close()
#-----------------------------

Locking a File

                                                                                                                                                                                                                                                               
import fcntl
myfile = open(somepath, 'r+')
fcntl.flock(myfile, fcntl.LOCK_EX)
# update file, then...
myfile.close()
#-----------------------------
fcntl.LOCK_SH
fcntl.LOCK_EX
fcntl.LOCK_NB
fcntl.LOCK_UN
#-----------------------------
import warnings
try:
    fcntl.flock(myfile, fcntl.LOCK_EX|fcntl.LOCK_NB)
except IOError:
    warnings.warn("can't immediately write-lock the file ($!), blocking ...")
    fcntl.flock(myfile, fcntl.LOCK_EX)
#-----------------------------
fcntl.flock(myfile, fcntl.LOCK_UN)
#-----------------------------
# option "r+" instead "w+" stops python from truncating the file on opening
# when another process might well hold an advisory exclusive lock on it.
myfile = open(somepath, "r+")
fcntl.flock(myfile, fcntl.LOCK_EX)
myfile.seek(0, 0)
myfile.truncate(0)
print>>myfile, "\n"   # or myfile.write("\n")
myfile.close()
#-----------------------------

Flushing Output

# Python doesn't have command buffering.  Files can have buffering set,
# when opened:
myfile = open(filename, "r", buffering=0)   #Unbuffered
myfile = open(filename, "r", buffering=1)   #Line buffered
myfile = open(filename, "r", buffering=100) #Use buffer of (approx) 100 bytes
myfile = open(filename, "r", buffering=-1)  #Use system default

myfile.flush()  # Flush the I/O buffer

# stdout is treated as a file.  If you ever need to flush it, do so:
import sys
sys.stdout.flush()

# DON'T DO THIS.  Use urllib, etc.
import socket
mysock = socket.socket()
mysock.connect(('www.perl.com', 80))
# mysock.setblocking(True)
mysock.send("GET /index.html http/1.1\n\n")
f = mysock.makefile()
print "Doc is:"
for line in f:
    print line[:-1]

Reading from Many Filehandles Without Blocking

import select
while True:
    rlist, wlist, xlist = select.select([file1, file2, file3], [], [], 0)
    for r in rlist:
        pass # Do something with the file handle

Doing Non-Blocking I/O

Use select.poll() on Unix systems.
# @@INCOMPLETE@@
# @@INCOMPLETE@@

Determining the Number of Bytes to Read

# @@INCOMPLETE@@
# @@INCOMPLETE@@

Storing Filehandles in Variables

# NOTE: this is all much easier in Python
def subroutine(myfile):
    print>>myfile, "Hello, file"

variable = myfile
subroutine(variable)

Caching Open Output Filehandles

# @@INCOMPLETE@@
# @@INCOMPLETE@@

Printing to Many Filehandles Simultaneously

for myfile in files:
    print>>myfile, stuff_to_print

# NOTE: This is unix specific
import os
file = os.popen("tee file1 file2 file3 >/dev/null", "w")
print>>myfile, "whatever"

# NOTE: the "make STDOUT go to three files" is bad programming style
import os, sys
sys.stdout.file = os.popen("tee file1 file2 file3", "w")
print "whatever"
sys.stdout.close()

# You could use a utility object to redirect writes:
class FileDispatcher(object):
    def __init__(self, *files):
        self.files = files

    def write(self, msg):
        for f in self.files:
            f.write(msg)

    def close(self):
        for f in self.files:
            f.close()

x = open("C:/test1.txt", "w")
y = open("C:/test2.txt", "w")
z = open("C:/test3.txt", "w")

fd = FileDispatcher(x, y, z)
print>>fd, "Foo"     # equiv to fd.write("Foo"); fd.write("\n")
print>>fd, "Testing"  
fd.close()

Opening and Closing File Descriptors by Number

import os
myfile = os.fdopen(fdnum) # open the descriptor itself
myfile = os.fdopen(os.dup(fdnum)) # open to a copy of the descriptor

###
outcopy = os.fdopen(os.dup(sys.stdin.fileno()), "w")
incopy = os.fdopen(os.dup(sys.stdin.fileno()), "r")

Copying Filehandles

original = open("C:/test.txt")
alias = original
alias.close()
print original.closed
#=>True

import copy

original = open("C:/test.txt")
dupe = copy.copy(original)
dupe.close()
print original.closed
#=>False

# DON'T DO THIS.
import sys
oldstderr = sys.stderr
oldstdout = sys.stdout

sys.stderr = open("C:/stderrfile.txt")
sys.stdout = open("C:/stdoutfile.txt")

print "Blah"  # Will be written to C:/stdoutfile.txt
sys.stdout.close()

sys.stdout = oldstdout
sys.stderr = oldstderr

Program: netlock

# @@INCOMPLETE@@
# @@INCOMPLETE@@

Program: lockarea

# On Windows:
import msvcrt
myfile.seek(5, 0)
msvcrt.locking(myfile.fileno(), msvcrt.LK_NBLCK, 3)

# On Unix:
import fcntl
fcntl.lockf(myfile.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB, 3, 5)