8. File Contents

Introduction

#-----------------------------
while (defined ($line = <DATAFILE>)) {
    chomp $line;
    $size = length $line;
    print "$size\n";                # output size of line
}
#-----------------------------
while (<DATAFILE>) {
    chomp;
    print length, "\n";             # output size of line
}
#-----------------------------
@lines = <DATAFILE>;
#-----------------------------
undef $/;
$whole_file = <FILE>;               # 'slurp' mode
#-----------------------------
#% perl -040 -e '$word = <>; print "First word is $word\n";'
#-----------------------------
#% perl -ne 'BEGIN { $/="%%\n" } chomp; print if /Unix/i' fortune.dat
#-----------------------------
print HANDLE "One", "two", "three"; # "Onetwothree"
print "Baa baa black sheep.\n";     # Sent to default output handle
#-----------------------------
$rv = read(HANDLE, $buffer, 4096)
        or die "Couldn't read from HANDLE : $!\n";
# $rv is the number of bytes read,
# $buffer holds the data read
#-----------------------------
truncate(HANDLE, $length)
    or die "Couldn't truncate: $!\n";
truncate("/tmp/$$.pid", $length)
    or die "Couldn't truncate: $!\n";
#-----------------------------
$pos = tell(DATAFILE);
print "I'm $pos bytes from the start of DATAFILE.\n";
#-----------------------------
seek(LOGFILE, 0, 2)         or die "Couldn't seek to the end: $!\n";
seek(DATAFILE, $pos, 0)     or die "Couldn't seek to $pos: $!\n";
seek(OUT, -20, 1)           or die "Couldn't seek back 20 bytes: $!\n";
#-----------------------------
$written = syswrite(DATAFILE, $mystring, length($mystring));
die "syswrite failed: $!\n" unless $written == length($mystring);
$read = sysread(INFILE, $block, 256, 5);
warn "only read $read bytes, not 256" if 256 != $read;
#-----------------------------
$pos = sysseek(HANDLE, 0, 1);       # don't change position
die "Couldn't sysseek: $!\n" unless defined $pos;
#-----------------------------

Reading Lines with Continuation Characters

#-----------------------------
while (defined($line = <FH>) ) {
    chomp $line;
    if ($line =~ s/\\$//) {
        $line .= <FH>;
        redo unless eof(FH);
    }
    # process full record in $line here
}
#-----------------------------
# DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \
#         $(TEXINFOS) $(INFOS) $(MANS) $(DATA)
# DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \
#         $(TEXINFOS) $(INFO_DEPS) $(MANS) $(DATA) \
#         $(EXTRA_DIST)
#-----------------------------
if ($line =~ s/\\\s*$//) { 
    # as before
}
#-----------------------------

Counting Lines (or Paragraphs or Records) in a File

#-----------------------------
$count = `wc -l < $file`;
die "wc failed: $?" if $?;
chomp($count);
#-----------------------------
open(FILE, "< $file") or die "can't open $file: $!";
$count++ while <FILE>;
# $count now holds the number of lines read
#-----------------------------
$count += tr/\n/\n/ while sysread(FILE, $_, 2 ** 16);
#-----------------------------
open(FILE, "< $file") or die "can't open $file: $!";
$count++ while <FILE>;
# $count now holds the number of lines read
#-----------------------------
open(FILE, "< $file") or die "can't open $file: $!";
for ($count=0; <FILE>; $count++) { }
#-----------------------------
1 while <FILE>;
$count = $.;
#-----------------------------
$/ = '';            # enable paragraph mode for all reads
open(FILE, $file) or die "can't open $file: $!";
1 while <FILE>;
$para_count = $.;
#-----------------------------

Processing Every Word in a File

#-----------------------------
while (<>) {
    for $chunk (split) {
        # do something with $chunk
    }
}
#-----------------------------
while (<>) {
    while ( /(\w[\w'-]*)/g ) {  #'
        # do something with $1
    }
}
#-----------------------------
# Make a word frequency count
%seen = ();
while (<>) {
    while ( /(\w['\w-]*)/g ) {  #'
        $seen{lc $1}++;
    }
}

# output hash in a descending numeric sort of its values
foreach $word ( sort { $seen{$b} <=> $seen{$a} } keys %seen) {
    printf "%5d %s\n", $seen{$word}, $word;
}
#-----------------------------
# Line frequency count
%seen = ();
while (<>) {
    $seen{lc $_}++;
}
foreach $line ( sort { $seen{$b} <=> $seen{$a} } keys %seen ) {
    printf "%5d %s", $seen{$line}, $line;
}
#-----------------------------

Reading a File Backwards by Line or Paragraph

#-----------------------------
@lines = <FILE>;
while ($line = pop @lines) {
    # do something with $line
}
#-----------------------------
@lines = reverse <FILE>;
foreach $line (@lines) {
    # do something with $line
}
#-----------------------------
for ($i = $#lines; $i != -1; $i--) {
    $line = $lines[$i];
}
#-----------------------------
# this enclosing block keeps local $/ temporary
{           
    local $/ = '';
    @paragraphs = reverse <FILE>;
}

foreach $paragraph (@paragraphs) {
    # do something
}
#-----------------------------

Trailing a Growing File

#-----------------------------
for (;;) {
    while (<FH>) { .... }
    sleep $SOMETIME;
    seek(FH, 0, 1);
}
#-----------------------------
use IO::Seekable;

for (;;) {
    while (<FH>) { .... }
    sleep $SOMETIME;
    FH->clearerr();
}
#-----------------------------
$naptime = 1;

use IO::Handle;
open (LOGFILE, "/tmp/logfile") or die "can't open /tmp/logfile: $!";
for (;;) {
    while (<LOGFILE>) { print }     # or appropriate processing
    sleep $naptime;
    LOGFILE->clearerr();            # clear stdio error flag
}
#-----------------------------
for (;;) {
    for ($curpos = tell(LOGFILE); <LOGFILE>; $curpos = tell(LOGFILE)) {
        # process $_ here
    }
    sleep $naptime;
    seek(LOGFILE, $curpos, 0);  # seek to where we had been
}
#-----------------------------
exit if (stat(LOGFILE))[3] == 0
#-----------------------------
use File::stat;
exit if stat(*LOGFILE)->nlink == 0;
#-----------------------------

Picking a Random Line from a File

#-----------------------------
srand;
rand($.) < 1 && ($line = $_) while <>;
# $line is the random line
#-----------------------------
$/ = "%%\n";
@ARGV = qw( /usr/share/games/fortunes );
srand;
rand($.) < 1 && ($adage = $_) while <>;
print $adage;
#-----------------------------

Randomizing All Lines

#-----------------------------
# assumes the &shuffle sub from Chapter 4
while (<INPUT>) {
    push(@lines, $_);
}
@reordered = shuffle(@lines);
foreach (@reordered) {
    print OUTPUT $_;
}
#-----------------------------

Reading a Particular Line in a File

#-----------------------------
# looking for line number $DESIRED_LINE_NUMBER
$. = 0;
do { $LINE = <HANDLE> } until $. == $DESIRED_LINE_NUMBER || eof;
#-----------------------------
@lines = <HANDLE>;
$LINE = $lines[$DESIRED_LINE_NUMBER];
#-----------------------------
# usage: build_index(*DATA_HANDLE, *INDEX_HANDLE)
sub build_index {
    my $data_file  = shift;
    my $index_file = shift;
    my $offset     = 0;

    while (<$data_file>) {
        print $index_file pack("N", $offset);
        $offset = tell($data_file);
    }
}

# usage: line_with_index(*DATA_HANDLE, *INDEX_HANDLE, $LINE_NUMBER)
# returns line or undef if LINE_NUMBER was out of range
sub line_with_index {
    my $data_file   = shift;
    my $index_file  = shift;
    my $line_number = shift;

    my $size;               # size of an index entry
    my $i_offset;           # offset into the index of the entry
    my $entry;              # index entry
    my $d_offset;           # offset into the data file

    $size = length(pack("N", 0));
    $i_offset = $size * ($line_number-1);
    seek($index_file, $i_offset, 0) or return;
    read($index_file, $entry, $size);
    $d_offset = unpack("N", $entry);
    seek($data_file, $d_offset, 0);
    return scalar(<$data_file>);
}

# usage:
open(FILE, "< $file")         or die "Can't open $file for reading: $!\n";
open(INDEX, "+>$file.idx")
        or die "Can't open $file.idx for read/write: $!\n";
build_index(*FILE, *INDEX);
$line = line_with_index(*FILE, *INDEX, $seeking);
#-----------------------------
use DB_File;
use Fcntl;

$tie = tie(@lines, $FILE, "DB_File", O_RDWR, 0666, $DB_RECNO) or die 
    "Cannot open file $FILE: $!\n";
# extract it
$line = $lines[$sought - 1];
#-----------------------------
# download the following standalone program
#!/usr/bin/perl -w
# print_line-v1 - linear style

@ARGV == 2 or die "usage: print_line FILENAME LINE_NUMBER\n";

($filename, $line_number) = @ARGV;
open(INFILE, "< $filename") or die "Can't open $filename for reading: $!\n";
while (<INFILE>) {
    $line = $_;
    last if $. == $line_number;
}
if ($. != $line_number) {
    die "Didn't find line $line_number in $filename\n";
}
print;

#-----------------------------
# download the following standalone program
#!/usr/bin/perl -w
# print_line-v2 - index style
# build_index and line_with_index from above
@ARGV == 2 or
    die "usage: print_line FILENAME LINE_NUMBER";

($filename, $line_number) = @ARGV;
open(ORIG, "< $filename") 
        or die "Can't open $filename for reading: $!";

# open the index and build it if necessary
# there's a race condition here: two copies of this
# program can notice there's no index for the file and
# try to build one.  This would be easily solved with
# locking
$indexname = "$filename.index";
sysopen(IDX, $indexname, O_CREAT|O_RDWR)
         or die "Can't open $indexname for read/write: $!";
build_index(*ORIG, *IDX) if -z $indexname;  # XXX: race unless lock

$line = line_with_index(*ORIG, *IDX, $line_number);
die "Didn't find line $line_number in $filename" unless defined $line;
print $line;

#-----------------------------
# download the following standalone program
#!/usr/bin/perl -w
# print_line-v3 - DB_File style
use DB_File;
use Fcntl;

@ARGV == 2 or
    die "usage: print_line FILENAME LINE_NUMBER\n";

($filename, $line_number) = @ARGV;
$tie = tie(@lines, "DB_File", $filename, O_RDWR, 0666, $DB_RECNO)
        or die "Cannot open file $filename: $!\n";

unless ($line_number < $tie->length) {
    die "Didn't find line $line_number in $filename\n"
}

print $lines[$line_number-1];                        # easy, eh?

#-----------------------------

Processing Variable-Length Text Fields

#-----------------------------
# given $RECORD with field separated by PATTERN,
# extract @FIELDS.
@FIELDS = split(/PATTERN/, $RECORD);
#-----------------------------
split(/([+-])/, "3+5-2");
#-----------------------------
(3, '+', 5, '-', 2)
#-----------------------------
@fields = split(/:/, $RECORD);
#-----------------------------
@fields = split(/\s+/, $RECORD);
#-----------------------------
@fields = split(" ", $RECORD);
#-----------------------------

Removing the Last Line of a File

#-----------------------------
open (FH, "+< $file")               or die "can't update $file: $!";
while ( <FH> ) {
    $addr = tell(FH) unless eof(FH);
}
truncate(FH, $addr)                 or die "can't truncate $file: $!";
#-----------------------------

Processing Binary Files

#-----------------------------
binmode(HANDLE);
#-----------------------------
$gifname = "picture.gif";
open(GIF, $gifname)         or die "can't open $gifname: $!";

binmode(GIF);               # now DOS won't mangle binary input from GIF
binmode(STDOUT);            # now DOS won't mangle binary output to STDOUT

while (read(GIF, $buff, 8 * 2**10)) {
    print STDOUT $buff;
}
#-----------------------------

Using Random-Access I/O

#-----------------------------
$ADDRESS = $RECSIZE * $RECNO;
seek(FH, $ADDRESS, 0) or die "seek:$!";
read(FH, $BUFFER, $RECSIZE);
#-----------------------------
$ADDRESS = $RECSIZE * ($RECNO-1);
#-----------------------------

Updating a Random-Access File

#-----------------------------
use Fcntl;                          # for SEEK_SET and SEEK_CUR

$ADDRESS = $RECSIZE * $RECNO;
seek(FH, $ADDRESS, SEEK_SET)        or die "Seeking: $!";
read(FH, $BUFFER, $RECSIZE) == $RECSIZE
                                    or die "Reading: $!";
@FIELDS = unpack($FORMAT, $BUFFER);
# update fields, then
$BUFFER = pack($FORMAT, @FIELDS);
seek(FH, -$RECSIZE, SEEK_CUR)       or die "Seeking: $!";
print FH $BUFFER;
close FH                            or die "Closing: $!";
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# weekearly -- set someone's login date back a week
use User::pwent;
use IO::Seekable;

$typedef = 'L A12 A16';         # linux fmt; sunos is "L A8 A16"
$sizeof  = length(pack($typedef, ()));
$user    = shift(@ARGV) || $ENV{USER} || $ENV{LOGNAME};

$address = getpwnam($user)->uid * $sizeof;

open (LASTLOG, "+</var/log/lastlog")
    or die "can't update /usr/adm/lastlog: $!";
seek(LASTLOG, $address, SEEK_SET)
    or die "seek failed: $!";
read(LASTLOG, $buffer, $sizeof) == $sizeof
    or die "read failed: $!";

($time, $line, $host) = unpack($typedef, $buffer);
$time  -= 24 * 7 * 60 * 60;         # back-date a week
$buffer = pack($typedef, $time, $line, $time);

seek(LASTLOG, -$sizeof, SEEK_CUR)   # backup one record
    or die "seek failed: $!";
print LASTLOG $record;
    
close(LASTLOG)
    or die "close failed: $!";

#-----------------------------

Reading a String from a Binary File

#-----------------------------
$old_rs = $/;                       # save old $/
$/ = "\0";                          # NULL
seek(FH, $addr, SEEK_SET)           or die "Seek error: $!\n";
$string = <FH>;                     # read string
chomp $string;                      # remove NULL
$/ = $old_rs;                       # restore old $/
#-----------------------------
{
    local $/ = "\0";
    # ...
}                           # $/ is automatically restored
#-----------------------------
# download the following standalone program
#!/usr/bin/perl 
# bgets - get a string from an address in a binary file
use IO::Seekable;
($file, @addrs) = @ARGV             or die "usage: $0 addr ...";
open(FH, $file)                     or die "cannot open $file: $!";
$/ = "\000";

foreach $addr (@addrs) {
    $addr = oct $addr if $addr =~ /^0/;
    seek(FH, $addr, SEEK_SET)
        or die "can't seek to $addr in $file: $!";
    printf qq{%#x %#o %d "%s"\n}, $addr, $addr, $addr, scalar <>;
}

#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# strings - pull strings out of a binary file
$/ = "\0";
while (<>) {
    while (/([\040-\176\s]{4,})/g) {
        print $1, "\n";
    }
}

#-----------------------------

Reading Fixed-Length Records

#-----------------------------
# $RECORDSIZE is the length of a record, in bytes.
# $TEMPLATE is the unpack template for the record
# FILE is the file to read from
# @FIELDS is an array, one element per field

until ( eof(FILE) ) {
    read(FILE, $record, $RECORDSIZE) == $RECORDSIZE
        or die "short read\n";
    @FIELDS = unpack($TEMPLATE, $record);
}
#-----------------------------
#define UT_LINESIZE           12
#define UT_NAMESIZE           8
#define UT_HOSTSIZE           16

struct utmp {                       /* here are the pack template codes */
    short ut_type;                  /* s for short, must be padded      */
    pid_t ut_pid;                   /* i for integer                    */
    char ut_line[UT_LINESIZE];      /* A12 for 12-char string           */
    char ut_id[2];                  /* A2, but need x2 for alignment    */
    time_t ut_time;                 /* l for long                       */
    char ut_user[UT_NAMESIZE];      /* A8 for 8-char string             */
    char ut_host[UT_HOSTSIZE];      /* A16 for 16-char string           */
    long ut_addr;                   /* l for long                       */
};
#-----------------------------

Reading Configuration Files

#-----------------------------
while (<CONFIG>) {
    chomp;                  # no newline
    s/#.*//;                # no comments
    s/^\s+//;               # no leading white
    s/\s+$//;               # no trailing white
    next unless length;     # anything left?
    my ($var, $value) = split(/\s*=\s*/, $_, 2);
    $User_Preferences{$var} = $value;
} 
#-----------------------------
do "$ENV{HOME}/.progrc";
#-----------------------------
# set class C net
NETMASK = 255.255.255.0
MTU     = 296
    
DEVICE  = cua1
RATE    = 115200
MODE    = adaptive
#-----------------------------
no strict 'refs';
$$var = $value;
#-----------------------------
# set class C net
$NETMASK = '255.255.255.0';
$MTU     = 0x128;
# Brent, please turn on the modem
$DEVICE  = 'cua1';
$RATE    = 115_200;
$MODE    = 'adaptive';
#-----------------------------
if ($DEVICE =~ /1$/) {
    $RATE =  28_800;
} else {
    $RATE = 115_200;
} 
#-----------------------------
$APPDFLT = "/usr/local/share/myprog";

do "$APPDFLT/sysconfig.pl";
do "$ENV{HOME}/.myprogrc";
#-----------------------------
do "$ENV{HOME}/.myprogrc";
    or
do "$APPDFLT/sysconfig.pl"
#-----------------------------
{ package Settings; do "$ENV{HOME}/.myprogrc" }
#-----------------------------
eval `cat $ENV{HOME}/.myprogrc`;
#-----------------------------
$file = "someprog.pl";
unless ($return = do $file) {
    warn "couldn't parse $file: $@"         if $@;
    warn "couldn't do $file: $!"            unless defined $return;
    warn "couldn't run $file"               unless $return;
}
#-----------------------------

Testing a File for Trustworthiness

#-----------------------------
( $dev, $ino, $mode, $nlink, 
  $uid, $gid, $rdev, $size, 
  $atime, $mtime, $ctime, 
  $blksize, $blocks )       = stat($filename)
        or die "no $filename: $!";

$mode &= 07777;             # discard file type info
#-----------------------------
$info = stat($filename)     or die "no $filename: $!";
if ($info->uid == 0) {
    print "Superuser owns $filename\n";
} 
if ($info->atime > $info->mtime) {
    print "$filename has been read since it was written.\n";
} 
#-----------------------------
use File::stat;

sub is_safe {
    my $path = shift;
    my $info = stat($path);
    return unless $info;

    # owner neither superuser nor me 
    # the real uid is in stored in the $< variable
    if (($info->uid != 0) && ($info->uid != $<)) {
        return 0;
    }

    # check whether group or other can write file.
    # use 066 to detect either reading or writing
    if ($info->mode & 022) {   # someone else can write this
        return 0 unless -d _;  # non-directories aren't safe
            # but directories with the sticky bit (01000) are
        return 0 unless $info->mode & 01000;        
    }
    return 1;
}
#-----------------------------
use Cwd;
use POSIX qw(sysconf _PC_CHOWN_RESTRICTED);
sub is_verysafe {
    my $path = shift;
    return is_safe($path) if sysconf(_PC_CHOWN_RESTRICTED);
    $path = getcwd() . '/' . $path if $path !~ m{^/};
    do {
        return unless is_safe($path);
        $path =~ s#([^/]+|/)$##;               # dirname
        $path =~ s#/$## if length($path) > 1;  # last slash
    } while length $path;

    return 1;
}
#-----------------------------
$file = "$ENV{HOME}/.myprogrc";
readconfig($file) if is_safe($file);
#-----------------------------
$file = "$ENV{HOME}/.myprogrc";
if (open(FILE, "< $file")) { 
    readconfig(*FILE) if is_safe(*FILE);
}
#-----------------------------

Program: tailwtmp

#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# tailwtmp - watch for logins and logouts; 
# uses linux utmp structure, from utmp(5)
$typedef = 's x2 i A12 A4 l A8 A16 l';
$sizeof = length pack($typedef, () );
use IO::File;
open(WTMP, '/var/log/wtmp') or die "can't open /var/log/wtmp: $!";
seek(WTMP, 0, SEEK_END);
for (;;) { 
        while (read(WTMP, $buffer, $sizeof) == $sizeof) {
        ($type, $pid,  $line, $id, $time, $user, $host, $addr) 
            = unpack($typedef, $buffer);
        next unless $user && ord($user) && $time;
        printf "%1d %-8s %-12s %2s %-24s %-16s %5d %08x\n", 
            $type,$user,$line,$id,scalar(localtime($time)), 
            $host,$pid,$addr;
    } 
    for ($size = -s WTMP; $size == -s WTMP; sleep 1) {}
    WTMP->clearerr();
}

#-----------------------------

Program: tctee

#-----------------------------
#% someprog | tee /tmp/output | Mail -s 'check this' user@host.org
#-----------------------------
#% someprog | tctee f1 "|cat -n" f2 ">>f3"
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# tctee - clone that groks process tees 
# perl3 compatible, or better.

while ($ARGV[0] =~ /^-(.+)/ && (shift, ($_ = $1), 1)) {
    next if /^$/;
    s/i// && (++$ignore_ints, redo);
    s/a// && (++$append,      redo);
    s/u// && (++$unbuffer,    redo);
    s/n// && (++$nostdout,    redo);
    die "usage tee [-aiun] [filenames] ...\n";
}

if ($ignore_ints) {
    for $sig ('INT', 'TERM', 'HUP', 'QUIT') { $SIG{$sig} = 'IGNORE'; }

}
$SIG{'PIPE'} = 'PLUMBER';
$mode = $append ? '>>' : '>';
$fh = 'FH000';

unless ($nostdout) {
    %fh = ('STDOUT', 'standard output'); # always go to stdout
    }

$| = 1 if $unbuffer;

for (@ARGV) {
    if (!open($fh, (/^[^>|]/ && $mode) . $_)) {
        warn "$0: cannot open $_: $!\n"; # like sun's; i prefer die
        $status++;
        next;
    }
    select((select($fh), $| = 1)[0]) if $unbuffer;
    $fh{$fh++} = $_;
}

while (<STDIN>) {
    for $fh (keys %fh) {
        print $fh $_;
    }
}

for $fh (keys %fh) {
    next if close($fh) || !defined $fh{$fh};
    warn "$0: couldnt close $fh{$fh}: $!\n";
    $status++;
}

exit $status;

sub PLUMBER {
    warn "$0: pipe to \"$fh{$fh}\" broke!\n";
    $status++;
    delete $fh{$fh};
}

#-----------------------------

Program: laston

#-----------------------------
#% laston gnat
#gnat  UID 314 at Mon May 25 08:32:52 1998 on ttyp0 from below.perl.com
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# laston - find out when given user last logged on
use User::pwent;
use IO::Seekable qw(SEEK_SET);

open (LASTLOG, "/var/log/lastlog") or die "can't open /usr/adm/lastlog: $!";

$typedef = 'L A12 A16';  # linux fmt; sunos is "L A8 A16"
$sizeof  = length(pack($typedef, ()));

for $user (@ARGV) {
    $U = ($user =~ /^\d+$/) ? getpwuid($user) : getpwnam($user);
    unless ($U) { warn "no such uid $user\n"; next; }
    seek(LASTLOG, $U->uid * $sizeof, SEEK_SET) or die "seek failed: $!";
    read(LASTLOG, $buffer, $sizeof) == $sizeof  or next;
    ($time, $line, $host) = unpack($typedef, $buffer);
    printf "%-8s UID %5d %s%s%s\n", $U->name, $U->uid,
          $time ? ("at " . localtime($time)) : "never logged in",
          $line && " on $line", 
          $host && " from $host";
} 

#-----------------------------