9. Directories

Introduction

Stdio.Stat entry;

entry = file_stat("/bin/vi");
entry = file_stat("/usr/bin");
entry = file_stat(argv[1]);

// ------------


Stdio.Stat entry; int ctime, size;

entry = file_stat("/bin/vi");
ctime = entry->ctime;
size = entry->size;

// ------------


// A routine detecting whether a file is a 'text' file doesn't appear

// to exist, so have implemented the following [crude] function(s)

// which search for a LF / NEWLINE in the file:


// Usable with any file

int(0..1) containsText(Stdio.File file)
{
  string c;
  while ((c = file->read(1)) != NULL) { (c == NEWLINE) && return 1; }
  return 0;
}

// Alternate version, expects a buffered file [usually containing text]

int(0..1) containsText(Stdio.FILE file)
{
  int c;
  while ((c = file->getchar()) != EOF) { (c == LF) && return 1; }
  return 0;
}

// Yet another alternative - this time we cheat and use the *NIX 'file'

// utility :) !


int(0..1) isTextFile(string filename)
{
  return chop(Process.popen("file -bN " + filename), 1)  == "ASCII text";
}

// ----


containsText(Stdio.File(argv[1])) || write("File %s doesn't have any text in it\n", argv[1]);

isTextFile(argv[1]) || write("File %s doesn't have any text in it\n", argv[1]);

// ------------


Filesystem.Traversion dirtree = Filesystem.Traversion("/usr/bin");

foreach(dirtree; string dir; string file)
{
  write("Inside %s is something called %s\n", chop(dir, 1), file);
}

Getting and Setting Timestamps

string filename = "example.txt";

Stdio.Stat fs = file_stat(filename);
int readtime = fs->atime, writetime = fs->mtime;

System.utime(filename, readtime, writetime);

// ----------------------------


constant SECONDS_PER_DAY = 60 * 60 * 24;

string filename = "example.txt";

Stdio.Stat fs = file_stat(filename);
int atime = fs->atime, mtime = fs->mtime;

atime -= 7 * SECONDS_PER_DAY; mtime -= 7 * SECONDS_PER_DAY;

System.utime(filename, atime, mtime);

// ----------------------------


argc != 1 || die("usage: " + argv[0] + " filename");

Stdio.Stat fs = file_stat(argv[1]);
int atime = fs->atime, mtime = fs->mtime;

Process.system(getenv("EDITOR") || "vi" + " " + argv[1]);

mixed result = catch { System.utime(argv[1], atime, mtime); };
(result == OK) || write("Error updating timestamp on file, %s!\n", argv[1]);

Deleting a File

string filename = "...";

rm(filename) || write("Can't delete, %s!\n", filename);

// ------------


int(0..1) rmAll(array(string) filelist)
{
  mixed result = catch
  {
    foreach(filelist, string filename) { rm(filename) || throw(PROBLEM); }
  };
  
  return result == OK;
}

// ----


array(string) filelist = ({"/tmp/x", "/tmp/y", "/tmp/z"});

rmAll(filelist) || write("Can't delete all files in array!\n");

// ----------------------------


void die(string msg, void|int(1..256) rc) { werror(msg + NEWLINE); exit(rc ? rc : PROBLEM); }

// ----


string filename = "...";

rm(filename) || die("Can't delete " + filename);

// ----------------------------


array(string) filelist = ({"/tmp/x", "/tmp/y", "/tmp/z"});

int deleted, count = sizeof(filelist);

foreach(filelist, string filename) { rm(filename) && ++deleted; }

(deleted == count) || write("Could only delete %d of %d files\n", deleted, count);

Copying or Moving a File

string oldfile = "/tmp/old", newfile = "/tmp/new";

Stdio.cp(oldfile, newfile) || write("Error copying file\n");

// ----------------------------


string oldfile = "/tmp/old", newfile = "/tmp/new";

mixed result = catch { Stdio.write_file(newfile, Stdio.read_file(oldfile)); };

(result == OK) || write("Problem copying file %s to file %s\n", oldfile, newfile);

// ----------------------------


// Note: This is a cross between, 'Process.system', which displays

// output on stdout, and, 'Process.popen', which does not display

// output [it returns it as a string] but does not return the status

// code

int system(string cmd)
{
  Stdio.File fout = Stdio.File(), ferr = Stdio.File();
  Stdio.File pout = fout->pipe(Stdio.PROP_IPC),
             perr = ferr->pipe(Stdio.PROP_IPC);

  int rc = Process.spawn(cmd, 0, pout, perr)->wait();

  pout->close(); destruct(pout); fout->close(); destruct(fout);
  perr->close(); destruct(perr); ferr->close(); destruct(ferr);

  return rc;
}

int(0..1) unixFileCopy(string oldfile, string newfile)
{
  string cmd = "cp --force --reply=yes " + oldfile + " " + newfile;
  return system(cmd) == OK;
}

int(0..1) vmsFileCopy(string oldfile, string newfile)
{
  string cmd = "copy " + oldfile + " " + newfile;
  return system(cmd) == OK;
}

// ----


string oldfile = "/tmp/old", newfile = "/tmp/new";

unixFileCopy(oldfile, newfile) || write("Problem copying file %s to file %s\n", oldfile, newfile);

// ----------------------------


string oldfile = "/tmp/old", newfile = "/tmp/new";

mv(oldfile, newfile) || write("Problem moving / renaming file %s to file %s\n", oldfile, newfile);

Recognizing Two Names for the Same File

mapping(array(int):int) seen = ([]);

// ----


void do_my_thing(string filename)
{
  Stdio.Stat fs = file_stat(filename);
  array(int) arr = aggregate(fs->inode, fs->dev);

  // Could do this [apply a lambda assigned to variable 'p']:

  //

  //    ... || (p(arr), seen[arr] = 1);

  //

  //    function p = lambda(array(int) arr) { ... };

  //

  // to process a file that has not previously been seen

  
  (seen[arr] && (seen[arr] += 1)) || (seen[arr] = 1);
}

// ----------------------------


constant SEP = ":"; mapping(array(int):string) seen = ([]);

// ----


array(string) files = ({"f1.txt", "f2.txt", "f3.txt"});

foreach(files, string filename)
{
  Stdio.Stat fs = file_stat(filename);
  array(int) arr = aggregate(fs->inode, fs->dev);
  (seen[arr] && (seen[arr] += (SEP + filename))) || (seen[arr] = filename);
}

// ----


array(array(int)) idxarr = indices(seen); sort(idxarr);

foreach(idxarr, array(int) inodev)
{
  foreach(seen[inodev] / SEP, string filename)
  {
    // ... do stuff with each filename ...

    write("%s\n", filename);
  }
}

Processing All Files in a Directory

string dirname = "..."; array(string) DIR = get_dir(dirname);

foreach(DIR, string filename)
{
  string path = dirname + "/" + filename;
  // ... do something with 'path' ...

}

// ----------------------------


string dirname = "/usr/local/bin"; int|array(string) DIR = get_dir(dirname);

DIR || die("Can't open " + dirname);

write("Text files in %s are:\n", dirname);

foreach(DIR, string filename)
{
  string path = dirname + "/" + filename;

  // 'isTextFile' defined in an earlier section

  isTextFile(path) && write("%s\n", filename);
}

// ----------------------------


// '.' and '..' don't show up in a 'get_dir'-generated array


// ----------------------------


array(string) plain_files(string dirname)
{
  // 'filter' procedure

  function fp =
    lambda(string filename, string dirname)
    {
      // 'isTextFile' defined in an earlier section

      return !has_prefix(filename, ".") && isTextFile(dirname + "/" + filename);
    };

  // 'map' procedure

  function mp =
    lambda(string filename, string dirname)
    {
      return dirname + "/" + filename;
    };

  array(string) paths = map(filter(get_dir(dirname), fp, dirname), mp, dirname);

  sort(paths);

  return paths;
}

Globbing, or Getting a List of Filenames Matching a Pattern

// A 'glob' workalike that filters using regular expressions

//

// Note: Pike offers many non-regexp-based string pattern matching

// functions [e.g. 'has_prefix' and other 'has_...' functions,

// 'search', etc]. These are preferable in many situations as they are

// much faster than regexprs. However, code shown here mostly uses

// regexprs in order to better match the Perl examples

//

int(0..1)|array(string) grep(string regexp, string|array(string) arr)
{
  if (stringp(arr)) return Regexp.match(regexp, arr);

  if (arrayp(arr))
  {
    function fp =
      lambda(string filename, string regexp)
      {
        return Regexp.match(regexp, filename);
      };

    return filter(arr, fp, regexp);
  }

  return 0;
}

// ----------------------------


string dirname = "...";
int|array(string) filenames = glob("*.c", get_dir(dirname));

// ------------


string dirname = "...";
int|array(string) filenames = grep("\.c$", get_dir(dirname));

// ------------


string dirname = "...";
int|array(string) filenames = grep("\.[CHch]$", get_dir(dirname));

// ----------------------------


string dirname = "...";
int|array(string) dir = get_dir(dirname);

dir || die("Couldn't open " + dirname + " for reading");

//

// Note: Pike arrays are immutable, so we use a mapping to emulate

// mutable arrays by using a numeric index as the key :)

//

mapping(int:string) files = ([]); int idx = -1; string path;    

foreach(dir, string file)
{
  if (!grep("\.[CHch]$", file)) continue;
  path = dirname + "/" + file;
  isTextFile(path) && (files[++idx] = path);
}

//

// Note: Traverse a mapping-based, emulated array in index order:

//

//  foreach(sort(indices(files)), int i)

//  {

//    write("%d -> %s\n", i, files[i]); 

//  }

//

Processing All Files in a Directory Recursively

//

// Routine inspired by library function, 'Stdio.recursive_rm'. A little

// extra code helped make it more generally useful

//

void|mixed process_directory(string path, function(string, mixed ... : void|mixed) op, mixed ... extra_args)
{
  Stdio.Stat file = file_stat(path, 1); if (!file) return 0;

  if (file->isdir)
    if (array(string) files = get_dir(path))
      foreach(files, string file)
        process_directory(path + "/" + file, op, @extra_args);

  return op(path, @extra_args);
}

// ----------------------------


array(string) dirlist = ({ "/tmp/d1", "/tmp/d2", "/tmp/d3" });

// Do something with each directory in the list

foreach(dirlist, string dir)
{
  // Delete directory [if empty]     -> rm(dir); 

  // Make it the 'current directory' -> cd(dir);

  // Get list of files it contains   -> array(string) filelist = get_dir(dir);

  // Get directory metadata          -> Stdio.Stat ds = file_stat(dir);

}

// ------------


array(string) dirlist = ({ "/tmp/d1", "/tmp/d2", "/tmp/d3" });

function pf =
  lambda(string path)
  {
    // ... do something to the file or directory ...

    write("%s\n", path);
  };

// For each directory in the list ...

foreach(dirlist, string dir)
{
  int|array(string) filelist = get_dir(dir);

  if (!filelist) { write("%s does not exist\n", dir); continue; }
  if (sizeof(filelist) == 0) { write("%s is empty\n", dir); continue; }

  // For each file / directory in the directory ...

  foreach(filelist, string filename)
  {
    // Apply function to process the file / directory

    pf(dir + "/" + filename);
  }
}

// ------------


// Special steps need to be taken in above routines to distinguish 

// between files and directories. Easiest to abstract out directory

// traversal into a single routine [so allowing for recursive traversal

// of entire tree], and have it apply a lambda to each file


array(string) dirlist = ({ "/tmp/d1", "/tmp/d2", "/tmp/d3" });

function pf =
  lambda(string path)
  {
    // ... do something to the file or directory ...

    write("%s\n", path);
  };

// For each directory in the list ...

foreach(dirlist, string dir)
{
  process_directory(dir, pf);
}

// ----------------------------


void accum_filesize(string path, array(int) accum)
{
  int|Stdio.Stat fs = file_stat(path);

  // Accumulate size only if it is a regular file

  (fs && fs->isreg) && (accum[0] += fs->size);
}

// ------------


// Verify arguments ...

argc == 2 || die("usage: " + argv[0] + " dir");
Stdio.Stat fs; string dir = argv[1];
((fs = file_stat(dir)) && fs->isdir) || die(dir + " does not exist / not a directory");

// Collect data [use an array to accumulate results]

array(int) dirsize = ({0});
process_directory(dir, accum_filesize, dirsize); 

// Report results

write("%s contains %d bytes\n", dir, dirsize[0]);

// ----------------------------


void biggest_file(string path, array(mixed) biggest)
{
  int|Stdio.Stat fs = file_stat(path);

  if (fs && fs->isreg && biggest[1] < fs->size)
  {
    biggest[0] = path; biggest[1] = fs->size;
  }
}

// ------------


// Verify arguments ...

argc == 2 || die("usage: " + argv[0] + " dir");
Stdio.Stat fs; string dir = argv[1];
((fs = file_stat(dir)) && fs->isdir) || die(dir + " does not exist / not a directory");

// Collect data [use an array to store results]

array(mixed) biggest = ({"", 0});
process_directory(dir, biggest_file, biggest); 

// Report results

write("Biggest file is %s containing %d bytes\n", biggest[0], biggest[1]);

// ----------------------------


void youngest_file(string path, array(mixed) youngest)
{
  int|Stdio.Stat fs = file_stat(path);

  if (fs && fs->isreg && youngest[1] > fs->ctime)
  {
    youngest[0] = path; youngest[1] = fs->ctime;
  }
}

// ------------


// Verify arguments ...

argc == 2 || die("usage: " + argv[0] + " dir");
Stdio.Stat fs; string dir = argv[1];
((fs = file_stat(dir)) && fs->isdir) || die(dir + " does not exist / not a directory");

// Collect data [use an array to store results]

array(mixed) youngest = ({"", Int.NATIVE_MAX});
process_directory(dir, youngest_file, youngest); 

// Report results

write("Youngest file is %s dating %s\n", youngest[0], ctime(youngest[1]));

// ----------------------------


void print_name_if_dir(string path)
{
  int|Stdio.Stat fs = file_stat(path);
  if (fs && fs->isdir) write("%s\n", path);
}

// ------------


// Verify arguments ...

argc == 2 || die("usage: " + argv[0] + " dir");
Stdio.Stat fs; string dir = argv[1];
((fs = file_stat(dir)) && fs->isdir) || die(dir + " does not exist / not a directory");

// Print directory names

process_directory(dir, print_name_if_dir); 

Removing a Directory and Its Contents

// Easy way - recommended

int(0..1) rmTree(string dirname) { return Stdio.recursive_rm(dirname); }

// ----


string dirtree = "/tmp/dirtree";

rmTree(dirtree) || write("Problem removing directory tree %s\n", dirtree);

// ----------------------------


// Another way, but unnecessary - probably for customised deletions only

int(0..1) rmTree(string dirname) { return process_directory(dirname, rm); }

// ----


string dirtree = "/tmp/dirtree";

rmTree(dirtree) || write("Problem removing directory tree %s\n", dirtree);

Renaming Files

// A list of file names

array(string) names = ({"f1.txt", "f2.txt", "f3.txt"});

// Dynamically assigned 'rename' procedure - can be reassigned at any time

function rename = lambda(string name) { return replace(name, ".txt", ".text"); };

// Process all files

foreach(names, string name)
{
  // Generate new name from existing name by applying 'rename' procedure

  string newname = rename(name); 

  // Perform actual rename task on file

  mv(name, newname) || write("Could not rename %s to %s\n", name, newname);
}

// ----------------------------


// Slightly different to the Perl example, though it does use regexp

// and intent is roughly the same.

//

// pike SCRIPTNAME '\.txt$' '.text' f1.txt f2.txt df3.txg

//

//    f1.txt  -> f1.text

//    f2.txt  -> f2.text

//    df3.txg -> df3.txg [no change]

//


argc > 2 || die("usage: " + argv[0] + " expr repl files...");

string expr = argv[1], repl = argv[2];

foreach(argv[3..], string name)
{
  string newname = Regexp.replace(expr, name, repl); 

  if (!equal(name, newname))
    mv(name, newname) || write("Could not rename %s to %s\n", name, newname);
} 

Splitting a Filename into Its Component Parts

string file_extension(string filename, void|string separator)
{
  return (filename / (separator || "."))[-1];
}

mapping(string:string) file_parse(string path)
{
  return
    mkmapping(({"dirname", "basename", "extension"}),
      ({dirname(path), basename(path), file_extension(basename(path))}));
}

// ----------------------------


string path = "/tmp/dirtree/s/s1/s1.txt";

// ----


string dir = dirname(path);
string base = basename(path);

mapping(string:string) pm = file_parse(path);
write("%s\n", pm["dirname"]);
write("%s\n", pm["basename"]);
write("%s\n", pm["extension"]);

// ------------


string path = "/usr/lib/libc.a";

// ----


string dir = dirname(path);
string base = basename(path);

write("dir is %s, file is %s\n", dir, base);

// ------------


string path = "/usr/lib/libc.a";

// ----


mapping(string:string) pm = file_parse(path);

write("dir is %s, name is %s, extension is %s\n",
  pm["dirname"], pm["basename"], "." + pm["extension"]);

// ----------------------------


// Handle as a general purpose parse task

string path = "Hard%20Drive:System%20Folder:README.txt";

// ----


mapping(string:string)
  pm = mkmapping(({"drive", "folder", "filename"}),
                 replace(path, "%20", " ") / ":"),

  fm = mkmapping(({"name", "extension"}),
                 pm["filename"] / ".");

write("dir is %s, name is %s, extension is %s\n",
  pm["drive"] + ":" + pm["folder"],
  fm["name"], "." + fm["extension"]);

// ----------------------------


// See implementation for 'file_extension' function above

Program: symirror

@@INCOMPLETE@@
@@INCOMPLETE@@

Program: lst

@@INCOMPLETE@@
@@INCOMPLETE@@