9. Directories

Introduction

$entry = stat('/bin/vi');
$entry = stat('/usr/bin');
$entry = stat($argv[1]);

// ------------

$entry = stat('/bin/vi');

$ctime = $entry['ctime'];
$size = $entry['size'];

// ----------------------------

// For the simple task of determining whether a file contains, text', a simple
// function that searches for a newline could be implemented. Not exactly
// foolproof, but very simple, low overhead, no installation headaches ...
function containsText($file)
{
  $status = FALSE;

  if (($fp = fopen($file, 'r')))
  {
    while (FALSE !== ($char = fgetc($fp)))
    {
      if ($char == "\n") { $status = TRUE; break; }
    }

    fclose($fp);
  }

  return $status;
}

// PHP offers the [currently experimental] Fileinfo group of functions to
// determine file types based on their contents / 'magic numbers'. This
// is functionality similar to the *NIX, 'file' utility. Note that it must
// first be installed using the PEAR utility [see PHP documentation] 
function isTextFile($file)
{
  // Note: untested code, but I believe this is how it is supposed to work
  $finfo = finfo_open(FILEINFO_NONE);
  $status = (finfo_file($finfo, $file) == 'ASCII text');
  finfo_close($finfo);
  return $status;
}

// Alternatively, use the *NIX utility, 'file', directly
function isTextFile($file)
{
  return exec(trim('file -bN ' . escapeshellarg($file))) == 'ASCII text';
}

// ----

containsText($argv[1]) || die("File {$argv[1]} doesn't have any text in it\n");

isTextFile($argv[1]) || die("File {$argv[1]} doesn't have any text in it\n");

// ----------------------------

$dirname = '/usr/bin/';

($dirhdl = opendir($dirname)) || die("Couldn't open {$dirname}\n");

while (($file = readdir($dirhdl)) !== FALSE)
{
  printf("Inside %s is something called: %s\n", $dirname, $file);
}

closedir($dirhdl);

Getting and Setting Timestamps

$filename = 'example.txt';

// Get the file's current access and modification time, respectively
$fs = stat($filename);

$readtime = $fs['atime'];
$writetime = $fs['mtime'];

// Alter $writetime, and $readtime ...

// Update file timestamp
touch($filename, $writetime, $readtime);

// ----------------------------

$filename = 'example.txt';

// Get the file's current access and modification time, respectively
$fs = stat($filename);

$atime = $fs['atime'];
$mtime = $fs['mtime'];

// Dedicated functions also exist to retrieve this information:
//
// $atime = $fileatime($filename);
// $mtime = $filemtime($filename);
//

// Perform date arithmetic. Traditional approach where arithmetic is performed
// directly with Epoch Seconds [i.e. the *NIX time stamp value] will work ...

define('SECONDS_PER_DAY', 60 * 60 * 24);

// Set file's access and modification times to 1 week ago
$atime -= 7 * SECONDS_PER_DAY;
$mtime -= 7 * SECONDS_PER_DAY;

// ... but care must be taken to account for daylight saving. Therefore, the
// recommended approach is to use library functions to perform such tasks:

$atime = strtotime('-7 days', $atime);
$mtime = strtotime('-7 days', $mtime);

// Update file timestamp
touch($filename, $mtime, $atime);

// Good idea to clear the cache after such updates have occurred so fresh
// values will be retrieved on next access
clearstatcache();

// ----------------------------

$argc == 2 || die("usage: {$argv[0]} filename\n");

$filename = $argv[1];
$fs = stat($filename);

$atime = $fs['atime'];
$mtime = $fs['mtime'];

// Careful here: since interactive, use, 'system', not 'exec', to launch [latter
// does not work under *NIX - at least, not for me :)]
system(trim(getenv('EDITOR') . ' vi ' . escapeshellarg($filename)), $retcode);

touch($filename, $mtime, $atime) || die("Error updating timestamp on file, {$filename}!\n");

Deleting a File

// The 'unlink' function is used to delete regular files, whilst the 'rmdir' function
// does the same on non-empty directories. AFAIK, no recursive-deletion facility
// exists, and must be manually programmed

$filename = '...';

@unlink($filename) || die("Can't delete, {$filename}!\n");

// ------------

$files = glob('...');
$problem = FALSE;

// Could simply use a foreach loop
foreach($files as $filename) { @unlink($filename) || $problem = TRUE; }

//
// Alternatively, an applicative approach could be used, one closer in spirit to
// largely-functional languages like Scheme
//
// function is_all_deleted($deleted, $filename) { return @unlink($filename) && $deleted; }
// $problem = !array_reduce($files, 'is_all_deleted', TRUE);
//

if ($problem)
{
  fwrite(STDERR, 'Could not delete all of:');
  foreach($files as $filename) { fwrite(STDERR, ' ' . $filename); }
  fwrite(STDERR, "\n"); exit(1);
} 

// ------------

function rmAll($files)
{
  $count = 0;

  foreach($files as $filename) { @unlink($filename) && $count++; };

  return $count;

// An applicative alternative using 'create_function', PHP's rough equivalent of 'lambda' ...
//
//  return array_reduce($files,
//    create_function('$count, $filename', 'return @unlink($filename) && $count++;'), 0);
}

// ----

$files = glob('...');
$toBeDeleted = sizeof($files);
$count = rmAll($files);

($count == $toBeDeleted) || die("Could only delete {$count} of {$toBeDeleted} files\n");

Copying or Moving a File

$oldfile = '/tmp/old'; $newfile = '/tmp/new';

copy($oldfile, $newfile) || die("Error copying file\n");

// ----------------------------

// All the following copy a file by copying its contents. Examples do so in a single
// operation, but it is also possible to copy arbitrary blocks, or, line-by-line in 
// the case of 'text' files
$oldfile = '/tmp/old'; $newfile = '/tmp/new';

if (is_file($oldfile))
  file_put_contents($newfile, file_get_contents($oldfile));
else
  die("Problem copying file {$oldfile} to file {$newfile}\n");

// ------------

$oldfile = '/tmp/old'; $newfile = '/tmp/new';

fwrite(($nh = fopen($newfile, 'wb')), fread(($oh = fopen($oldfile, 'rb')), filesize($oldfile)));
fclose($oh);
fclose($nh);

// ------------

// As above, but with some error checking / handling
$oldfile = '/tmp/old'; $newfile = '/tmp/new';

($oh = fopen($oldfile, 'rb')) || die("Problem opening input file {$oldfile}\n");
($nh = fopen($newfile, 'wb')) || die("Problem opening output file {$newfile}\n");

if (($filesize = filesize($oldfile)) > 0)
{
  fwrite($nh, fread($oh, $filesize)) || die("Problem reading / writing file data\n");
}

fclose($oh);
fclose($nh);

// ----------------------------

// Should there be platform-specfic problems copying 'very large' files, it is
// a simple matter to call a system command utility via, 'exec'

// *NIX-specific example. Could check whether, 'exec', succeeded, but checking whether
// a file exists after the operation might be a better approach
$oldfile = '/tmp/old'; $newfile = '/tmp/new';

is_file($newfile) && unlink($newfile);

exec(trim('cp --force ' . escapeshellarg($oldfile) . ' ' . escapeshellarg($newfile)));

is_file($newfile) || die("Problem copying file {$oldfile} to file {$newfile}\n");

// For other operating systems just change:
// * filenames
// * command being 'exec'ed
// as the rest of the code is platform independant

Recognizing Two Names for the Same File

function makeDevInodePair($filename)
{
  if (!($fs = @stat($filename))) return FALSE;
  return strval($fs['dev'] . $fs['ino']);
}

// ------------

function do_my_thing($filename)
{
  // Using a global variable to mimic Perl example, but could easily have passed
  // '$seen' as an argument
  global $seen;

  $devino = makeDevInodePair($filename);

  // Process $filename if it has not previously been seen, else just increment
  if (!isset($seen[$devino]))
  {
    // ... process $filename ...

    // Set initial count
    $seen[$devino] = 1;
  }
  else
  {
    // Otherwise, just increment the count
    $seen[$devino] += 1;
  }
}

// ----

// Simple example
$seen = array();

do_my_thing('/tmp/old');
do_my_thing('/tmp/old');
do_my_thing('/tmp/old');
do_my_thing('/tmp/new');

foreach($seen as $devino => $count)
{
  echo "{$devino} -> {$count}\n";
}

// ------------

// A variation on the above avoiding use of global variables, and illustrating use of
// easily-implemented 'higher order' techniques

// Helper function loosely modelled on, 'array_reduce', but using an array as
// 'accumulator', which is returned on completion
function array_update($arr, $lambda, $updarr)
{
  foreach($arr as $key) $lambda($updarr, $key);
  return $updarr;
}

function do_my_thing(&$seen, $filename)
{
  if (!array_key_exists(($devino = makeDevInodePair($filename)), $seen))
  {
    // ... processing $filename ...

    // Update $seen
    $seen[$devino] = 1;
  }
  else
  {
    // Update $seen
    $seen[$devino] += 1;
  }
}

// ----

// Simple example
$files = array('/tmp/old', '/tmp/old', '/tmp/old', '/tmp/new');

// Could do this ...
$seen = array();
array_update($files, 'do_my_thing', &$seen);

// or this:
$seen = array_update($files, 'do_my_thing', array());

// or a 'lambda' could be used:
array_update($files,
             create_function('$seen, $filename', '... code not shown ...'),
             &$seen);

foreach($seen as $devino => $count)
{
  echo "{$devino} -> {$count}\n";
}

// ----------------------------

$files = glob('/tmp/*');

define(SEP, ';');
$seen = array();

foreach($files as $filename)
{
  if (!array_key_exists(($devino = makeDevInodePair($filename)), $seen))
    $seen[$devino] = $filename;
  else
    $seen[$devino] = $seen[$devino] . SEP . $filename;
}

$devino = array_keys($seen);
sort($devino);

foreach($devino as $key)
{
  echo $key . ':';
  foreach(split(SEP, $seen[$key]) as $filename) echo ' ' . $filename;
  echo "\n";
}

Processing All Files in a Directory

// Conventional POSIX-like approach to directory traversal
$dirname = '/usr/bin/';

($dirhdl = opendir($dirname)) || die("Couldn't open {$dirname}\n");

while (($file = readdir($dirhdl)) !== FALSE)
{
  ; // ... do something with $dirname/$file
    // ...
}

closedir($dirhdl);

// ------------

// Newer [post PHP 4], 'applicative' approach - an array of filenames is
// generated that may be processed via external loop ...

$dirname = '/usr/bin/';

foreach(scandir($dirname) as $file)
{
  ; // ... do something with $dirname/$file
    // ...
}

// .. or, via callback application, perhaps after massaging by one of the
// 'array' family of functions [also uses, 'array_update', from earlier section]

$newlist = array_update(array_reverse(scandir($dirname)),
                        create_function('$filelist, $file',  ' ; '),
                        array());

// And don't forget that the old standby, 'glob', that returns an array of
// paths filtered using the Bourne Shell-based wildcards, '?' and '*', is
// also available

foreach(glob($dirname . '*') as $path)
{
  ; // ... do something with $path
    // ...
}

// ----------------------------

// Uses, 'isTextFile', from an earlier section
$dirname = '/usr/bin/';

echo "Text files in {$dirname}:\n";

foreach(scandir($dirname) as $file)
{
  // Take care when constructing paths to ensure cross-platform operability 
  $path = $dirname . $file;

  if (is_file($path) && isTextFile($path)) echo $path . "\n";
}

// ----------------------------

function plain_files($dirname)
{
  ($dirlist = glob($dirname . '*')) || die("Couldn't glob {$dirname}\n");

  // Pass function name directly if only a single function performs filter test
  return array_filter($dirlist, 'is_file');

  // Use, 'create_function', if a multi-function test is needed
  //
  // return array_filter($dirlist, create_function('$path', 'return is_file($path);'));
  //
}

// ------------

foreach(plain_files('/tmp/') as $path)
{
  echo $path . "\n";
}

Globbing, or Getting a List of Filenames Matching a Pattern

$dirname = '/tmp/';

// Full paths
$pathlist = glob($dirname . '*.c');

// File names only - glob-based matching
$filelist = array_filter(scandir($dirname),
                         create_function('$file', 'return fnmatch("*.c", $file);'));

// ----------------------------

$dirname = '/tmp/';

// File names only - regex-based matching [case-insensitive]
$filelist = array_filter(scandir($dirname),
                         create_function('$file', 'return eregi("\.[ch]$", $file);'));

// ----------------------------

$dirname = '/tmp/';

// Directory names - all-digit names
$dirs = array_filter(glob($dirname . '*', GLOB_ONLYDIR),
                     create_function('$path', 'return ereg("^[0-9]+$", basename($path));'));

Processing All Files in a Directory Recursively

// Recursive directory traversal function and helper: traverses a directory tree
// applying a function [and a variable number of accompanying arguments] to each
// file

class Accumulator
{
  public $value;
  public function __construct($start_value) { $this->value = $start_value; }
}

// ------------

function process_directory_($op, $func_args)
{
  if (is_dir($func_args[0]))
  {
    $current = $func_args[0];
    foreach(scandir($current) as $entry)
    {
      if ($entry == '.' || $entry == '..') continue;
      $func_args[0] = $current . '/' . $entry;
      process_directory_($op, $func_args);
    }
  }
  else
  {
    call_user_func_array($op, $func_args);
  }
}

function process_directory($op, $dir)
{
  if (!is_dir($dir)) return FALSE;
  $func_args = array_slice(func_get_args(), 1);
  process_directory_($op, $func_args);
  return TRUE;
}

// ----------------------------

$dirlist = array('/tmp/d1', '/tmp/d2', '/tmp/d3');

// Do something with each directory in the list
foreach($dirlist as $dir)
{
  ;
  // Delete directory [if empty]     -> rmdir($dir); 
  // Make it the 'current directory' -> chdir($dir);
  // Get list of files it contains   -> $filelist = scandir($dir);
  // Get directory metadata          -> $ds = stat($dir);
}

// ------------

$dirlist = array('/tmp/d1', '/tmp/d2', '/tmp/d3');

function pf($path)
{
  // ... do something to the file or directory ...
  printf("%s\n", $path);
}

// For each directory in the list ...
foreach($dirlist as $dir)
{
  // Is this a valid directory ?
  if (!is_dir($dir)) { printf("%s does not exist\n", $dir); continue; }

  // Ok, so get all the directory's entries
  $filelist = scandir($dir);

  // An 'empty' directory will contain at least two entries: '..' and '.'
  if (count($filelist) == 2) { printf("%s is empty\n", $dir); continue; }

  // For each file / directory in the directory ...
  foreach($filelist as $file)
  {
    // Ignore '..' and '.' entries
    if ($file == '.' || $file == '..') continue;

    // Apply function to process the file / directory
    pf($dir . '/' . $file);
  }
}

// ----------------------------

function accum_filesize($file, $accum)
{
  is_file($file) && ($accum->value += filesize($file));
}

// ------------

// Verify arguments ...
$argc == 2 || die("usage: {$argv[0]} dir\n");
$dir = $argv[1];

is_dir($dir) || die("{$dir} does not exist / not a directory\n");

// Collect data [use an object to accumulate results]
$dirsize = new Accumulator(0);
process_directory('accum_filesize', $dir, $dirsize); 

// Report results
printf("%s contains %d bytes\n", $dir, $dirsize->value);

// ----------------------------

function biggest_file($file, $accum)
{
  if (is_file($file))
  {
    $fs = filesize($file);
    if ($accum->value[1] < $fs) { $accum->value[0] = $file; $accum->value[1] = $fs; }
  }
}

// ------------

// Verify arguments ...
$argc == 2 || die("usage: {$argv[0]} dir\n");
$dir = $argv[1];

is_dir($dir) || die("{$dir} does not exist / not a directory\n");

// Collect data [use an object to accumulate results]
$biggest = new Accumulator(array('', 0));
process_directory('biggest_file', $dir, $biggest); 

// Report results
printf("Biggest file is %s containing %d bytes\n", $biggest->value[0], $biggest->value[1]);

// ----------------------------

function youngest_file($file, $accum)
{
  if (is_file($file))
  {
    $fct = filectime($file);
    if ($accum->value[1] > $fct) { $accum->value[0] = $file; $accum->value[1] = $fct; }
  }
}

// ------------

// Verify arguments ...
$argc == 2 || die("usage: {$argv[0]} dir\n");
$dir = $argv[1];

is_dir($dir) || die("{$dir} does not exist / not a directory\n");

// Collect data [use an object to accumulate results]
$youngest = new Accumulator(array('', 2147483647));
process_directory('youngest_file', $dir, $youngest); 

// Report results
printf("Youngest file is %s dating %s\n", $youngest->value[0], date(DATE_ATOM, $youngest->value[1]));

Removing a Directory and Its Contents

// AFAICT, there is currently no library function that recursively removes a
// directory tree [i.e. a directory, it's subdirectories, and any other files]
// with a single call. Such a function needs to be custom built. PHP tools
// with which to do this:
// * 'unlink', 'rmdir', 'is_dir', and 'is_file' functions, will all take care
//   of the file testing and deletion
// * Actual directory traversal requires obtaining directory / subdirectory
//   lists, and here there is much choice available, though care must be taken
//   as each has it's own quirks
//   - 'opendir', 'readdir', 'closedir'
//   - 'scandir'
//   - 'glob'
//   - SPL 'directory iterator' classes [newish / experimental - not shown here]
//
// The PHP documentation for 'rmdir' contains several examples, each illustrating
// one of each approach; the example shown here is loosely based on one of these
// examples

// Recursor - recursively traverses directory tree
function rmtree_($dir)
{
  $dir = "$dir";

  if ($dh = opendir($dir))
  {
    while (FALSE !== ($item = readdir($dh)))
    {
      if ($item != '.' && $item != '..')
      {
        $subdir = $dir . '/' . "$item";

        if (is_dir($subdir)) rmtree_($subdir);
        else @unlink($subdir);
      }
    }

    closedir($dh); @rmdir($dir);
  }
}

// Launcher - performs validation then starts recursive routine
function rmtree($dir)
{
  if (is_dir($dir))
  {
    (substr($dir, -1, 1) == '/') && ($dir = substr($dir, 0, -1));
    rmtree_($dir); return !is_dir($dir);
  }

  return FALSE;
}

// ------------

$argc == 2 || die("usage: rmtree dir\n");

rmtree($argv[1]) || die("Could not remove directory {$argv[1]}\n");

Renaming Files

$filepairs = array('x.txt' => 'x2.txt', 'y.txt' => 'y.doc', 'zxc.txt' => 'cxz.txt');

foreach($filepairs as $oldfile => $newfile)
{
  @rename($oldfile, $newfile) || fwrite(STDERR, sprintf("Could not rename %s to %s\n", $oldfile, $newfile));
}

// ----------------------------

// Call a system command utility via, 'exec'. *NIX-specific example. Could check whether,
// 'exec', succeeded, but checking whether a renamed file exists after the operation might
// be a better approach

$oldfile = '/tmp/old'; $newfile = '/tmp/new';

is_file($newfile) && unlink($newfile);

exec(trim('mv --force ' . escapeshellarg($oldfile) . ' ' . escapeshellarg($newfile)));

is_file($oldfile) || die("Problem renaming file {$oldfile} to file {$newfile}\n");

// For other operating systems just change:
// * filenames
// * command being 'exec'ed
// as the rest of the code is platform independant

// ----------------------------

// A modified implementation of Larry's Filename Fixer. Rather than passing
// a single expression, a 'from' regexp is passed; each match in the file
// name(s) is changed to the value of 'to'. It otherwise behaves the same
//

$argc > 2 || die("usage: rename from to [file ...]\n");

$from = $argv[1];
$to = $argv[2]; 

if (count(($argv = array_slice($argv, 3))) < 1)
  while (!feof(STDIN)) $argv[] = substr(fgets(STDIN), 0, -1);

foreach($argv as $file)
{
  $was = $file;
  $file = ereg_replace($from, $to, $file);

  if (strcmp($was, $file) != 0)
    @rename($was, $file) || fwrite(STDERR, sprintf("Could not rename %s to %s\n", $was, $file));
}

Splitting a Filename into Its Component Parts

$base = basename($path);
$dir = dirname($path);

// PHP's equivalent to Perl's 'fileparse'
$pathinfo = pathinfo($path);

$base = $pathinfo['basename'];
$dir = $pathinfo['dirname'];
$ext = $pathinfo['extension'];

// ----------------------------

$path = '/usr/lib/libc.a';

printf("dir is %s, file is %s\n", dirname($path), basename($path));

// ------------

$path = '/usr/lib/libc.a';

$pathinfo = pathinfo($path);

printf("dir is %s, name is %s, extension is %s\n", $pathinfo['dirname'], $pathinfo['basename'], $pathinfo['extension']);

// ----------------------------

// Handle Mac example as a simple parse task. However, AFAIK, 'pathinfo' is cross-platform,
// so should handle file path format differences transparently
$path = 'Hard%20Drive:System%20Folder:README.txt';

$macp = array_combine(array('drive', 'folder', 'filename'), split("\:", str_replace('%20', ' ', $path)));
$macf = array_combine(array('name', 'extension'), split("\.", $macp['filename'])); 

printf("dir is %s, name is %s, extension is %s\n", ($macp['drive'] . ':' . $macp['folder']), $macf['name'], ('.' . $macf['extension']));

// ----------------------------

// Not really necessary since we have, 'pathinfo', but better matches Perl example
function file_extension($filename, $separator = '.')
{
  return end(split(("\\" . $separator), $filename));
}

// ----

echo file_extension('readme.txt') . "\n";

Program: symirror

// @@INCOMPLETE@@
// @@INCOMPLETE@@

Program: lst

// @@INCOMPLETE@@
// @@INCOMPLETE@@