9. Directories

Introduction

open Unix 

(* handle_unix_error generates a nice error message and exits *)
let entry = handle_unix_error stat "/usr/bin/vi"
let entry = handle_unix_error stat "/usr/bin/"
let entry = handle_unix_error fstat filedescr

(* without handle_unix_error an exception is raised for errors *)
let inode = stat "/usr/bin/vi"
let ctime = inode.st_ctime
let size = inode.st_size

(* don't know any equivalent in ocaml *)
(* maybe one could use file(1) (to know if it is an ASCII text file) *)
let dirhandle = handle_unix_error opendir "/usr/bin" in
begin
  try
    while true do
      let file = readdir dirhandle in
      Printf.printf "Inside /usr/bin is something called %s\n" file
    done
  with
    | End_of_file -> ()
end;
closedir dirhandle;;

Getting and Setting Timestamps

let (readtime, writetime) =
  let inode = stat filename in
  (inode.st_atime, inode.st_mtime);;

utimes filename newreadtime newwritetime;;

(*-----------------------------*)

let second_per_day = 60. *. 60. *. 24. in
let (atime, mtime) =
  let inode = stat filename in
  (inode.st_atime, inode.st_mtime) in
let newreadtime = atime -. 7. *. second_per_day
and newwritetime = mtime -. 7. *. second_per_day in
try 
  utimes filename newreadtime newwritetime 
with
  | Unix_error (er,_,_) ->
      Printf.eprintf 
        "couldn't backdate %s by a week w/ utime: %s\n"
        filename (error_message er);;

(*-----------------------------*)
let mtime = (stat file).st_mtime in
utimes file (time ()) mtime  ;;

(*-----------------------------*)

(* compile with ocamlc unix.cma uvi.ml -o uvi *)
open Unix

let main () =
  if (Array.length Sys.argv <> 2)
  then
    Printf.eprintf "Usage: uvi filename\n";
  let filename = Sys.argv.(1) in
  let atime,mtime = 
    let st = stat filename in
    (st.st_atime, st.st_mtime) in
  let editor =
    begin
      try
        Sys.getenv "editor"
      with
        | Not_found -> "vi"
    end in
  Sys.command (Printf.sprintf "%s %s" editor filename);
  utimes filename atime mtime in
main ();;
  
(*-----------------------------*)

Deleting a File

unlink filename;;                       (* use unix library *)
Sys.remove filename;;                   (* in the standard library *)

let error_flag = ref(None) in
let local_unlink filename =
  try
    unlink filename
  with
    | Unix_error (er,_,_) -> 
        error_flag := (Some er) in
List.iter local_unlink filenames;
match !error_flag with
  | Some er ->
      Printf.eprintf "Couldn't unlink all of";
      List.iter (Printf.eprintf " %s") filenames;
      Printf.eprintf ": %s\n" (error_message er)
  | None ();;


(*-----------------------------*)

let error_flag = ref(0) in
let local_unlink count filename =
  try
    unlink filename;
    count + 1
  with
    | Unix_error (er,_,_) -> 
        count in
let count = (List.fold_left local_unlink filenames 0) 
and len = List.length filenames in
if count <> len
then
  Printf.eprintf "Could only delete %i of %i file\n" count len;;

(*-----------------------------*)

Copying or Moving a File

(*-----------------------------*)

(* Note : this doesn't use the unix library, only the standard one *)

let copy oldfile newfile =
  let infile = open_in oldfile
  and outfile = open_out newfile
  and blksize = 16384 in
  let buf = String.create blksize in
  let rec real_copy () =
    let byte_read = input infile buf 0 blksize in
    if byte_read <> 0 then
      begin
        (* Handle partialle write : nothing to do *)
        output outfile buf 0 byte_read;
        real_copy ()
      end in
  real_copy ();
  close_in infile;
  close_out outfile;;

(*-----------------------------*)
Sys.command ("cp " ^ oldfile ^ " " ^ newfile)   (* Unix *)
Sys.command (String.concat " " ["copy";oldfile;newfile]) (* Dos *)

(*-----------------------------*)

Unix.copy "datafile.dat" "datafile.bak";;

Sys.rename "datafile.dat" "datafile.bak";;

(*-----------------------------*)

Recognizing Two Names for the Same File

#load "unix.cma";;

(* Count the number of times a (dev, ino) pair is seen. *)
let seen = Hashtbl.create 0
let do_my_thing filename =
  let {Unix.st_dev=dev; st_ino=ino} = Unix.stat filename in
  Hashtbl.replace seen (dev, ino)
    (try Hashtbl.find seen (dev, ino) + 1
     with Not_found -> 1);
  if Hashtbl.find seen (dev, ino) = 1
  then
    begin
      (* do something with filename because we haven't
         seen it before. *)
    end

(*-----------------------------*)

(* Maintain a list of files for each (dev, ino) pair. *)
let seen = Hashtbl.create 0
let () =
  List.iter
    (fun filename ->
       let {Unix.st_dev=dev; st_ino=ino} = Unix.stat filename in
       Hashtbl.replace seen (dev, ino)
         (try filename :: Hashtbl.find seen (dev, ino)
          with Not_found -> [filename]))
    files
let () =
  Hashtbl.iter
    (fun (dev, ino) filenames ->
       Printf.printf "(%d, %d) => [%s]\n"
         dev ino (String.concat ", " filenames))
    seen

Processing All Files in a Directory

(* Using Sys.readdir. *)
let () =
  Array.iter
    (fun file ->
       let path = Filename.concat dirname file in
       (* do something with path *)
       ())
    (Sys.readdir dirname)

(*-----------------------------*)

(* Using Unix.opendir, readdir, and closedir. Note that the "." and ".."
   directories are included in the result unlike with Sys.readdir. *)
#load "unix.cma";;

let () =
  let dir =
    try Unix.opendir dirname
    with Unix.Unix_error (e, _, _) ->
      Printf.eprintf "can't opendir %s: %s\n"
        dirname (Unix.error_message e);
      exit 255 in
  try
    while true do
      let file = Unix.readdir dir in
      let path = Filename.concat dirname file in
      (* do something with path *)
      ()
    done
  with End_of_file ->
    Unix.closedir dir

(*-----------------------------*)

(* Get a list of full paths to plain files. *)
let plainfiles dir =
  List.filter
    (fun path ->
       match Unix.lstat path with
         | {Unix.st_kind=Unix.S_REG} -> true
         | _ -> false)
    (List.map
       (Filename.concat dir)
       (Array.to_list (Sys.readdir dir)))

Globbing, or Getting a List of Filenames Matching a Pattern

(* See recipe 6.9 for a more powerful globber. *)
#load "str.cma";;

(* OCaml does not come with a globbing function. As a workaround, the
   following function builds a regular expression from a glob pattern.
   Only the '*' and '?' wildcards are recognized. *)
let regexp_of_glob pat =
  Str.regexp
    (Printf.sprintf "^%s$"
       (String.concat ""
          (List.map
             (function
                | Str.Text s -> Str.quote s
                | Str.Delim "*" -> ".*"
                | Str.Delim "?" -> "."
                | Str.Delim _ -> assert false)
             (Str.full_split (Str.regexp "[*?]") pat))))

(* Now we can build a very basic globber. Only the filename part will
   be used in the glob pattern, so directory wildcards will break in
   this simple example. *)
let glob pat =
  let basedir = Filename.dirname pat in
  let files = Sys.readdir basedir in
  let regexp = regexp_of_glob (Filename.basename pat) in
  List.map
    (Filename.concat basedir)
    (List.filter
       (fun file -> Str.string_match regexp file 0)
       (Array.to_list files))

(* Find all data files in the pleac directory. *)
let files = glob "pleac/*.data"

(*-----------------------------*)

(* Find and sort directories with numeric names. *)
let dirs =
  List.map snd                             (* extract pathnames *)
    (List.sort compare                     (* sort names numerically *)
       (List.filter                        (* path is a dir *)
          (fun (_, s) -> Sys.is_directory s)
          (List.map                        (* form (name, path) *)
             (fun s -> (int_of_string s, Filename.concat path s))
             (List.filter                  (* just numerics *)
                (fun s ->
                   try ignore (int_of_string s); true
                   with _ -> false)
                (Array.to_list
                   (Sys.readdir path)))))) (* all files *)

Processing All Files in a Directory Recursively

let rec find_files f error root =
  Array.iter
    (fun filename ->
       let path = Filename.concat root filename in
       let is_dir =
         try Some (Sys.is_directory path)
         with e -> error root e; None in
       match is_dir with
         | Some true -> if f path then find_files f error path
         | Some false -> ignore (f path)
         | None -> ())
    (try Sys.readdir root with e -> error root e; [| |])

let process_file fn =
  (* Print the name of each directory and file found. *)
  Printf.printf "%s: %s\n"
    (if Sys.is_directory fn then "directory" else "file") fn;

  (* Prune directories named ".svn". *)
  not (Sys.is_directory fn && Filename.basename fn = ".svn")

let handle_error fn exc =
  Printf.eprintf "Error reading %s: %s\n" fn (Printexc.to_string exc)

let () =
  List.iter (find_files process_file handle_error) dirlist

(*-----------------------------*)

(* Add a trailing slash to the names of directories. *)
let () =
  List.iter
    (find_files
       (fun fn ->
          print_endline
            (if Sys.is_directory fn then (fn ^ "/") else fn);
          true)
       (fun _ _ -> ()))
    (match List.tl (Array.to_list Sys.argv) with
       | [] -> ["."]
       | dirs -> dirs)

(*-----------------------------*)

(* Sum the file sizes of a directory tree. *)
#load "unix.cma";;
let sum = ref 0
let () =
  List.iter
    (find_files
       (fun fn ->
          sum := !sum + (match Unix.stat fn
                         with {Unix.st_size=size} -> size);
          true)
       (fun _ _ -> ()))
    (match List.tl (Array.to_list Sys.argv) with
       | [] -> ["."]
       | dirs -> dirs);
  Printf.printf "%s contains %d bytes\n"
    (String.concat " " (List.tl (Array.to_list Sys.argv))) !sum

(*-----------------------------*)

(* Find the largest file in a directory tree. *)
#load "unix.cma";;
let saved_size = ref 0
let saved_name = ref ""
let () =
  List.iter
    (find_files
       (fun fn ->
          (match Unix.stat fn with
             | {Unix.st_size=size} ->
                 if size > !saved_size
                 then (saved_size := size; saved_name := fn));
          true)
       (fun _ _ -> ()))
    (match List.tl (Array.to_list Sys.argv) with
       | [] -> ["."]
       | dirs -> dirs);
  Printf.printf "Biggest file %s in %s is %d bytes long.\n"
    !saved_name
    (String.concat " " (List.tl (Array.to_list Sys.argv)))
    !saved_size

(*-----------------------------*)

(* Find the youngest file or directory. *)
#load "unix.cma";;
let saved_age = ref 0.
let saved_name = ref ""
let () =
  List.iter
    (find_files
       (fun fn ->
          (match Unix.stat fn with
             | {Unix.st_mtime=age} ->
                 if age > !saved_age
                 then (saved_age := age; saved_name := fn));
          true)
       (fun _ _ -> ()))
    (match List.tl (Array.to_list Sys.argv) with
       | [] -> ["."]
       | dirs -> dirs);
  match Unix.localtime !saved_age with
    | {Unix.tm_year=year; tm_mon=month; tm_mday=day} ->
        Printf.printf "%04d-%02d-%02d %s\n"
          (year + 1900) (month + 1) day
          !saved_name

(*-----------------------------*)

(* fdirs - find all directories *)
let () =
  List.iter
    (find_files
       (fun fn ->
          if Sys.is_directory fn then print_endline fn;
          true)
       (fun _ _ -> ()))
    (match List.tl (Array.to_list Sys.argv) with
       | [] -> ["."]
       | dirs -> dirs)

Removing a Directory and Its Contents

(* rmtree - remove whole directory trees like rm -r *)
#load "unix.cma";;

let rec finddepth f roots =
  Array.iter
    (fun root ->
       (match Unix.lstat root with
          | {Unix.st_kind=Unix.S_DIR} ->
              finddepth f
                (Array.map (Filename.concat root) (Sys.readdir root))
          | _ -> ());
       f root)
    roots

let zap path =
  match Unix.lstat path with
    | {Unix.st_kind=Unix.S_DIR} ->
        Printf.printf "rmdir %s\n%!" path;
        Unix.rmdir path
    | _ ->
        Printf.printf "unlink %s\n%!" path;
        Unix.unlink path

let () =
  if Array.length Sys.argv < 2
  then (Printf.eprintf "usage: %s dir ..\n" Sys.argv.(0); exit 1);
  finddepth zap (Array.sub Sys.argv 1 (Array.length Sys.argv - 1))

Renaming Files

#load "unix.cma";;
let () = List.iter
  (fun file ->
     let newname = file in
     (* change newname *)
     Unix.rename file newname)
  names

(* rename - Larry's filename fixer *)
#load "unix.cma";;
#directory "+pcre";;
#load "pcre.cma";;
let () =
  match Array.to_list Sys.argv with
    | prog :: pat :: templ :: files ->
        let replace = Pcre.replace ~pat ~templ in
        List.iter
          (fun file ->
             let file' = replace file in
             Unix.rename file file')
          files
    | _ -> prerr_endline "Usage: rename pattern replacment [files]"

(*
  % rename '\.orig$' '' *.orig
  % rename '$' '.bad' *.f
  % rename '([^/]+)~$' '.#$1' /tmp/*~
  % find /tmp -name '*~' -exec rename '([^/]+)~$' '.#$1' {} \;
*)

Splitting a Filename into Its Component Parts

let splitext name =
  try
    let root = Filename.chop_extension name in
    let i = String.length root in
    let ext = String.sub name i (String.length name - i) in
    root, ext
  with Invalid_argument _ ->
    name, ""

let dir = Filename.dirname path
let file = Filename.basename path
let name, ext = splitext file

Program: symirror

#!/usr/bin/ocaml
(* symirror - build spectral forest of symlinks *)
#load "unix.cma";;

open Printf

let die msg = prerr_endline msg; exit 1

let () =
  if Array.length Sys.argv <> 3
  then die (sprintf "usage: %s realdir mirrordir" Sys.argv.(0))

let srcdir, dstdir = Sys.argv.(1), Sys.argv.(2)
let cwd = Unix.getcwd ()

let fix_relative path =
  if Filename.is_relative path
  then Filename.concat cwd path
  else path

let is_dir dir =
  try Some (Sys.is_directory dir)
  with Sys_error _ -> None

let () =
  match (is_dir srcdir, is_dir dstdir) with
    | (None, _) | (Some false, _) ->
        die (sprintf "%s: %s is not a directory" Sys.argv.(0) srcdir)
    | (_, Some false) ->
        die (sprintf "%s: %s is not a directory" Sys.argv.(0) dstdir)
    | (_, None) ->
        Unix.mkdir dstdir 0o7777        (* be forgiving *)
    | (Some _, Some _) ->
        ()                              (* cool *)

(* fix relative paths *)
let srcdir, dstdir = fix_relative srcdir, fix_relative dstdir

let rec find f roots =
  Array.iter
    (fun root ->
       f root;
       match Unix.lstat root with
         | {Unix.st_kind=Unix.S_DIR} ->
             find f (Array.map
                       (Filename.concat root)
                       (Sys.readdir root))
         | _ -> ())
    roots

let wanted name =
  if name <> Filename.current_dir_name
  then
    let {Unix.st_dev=dev; st_ino=ino; st_kind=kind; st_perm=perm} =
      Unix.lstat name in
    (* preserve directory permissions *)
    let perm = perm land 0o7777 in
    (* correct name *)
    let name =
      if String.length name > 2 && String.sub name 0 2 = "./"
      then String.sub name 2 (String.length name - 2)
      else name in
    if kind = Unix.S_DIR
    then
      (* make a real directory *)
      Unix.mkdir (Filename.concat dstdir name) perm
    else
      (* shadow everything else *)
      Unix.symlink
        (Filename.concat srcdir name)
        (Filename.concat dstdir name)

let () =
  Unix.chdir srcdir;
  find wanted [|"."|]

Program: lst

#!/usr/bin/ocaml
(* lst - list sorted directory contents (depth first) *)
#load "unix.cma";;

open Unix
open Printf

let opt_m = ref false
let opt_u = ref false
let opt_c = ref false
let opt_s = ref false
let opt_r = ref false
let opt_i = ref false
let opt_l = ref false
let names = ref []

let () =
  Arg.parse
    [
      "-m", Arg.Set opt_m, "Use mtime (modify time) [DEFAULT]";
      "-u", Arg.Set opt_u, "Use atime (access time)";
      "-c", Arg.Set opt_c, "Use ctime (inode change time)";
      "-s", Arg.Set opt_s, "Use size for sorting";
      "-r", Arg.Set opt_r, "Reverse sort";
      "-i", Arg.Set opt_i, "Read pathnames from stdin";
      "-l", Arg.Set opt_l, "Long listing";
    ]
    (fun name -> names := name :: !names)
    (sprintf
       "Usage: %s [-m] [-u] [-c] [-s] [-r] [-i] [-l] [dirs ...]
 or    %s -i [-m] [-u] [-c] [-s] [-r] [-l] < filelist"
       Sys.argv.(0) Sys.argv.(0));
  names :=
    match !names with
      | [] when not !opt_i -> ["."]
      | names -> names

let die msg = prerr_endline msg; exit 1

let () =
  let int_of_bool = function true -> 1 | false -> 0 in
  if (int_of_bool !opt_c
      + int_of_bool !opt_u
      + int_of_bool !opt_s
      + int_of_bool !opt_m) > 1
  then die "can only sort on one time or size"

let idx = fun {st_mtime=t} -> t
let idx = if !opt_u then fun {st_atime=t} -> t else idx
let idx = if !opt_c then fun {st_ctime=t} -> t else idx
let idx = if !opt_s then fun {st_size=s} -> float s else idx
let time_idx = if !opt_s then fun {st_mtime=t} -> t else idx

let rec find f roots =
  Array.iter
    (fun root ->
       f root;
       match lstat root with
         | {st_kind=S_DIR} ->
             find f (Array.map
                       (Filename.concat root)
                       (Sys.readdir root))
         | _ -> ())
    roots

let time = Hashtbl.create 0
let stat = Hashtbl.create 0

(* get stat info on the file, saving the desired *)
(* sort criterion (mtime, atime, ctime, or size) *)
(* in the time hash indexed by filename.         *)
(* if they want a long list, we have to save the *)
(* entire stat structure in stat.                *)
let wanted name =
  try
    let sb = Unix.stat name in
    Hashtbl.replace time name (idx sb);
    if !opt_l then Hashtbl.replace stat name sb
  with Unix_error _ -> ()

(* cache user number to name conversions *)
let user =
  let user = Hashtbl.create 0 in
  fun uid ->
    Hashtbl.replace user uid
      (try (getpwuid uid).pw_name
       with Not_found -> ("#" ^ string_of_int uid));
    Hashtbl.find user uid

(* cache group number to name conversions *)
let group =
  let group = Hashtbl.create 0 in
  fun gid ->
    Hashtbl.replace group gid
      (try (getgrgid gid).gr_name
       with Not_found -> ("#" ^ string_of_int gid));
    Hashtbl.find group gid

let days = [| "Sun"; "Mon"; "Tue"; "Wed"; "Thu"; "Fri"; "Sat" |]
let months = [| "Jan"; "Feb"; "Mar"; "Apr"; "May"; "Jun";
                "Jul"; "Aug"; "Sep"; "Oct"; "Nov"; "Dec" |]

let format_time time =
  let tm = localtime time in
  sprintf "%s %s %2d %02d:%02d:%02d %04d"
    days.(tm.tm_wday)
    months.(tm.tm_mon)
    tm.tm_mday
    tm.tm_hour
    tm.tm_min
    tm.tm_sec
    (tm.tm_year + 1900)

let () =
  if !opt_i
  then
    begin
      begin
        try
          while true do
            names := (input_line Pervasives.stdin) :: !names
          done
        with End_of_file -> ()
      end;
      List.iter wanted (List.rev !names)
    end
  else find wanted (Array.of_list (List.rev !names))

(* sort the files by their cached times, youngest first *)
let skeys =
  List.sort
    (fun a b -> compare (Hashtbl.find time b) (Hashtbl.find time a))
    (Hashtbl.fold (fun k v a -> k :: a) time [])

(* but flip the order if -r was supplied on command line *)
let skeys = if !opt_r then List.rev skeys else skeys

let () =
  List.iter
    (fun skey ->
       if !opt_l
       then
         let sb = Hashtbl.find stat skey in
         printf "%6d %04o %6d %8s %8s %8d %s %s\n"
           sb.st_ino
           (sb.st_perm land 0o7777)
           sb.st_nlink
           (user sb.st_uid)
           (group sb.st_gid)
           sb.st_size
           (format_time (time_idx sb))
           skey
       else
         print_endline skey)
    skeys