7. File Access

Introduction

#load "str.cma";;

(* Print all lines that contain the word "blue" in the input file
   /usr/local/widgets/data to stdout. *)
let () =
  let in_channel = open_in "/usr/local/widgets/data" in
  try
    while true do
      let line = input_line in_channel in
      try
        ignore (Str.search_forward (Str.regexp_string "blue") line 0);
        print_endline line
      with Not_found -> ()
    done
  with End_of_file ->
    close_in in_channel

(*-----------------------------*)

let () =
  let regexp = Str.regexp ".*[0-9]" in
  try
    while true do
      (* reads from stdin *)
      let line = input_line stdin in
      (* writes to stderr *)
      if not (Str.string_match regexp line 0)
      then prerr_endline "No digit found.";
      (* writes to stdout *)
      Printf.printf "Read: %s\n" line;
      flush stdout
    done
  with End_of_file ->
    close_out stdout

(*-----------------------------*)

(* Write to an output file the usual way. *)
let () =
  let logfile = open_out "/tmp/log" in
  output_string logfile "Countdown initiated...\n";
  close_out logfile;
  print_endline "You have 30 seconds to reach minimum safety distance."

(* Write to an output file using redirection. *)
#load "unix.cma";;
let () =
  let logfile = open_out "/tmp/log" in
  let old_descr = Unix.dup Unix.stdout in
  (* switch to logfile for output *)
  Unix.dup2 (Unix.descr_of_out_channel logfile) Unix.stdout;
  print_endline "Countdown initiated...";
  (* return to original output *)
  Unix.dup2 old_descr Unix.stdout;
  print_endline "You have 30 seconds to reach minimum safety distance."

Opening a File

(* open file "path" for reading only *)
let source =
  try open_in path
  with Sys_error msg -> failwith ("Couldn't read from " ^ msg)

(* open file "path" for writing only *)
let sink =
  try open_out path
  with Sys_error msg -> failwith ("Couldn't write to " ^ msg)

(*-----------------------------*)

#load "unix.cma";;

(* open file "path" for reading only *)
let source =
  try Unix.openfile path [Unix.O_RDONLY] 0o644
  with Unix.Unix_error (code, func, param) ->
    failwith (Printf.sprintf "Couldn't open %s for reading: %s"
                path (Unix.error_message code))

(* open file "path" for writing only *)
let sink =
  try Unix.openfile path [Unix.O_WRONLY; Unix.O_CREAT] 0o644
  with Unix.Unix_error (code, func, param) ->
    failwith (Printf.sprintf "Couldn't open %s for writing: %s"
                path (Unix.error_message code))

(*-----------------------------*)

(* open file "path" for reading and writing *)
let fh =
  try Unix.openfile filename [Unix.O_RDWR] 0o644
  with Unix.Unix_error (code, func, param) ->
    failwith (Printf.sprintf "Couldn't open %s for read and write: %s"
                filename (Unix.error_message code))

(*-----------------------------*)

(* open file "path" read only *)
let fh = open_in path
let fh = Unix.openfile path [Unix.O_RDONLY] 0o644

(*-----------------------------*)

(* open file "path" write only, create it if it does not exist *)
let fh = open_out path
let fh = Unix.openfile path [Unix.O_WRONLY; Unix.O_TRUNC; Unix.O_CREAT] 0o600

(*-----------------------------*)

(* open file "path" write only, fails if file exists *)
let fh = Unix.openfile path [Unix.O_WRONLY; Unix.O_EXCL; Unix.O_CREAT] 0o600

(*-----------------------------*)

(* open file "path" for appending *)
let fh =
  open_out_gen [Open_wronly; Open_append; Open_creat] 0o600 path
let fh =
  Unix.openfile path [Unix.O_WRONLY; Unix.O_APPEND; Unix.O_CREAT] 0o600

(*-----------------------------*)

(* open file "path" for appending only when file exists *)
let fh = Unix.openfile path [Unix.O_WRONLY; Unix.O_APPEND] 0o600

(*-----------------------------*)

(* open file "path" for reading and writing *)
let fh = Unix.openfile path [Unix.O_RDWR] 0o600

(*-----------------------------*)

(* open file "path" for reading and writing,
   create a new file if it does not exist *)
let fh = Unix.openfile path [Unix.O_RDWR; Unix.O_CREAT] 0o600

(*-----------------------------*)

(* open file "path" for reading and writing, fails if file exists *)
let fh = Unix.openfile path [Unix.O_RDWR; Unix.O_EXCL; Unix.O_CREAT] 0o600

Opening Files with Unusual Filenames

(* Nothing different needs to be done with OCaml *)

Expanding Tildes in Filenames

#load "str.cma";;
#load "unix.cma";;

let expanduser =
  let regexp = Str.regexp "^~\\([^/]*\\)" in
  let replace s =
    match Str.matched_group 1 s with
      | "" ->
          (try Unix.getenv "HOME"
           with Not_found ->
             (try Unix.getenv "LOGDIR"
              with Not_found ->
                (Unix.getpwuid (Unix.getuid ())).Unix.pw_dir))
      | user -> (Unix.getpwnam user).Unix.pw_dir in
  Str.substitute_first regexp replace

(*-----------------------------*)

    ~user
    ~user/blah
    ~
    ~/blah

Making Perl Report Filenames in Errors

#load "unix.cma";;

open Unix

(* Raises an exception on failure. *)
let file = openfile filename [ O_RDONLY ] 0o640

exception ErrString of string

let file =
  try openfile filename [ O_RDONLY ] 0o640
  with Unix_error (e, f, n) ->
    raise (ErrString
             (Printf.sprintf "Could not open %s for read: %s"
                n (error_message e)))

Creating Temporary Files

(* Open a new temporary file for writing. Filename.open_temp_file
   safeguards against race conditions and returns both the filename
   and an output channel. *)
let name, out_channel = Filename.open_temp_file "prefix-" ".suffix"

(* Install an at_exit handler to remove the temporary file when this
   program exits. *)
let () = at_exit (fun () -> Sys.remove name)

(*-----------------------------*)

#load "unix.cma";;

let () =
  (* Open a temporary file for reading and writing. *)
  let name = Filename.temp_file "prefix-" ".suffix" in
  let descr = Unix.openfile name [Unix.O_RDWR] 0o600 in

  (* Write ten lines of output. *)
  let out_channel = Unix.out_channel_of_descr descr in
  for i = 1 to 10 do
    Printf.fprintf out_channel "%d\n" i
  done;
  flush out_channel;

  (* Seek to the beginning and read the lines back in. *)
  let in_channel = Unix.in_channel_of_descr descr in
  seek_in in_channel 0;
  print_endline "Tmp file has:";
  let rec loop () =
    print_endline (input_line in_channel);
    loop () in
  try loop() with End_of_file -> ();

  (* Close the underlying file descriptor and remove the file. *)
  Unix.close descr;
  Sys.remove name

Storing Files Inside Your Program Text

#load "str.cma";;

let main data =
  List.iter
    (fun line ->
       (* process the line *)
       ())
    (Str.split (Str.regexp "\n") data)

let () = main "\
your data goes here
"

Writing a Filter

#load "str.cma";;

let parse_args () =
  match List.tl (Array.to_list Sys.argv) with
    | [] -> ["-"]
    | args -> args

let run_filter func args =
  List.iter
    (fun arg ->
       let in_channel =
         match arg with
           | "-" -> stdin
           | arg -> open_in arg in
       try
         begin
           try
             while true do
               func (input_line in_channel)
             done
           with End_of_file -> ()
         end;
         close_in in_channel
       with e ->
         close_in in_channel;
         raise e)
    args

let () =
  run_filter
    (fun line ->
       (* do something with the line *)
       ())
    (parse_args ())

(*-----------------------------*)

(* arg demo 1: Process optional -c flag *)
let chop_first = ref false
let args =
  match parse_args () with
    | "-c" :: rest -> chop_first := true; rest
    | args -> args

(* arg demo 2: Process optional -NUMBER flag *)
let columns = ref None
let args =
  match parse_args () with
    | arg :: rest
      when Str.string_match (Str.regexp "^-\\([0-9]+\\)$") arg 0 ->
        columns := Some (int_of_string (Str.matched_group 1 arg));
        rest
    | args -> args

(* arg demo 3: Process clustering -a, -i, -n, or -u flags *)
let append = ref false
let ignore_ints = ref false
let nostdout = ref false
let unbuffer = ref false
let args =
  let rec parse_flags = function
    | "" -> ()
    | s ->
        (match s.[0] with
           | 'a' -> append      := true
           | 'i' -> ignore_ints := true
           | 'n' -> nostdout    := true
           | 'u' -> unbuffer    := true
           | _ ->
               Printf.eprintf "usage: %s [-ainu] [filenames] ...\n"
                 Sys.argv.(0);
               flush stderr;
               exit 255);
        parse_flags (String.sub s 1 (String.length s - 1)) in
  List.rev
    (List.fold_left
       (fun acc ->
          function
            | "" -> acc
            | s when s.[0] = '-' ->
                parse_flags (String.sub s 1 (String.length s - 1));
                acc
            | arg -> arg :: acc)
       []
       (parse_args ()))

(*-----------------------------*)

(* findlogin - print all lines containing the string "login" *)

let () =
  run_filter
    (fun line ->
       if Str.string_match (Str.regexp ".*login.*") line 0
       then print_endline line)
    (parse_args ())

(*-----------------------------*)

(* lowercase - turn all lines into lowercase *)

let () =
  run_filter
    (fun line -> print_endline (String.lowercase line))
    (parse_args ())

(*-----------------------------*)

(* countchunks - count how many words are used *)

let chunks = ref 0
let () =
  run_filter
    (fun line ->
       if line <> "" && line.[0] == '#'
       then ()
       else chunks := !chunks
         + List.length (Str.split (Str.regexp "[ \t]+") line))
    (parse_args ());
  Printf.printf "Found %d chunks\n" !chunks

Modifying a File in Place with Temporary File

(* Modify a file in place. *)
let modify func old new' =
  let old_in = open_in old in
  let new_out = open_out new' in
  begin
    try
      while true do
        let line = input_line old_in in
        func new_out line
      done
    with End_of_file -> ()
  end;
  close_in old_in;
  close_out new_out;
  Sys.rename old (old ^ ".orig");
  Sys.rename new' old

(* Insert lines at line 20. *)
let () =
  let count = ref 0 in
  modify
    (fun out line ->
       incr count;
       if !count = 20
       then (output_string out "Extra line 1\n";
             output_string out "Extra line 2\n");
       output_string out line;
       output_string out "\n")
    old new'

(* Delete lines 20..30. *)
let () =
  let count = ref 0 in
  modify
    (fun out line ->
       incr count;
       if !count < 20 || !count > 30
       then (output_string out line;
             output_string out "\n"))
    old new'

Modifying a File in Place with -i Switch

(* An equivalent of Perl's -i switch does not exist in OCaml. *)

Modifying a File in Place Without a Temporary File

#load "str.cma";;
#load "unix.cma";;

(* Modify a file in place. *)
let modify func file =
  let in' = open_in file in
  let lines = ref [] in
  begin
    try
      while true do
        let line = input_line in' in
        lines := func line :: !lines
      done
    with End_of_file -> ()
  end;
  close_in in';
  let lines = List.rev !lines in
  let out = open_out file in
  List.iter
    (fun line ->
       output_string out line;
       output_string out "\n")
    lines;
  close_out out

(* Replace DATE with the current date. *)
let () =
  let tm = Unix.localtime (Unix.time ()) in
  let date = Printf.sprintf "%02d/%02d/%04d"
    (tm.Unix.tm_mon + 1)
    tm.Unix.tm_mday
    (tm.Unix.tm_year + 1900) in
  modify
    (Str.global_replace (Str.regexp "DATE") date)
    infile

Locking a File

#load "unix.cma";;

let descr = Unix.openfile path [Unix.O_RDWR] 0o664

let () =
  Unix.lockf descr Unix.F_LOCK 0;
  (* update file, then ... *)
  Unix.close descr

let () =
  try Unix.lockf descr Unix.F_TLOCK 0
  with Unix.Unix_error (error, _, _) ->
    Printf.eprintf
      "can't immediately write-lock the file (%s), blocking ...\n"
      (Unix.error_message error);
    flush stderr;
    Unix.lockf descr Unix.F_LOCK 0

(*-----------------------------*)

#load "unix.cma";;

let descr = Unix.openfile "numfile" [Unix.O_RDWR; Unix.O_CREAT] 0o664

let () =
  Unix.lockf descr Unix.F_LOCK 0;
  (* Now we have acquired the lock, it's safe for I/O *)
  let num =
    try int_of_string (input_line (Unix.in_channel_of_descr descr))
    with _ -> 0 in
  ignore (Unix.lseek descr 0 Unix.SEEK_SET);
  Unix.ftruncate descr 0;
  let out = Unix.out_channel_of_descr descr in
  output_string out (string_of_int (num + 1));
  output_string out "\n";
  flush out;
  Unix.close descr

Flushing Output

(* OCaml automatically flushes after calling these functions: *)
let () =
  print_endline "I get flushed.";
  print_newline (); (* Me too! *)
  prerr_endline "So do I.";
  prerr_newline () (* As do I. *)

(* The Printf functions allow a format specifier of "%!" to trigger
   an immediate flush. *)
let () = Printf.printf "I flush %s%! and %s!\n%!" "here" "there"

(*-----------------------------*)

(* seeme - demo stdio output buffering *)
#load "unix.cma";;
let () =
  output_string stdout "Now you don't see it...";
  Unix.sleep 2;
  print_endline "now you do"

(*-----------------------------*)

(* A channel can be explicitly flushed: *)
let () = flush stderr

(* All channels can be flushed at once (errors are ignored): *)
let () = flush_all ()

(* Closing a channel flushes automatically: *)
let () =
  output_string stdout "I get written.\n";
  close_out stdout

(* Calls to exit result in a flush_all, and exit is always called at
   termination even if an error occurs. *)
let () =
  output_string stderr "Bye!\n";
  exit 0

Reading from Many Filehandles Without Blocking

Doing Non-Blocking I/O

Determining the Number of Bytes to Read

Storing Filehandles in Variables

Caching Open Output Filehandles

Printing to Many Filehandles Simultaneously

Opening and Closing File Descriptors by Number

Copying Filehandles

Program: netlock

Program: lockarea