8. File Contents

Introduction

let () =
  try
    while true do
      let line = input_line datafile in
      let size = String.length line in
      Printf.printf "%d\n" size             (* output size of line *)
    done
  with End_of_file -> ()

(*-----------------------------*)

let line_stream_of_channel channel =
  Stream.from
    (fun _ -> try Some (input_line channel) with End_of_file -> None)

let output_size line =
  Printf.printf "%d\n" (String.length line) (* output size of line *)

let () =
  Stream.iter output_size (line_stream_of_channel datafile)

(*-----------------------------*)

let lines =
  let xs = ref [] in
  Stream.iter
    (fun x -> xs := x :: !xs)
    (line_stream_of_channel datafile);
  List.rev !xs

(*-----------------------------*)

let slurp_channel channel =
  let buffer_size = 4096 in
  let buffer = Buffer.create buffer_size in
  let string = String.create buffer_size in
  let chars_read = ref 1 in
  while !chars_read <> 0 do
    chars_read := input channel string 0 buffer_size;
    Buffer.add_string buffer (String.sub string 0 !chars_read)
  done;
  Buffer.contents buffer

let slurp_file filename =
  let channel = open_in_bin filename in
  let result =
    try slurp_channel channel
    with e -> close_in channel; raise e in
  close_in channel;
  result

let whole_file = slurp_file filename

(*-----------------------------*)

let () =
  (* Onetwothree *)
  List.iter (output_string handle) ["One"; "two"; "three"];

  (* Sent to default output handle *)
  print_string "Baa baa black sheep\n"

(*-----------------------------*)

let buffer = String.make 4096 '\000'
let rv = input handle buffer 0 4096
(* rv is the number of bytes read, *)
(* buffer holds the data read *)

(*-----------------------------*)

#load "unix.cma";;
let () =
  Unix.ftruncate descr length;
  Unix.truncate (Printf.sprintf "/tmp/%d.pid" (Unix.getpid ())) length

(*-----------------------------*)

let () =
  let pos = pos_in datafile in
  Printf.printf "I'm %d bytes from the start of datafile.\n" pos

(*-----------------------------*)

let () =
  seek_in in_channel pos;
  seek_out out_channel pos

#load "unix.cma";;
let () =
  Unix.lseek descr 0     Unix.SEEK_END; (* seek to the end    *)
  Unix.lseek descr pos   Unix.SEEK_SET; (* seek to pos        *)
  Unix.lseek descr (-20) Unix.SEEK_CUR; (* seek back 20 bytes *)

(*-----------------------------*)

#load "unix.cma";;
let () =
  let written =
    Unix.write datafile mystring 0 (String.length mystring) in
  let read =
    Unix.read datafile mystring 5 256 in
  if read <> 256 then Printf.printf "only read %d bytes, not 256\n" read

(*-----------------------------*)

#load "unix.cma";;
let () =
  (* don't change position *)
  let pos = Unix.lseek handle 0 Unix.SEEK_CUR in
  (* ... *)
  ()

Reading Lines with Continuation Characters

let () =
  let buffer = Buffer.create 16 in
  let rec loop () =
    let line = input_line chan in
    if line <> "" && line.[String.length line - 1] = '\\'
    then (Buffer.add_string
            buffer (String.sub line 0 (String.length line - 1));
          loop ())
    else Buffer.add_string buffer line;
    let line = Buffer.contents buffer in
    Buffer.clear buffer;
    (* process full record in line here *)
    loop () in
  try loop () with End_of_file -> ()

Counting Lines (or Paragraphs or Records) in a File

#load "unix.cma";;

let () =
  let proc = Unix.open_process_in ("wc -l < " ^ file) in
  let count = int_of_string (input_line proc) in
  ignore (Unix.close_process_in proc);
  (* count now holds the number of lines read *)
  ()

(*-----------------------------*)

let () =
  let count = ref 0 in
  let chan = open_in file in
  (try
     while true do
       ignore (input_line chan);
       incr count
     done
   with End_of_file -> close_in chan);
  (* !count now holds the number of lines read *)
  ()

(*-----------------------------*)

#load "str.cma";;

let () =
  let delim = Str.regexp "[ \n\r\t]*$" in
  let count = ref 0 in
  let in_para = ref false in
  let chan = open_in file in
  (try
     while true do
       if Str.string_match delim (input_line chan) 0
       then in_para := false
       else begin
         if not !in_para then incr count;
         in_para := true
       end
     done
   with End_of_file -> close_in chan);
  (* !count now holds the number of paragraphs read *)
  ()

Processing Every Word in a File

let word_stream_of_channel channel =
  (* Thanks to Mac Mason for figuring this out. *)
  let buffer = (Scanf.Scanning.from_channel channel) in
  Stream.from
    (fun count ->
       try
         match Scanf.bscanf buffer " %s " (fun x -> x) with
           | "" -> None
           | s -> Some s
       with End_of_file ->
         None)

(*-----------------------------*)

let () =
  Stream.iter
    (fun chunk ->
       (* do something with chunk *)
       ())
    (word_stream_of_channel stdin)

(*-----------------------------*)

(* Make a word frequency count *)
let seen = Hashtbl.create 0
let () =
  Stream.iter
    (fun word ->
       Hashtbl.replace seen word
         (try Hashtbl.find seen word + 1
          with Not_found -> 1))
    (word_stream_of_channel stdin)

(* output hash in a descending numeric sort of its values *)
let () =
  let words = ref [] in
  Hashtbl.iter (fun word _ -> words := word :: !words) seen;
  List.iter
    (fun word ->
       Printf.printf "%5d %s\n" (Hashtbl.find seen word) word)
    (List.sort
       (fun a b -> compare (Hashtbl.find seen b) (Hashtbl.find seen a))
       !words)

(*-----------------------------*)

(* Line frequency count *)

let line_stream_of_channel channel =
  Stream.from
    (fun _ -> try Some (input_line channel) with End_of_file -> None)

let seen = Hashtbl.create 0
let () =
  Stream.iter
    (fun line ->
       Hashtbl.replace seen line
         (try Hashtbl.find seen line + 1
          with Not_found -> 1))
    (line_stream_of_channel stdin)

let () =
  let lines = ref [] in
  Hashtbl.iter (fun line _ -> lines := line :: !lines) seen;
  List.iter
    (fun line ->
       Printf.printf "%5d %s\n" (Hashtbl.find seen line) line)
    (List.sort
       (fun a b -> compare (Hashtbl.find seen b) (Hashtbl.find seen a))
       !lines)

Reading a File Backwards by Line or Paragraph

let lines = ref []
let () =
  try
    while true do
      lines := input_line chan :: !lines
    done
  with End_of_file -> ()
let () =
  List.iter
    (fun line ->
       (* do something with line *)
       ())
    !lines

Trailing a Growing File

#load "unix.cma";;

let sometime = 1

let () =
  let chan = open_in file in
  while Sys.file_exists file do
    (try
       let line = input_line chan in
       (* ... *)
       ()
     with End_of_file ->
       Unix.sleep sometime)
  done;
  close_in chan

Picking a Random Line from a File

let () =
  Random.self_init ();
  let count = ref 1 in
  let line = ref "" in
  try
    while true do
      let next = input_line stdin in
      if Random.int !count < 1 then line := next;
      incr count
    done
  with End_of_file ->
    (* !line is the random line *)
    ()

Randomizing All Lines

(* assumes the fisher_yates_shuffle function from Chapter 4 *)
let shuffle list =
  let array = Array.of_list list in
  fisher_yates_shuffle array;
  Array.to_list array

let () =
  Random.self_init ();
  let lines = ref [] in
  (try
     while true do
       lines := (input_line input) :: !lines
     done
   with End_of_file -> ());
  let reordered = shuffle !lines in
  List.iter
    (fun line ->
       output_string output line;
       output_char output '\n')
    reordered

Reading a Particular Line in a File

Processing Variable-Length Text Fields

(* given "record" with field separated by "pattern",
   extract "fields". *)
#load "str.cma";;
let regexp = Str.regexp pattern
let fields = Str.split_delim regexp record

(* same as above using PCRE library, available at:
   http://www.ocaml.info/home/ocaml_sources.html#pcre-ocaml *)
#directory "+pcre";;
#load "pcre.cma";;
let fields = Pcre.split ~pat:pattern record

(*-----------------------------*)

# Str.full_split (Str.regexp "[+-]") "3+5-2";;
- : Str.split_result list =
[Str.Text "3"; Str.Delim "+"; Str.Text "5"; Str.Delim "-"; Str.Text "2"]

# Pcre.split ~pat:"([+-])" "3+5-2";;
- : string list = ["3"; "+"; "5"; "-"; "2"]

(*-----------------------------*)

let fields = Str.split_delim (Str.regexp ":") record
let fields = Str.split_delim (Str.regexp "[ \n\r\t]+") record
let fields = Str.split_delim (Str.regexp " ") record

let fields = Pcre.split ~pat:":" record
let fields = Pcre.split ~pat:"\\s+" record
let fields = Pcre.split ~pat:" " record

Removing the Last Line of a File

#load "unix.cma";;

let () =
  let descr = Unix.openfile file [Unix.O_RDWR] 0o666 in
  let in_channel = Unix.in_channel_of_descr descr in
  let position = ref 0 in
  let last_position = ref 0 in
  begin
    try
      while true do
        ignore (input_line in_channel);
        last_position := !position;
        position := pos_in in_channel;
      done
    with End_of_file -> ()
  end;
  Unix.ftruncate descr !last_position;
  Unix.close descr

Processing Binary Files

set_binary_mode_in in_channel true
set_binary_mode_out out_channel true

(*-----------------------------*)

let () =
  let gifname = "picture.gif" in
  let gif = open_in gifname in
  set_binary_mode_in gif true;
  (* now DOS won't mangle binary input from "gif" *)
  set_binary_mode_out stdout true;
  (* now DOS won't mangle binary output to "stdout" *)
  let buff = String.make 8192 '\000' in
  let len = ref (-1) in
  while !len <> 0 do
    len := input gif buff 0 8192;
    output stdout buff 0 !len
  done

Using Random-Access I/O

let () =
  let address = recsize * recno in
  seek_in fh address;
  really_input fh buffer 0 recsize

(*-----------------------------*)

let () =
  let address = recsize * (recno - 1) in
  (* ... *)
  ()

Updating a Random-Access File

Reading a String from a Binary File

let () =
  let in_channel = open_in_bin file in
  seek_in in_channel addr;
  let buffer = Buffer.create 0 in
  let ch = ref (input_char in_channel) in
  while !ch <> '\000' do
    Buffer.add_char buffer !ch;
    ch := input_char in_channel;
  done;
  close_in in_channel;
  let string = Buffer.contents buffer in
  print_endline string

(*-----------------------------*)

(* bgets - get a string from an address in a binary file *)
open Printf

let file, addrs =
  match Array.to_list Sys.argv with
    | _ :: file :: addrs when List.length addrs > 0 -> file, addrs
    | _ -> eprintf "usage: %s file addr ...\n" Sys.argv.(0); exit 0

let () =
  let in_channel = open_in_bin file in
  List.iter
    (fun addr ->
       let addr = int_of_string addr in
       seek_in in_channel addr;
       let buffer = Buffer.create 0 in
       let ch = ref (input_char in_channel) in
       while !ch <> '\000' do
         Buffer.add_char buffer !ch;
         ch := input_char in_channel;
       done;
       printf "%#x %#o %d \"%s\"\n"
         addr addr addr (Buffer.contents buffer))
    addrs;
  close_in in_channel

(*-----------------------------*)

(* strings - pull strings out of a binary file *)
#load "str.cma";;

let find_strings =
  let pat = "[\040-\176\r\n\t ]" in
  let regexp = Str.regexp (pat ^ pat ^ pat ^ pat ^ "+") in
  fun f input ->
    List.iter
      (function Str.Delim string -> f string | _ -> ())
      (Str.full_split regexp input)

let file =
  try Sys.argv.(1)
  with Invalid_argument _ ->
    Printf.eprintf "usage: %s file\n" Sys.argv.(0);
    exit 0

let () =
  let in_channel = open_in_bin file in
  try
    while true do
      let buffer = Buffer.create 0 in
      let ch = ref (input_char in_channel) in
      while !ch <> '\000' do
        Buffer.add_char buffer !ch;
        ch := input_char in_channel;
      done;
      find_strings print_endline (Buffer.contents buffer)
    done
  with End_of_file ->
    close_in in_channel

Reading Fixed-Length Records

Reading Configuration Files

#load "str.cma";;

let user_preferences = Hashtbl.create 0

let () =
  let comments = Str.regexp "#.*" in
  let leading_white = Str.regexp "^[ \t]+" in
  let trailing_white = Str.regexp "[ \t]+$" in
  let equals_delim = Str.regexp "[ \t]*=[ \t]*" in
  Stream.iter
    (fun s ->
       let s = Str.replace_first comments "" s in
       let s = Str.replace_first leading_white "" s in
       let s = Str.replace_first trailing_white "" s in
       (* anything left? *)
       if String.length s > 0 then
         match Str.bounded_split_delim equals_delim s 2 with
           | [var; value] -> Hashtbl.replace user_preferences var value
           | _ -> failwith s)
    (* defined in this chapter's introduction *)
    (line_stream_of_channel config)

(*-----------------------------*)

(* load variables from ocaml source - toplevel scripts only *)
#use ".progrc";;

Testing a File for Trustworthiness

#load "unix.cma";;

let () =
  try
    let {Unix.st_dev = dev;
         st_ino = ino;
         st_kind = kind;
         st_perm = perm;
         st_nlink = nlink;
         st_uid = uid;
         st_gid = gid;
         st_rdev = rdev;
         st_size = size;
         st_atime = atime;
         st_mtime = mtime;
         st_ctime = ctime} = Unix.stat filename in
    (* ... *)
    ()
  with Unix.Unix_error (e, _, _) ->
    Printf.eprintf "no %s: %s\n" filename (Unix.error_message e);
    exit 0

(*-----------------------------*)

let () =
  let info =
    try Unix.stat filename
    with Unix.Unix_error (e, _, _) ->
      Printf.eprintf "no %s: %s\n" filename (Unix.error_message e);
      exit 0 in
  if info.Unix.st_uid = 0
  then Printf.printf "Superuser owns %s\n" filename;
  if info.Unix.st_atime > info.Unix.st_mtime
  then Printf.printf "%s has been read since it was written.\n" filename

(*-----------------------------*)

let is_safe path =
  let info = Unix.stat path in
  (* owner neither superuser nor me *)
  (* the real uid can be retrieved with Unix.getuid () *)
  if (info.Unix.st_uid <> 0) && (info.Unix.st_uid <> Unix.getuid ())
  then false
  else
    (* check whether the group or other can write file. *)
    (* use 0o066 to detect either reading or writing *)
    if info.Unix.st_perm land 0o022 = 0
    then true  (* no one else can write this *)
    else if info.Unix.st_kind <> Unix.S_DIR
    then false (* non-directories aren't safe *)
    else if info.Unix.st_perm land 0o1000 <> 0
    then true  (* but directories with the sticky bit (0o1000) are *)
    else false

(*-----------------------------*)

let is_verysafe path =
  let rec loop path parent =
    if not (is_safe path)
    then false
    else if path <> parent
    then loop parent (Filename.dirname parent)
    else true in
  loop path (Filename.dirname path)

Program: tailwtmp

Program: tctee

Program: laston

(* laston - find out when a given user last logged on *)

#load "str.cma";;
#load "unix.cma";;

open Printf
open Unix

let lastlog = open_in "/var/log/lastlog"
let sizeof = 4 + 12 + 16
let line = String.make 12 ' '
let host = String.make 16 ' '

let days = [| "Sun"; "Mon"; "Tue"; "Wed"; "Thu"; "Fri"; "Sat" |]
let months = [| "Jan"; "Feb"; "Mar"; "Apr"; "May"; "Jun";
                "Jul"; "Aug"; "Sep"; "Oct"; "Nov"; "Dec" |]

let format_time time =
  let tm = localtime time in
  sprintf "%s %s %2d %02d:%02d:%02d %04d"
    days.(tm.tm_wday)
    months.(tm.tm_mon)
    tm.tm_mday
    tm.tm_hour
    tm.tm_min
    tm.tm_sec
    (tm.tm_year + 1900)

let trim_asciiz = Str.replace_first (Str.regexp "\000*$") ""

let () =
  Array.iter
    (fun user ->
       try
         let u =
           try getpwuid (int_of_string user)
           with Failure _ -> getpwnam user in
         seek_in lastlog (u.pw_uid * sizeof);
         let time = input_binary_int lastlog in
         really_input lastlog line 0 12;
         really_input lastlog host 0 16;
         let line = trim_asciiz line in
         let host = trim_asciiz host in
         printf "%-8s UID %5d %s%s%s\n"
           u.pw_name
           u.pw_uid
           (if time <> 0
            then format_time (float_of_int time)
            else "never logged in")
           (if line <> "" then " on " ^ line else "")
           (if host <> "" then " from " ^ host else "")
       with Not_found ->
         printf "no such uid %s\n" user)
    (Array.sub Sys.argv 1 (Array.length Sys.argv - 1))