10. Subroutines

Introduction


(* A function is bound to a variable (as with everything) with the let keyword
*)

let hello () =
  incr greeted; (* global reference *)
  printf "hi there!\n";;

(* Other forms for declaring a function are as follows *)

let hello = 
  fun () -> 
    incr greeted; (* global reference *)
    printf "hi there!\n";;

let hello = 
  function () ->
    incr greeted; (* global reference *)
    printf "hi there!\n";;

(* The typical way of calling this function is *)

hello ();;

Accessing Subroutine Arguments


(* All values passed to a function must be named in the paramater list to the
 * function *)

let hypotenuse side1 side2 =
  sqrt ((side1 ** 2.) +. (side2 ** 2.));;

(* Note, however, that if the parameters are defined/sent as a tuple then they
 * can be accessed in one of two equivalent ways *)

let hypotenuse (side1,side2) =
  sqrt ((side1 ** 2.) +. (side2 ** 2.));;

let hypotenuse sides =
  let side1,side2 = sides in
  sqrt ((side1 ** 2.) +. (side2 ** 2.));;

(* In both of these cases, however, we must pass the arguments as a tuple *)

print_float hypotenuse (3.,4.);;

(* since most data structures are immutable, one generally does not need to copy
 * the parameters into local variables *)

let nums = [1.4; 3.5; 6.7];;
let int_all l =
  List.map int_of_float l;;

(*
# let ints = int_all nums;;
val ints : int list = [1; 3; 6]

# nums;;
- : float list = [1.4; 3.5; 6.7]
*)

(* However, one needs to be careful when mutable data is passed in and
 * operations that alter that data are used *)

let nums = [|1.4; 3.5; 6.7 |];;
let int_all2 a =
  Array.iteri (fun i x -> a.(i) <- 10. *. x) a;
  a;;
let int_all3 a = 
  Array.map int_of_float a;;

(*
# let a2 = int_all2 nums;;
val a2 : int array = [|1; 3; 6|]

# nums;;
- : float array = [|1.4; 3.5; 6.7|]

# let a3 = times10 nums;;
val a3 : float array = [|14.; 35.; 67.|]

# nums;;
- : float array = [|14.; 35.; 67.|]
*)

(* To write functions that change their caller's variables, those variables must
 * be mutable structures, such as references *)
let nums = ref [1.4; 3.5; 6.7];;
let trunc_em l =
  l:= List.map floor !l;
  !l;;

(*

# let n2 = trunc_em nums;;
val n2 : float list = [1.; 3.; 6.]

# !nums;;
- : float list = [1.; 3.; 6.]
*)

Making Variables Private to a Function


(* to declare a variable local to a function, simply use let inside the function
 * body *)

let somefunc () =
  let variable = ... in
  let another,anarray,ahash = ... in
  ... ;;

let check_x x =
  let y = "whatever" in
  run_check ();
  if condition then printf "got %s" x;;

let save_array arguments =
  global_list := arguments @ !global_list;;

Creating Persistent Private Variables


let mysub =
  let variable = ... in
  fun args -> ... ;;

(* To write a counter *)
let next_counter = 
  let counter = ref 0 in
  fun () -> 
    incr counter; 
    !counter;;

let next_counter,prev_counter = 
  let counter = ref 42 in
  (fun () -> incr counter; !counter),
  (fun () -> decr counter; !counter);;

Determining Current Function Name

(* The names of functions are not available at runtime. However, using the
   camlp4 preprocessor, we can expose various pieces of static information.

   The "macro" parser provides the current file and location as __FILE__ and
   __LOCATION__, respectively. With a bit more customization, we can expose
   the current function name as well.

   To do this, we'll make a copy of camlp4/Camlp4Filters/Camlp4Profiler.ml
   from the OCaml sources and rename it to "Camlp4FuncNamer.ml". Then,
   we'll change the definition of "decorate_this_expr" to the following: *)

(*---------------------------*)

value decorate_this_expr e id =
  let _loc = Ast.loc_of_expr e in
  <:expr< let __FUNC__ = $`str:id$ in $e$ >>;

(*---------------------------*)

(* This has the effect of exposing the current function name as the
   string, __FUNC__, which we can use just like __FILE__. To build this
   syntax extension, use a command like the following: *)

ocamlc -c -pp camlp4rf -I /usr/lib/ocaml/3.10.2/camlp4 Camlp4FuncNamer.ml

(*---------------------------*)

(* Now, we'll write a simple test program called "main.ml": *)

(* Comment out this line to silence log messages. *)
DEFINE DEBUG

(* Default function name if Camlp4FuncNamer doesn't provide one. *)
let __FUNC__ = "<toplevel>"

(* Log macro with Printf formatting. *)
DEFINE LOG =
  IFDEF DEBUG THEN
    Printf.kprintf
      (Printf.eprintf "%s[%s]: %s\n%!" __FUNC__ __FILE__)
  ELSE
    Printf.kprintf (fun _ -> ())
  END

(* An example named function. *)
let test_function () =
  let str = "Hello, world!" in
  let num = 42 in
  LOG "str=\"%s\", num=%d" str num;
  print_endline "test complete"

(* Some code to run at the toplevel. *)
let () =
  LOG "not in a function";
  test_function ()

(*---------------------------*)

(* We can compile this program as follows: *)

ocamlc -pp "camlp4of Camlp4FuncNamer.cmo" \
    -I /usr/lib/ocaml/3.10.2/camlp4 \
    -o main main.ml

(* Running it, we get this output: *)

<toplevel>[main.ml]: not in a function
test_function[main.ml]: str="Hello, world!", num=42
test complete

Passing Arrays and Hashes by Reference

(* Because all OCaml variables represent pointers to their data, all function
 * arguments are implicitly passed by reference *)

array_diff array1 array2;;

let a = [| 1; 2 |];;
let b = [| 5; 8 |];;
let add_vec_pair x y =
  Array.init (Array.length x) (fun i -> x.(i) + y.(i));;

(*
# let c = add_vec_pair a b;;
val c : int array = [|6; 10|]
*)

Detecting Return Context

(* OCaml's type safety doesn't allow this kind of shenanigans unless you bring
 * union types into play -- but you still need to ensure that the return type of
 * all three contexts is the same *)

type 'a lORs =
    List of 'a list
  | Scalar of 'a
  | Void of unit ;;

let mysub arg =
  match arg with
    List l -> (* list context, do something with l *)
  | Scalar s -> (* scalar context, do something with s *)
  | Void _ -> (* void context, do something with nothing *);;

(* or equivalently *)
let mysub = function
    List l -> (* list context, do something with l *)
  | Scalar s -> s (* scalar context, do something with s *)
  | Void _ -> (* void context, do something with nothing *);;

mysub (Void ());;         (* void context *)
mysub (Scalar arg);;      (* scalar context *)
mysub (List arg);;        (* list context *)

Passing by Named Parameter

(* To name the arguments of a function, use labels *)
let thefunc ~increment ~finish ~start =
  ... ;;

(* It can be called like *)
thefunc ~increment:"20s" ~start:"+5m" ~finish:"+30m";;

(* Note that you can use different names for the labels and variables, and if
 * the application is total, the labels can be omitted *)
let divide ~numerator:x ~denominator:y = 
  x / y;;

(*
# divide ~denominator:2 ~numerator:100;;
 - : int = 50

# divide 20 4;;
 - : int = 5
*)

(* If you want to provide default values, you need to use optional arguments,
 * but this requires at least one unlabelled argument *)

let fraction ?(y = 2) x =
  x / y;;

(*
fraction 30 ~y:3;;
 - : int = 10

fraction 30;;
 - : int = 15
*)

Skipping Selected Return Values

(* Use _, which matches any pattern and throws away the value it matches *)

let a,_,c = func ();;
let _,_,d = func ();;

Returning More Than One Array or Hash

(* Just stick all of the values in a tuple and return it *)
let somefunc () =
  let arr = ... in
  let hash = ... in
    ...
    (arr,hash);;

let a,h = somefunc ();;

Returning Failure

(* Use an appropriate exception *)

let failing_routine () =
  ...
  raise Failure "Bad things happened...";;

try failing_routine () with
  Failure s -> printf "failing_routine failed because: %s\n" s;;

Prototyping Functions


(* This is pretty much unnecessary due to OCaml's type inference -- you will
 * know at compile time if you try to pass invalid arguments to a function *)

Handling Exceptions

(* To handle exceptions, which are thrown with the raise keword, wrap the
 * possibly exceptional call in a try ... with block.  You only need to do this
 * where appropriate *)

let slurp_to_list filename =
  (* Note, if filename does not exist in the current directory, it will raise a
   * Sys_error exception *)
  let ic = open_in filename and
  l = ref [] in
  let rec loop () =
    let line = input_line ic in
    l := line::!l;
    loop () in
  try loop () with End_of_file -> close_in ic; List.rev !l;;

let lfind name l =
  (* Note, if no elements in the list satisfy the predicate, List.find will
   * raise the Not_found exception *)
  List.find (fun x -> Str.string_match (Str.regexp ("$" ^ name)) x 0) l;;

let findSmurfette =
  try
    print_endline (lfind "Smurfette" (slurp_to_list "smurfs"))
  with
    Sys_error s -> prerr_endline ("Dammit! - " ^ s)
  | Not_found -> prerr_endline "Hmmm... Smurfette is not in smurfs";;

Saving Global Values

(* To do this in OCaml -- which doesn't like global state in the first place --
 * you need to manually store the old value and replace it before exiting the
 * block *)

let age = ref 18;;
if condition then
  (
    let org_age = !age in
    age := 23;
    func ();
    age := org_age
  );;

(* for local handles, just create a new channel inside your block *)
let get_motd () =
  let motd = open_in "/etc/motd" in
  let retval = 
    ... in
  close_in motd;
  retval;;

Redefining a Function


(* If you want to redefine a function... go ahead.  Functions are first class
 * members in OCaml *)

let f x y =
  x + y;;

f 5 7;;
(*  - : int = 12 *)

let f x y =
  x - y;;

f 5 7;;

(*  - : int = -2 *)

(* to do it temporarily, either save to old value and then restore it, or just
 * redefine it in the current block.  The old value will be restored when you
 * exit the scope of that block *)

let g = f
and f x y =
  x * y;;

f 5 7;;

(*  - : int = 35 *)

let f = g;;

f 5 7;;

(*  - : int = -2 *)

let g () = 
  let f x y =
    x / y in
  f 5 7;;

g ();;

(*  - : int = 0 *)

f 5 7;;

(*  - : int = -2 *)

Trapping Undefined Function Calls with AUTOLOAD

(* Since OCaml is statically typed, any attempt to call an undefined
   function will result in a compiler error. There is no way to capture
   and handle this event at runtime. *)

Nesting Subroutines


(* Just define the inner function within the outer one *)
let outer x =
  let x = x + 35 in
  let inner () =
    x * 19 in
  x + inner ();;

Program: Sorting Your Mail


let slurp_to_string filename =
  let ic = open_in filename and
  buf = Buffer.create 4096 in
  let rec loop () =
    let line = input_line ic in
    Buffer.add_string buf line;
    Buffer.add_string buf "\n";
    loop () in
  try loop () with End_of_file -> close_in ic; Buffer.contents buf;;

(* Note: The following function does something slightly different than the Perl
 * version, as it returns a subject,message #,refrence to the message tuple
 * sorted by subject -> message number instead of just a list of messages sorted
 * by subject -> message number -- it's trivial to get just what the Perl
 * version does from this... *)

let sortedMail fn =
  let msglist = 
    (* I had to add this filtering step due to some wierd structure in my mbox
     * file. go figure... *)
    List.filter (fun s -> String.sub s 0 5 = "From:")
      (List.map (fun x -> "From" ^ x) 
        (Str.split (Str.regexp "^From") (slurp_to_string fn)))
  and counter = ref (-1) in
(*  let subjList = *)
    List.sort compare
      (List.map 
        (fun s -> 
          ignore (Str.search_forward 
            (* Not positive this regex is equivalent to the Perl version, but it
             * seems to work -- you can use the third party PCRE module if you
             * want to be positive *)
            (Str.regexp "^Subject:[ \t]*\(:?[Rr][Ee]:[ \t]*\)*\(.*\)") s 0);
          incr counter;
          (try (String.lowercase (Str.matched_group 2 s)) with Not_found -> ""),
           !counter,
           ref s)
        msglist);;

List.iter (fun (_,_,rm) -> print_endline !rm) (sortedMail "mbox");;

(* To sort by using a hashtable *)

let keys h =
  let k = Hashtbl.fold (fun k v b -> k::b) h [] in
  (* filter out duplicates *)
  List.fold_left (fun b x -> if List.mem x b then b else x::b) [] k;;

let sortedMailByHash fn =
  let msglist = 
    (* I had to add this filtering step due to some wierd structure in my mbox
     * file. go figure... *)
    List.filter (fun s -> String.sub s 0 5 = "From:")
      (List.map (fun x -> "From" ^ x) 
        (Str.split (Str.regexp "^From") (slurp_to_string fn)))
  and counter = ref (-1) in
  let h = Hashtbl.create (List.length msglist) in
(*  let subjList = *)
(*    List.sort compare *)
      (List.iter 
        (fun s -> 
          ignore (Str.search_forward 
            (* Not positive this regex is equivalent to the Perl version, but it
             * seems to work -- you can use the third party PCRE module if you
             * want to be positive *)
            (Str.regexp "^Subject:[ \t]*\(:?[Rr][Ee]:[ \t]*\)*\(.*\)") s 0);
          incr counter;
          let sub =
            try 
              (String.lowercase (Str.matched_group 2 s)) 
          with Not_found -> "" in
          Hashtbl.add h sub s))
        msglist;
      List.flatten 
        (List.map (fun x -> List.rev (Hashtbl.find_all h x)) 
          (List.sort (keys h)));;

List.iter (fun m -> print_endline m) (sortedMailByHash "mbox");;