5. Hashes

Introduction

;;;-----------------------------
(setf age (make-hash-table :test 'equal))

(setf (gethash "Nat" age) 24
      (gethash "Jules" age) 25
      (gethash "Josh" age) 17)
;;-----------------------------
(mapcar #'(lambda (l)
            (setf (gethash (car l) age) (cdr l)))
        '(("Nat" . 24)
          ("Jules" . 25)
          ("Josh" . 17)))
;;-----------------------------
(defparameter *food-color* (make-hash-table :test 'equal))

(mapcar #'(lambda (l)
            (setf (gethash (car l) *food-color*) (cdr l)))
        '(("Apple" . "red")
          ("Banana" . "yellow")
          ("Lemon" . "yellow")
          ("Carrot" . "orange")))
;;;-----------------------------
(mapcar #'(lambda (l)
            (setf (gethash (car l) *food-color*) (cdr l)))
        '((Apple . "red")
          (Banana . "yellow")
          (Lemon . "yellow")
          (Carrot . "orange")))
;;;-----------------------------

Adding an Element to a Hash

;;;-----------------------------
(setf (gethash key hash) value)
;;;-----------------------------
;; *FOOD-COLOR* defined per the introduction
(setf (gethash "Raspberry" *food-color*) "pink")

(format t "Known foods:~%~{~A~%~}"
        (loop for f being the hash-keys of *food-color*
           collect f))
;;Known foods:
;;Apple
;;Banana
;;Lemon
;;Carrot
;;;-----------------------------

Testing for the Presence of a Key in a Hash

;;;-----------------------------
;; does HASH have a value for KEY ?
(if (nth-value 1 (gethash key hash))
    (progn
      ;; it exists
      )
    (progn
      ;; it doesn't
      ))
;;;-----------------------------
;; *FOOD-COLOR* per the introduction
(dolist (name '("Banana" "Martini"))
  (format t "~A is a ~A.~%"
          name
          (if (nth-value 1 (gethash name *food-color*))
              "food" "drink")))

;;Banana is a food.
;;Martini is a drink.
;;;-----------------------------
(setf age (make-hash-table :test 'equal))
(setf (gethash "Toddler" age) 3)
(setf (gethash "Unborn" age) 0)
(setf (gethash "Phantasm" age) nil)
(dolist (thing '("Toddler" "Unborn" "Phantasm" "Relic"))
  (format t "~a: " thing)
  (multiple-value-bind (defined exists)
      (gethash thing age)
      (when exists
        (format t "Exists ")
        (when defined 
          (format t "Defined ")
          ;; 0 is "true" in CL, so explicitly mimic Perl
          (unless (zerop defined)
            (format t "True ")))))
  (format t "~%"))

;;Toddler: Exists Defined True 
;;Unborn: Exists Defined 
;;Phantasm: Exists 
;;Relic: 
;;;-----------------------------
;;; @@INCOMPLETE@@

Deleting from a Hash

;;;-----------------------------
;; remove KEY and its value from HASH
(remhash key hash)
;;;-----------------------------
;; *FOOD-COLOR* as per Introduction
(defun print-foods ()
  (let ((foods (hash-keys *food-color*))) ; HASH-KEYS defined in Appendix
    (format t "Keys: ~{~A~^ ~}~%Values: ~{~A~^ ~}~%" 
            foods
            (loop for food in foods
               collect (or (gethash food *food-color*)
                           "(undef)")))))

(format t "Initially~%")
(print-foods)

(format t "~%With Banana undef~%")
(setf (gethash "Banana" *food-color*) 

(format t "~%With Banana deleted~%")
(remhash "Banana" *food-color*)
(print-foods)

;; Initially
;; Keys: Apple Banana Lemon Carrot
;; Values: red yellow yellow orange
;;
;; With Banana undef
;; Keys: Apple Banana Lemon Carrot
;; Values: red (undef) yellow orange
;; 
;; With Banana deleted
;; Keys: Apple Lemon Carrot
;; Values: red yellow orange
;;;-----------------------------
(mapc #'(lambda (key) (remhash key *food-color*)) '("Banana" "Apple" "Cabbage"))
;;;-----------------------------
    

Traversing a Hash

;;;-----------------------------
(loop for key being the hash-keys of hash using (hash-value value)
     ;; do something with KEY and VALUE
     )
;;;-----------------------------
(maphash #'(lambda (key value)
             ;; do something with KEY and VALUE
             )
         hash)
;;;-----------------------------
;; *FOOD-COLOR* per the introduction
(loop for food being the hash-keys of *food-color* using (hash-value color)
     do (format t "~A is ~A.~%" food color))
;; Apple is red.
;; Banana is yellow.
;; Lemon is yellow.
;; Carrot is orange.
;;;-----------------------------
(maphash #'(lambda (food color)
             (format t "~A is ~A.~%" food color))
         *food-color*)
;; Apple is red.
;; Banana is yellow.
;; Lemon is yellow.
;; Carrot is orange.
;;;-----------------------------
;; No equivalent
;;;-----------------------------
(loop for food in (sort (hash-keys *food-color*) 'string-lessp)
   do (format t "~A is ~A~%" food (gethash food *food-color*)))
;; Apple is red
;; Banana is yellow
;; Carrot is orange
;; Lemon is yellow
;;;-----------------------------
;; Not sure what the following Perl is supposed to do:
;;while ( ($k,$v) = each %food_color ) {
;;    print "Processing $k\n";
;;    keys %food_color;               # goes back to the start of %food_color
;;}
;;;-----------------------------
(use-package :cl-ppcre)
(use-package :iterate)

;; The following handles the case that the Perl handles where there is
;; no filename (and it then opens '-'.  To do the same thing you'd do
;; something like: (countfrom *standard-input*) and this method would
;; automatically get triggered instead of the one requiring a
;; filename.
(defmethod countfrom ((stream stream))
  (let ((from (make-hash-table :test 'equal)))
    (with-open-stream (input stream)
      (iter (for line in-stream input using 'read-line)
            (register-groups-bind (person) ("^From: (.*)\\s" line)
              (incf (gethash person from 0)))))
    (loop for person in (sort (hash-keys from) 'string-lessp)
       do (format t "~A: ~A~%" person (gethash person from)))))

;; This method is a bit of a hack in that it shouldn't really assume
;; that the string designates a filename, but for the purposes of this
;; example that seems ok.  Note that it just calls OPEN directly
;; without WITH-OPEN-FILE because it knows that the STREAM version of
;; this method will always close it.
(defmethod countfrom ((filename string))
  (countfrom (open filename)))
;;;-----------------------------

Printing a Hash

(defvar *table* (make-hash-table :test #'equal))

(dotimes (i 10)
  (setf (gethash (format nil "~R" i) *table*) i))

(maphash (lambda (key val) (format t "~A~%" (cons key val))) *table*)
; => ("nine"  . 9)
;    ("eight" . 8)
;    ("seven" . 7)
;    ("six"   . 6)
;    ("five"  . 5)
;    ("four"  . 4)
;    ("three" . 3)
;    ("two"   . 2)
;    ("one"   . 1)
;    ("zero"  . 0)

;;; Unfortunately there doesn't exist a  `FORMAT' directive for hash tables. But
;;; one can easily override existing `PRINT-OBJECT' method for hash tables.

Retrieving from a Hash in Insertion Order

;;; There doesn't exist a portable way of preserving insertion order of contents
;;; of a hash table in Common Lisp.   (Neither there is a rational behind such a
;;; feature, IMHO.)  On the  other hand, one  can keep  a history of  hash table
;;; modifications separately.

(defun make-ordered-hash-table (&rest make-hash-table-args)
  (cons (apply #'make-hash-table make-hash-table-args) 0))

(defun setkey (table key val)
  (setf (gethash key (car table)) (cons (incf (cdr table)) val)))

(defun remkey (table key)
  (remhash key (car table)))

(defun getkey (table key &optional default)
  (let* ((missing-key (gensym))
         (val (gethash key (car table) missing-key)))
    (if (eql missing-key val) default (cdr val))))

(defun keys (table)
  (let (key-id-pairs)
    (maphash
     (lambda (key val) (push (cons key (car val)) key-id-pairs))
     (car table))
    (mapcar #'car (sort key-id-pairs #'< :key #'cdr))))

(defvar *table* (make-ordered-hash-table))

(dotimes (i 10)
  (setkey *table* (/ (1+ i)) i))

(keys *table*)
; => (1 1/2 1/3 1/4 1/5 1/6 1/7 1/8 1/9 1/10)

(setkey *table* (/ 2) 20)
(setkey *table* (/ 4) 40)
(remkey *table* (/ 6))

(keys *table*)
; => (1 1/3 1/5 1/7 1/8 1/9 1/10 1/2 1/4)

Hashes with Multiple Values Per Key

;;; You can  easily create hash tables  using multiple values per  key. For this
;;; purpose you  just need to increase  the verbosity of the  used test function
;;; during hash table creation.

(defvar *employee-table* (make-hash-table :test #'equal))

(defvar *employee-list*
  '(("R&D"   "Bob"   "1000")
    ("R&D"   "Trudy" "1001")
    ("R&D"   "Alice" "1002")
    ("Sales" "Bob"   "2001")
    ("Sales" "Jane"  "2003")))

;;; Pay attention that  we have to "Bob"s in  different departments. We'll place
;;; employees into  the `*EMPLOYEE-TABLE*' using  these two (name  & department)
;;; identifier fields.

(dolist (employee *employee-list*)
  (destructuring-bind (dept name phone) employee
    (setf (gethash (list dept name) *employee-table*)
          phone)))

(maphash (lambda (key val) (format t "~S => ~S~%" key val))
         *employee-table*)
; => ("R&D" "Bob")    => "1000"
;    ("R&D" "Trudy")  => "1001"
;    ("R&D" "Alice")  => "1002"
;    ("Sales" "Bob")  => "2001"
;    ("Sales" "Jane") => "2003"

Inverting a Hash

(defun reverse-hash-table (old-table &key test)
  (let ((new-table
         (make-hash-table
          :test (or test (hash-table-test old-table))
          :size (hash-table-size old-table))))
    (maphash (lambda (key val)
               (setf (gethash val new-table) key))
             old-table)
    new-table))

(defvar *table* (make-hash-table))

(dotimes (i 10) (setf (gethash (/ (1+ i)) *table*) i))

(maphash (lambda (key val) (format t "~S => ~S~%" key val))
         *table*)
; => 1    => 0
;    1/2  => 1
;    1/3  => 2
;    1/4  => 3
;    1/5  => 4
;    1/6  => 5
;    1/7  => 6
;    1/8  => 7
;    1/9  => 8
;    1/10 => 9

(maphash (lambda (key val) (format t "~S => ~S~%" key val))
         (reverse-hash-table *table*))
; => 0 => 1
;    1 => 1/2
;    2 => 1/3
;    3 => 1/4
;    4 => 1/5
;    5 => 1/6
;    6 => 1/7
;    7 => 1/8
;    8 => 1/9
;    9 => 1/10

Sorting a Hash

;;; One cannot (and shouldn't) make assumptions  on the order of the elements in
;;; a hash table in  Common Lisp. Anyway, while you're here, below  is a quick &
;;; dirty solution.

(defun ordered-keys (table)
  (sort
   (loop for key being each hash-key of table
         collect key)
   #'<))

(defvar *table* (make-hash-table))

(dotimes (i 10)
  (setf (gethash  (/ (1+ i)) *table*) i))

(ordered-keys *table*)
; => (1/10 1/9 1/8 1/7 1/6 1/5 1/4 1/3 1/2 1)

Merging Hashes

(defun merge-hash-tables (&rest tables)
  (let ((union
         (make-hash-table
          :test (first
                 (sort (mapcar #'hash-table-test tables) #'>
                       :key (lambda (test)
                              (ecase test
                                (eq 0)
                                (eql 1)
                                (equal 2)
                                (equalp 3)))))
          :size (reduce #'max (mapcar #'hash-table-size tables)))))
    (dolist (table tables)
      (maphash (lambda (key val) (setf (gethash key union) val)) table))
    union))

(defvar *table-x* (make-hash-table))

(defvar *table-y* (make-hash-table :test #'equal))

(dotimes (i 5)
  (setf (gethash i *table-x*) (* 10 i)
        (gethash (format nil "~R" i) *table-y*) i))

(defvar *table-u* (merge-hash-tables *table-x* *table-y*))

(maphash (lambda (key val) (format t "~10S => ~S~%" key val)) *table-u*)
; => 0       => 0
;    1       => 10
;    2       => 20
;    3       => 30
;    4       => 40
;    "zero"  => 0
;    "one"   => 1
;    "two"   => 2
;    "three" => 3
;    "four"  => 4

Finding Common or Different Keys in Two Hashes

;;; Before  going  into  the  details   about  how  to  find  intersections  and
;;; differences between two hash tables,  one should note the existence of below
;;; functions operating on lists.

;;; Function <a href="http://l1sp.org/cl/set-exclusive-or">SET-EXCLUSIVE-OR</a>, <a href="http://l1sp.org/cl/nset-exclusive-or">NSET-EXCLUSIVE-OR</a>
;;; Function <a href="http://l1sp.org/cl/set-difference">SET-DIFFERENCE</a>, <a href="http://l1sp.org/cl/nset-difference">NSET-DIFFERENCE</a>
;;; Function <a href="http://l1sp.org/cl/intersection">INTERSECTION</a>, <a href="http://l1sp.org/cl/nintersection">NINTERSECTION</a>
;;; Function <a href="http://l1sp.org/cl/union">UNION</a>, <a href="http://l1sp.org/cl/nunion">NUNION</a>

;;; While  a hash  table implementation  of above  set operations  would perform
;;; better  for large  data sets,  people  report that  above functions  perform
;;; better than hash tables for collections of size 20-30 elements. On the other
;;; hand, it's  obvious that  working with  lists is much  more suitable  to the
;;; nature of a lisp program.

;;; Simple & Clean Hash Table Intersection/Difference Implementations

(defun hash-table-keys (table)
  (loop for key being each hash-key of table
        collect key))

(defun key-intersection (u v)
  (intersection
   (hash-table-keys u) (hash-table-keys v)
   :test (hash-table-test u)))

(defun key-difference (u v)
  (set-difference
   (hash-table-keys u) (hash-table-keys v)
   :test (hash-table-test u)))


;;; Hairy Hash Table Intersection/Difference Implementations

(defun hairy-key-frequency (tables &key (test 'eql))
  (let ((frequency (make-hash-table :test test)))
    (dolist (table tables)
      (maphash
       (lambda (key val)
         (declare (ignore val))
         (incf (gethash key frequency 0)))
       table))
    frequency))

(defmacro with-table-test-consistency ((tables test) &body body)
  (alexandria:with-unique-names (table)
    `(let ((,test (hash-table-test (first ,tables))))
       (unless (every (lambda (,table) (eql (hash-table-test ,table) ,test))
                      (rest ,tables))
         (error "Inconsistent test functions! (Expecting ~S.)" ,test))
       ,@body)))

(defun hairy-key-frequency-filter (tables pred)
  (when tables
    (with-table-test-consistency (tables test)
      (let (accum)
        (maphash
         (lambda (key val) (when (funcall pred val) (push key accum)))
         (hairy-key-frequency tables :test test))
        accum))))

(defun hairy-key-intersection (&rest tables)
  (hairy-key-frequency-filter tables (lambda (val) (< 1 val))))

(defun hairy-key-difference (&rest tables)
  (hairy-key-frequency-filter tables (lambda (val) (< val 2))))


;;; Example Usage

(let ((u (make-hash-table))
      (v (make-hash-table)))
  (dolist (i '(1 2 3 4 5)) (setf (gethash i u) t))
  (dolist (i '(3 4 5 6 7)) (setf (gethash i v) t))
  (list :simple (list :intersection (key-intersection u v)
                      :difference   (key-difference u v))
        :hairy  (list :intersection (hairy-key-intersection u v)
                      :difference   (hairy-key-difference u v))))
; => (:SIMPLE (:INTERSECTION (5 4 3) :DIFFERENCE (2 1))
;     :HAIRY  (:INTERSECTION (5 4 3) :DIFFERENCE (7 6 2 1)))

Hashing References

(defun consume-stream (stream &key (buffer-size 8192))
  (let ((buffer
         (make-sequence
          `(vector ,(stream-element-type stream))
          buffer-size)))
    (loop for pos = (read-sequence buffer stream)
          until (zerop pos) sum pos)))

(let* ((streams (mapcar #'open (list "/etc/passwd" "/etc/motd" "/etc/fstab"))))
  (unwind-protect
       (let ((table (make-hash-table)))
         ;; Place `(STREAM . PATHNAME)' pairs as table key and values.
         (dolist (stream streams)
           (setf (gethash stream table) (pathname stream)))
         ;; Let's find the sizes of the streams.
         (let (accum)
           (maphash
            (lambda (key val) (push (cons val (consume-stream key)) accum))
            table)
           accum))
    ;; Don't forget to close open file streams.
    (mapcar #'close streams)))
; => ((#P"/etc/fstab"  . 488)
;     (#P"/etc/motd"   . 351)
;     (#P"/etc/passwd" . 1232))

Presizing a Hash

;;; In Common Lisp,  you can create hash tables of  previously known sizes using
;;; `SIZE'             keyword             supplied            to             <a
;;; href="http://l1sp.org/cl/make-hash-table">`MAKE-HASH-TABLE'</a>.    (Moreover
;;; there are `REHASH-SIZE'  and `REHASH-THRESHOLD' keyword parameters available
;;; which  might  interest  you.) But  once  you  created  a hash  table,  these
;;; configurations  stay as  is;  in  other words  later  modifications are  not
;;; allowed.

Finding the Most Common Anything

;;; See  `HAIRY-KEY-FREQUENCY'  in  "Hairy  Hash  Table  Intersection/Difference
;;; Implementations".

Representing Relationships Between Data

;;; We construct  the family  tree (in s-expressions)  extracted from  the given
;;; parent-child relationship table.

(defvar *relations*
  '(("Cain"      . "Adam")
    ("Abel"      . "Adam")
    ("Seth"      . "Adam")
    ("Enoch"     . "Cain")
    ("Irad"      . "Enoch")
    ("Mehujael"  . "Irad")
    ("Methusael" . "Mehujael")
    ("Lamech"    . "Methusael")
    ("Jabal"     . "Lamech")
    ("Jubal"     . "Lamech")
    ("Tubalcain" . "Lamech")))

(defun construct-relation-tree (rels)
  (labels ((construct (parent)
             (cons parent
                   (mapcar #'construct
                           (mapcar #'car
                                   (remove parent rels
                                           :key #'cdr
                                           :test (complement #'string-equal)))))))
    (mapcar #'construct
            (remove-duplicates
             (mapcar #'cdr
                     (remove-if
                      (lambda (pair)
                        (find (cdr pair) rels :test #'string-equal :key #'car))
                      rels))
             :test #'string-equal))))

(construct-relation-tree *relations*)
; => (("Adam"
;      ("Cain"
;       ("Enoch"
;        ("Irad"
;         ("Mehujael" ("Methusael" ("Lamech" ("Jabal") ("Jubal") ("Tubalcain")))))))
;      ("Abel")
;      ("Seth")))

;;; In a similar way, we can  construct the dependency tree between C source and
;;; header files.

(defun parse-file-dependencies (pathname)
  (let (deps)
    (with-open-file (in pathname)
      (loop for line = (read-line in nil nil)
            while line
            do (ppcre:do-scans
                   (m-s m-e r-ss r-es
                    "\\s*#\\s*include\\s*[<\"]([^>\"]+)[>\"]" line)
                 (push (subseq line (elt r-ss 0) (elt r-es 0)) deps))))
    deps))

(defun construct-file-relations (pathnames)
  (mapcan
   (lambda (child)
     (mapcar (lambda (parent)
               (cons
                (format nil "~A.~A" (pathname-name child) (pathname-type child))
                parent))
             (parse-file-dependencies child)))
   pathnames))

(construct-file-relations
 '("/tmp/c/hello.c" "/tmp/c/hello.h" "/tmp/c/main.c"))
; => (("hello.c" . "hello.h")
;     ("hello.c" . "stdio.h")
;     ("main.c" . "hello.h")
;     ("main.c" . "stdlib.h")
;     ("main.c" . "stdio.h"))

(construct-relation-tree
 (construct-file-relations
  '("/tmp/c/hello.c" "/tmp/c/hello.h" "/tmp/c/main.c")))
; => (("hello.h" ("hello.c") ("main.c"))
;     ("stdlib.h" ("main.c"))
;     ("stdio.h" ("hello.c") ("main.c")))

;;; Actually, whole PLEAC  example was related with hash  tables in its original
;;; demonstration.  But  the more I  think about it,  the more I find  that list
;;; structures are  more suitable for  this kind of  a tasks. But it  is obvious
;;; that, `REMOVE-DUPLICATES' kind of  tricks will perform poorly when duplicate
;;; eliminations will consume the majority  of the computation power and in such
;;; a situation you'll inevitably need hash tables.

Program: dutree

(defstruct hash-branch (weight 0) (tree nil))

(defun hash-tree-add (branch path weight)
  (let ((branch (or branch (make-hash-branch))))
    (prog1 branch
      (incf (hash-branch-weight branch) weight)
      (when path
        (setf (hash-branch-tree branch)
              (or (hash-branch-tree branch)
                  (make-hash-table :test #'equal)))
        (setf (gethash (first path) (hash-branch-tree branch))
              (hash-tree-add
               (gethash (first path) (hash-branch-tree branch))
               (rest path) weight))))))

(defun hash-tree->list-tree (branch)
  (when branch
    (cons (hash-branch-weight branch)
          (let ((tree (hash-branch-tree branch))
                (accum))
            (when tree
              (maphash
               (lambda (key val)
                 (push (cons key (hash-tree->list-tree val)) accum))
               tree)
              accum)))))

(defun print-list-tree (branches &optional (depth 0))
  (dolist (branch (sort (copy-list branches) #'> :key #'second))
    (format t "~8D |" (second branch))
    (loop repeat depth do (format t "--"))
    (format t "-> ~A~%" (first branch))
    (print-list-tree (cddr branch) (1+ depth))))

(defun du-output->size-path-pair (string)
  (with-input-from-string (in string)
    (loop for line = (read-line in nil nil)
          while line
          collect (ppcre:register-groups-bind (size path)
                      ("^\(\[^\\s\]+\)\\s+\(\[^\\s\]+\)$" line)
                    (cons (parse-integer size)
                          (delete-if #'zerop (ppcre:split "/" path)
                                     :key #'length))))))

(defun du-output->list-tree (string)
  (let ((branch (make-hash-branch)))
    (dolist (size-path-pair (du-output->size-path-pair string))
      (hash-tree-add branch (cdr size-path-pair) (car size-path-pair)))
    (rest (hash-tree->list-tree branch))))

(du-output->list-tree
  (trivial-shell:shell-command "du -ab /tmp/c"))
; => (("tmp" 21026
;      ("c" 21026 ("hello.h" 71) ("hello.c" 128) ("main.c" 123) ("main" 9168)
;       ("out" 1212 ("stdout" 44) ("stderr" 246) ("debug" 256)))))

(print-list-tree
 (du-output->list-tree
  (trivial-shell:shell-command "du -ab /tmp/c")))
; =>    21026 |-> tmp
;       21026 |---> c
;        9168 |-----> main
;        1212 |-----> out
;         256 |-------> debug
;         246 |-------> stderr
;          44 |-------> stdout
;         128 |-----> hello.c
;         123 |-----> main.c
;          71 |-----> hello.h