5. Hashes

Introduction

-- using simple association list
age = [ ("Nat", 24)
      , ("Jules", 25)
      , ("Josh", 17) ]

-- access by key:
Just jules_age = lookup "Jules" age    -- 25

-- using Data.Map
import qualified Data.Map as Map
age = Map.fromList [ ("Nat", 24)
                   , ("Jules", 25)
                   , ("Josh", 17) ]

-- access by key:
jules_age' = age' Map.! "Jules"   -- 25

-- same, incrementally
age1 = Map.empty
age2 = Map.insert "Nat"   24 age1
age3 = Map.insert "Jules" 25 age2
age4 = Map.insert "Josh"  17 age3
-- => Map.fromList [("Josh",17),("Jules",25),("Nat",24)]

food_color = [ ("Apple" , "red")
             , ("Banana", "yellow")
             , ("Lemon" , "yellow")
             , ("Carrot", "orange") ]

food_color' = Map.fromList food_color

Adding an Element to a Hash

food_color2 = ("Raspberry", "pink") : food_color

t = putStrLn $ "Know foods: " ++ unwords (map fst food_color2)
-- => Know foods: Raspberry Apple Banana Lemon Carrot

-- nb: it doesn't remove duplicates
food_color3 = ("Raspberry", "red") : food_color2
-- => [ ("Raspberry","red"), ("Raspberry","pink"), ("Apple","red"), ... ]
-- (see section "Finding Common or Different Keys in Two Hashes" for a solution removing duplicates)


-- same using Data.Map
food_color2' = Map.insert "Raspberry" "pink" food_color'

t' = putStrLn $ "Know foods: " ++ unwords (Map.keys food_color2')
-- => Know foods: Apple Banana Carrot Lemon Raspberry

-- nb: it does remove duplicates
food_color3' = Map.insert "Raspberry" "red" food_color2'
-- Map.fromList [("Apple","red"),("Banana","yellow"),("Carrot","orange"),("Lemon","yellow"),("Raspberry","red")]

Testing for the Presence of a Key in a Hash

s = map describe ["Banana", "Martini"]
    where describe name = name ++ " is a " ++ kind name ++ "."
          kind name = if isJust (lookup name food_color) then "food" else "drink"
-- => ["Banana is a food.","Martini is a drink."]

--   isJust (lookup name food_color)   
-- can be replaced by
--   name `elem` map fst food_color

-- for Data.Map, replace (isJust . lookup) with Map.member:
s' = map describe ["Banana", "Martini"]
    where describe name = name ++ " is a " ++ kind name ++ "."
          kind name = if Map.member name food_color' then "food" else "drink"
-- => ["Banana is a food.","Martini is a drink."]

Deleting from a Hash

-- removing key on association list is done with a simple filter:
food_color2 = filter ((/= "Banana") . fst) food_color

print_foods foods = do putStrLn $ "Keys: " ++ (unwords $ map fst foods)
                       putStrLn $ "Values: " ++ (unwords $ map snd foods)

before = print_foods food_color
-- Keys: Apple Banana Lemon Carrot
-- Values: red yellow yellow orange
after = print_foods food_color2
-- Keys: Apple Lemon Carrot
-- Values: red yellow orange


-- using Data.Map
food_color2' = Map.delete "Banana" food_color'

print_foods' foods = do putStrLn $ "Keys: " ++ (unwords $ Map.keys foods)
                        putStrLn $ "Values: " ++ (unwords $ Map.elems foods)

before' = print_foods' food_color'
-- Keys: Apple Banana Carrot Lemon
-- Values: red yellow orange yellow
after' = print_foods' food_color2'
-- Keys: Apple Carrot Lemon
-- Values: red orange yellow

-- to remove a list of keys, simply use foldr:
food_color3' = foldr Map.delete food_color' [ "Banana", "Apple", "Cabbage" ]

Traversing a Hash

t  = map (\(food, color) -> food ++ " is " ++ color) food_color
-- ["Apple is red","Banana is yellow","Lemon is yellow","Carrot is orange"]
t2 = map (\(food, color) -> food ++ " is " ++ color) $ sort food_color
-- ["Apple is red","Banana is yellow","Carrot is orange","Lemon is yellow"]

-- using Data.Map
t'  = map (\(food, color) -> food ++ " is " ++ color) $ Map.assocs food_color'
-- ["Apple is red","Banana is yellow","Carrot is orange","Lemon is yellow"]

t2' = map format_it $ sort $ Map.assocs food_color'
    where format_it (food, color) = food ++ " is " ++ color
-- ["Apple is red","Banana is yellow","Carrot is orange","Lemon is yellow"]

-- you can of course get keys first, and use Map.!
t2' = map format_it $ sort $ Map.keys food_color'
    where format_it food = food ++ " is " ++ food_color' Map.! food

-- countfrom - count number of messages from each sender
#!/usr/bin/runghc

import System
import Maybe
import List

any_input [] = getContents
any_input (f:_) = readFile f

get_senders = catMaybes . map get_sender
    where get_sender line = if prefix `isPrefixOf` line 
                            then Just (drop (length prefix) line) 
                            else Nothing
          prefix = "From: "

main = do s <- getArgs >>= any_input
          let senders = group $ sort $ get_senders (lines s)
          mapM_ (putStrLn . format_it) $ senders
    where format_it froms@(name:_) = name ++ ": " ++ show (length froms)

Printing a Hash

-- show gives a string representation
s = show food_color
t = putStrLn s
-- => [("Apple","red"),("Banana","yellow"),("Lemon","yellow"),("Carrot","orange")]

s2  = map (\(food, color) -> food ++ " => " ++ color) food_color
-- see "Traversing a Hash" for more

Retrieving from a Hash in Insertion Order

-- association list as explained in previous sections keeps order

Hashes with Multiple Values Per Key

-- one can use simple list functions
lookupAll k = map snd . filter ((== k) . fst)

color_to_foods color = map fst $ filter ((== color) . snd) food_color
yellow_food = color_to_foods "yellow" -- ["Banana","Lemon"]

-- one can precompute it:
food_of_color = map (\color -> (color, color_to_foods color)) colors
                where colors = nub (map snd food_color)
-- => [("red",["Apple"]), ("yellow",["Banana","Lemon"]), ("orange",["Carrot"])]
-- then use it
yellow_food_ = fromMaybe [] (lookup "yellow" food_of_color)

-- another way to pre-compute
assocListGroupByKey :: Ord a => [(a,b)] -> [(a,[b])]
assocListGroupByKey = map factorize . groupBy same_key . sortBy (comparing fst)
    where factorize l@((user, _) : _) = (user, map snd l)          
          same_key (a,_) (b,_) = a == b
-- you can write: factorize = fst . head Arrow.&&& map snd
food_of_color = assocListGroupByKey food_color

-- or one can use a full fledged data structure
-- (need: import qualified Data.Set as Set)
food_of_color' = foldl (\l (v, k) -> Map.alter (add_it v) k l) Map.empty food_color
    where add_it val prev = Just (Set.insert val $ fromMaybe Set.empty prev)
yellow_food' = food_of_color' Map.! "yellow"
-- => Set.fromList ["Banana","Lemon"]

-- for every logged-in user, display the ttys on which it is logged
t = do Right s <- readProcess "who" [] ""
       let l = assocListGroupByKey $ map (parse_it . words) (lines s)
       mapM_ (putStrLn . format_it) l
    where format_it l (user, ttys) = user ++ ": " ++ unwords ttys
          parse_it (user : tty : _) = (user, tty)

Inverting a Hash

surname = [ ("Mickey", "Mantle"), ("Babe", "Ruth") ];
first_name = map swap surname
swap (a,b) = (b,a)

Just name = lookup "Mantle" first_name -- "Mickey"

-- see also section "Hashes with Multiple Values Per Key"

Sorting a Hash

t = map (\(food, color) -> food ++ " is " ++ color) $ sort food_color
-- ["Apple is red","Banana is yellow","Carrot is orange","Lemon is yellow"]

-- sorting on length of color (sortByKey is in "Helpers")
t2 = map (\(food, color) -> food ++ " is " ++ color) $ sortByKey (length . snd) food_color
-- => ["Apple is red","Banana is yellow","Lemon is yellow","Carrot is orange"]

-- using Data.Map, the simplest is to use Map.assocs and do as above
t' = map format_it $ sort $ Map.assocs food_color'
    where format_it (food, color) = food ++ " is " ++ color
-- ["Apple is red","Banana is yellow","Carrot is orange","Lemon is yellow"]

-- you can of course get keys first, and use Map.!
t_' = map format_it $ sort $ Map.keys food_color'
    where format_it food = food ++ " is " ++ food_color' Map.! food

Merging Hashes

-- food_color as per the introduction
drink_color = [ ("Galliano", "yellow"), ("Mai Tai", "blue") ]
ingested_color = drink_color ++ food_color
-- => [("Galliano","yellow"),("Mai Tai","blue"),("Apple","red"),("Banana","yellow"),("Lemon","yellow"),("Carrot","orange")]
-- (it doesn't remove duplicates, see section "Finding Common or Different Keys in Two Hashes" for a solution removing duplicates)

-- using Data.Map
drink_color' = Map.fromList drink_color
ingested_color' = Map.union drink_color' food_color'
-- => Map.fromList [("Apple","red"),("Banana","yellow"),("Carrot","orange"),("Galliano","yellow"),("Lemon","yellow"),("Mai Tai","blue")]

Finding Common or Different Keys in Two Hashes

-- food_color as per the introduction
citrus_color = [ ("Lemon", "yellow"), ("Orange", "orange"), ("Lime", "green") ]
common = food_color ++ citrus_color
-- => [("Apple","red"),("Banana","yellow"),("Lemon","yellow"),("Carrot","orange"),("Lemon","yellow"),("Orange","orange"),("Lime","green")]
-- the duplicates are still here, to remove them:
nubByKey f = nubBy (\a b -> f a == f b)
common_ = nubByKey fst common
-- => [("Apple","red"),("Banana","yellow"),("Lemon","yellow"),("Carrot","orange"),("Orange","orange"),("Lime","green")]
-- no more duplicates

non_citrus = food_color \\ citrus_color
-- => [("Apple","red"),("Banana","yellow"),("Carrot","orange")]

-- using Data.Map
citrus_color' = Map.fromList citrus_color
common' = Map.union food_color' citrus_color'
-- => Map.fromList [("Apple","red"),("Banana","yellow"),("Carrot","orange"),("Lemon","yellow"),("Lime","green"),("Orange","orange")]
non_citrus' = Map.difference food_color' citrus_color'
-- => Map.fromList [("Apple","red"),("Banana","yellow"),("Carrot","orange")]

Hashing References

-- no problem here, haskell handles any kind of object for key-ing

name = mapM open_files [ "/etc/termcap", "/etc/passwd", "/bin/cat" ]
       where open_files file = do fh <- openFile file ReadMode
                                  return (fh, file)
t = do files <- fmap (map snd) name
       putStrLn $ "open files: " ++ intercalate ", " files
t2 = name >>= mapM_ display_size
    where display_size (fh, file) = 
              do size <- hFileSize fh
                 printf "%s is %d bytes long.\n" file size

Presizing a Hash

-- presizing is only useful for mutable maps

-- about performance, one may use Map.IntMap when the keys are Int

Finding the Most Common Anything

-- from http://haskell.org/haskellwiki/99_questions/31_to_41#Problem_36
encode :: Eq a => [a] -> [(a, Int)]
encode = map (\x -> (head x, length x)) . group

s = encode $ sort "Apple Banana Lemon"
-- => [(' ',2),('A',1),('B',1),('L',1),('a',3),('e',2),('l',1),('m',1),('n',3),('o',1),('p',2)]

Representing Relationships Between Data

import Data.List

father = [ ("Cain",      "Adam")
         , ("Abel",      "Adam")
         , ("Seth",      "Adam")
         , ("Enoch",     "Cain")
         , ("Irad",      "Enoch")
         , ("Mehujael",  "Irad")
         , ("Methusael", "Mehujael")
         , ("Lamech",    "Methusael")
         , ("Jabal",     "Lamech")
         , ("Jubal",     "Lamech")
         , ("Tubalcain", "Lamech")
         , ("Enos",      "Seth")
         ]

upline person = person : parents
    where parents = unfoldr (\name -> fmap (\x -> (x,x)) $ lookup name father) person
-- you can replace (\x -> (x,x)) with (join (,))   ("join" is from Control.Monad)

from_Irad = upline "Irad"       -- ["Irad","Enoch","Cain","Adam"]

ifNull default_ [] = default_
ifNull _ l = l

downline person = person ++ " begat " ++ (ifNull "Nobody" $ intercalate ", " $ children)
    where children = map fst $ filter ((== person) . snd) father

to_Tubalcain = downline "Tubalcain" -- Tubalcain begat Nobody
to_Adam      = downline "Adam"      -- Adam begat Cain, Abel, Seth


-- the perl example is doing something quite weird,
-- include_free is the list of files which are not included by other files,
-- but that do include a file.
dir = "/usr/include"
include_free =
    do files <- getDirectoryContents dir
       let h_files = filter (isSuffixOf ".h") files
       file2includes <- mapM get_includes h_files
       -- getting the files that do include something:
       let files_ = map fst $ filter (not . null . snd) file2includes
       -- removing files included by other files:
       return $ files_ \\ (concatMap snd file2includes)
    where get_includes h_file = fmap (with_content h_file) (readFile $ dir ++ "/" ++ h_file)
          with_content h_file content = (h_file, content_to_includes content)
          content_to_includes = catMaybes . map (fmap head . matchRegex regex) . lines
          regex = mkRegex "^\\s*#\\s*include\\s*<([^>]+)>"

Program: dutree

import Data.Ord (comparing)
import qualified Data.Tree as Tree

dirname_basename file = (if dir == "" then "." else (reverse $ tail dir), reverse base)
    where (base, dir) = break (== '/') $ reverse file

dirname = fst . dirname_basename
basename = snd . dirname_basename

duProcessFakedInput = 
    "11732   groovysoap/lib\n\
    \68      groovysoap/src/main/groovy/net/soap\n\
    \71      groovysoap/src/main/groovy/net\n\
    \74      groovysoap/src/main/groovy\n\
    \77      groovysoap/src/main\n\
    \9       groovysoap/src/examples\n\
    \8       groovysoap/src/examples/groovy\n\
    \102     groovysoap/src/test\n\
    \202     groovysoap/src\n\
    \11966   groovysoap\n"

sortByKeyRev f = map snd . sortBy (flip $ comparing fst) . map (\x -> (f x, x))

dirs_tree :: Tree.Tree (String, Int)
dirs_tree = Tree.unfoldTree get_info (last dir2size)
    where get_info (dir, size) = 
              let subdirs = filter ((== dir) . dirname . fst) dir2size in
              ((dir, size), 
               if null subdirs then [] else 
                   sortByKeyRev snd $ subdirs ++ [ (dir ++ "/.", size - sum (map snd subdirs)) ])
          dir2size = map ((\(size:dir:_) -> (dir, read size)) . words) $ lines duProcessFakedInput

t = putStr $ Tree.drawTree$ fmap (\(f,s)-> show s ++ " " ++ basename f) dirs_tree
-- using Tree.drawTree, the result is not as expected, but it's quite nice:
--
-- 11966 groovysoap
-- |
-- +- 11732 lib
-- |
-- +- 202 src
-- |  |
-- |  +- 102 test
-- |  |
-- |  +- 77 main
-- |  |  |
-- |  |  +- 74 groovy
-- |  |  |  |
-- |  |  |  +- 71 net
-- |  |  |  |  |
-- |  |  |  |  +- 68 soap
-- |  |  |  |  |
-- |  |  |  |  `- 3 .
-- |  |  |  |
-- |  |  |  `- 3 .
-- |  |  |
-- |  |  `- 3 .
-- |  |
-- |  +- 14 .
-- |  |
-- |  `- 9 examples
-- |     |
-- |     +- 8 groovy
-- |     |
-- |     `- 1 .
-- |
-- `- 32 .

-- now the exact same solution 
mydraw :: Tree.Tree (String, Int) -> String
mydraw tree = unlines $ drawSubTrees [tree]
  where drawSubTrees [] = []
        drawSubTrees l@(Tree.Node (_,size_max) _ : _) =
            let format = "%" ++ show (length $ show size_max) ++ "s %s" in
            let draw_one (Tree.Node (name, size) sub) =
                    let name' = basename name in
                    printf format (show size) name' :
                           map (\s -> printf format "|" (map (\_ -> ' ') name') ++ s) (drawSubTrees sub) in
            concatMap draw_one l

t2 = putStr $ mydraw dirs_tree

--11966 groovysoap
--    |           11732 lib
--    |             202 src
--    |               |    102 test
--    |               |     77 main
--    |               |      |     74 groovy
--    |               |      |      |       71 net
--    |               |      |      |        |    68 soap
--    |               |      |      |        |     3 .
--    |               |      |      |        3 .
--    |               |      |      3 .
--    |               |     14 .
--    |               |      9 examples
--    |               |      |         8 groovy
--    |               |      |         1 .
--    |              32 .