5. Hashes

Introduction

//----------------------------------------------------------------------------------
// quotes are optional around the key
age = [ Nat:24, Jules:25, Josh:17 ]

assert age['Nat']  == 24
// alternate syntax
assert age."Jules" == 25

foodColor = [
    Apple:  'red',
    Banana: 'yellow',
    Lemon:  'yellow',
    Carrot: 'orange'
]
assert foodColor.size() == 4
//----------------------------------------------------------------------------------

Adding an Element to a Hash

//----------------------------------------------------------------------------------
foodColor['Lemon'] = 'green'
assert foodColor.size() == 4
assert foodColor['Lemon'] == 'green'
foodColor['Raspberry'] = 'pink'
assert foodColor.size() == 5
//----------------------------------------------------------------------------------

Testing for the Presence of a Key in a Hash

//----------------------------------------------------------------------------------
assert ['Banana', 'Martini'].collect{ foodColor.containsKey(it)?'food':'drink' } == [ 'food', 'drink' ]

age = [Toddler:3, Unborn:0, Phantasm:null]
['Toddler', 'Unborn', 'Phantasm', 'Relic'].each{ key ->
    print "$key: "
    if (age.containsKey(key)) print 'has key '
    if (age.containsKey(key) && age[key]!=null) print 'non-null '
    if (age.containsKey(key) && age[key]) print 'true '
    println ''
}
// =>
// Toddler: has key non-null true
// Unborn: has key non-null
// Phantasm: has key
// Relic:
//----------------------------------------------------------------------------------

Deleting from a Hash

//----------------------------------------------------------------------------------
assert foodColor.size() == 5
foodColor.remove('Banana')
assert foodColor.size() == 4
//----------------------------------------------------------------------------------

Traversing a Hash

//----------------------------------------------------------------------------------
hash = [:]
hash.each { key, value ->
    // do something with key and value
}

hash.each { entry ->
    // do something with entry
}

hash.keySet().each { key ->
    // do something with key
}

sb = new StringBuffer()
foodColor.each { food, color ->
    sb << "$food is $color\n"
}
assert '\n' + sb.toString() == '''
Lemon is green
Carrot is orange
Apple is red
Raspberry is pink
'''

foodColor.each { entry ->
    assert entry.key.size() > 4 && entry.value.size() > 2
}

foodColorsSortedByFood = []
foodColor.keySet().sort().each { k -> foodColorsSortedByFood << foodColor[k] }
assert foodColorsSortedByFood == ["red", "orange", "green", "pink"]

fakedInput = '''
From: someone@somewhere.com
From: someone@spam.com
From: someone@somewhere.com
'''

from = [:]
fakedInput.split('\n').each{
    matcher = (it =~ /^From:\s+([^\s>]*)/)
    if (matcher.matches()) {
        sender = matcher[0][1]
        if (from.containsKey(sender)) from[sender] += 1
        else from[sender] = 1
    }
}

// More useful to sort by number of received mail by person
from.entrySet().sort { a,b -> b.value<=>a.value}.each { e->
    println "${e.key}: ${e.value}"
}
// =>
// someone@somewhere.com: 2
// someone@spam.com: 1
//----------------------------------------------------------------------------------

Printing a Hash

//----------------------------------------------------------------------------------
hash = [a:1, b:2, c:3]
// Map#toString already produce a pretty decent output:
println hash
// => ["b":2, "a":1, "c":3]

// Or do it by longhand for customised formatting
hash.each { k,v -> println "$k => $v" }
// =>
// b => 2
// a => 1
// c => 3
//----------------------------------------------------------------------------------

Retrieving from a Hash in Insertion Order

//----------------------------------------------------------------------------------
// java.util.LinkedHashMap "maintains a doubly-linked list running through all of its entries.
// This linked list defines the iteration ordering, which is normally the order in which keys
// were inserted into the map (insertion-order)".
foodColor = new LinkedHashMap()
foodColor['Banana'] = 'Yellow'
foodColor['Apple'] = 'Green'
foodColor['Lemon'] = 'Yellow'

foodColor.keySet().each{ key -> println key }
// =>
// Banana
// Apple
// Lemon
//----------------------------------------------------------------------------------

Hashes with Multiple Values Per Key

//----------------------------------------------------------------------------------
foodsOfColor = [ Yellow:['Banana', 'Lemon'], Green:['Apple'] ]
foodsOfColor['Green'] += 'Melon'
assert foodsOfColor == ["Green":["Apple", "Melon"], "Yellow":["Banana", "Lemon"]]
//----------------------------------------------------------------------------------

Inverting a Hash

//----------------------------------------------------------------------------------
surname = [Mickey: 'Mantle', Babe: 'Ruth']
assert surname.findAll{ it.value == 'Mantle' }.collect{ it.key } == ["Mickey"]

firstname = [:]
surname.each{ entry -> firstname[entry.value] = entry.key }
assert firstname == ["Ruth":"Babe", "Mantle":"Mickey"]

// foodfindScript:
#!/usr/bin/groovy
// usage: foodfind food_or_color"
color = [Apple:'red', Banana:'yellow', Lemon:'yellow', Carrot:'orange']
given = args[0]
if (color.containsKey(given))
    println "$given is a food with color ${color[given]}."
if (color.containsValue(given)) {
    // could use commify() here - see 4.2
    foods = color.findAll{it.value == given}.collect{it.key}
    join = foods.size() == 1 ? 'is a food' : 'are foods'
    println "${foods.join(', ')} $join with color ${given}."
}
// foodfind red
// => Apple is a food with color red.
// foodfind yellow
// => Lemon, Banana are foods with color yellow.
// foodfind Carrot
// => Carrot is a food with color orange.
//----------------------------------------------------------------------------------

Sorting a Hash

//----------------------------------------------------------------------------------
foodColor = [Apple:'red', Carrot:'orange', Banana:'yellow', Cherry:'black']

// Sorted by keys
assert foodColor.keySet().sort() == ["Apple", "Banana", "Carrot", "Cherry"]
// you could now iterate through the hash with the sorted keys
assert foodColor.values().sort() == ["black", "orange", "red", "yellow"]
assert foodColor.values().sort{it.size()} == ["red", "black", "orange", "yellow"]
//----------------------------------------------------------------------------------

Merging Hashes

//----------------------------------------------------------------------------------
//merged = a.clone.update(b)        # because Hash#update changes object in place

drinkColor = [Galliano:'yellow', 'Mai Tai':'blue']
ingestedColor = [:]
ingestedColor.putAll(drinkColor)
// overrides any common keys
ingestedColor.putAll(foodColor)

totalColors = ingestedColor.values().sort().unique()
assert totalColors == ["black", "blue", "orange", "red", "yellow"]
//----------------------------------------------------------------------------------

Finding Common or Different Keys in Two Hashes

//----------------------------------------------------------------------------------
foodColor['Lemon']='yellow'
citrusColor = [Lemon:'yellow', Orange:'orange', Lime:'green']
println foodColor
println citrusColor
common = foodColor.keySet().intersect(citrusColor.keySet())
assert common == ["Lemon"]

foodButNotCitrus = foodColor.keySet().toList() - citrusColor.keySet().toList()
assert foodButNotCitrus == ["Carrot", "Apple", "Banana", "Cherry"]
//----------------------------------------------------------------------------------

Hashing References

//----------------------------------------------------------------------------------
// no problem here, Groovy handles any kind of object for key-ing
//----------------------------------------------------------------------------------

Presizing a Hash

//----------------------------------------------------------------------------------
// Groovy uses Java implementations for storing hashes and these
// support setting an initial capacity and load factor (which determines
// at what point the hash will be resized if needed)
hash = [:]                              // Groovy shorthand gets defaults
hash = new HashMap()                    // default capacity and load factor
println hash.capacity()
// => 16
('A'..'Z').each{ hash[it] = it }
println hash.capacity()
// => 64
hash = new HashMap(100)                 // initial capacity of 100 and default load factor
hash = new HashMap(100, 0.8f)    // initial capacity of 100 and 0.8 load factor
//----------------------------------------------------------------------------------

Finding the Most Common Anything

//----------------------------------------------------------------------------------
count = [:]
letters = []
foodColor.values().each{ letters.addAll((it as String[]).toList()) }
letters.each{ if (count.containsKey(it)) count[it] += 1 else count[it] = 1 }
assert count == ["o":3, "d":1, "k":1, "w":2, "r":2, "c":1, "l":5, "g":1, "b":1, "a":2, "y":2, "n":1, "e":4]
//----------------------------------------------------------------------------------

Representing Relationships Between Data

//----------------------------------------------------------------------------------
father = [
    Cain:'Adam',
    Abel:'Adam',
    Seth:'Adam',
    Enoch:'Cain',
    Irad:'Enoch',
    Mehujael:'Irad',
    Methusael:'Mehujael',
    Lamech:'Methusael',
    Jabal:'Lamech',
    Jubal:'Lamech',
    Tubalcain:'Lamech',
    Enos:'Seth'
]

def upline(person) {
    while (father.containsKey(person)) {
        print person + ' '
        person = father[person]
    }
    println person
}

upline('Irad')
// => Irad Enoch Cain Adam

children = [:]
father.each { k,v ->
    if (!children.containsKey(v)) children[v] = []
    children[v] += k
}
def downline(person) {
    println "$person begat ${children.containsKey(person)?children[person].join(', '):'Nobody'}.\n"
}
downline('Tubalcain')
// => Tubalcain begat Nobody.
downline('Adam')
// => Adam begat Abel, Seth, Cain.

// This one doesn't recurse through subdirectories (as a simplification)
// scriptToFindIncludeFilesWhichContainNoIncludesScript:
dir = '<path_to_usr/include>'
includes = [:]
new File(dir).eachFile{ file ->
    if (file.directory) return
    file.eachLine{ line ->
        matcher = (line =~ '^\\s*#\\s*include\\s*<([^>]+)>')
        if (matcher.matches()) {
            if (!includes.containsKey(file.name)) includes[file.name] = []
            includes[file.name] += matcher[0][1]
        }
    }
}
// find referenced files which have no includes; assumes all files
// were processed and none are missing
println includes.values().sort().flatten().unique() - includes.keySet()
//----------------------------------------------------------------------------------

Program: dutree

//----------------------------------------------------------------------------------
// dutree - print sorted indented rendition of du output
// obtaining this input is not shown, it is similar to other examples
// on some unix systems it will be: duProcessFakedInput = "du options".process().text
duProcessFakedInput = '''
11732   groovysoap/lib
68      groovysoap/src/main/groovy/net/soap
71      groovysoap/src/main/groovy/net
74      groovysoap/src/main/groovy
77      groovysoap/src/main
9       groovysoap/src/examples
8       groovysoap/src/examples/groovy
102     groovysoap/src/test
202     groovysoap/src
11966   groovysoap
'''

// The DuNode class collects all information about a directory,
class DuNode {
    def name
    def size
    def kids = []

    // support for sorting nodes with side
    def compareTo(node2) { size <=> node2.size }

    def getBasename() {
        name.replaceAll(/.*\//, '')
    }

    // returns substring before last "/", otherwise null
    def getParent() {
        def p = name.replaceAll(/\/[^\/]+$/,'')
        return (p == name) ? null : p
    }
}

// The DuTree does the actual work of
// getting the input, parsing it, building up a tree
// and formatting it for output
class DuTree {
    def input
    def topdir
    def nodes = [:]
    def dirsizes = [:]
    def kids = [:]

    // get a node by name, create it if it does not exist yet
    def getOrCreateNode(name) {
        if (!nodes.containsKey(name))
            nodes[name] = new DuNode(name:name)
        return nodes[name]
    }

    // figure out how much is taken in each directory
    // that isn't stored in the subdirectories. Add a new
    // fake kid called "." containing that much.
    def getDots(node) {
        def cursize = node.size
        for (kid in node.kids) {
            cursize -=  kid.size
            getDots(kid)
        }
        if (node.size != cursize) {
            def newnode = getOrCreateNode(node.name + "/.")
            newnode.size = cursize
            node.kids += newnode
        }
    }

    def processInput() {
        def name = ''
        input.split('\n').findAll{it.trim()}.each{ line ->
            def tokens = line.tokenize()
            def size = tokens[0]
            name = tokens[1]
            def node = getOrCreateNode(name)
            node.size = size.toInteger()
            nodes[name] = node
            def parent = node.parent
            if (parent)
                getOrCreateNode(parent).kids << node
        }
        topdir = nodes[name]
    }

    // recursively output everything
    // passing padding and number width as well
    // on recursive calls
    def output(node, prefix='', width=0) {
        def line = node.size.toString().padRight(width) + ' ' + node.basename
        println (prefix + line)
        prefix += line.replaceAll(/\d /, '| ')
        prefix = prefix.replaceAll(/[^|]/, ' ')
        if (node.kids.size() > 0) {    // not a bachelor node
            kids = node.kids
            kids.sort{ a,b -> b.compareTo(a) }
            width = kids[0].size.toString().size()
            for (kid in kids) output(kid, prefix, width)
        }
    }
}

tree = new DuTree(input:duProcessFakedInput)
tree.processInput()
tree.getDots(tree.topdir)
tree.output(tree.topdir)
// =>
// 11966 groovysoap
//     |           11732 lib
//     |           202   src
//     |             |      102 test
//     |             |      77  main
//     |             |       |      74 groovy
//     |             |       |       |       71 net
//     |             |       |       |        |    68 soap
//     |             |       |       |        |    3  .
//     |             |       |       |       3  .
//     |             |       |      3  .
//     |             |      14  .
//     |             |      9   examples
//     |             |      |           8 groovy
//     |             |      |           1 .
//     |           32    .
//----------------------------------------------------------------------------------