5. Hashes

Introduction

age = { "Nat",   24,
        "Jules", 25,
        "Josh",  17  }

age["Nat"]   = 24
age["Jules"] = 25
age["Josh"]  = 17

food_color = {
    "Apple"  => "red",
    "Banana" => "yellow",
    "Lemon"  => "yellow",
    "Carrot" => "orange"
             }

# In Ruby, you cannot avoid the double or simple quoting
# while manipulatin hashes

Adding an Element to a Hash

hash[key] = value

food_color["Raspberry"] = "pink"
puts "Known foods:", food_color.keys

Testing for the Presence of a Key in a Hash

# does hash have a value for key ?
if (hash.has_key?(key))
    # it exists
else
    # it doesn't
end

[ "Banana", "Martini" ].each { |name|
    print name, " is a ", food_color.has_key?(name) ? "food" : "drink", "\n"
}

age = {}
age['Toddler'] = 3
age['Unborn'] = 0
age['Phantasm'] = nil

for thing in ['Toddler', 'Unborn', 'Phantasm', 'Relic']
    print "#{thing}: "
    print "Has-key " if age.has_key?(thing)
    print "True " if age[thing]
    print "Nonzero " if age[thing] && age[thing].nonzero?
    print "\n"
end

#=>
# Toddler: Has-key True Nonzero 
# Unborn: Has-key True 
# Phantasm: Has-key 
# Relic: 

# You use Hash#has_key? when you use Perl's exists -> it checks
# for existence of a key in a hash.
# All Numeric are "True" in ruby, so the test doesn't have the
# same semantics as in Perl; you would use Numeric#nonzero? to
# achieve the same semantics (false if 0, true otherwise).

Deleting from a Hash

food_color.delete("Banana")

Traversing a Hash

hash.each { |key, value|
    # do something with key and value
}

hash.each_key { |key|
    # do something with key
}

food_color.each { |food, color|
    puts "#{food} is #{color}"
}

food_color.each_key { |food|
    puts "#{food} is #{food_color[food]}"
}

# IMO this demonstrates that OO style is by far more readable
food_color.keys.sort.each { |food|
    puts "#{food} is #{food_color[food]}."
}

#-----------------------------
#!/usr/bin/ruby
# countfrom - count number of messages from each sender

# Default value is 0
from = Hash.new(0)
while gets
    /^From: (.*)/ and from[$1] += 1
end

# More useful to sort by number of received mail by person
from.sort {|a,b| b[1]<=>a[1]}.each { |v|
    puts "#{v[1]}: #{v[0]}"
}
#-----------------------------

Printing a Hash

# You may use the built-in 'inspect' method this way:
p hash

# Or do it the Cookbook way:
hash.each { |k,v| puts "#{k} => #{v}" }

# Sorted by keys
hash.sort.each { |e| puts "#{e[0]} => #{e[1]}" }
# Sorted by values
hash.sort{|a,b| a[1]<=>b[1]}.each { |e| puts "#{e[0]} => #{e[1]}" }

Retrieving from a Hash in Insertion Order

# Use the OrderedHash module (part of the GoodLibs RubyGem):
require 'rubygems'
require 'ordered_hash'
hash = OrderedHash.new;
# manipulate hash
keys = hash.keys                # keys is in insertion order

# initialize
require 'rubygems'
require 'ordered_hash'

food_color = OrderedHash.new
food_color["Banana"] = "Yellow"
food_color["Apple"]  = "Green"
food_color["Lemon"]  = "Yellow"

puts "In insertion order, the foods are:"
food_color.each_key { |food|
    puts "  #{food}"
}

puts "Still in insertion order, the foods' colors are:"
food_color.each { |food, color|
    puts "#{food} is colored #{color}."
}

Hashes with Multiple Values Per Key

ttys = Hash.new
for i in `who`
    user, tty = i.split
    (ttys[user] ||= []) << tty               # see problems_ruby for more infos
end
ttys.keys.sort.each { |k|
    puts "#{k}: #{commify_series(ttys[k])}"  # from 4.2
}

Inverting a Hash

surname = { "Mickey" => "Mantle", "Babe" => "Ruth" }
puts surname.index("Mantle")

# If you really needed to 'invert' the whole hash, use Hash#invert

#-----------------------------
#!/usr/bin/ruby -w
# foodfind - find match for food or color

given = ARGV.shift or raise "usage: foodfind food_or_color"

color = {
    "Apple"  => "red",
    "Banana" => "yellow",
    "Lemon"  => "yellow",
    "Carrot" => "orange",
}

if (color.has_key?(given))
    puts "#{given} is a food with color #{color[given]}."
end
if (color.has_value?(given))
    puts "#{color.index(given)} is a food with color #{given}."
end
#-----------------------------

Sorting a Hash

# Sorted by keys (Hash#sort gives an Array of pairs made of each key,value)
food_color.sort.each { |f|
    puts "#{f[0]} is #{f[1]}."
}

# Sorted by values
food_color.sort { |a,b| a[1] <=> b[1] }.each { |f|
    puts "#{f[0]} is #{f[1]}."
}

# Sorted by length of values
food_color.sort { |a,b| a[1].length <=> b[1].length }.each { |f|
    puts "#{f[0]} is #{f[1]}."
}

Merging Hashes

merged = a.clone.update(b)        # because Hash#update changes object in place

drink_color = { "Galliano"  => "yellow", "Mai Tai" => "blue" }
ingested_color = drink_color.clone.update(food_color)

substance_color = {}
for i in [ food_color, drink_color ]
    i.each_key { |k|
        if substance_color.has_key?(k)
            puts "Warning: #{k} seen twice.  Using the first definition."
            next
        end
        substance_color[k] = 1
    }
end

Finding Common or Different Keys in Two Hashes

common = hash1.keys & hash2.keys

this_not_that = hash1.keys - hash2.keys

Hashing References

# no problem here, Ruby handles any kind of object for key-ing
# (it takes Object#hash, which defaults to Object#id)

Presizing a Hash

# AFAIK, not possible in Ruby

Finding the Most Common Anything

# Be careful, the following is possible only because Fixnum objects are
# special (documentation says: there is effectively only one Fixnum object
# instance for any given integer value).
count = Hash.new(0)
array.each { |e|
    count[e] += 1
}

Representing Relationships Between Data

father = {
    "Cain"      , "Adam",
    "Abel"      , "Adam",
    "Seth"      , "Adam",
    "Enoch"     , "Cain",
    "Irad"      , "Enoch",
    "Mehujael"  , "Irad",
    "Methusael" , "Mehujael",
    "Lamech"    , "Methusael",
    "Jabal"     , "Lamech",
    "Jubal"     , "Lamech",
    "Tubalcain" , "Lamech",
    "Enos"      , "Seth",
}

while gets
    chomp
    begin
        print $_, " "
    end while $_ = father[$_]
    puts
end

children = {}
father.each { |k,v|
    (children[v] ||= []) << k
}
while gets
    chomp
    puts "#{$_} begat #{(children[$_] || ['Nobody']).join(', ')}.\n"
end

includes = {}
files.each { |f|
    begin
        for l in IO.readlines(f)
            next unless l =~ /^\s*#\s*include\s*<([^>]+)>/
            (includes[$1] ||= []) << f
        end
    rescue SystemCallError
        $stderr.puts "#$! (skipping)"
    end
}

include_free = includes.values.flatten.uniq - includes.keys

Program: dutree

# dutree - print sorted intented rendition of du output
#% dutree
#% dutree /usr
#% dutree -a
#% dutree -a /bin

# The DuNode class collects all information about a directory,
# and provides some convenience methods
class DuNode

    attr_reader :name
    attr_accessor :size
    attr_accessor :kids

    def initialize(name)
        @name = name
        @kids = []
        @size = 0
    end

    # support for sorting nodes with side
    def size_compare(node2)
        @size <=> node2.size
    end

    def basename
        @name.sub(/.*\//, "")
    end

    #returns substring before last "/", nil if not there
    def parent
        p = @name.sub(/\/[^\/]+$/,"")
        if p == @name
            nil
        else
            p
        end
    end

end

# The DuTree does the acdtual work of
# getting the input, parsing it, builging up a tree
# and format it for output
class Dutree

    attr_reader :topdir

    def initialize
        @nodes = Hash.new
        @dirsizes = Hash.new(0)
        @kids = Hash.new([])
    end

    # get a node by name, create it if it does not exist yet
    def get_create_node(name)
        if @nodes.has_key?(name)
            @nodes[name]
        else
            node = DuNode.new(name)
            @nodes[name] = node
            node
        end
    end

    # run du, read in input, save sizes and kids
    # stores last directory read in instance variable topdir
    def input(arguments)
        name = ""
        cmd = "du " + arguments.join(" ")
        IO.popen(cmd) { |pipe|
            pipe.each { |line|
                size, name = line.chomp.split(/\s+/, 2)
                node = get_create_node(name)
                node.size = size.to_i
                @nodes[name] = node
                parent = node.parent
                if parent
                    get_create_node(parent).kids.push(node)
                end
            }
        }
        @topdir = @nodes[name]
    end

    # figure out how much is taken in each directory
    # that isn't stored in the subdirectories. Add a new
    # fake kid called "." containing that much.
    def get_dots(node)
        cursize = node.size
        for kid in node.kids
            cursize -=  kid.size
            get_dots(kid)
        end
        if node.size != cursize
            newnode = get_create_node(node.name + "/.")
            newnode.size = cursize
            node.kids.push(newnode)
        end
    end

    # recursively output everything
    # passing padding and number width as well
    # on recursive calls
    def output(node, prefix="", width=0)
        line = sprintf("%#{width}d %s", node.size, node.basename)
        puts(prefix + line)
        prefix += line.sub(/\d /, "| ")
        prefix.gsub!(/[^|]/, " ")
        if node.kids.length > 0     # not a bachelor node
            kids = node.kids
            kids.sort! { |a,b|
                b.size_compare(a)
            }
            width = kids[0].size.to_s.length
            for kid in kids
                output(kid, prefix, width)
            end
        end
    end

end

tree = Dutree.new
tree.input(ARGV)
tree.get_dots(tree.topdir)
tree.output(tree.topdir)