5. Hashes

Introduction

#-----------------------------
# dictionaries
age = {"Nat": 24,
       "Jules": 24,
       "Josh": 17}
#-----------------------------
age = {}
age["Nat"] = 24
age["Jules"] = 25
age["Josh"] = 17
#-----------------------------
food_color = {"Apple":  "red",
              "Banana": "yellow",
              "Lemon":  "yellow",
              "Carrot": "orange"
             }
#-----------------------------
# NOTE: keys must be quoted in Python

Adding an Element to a Hash

mydict[key] = value
#-----------------------------
# food_color defined per the introduction
food_color["Raspberry"] = "pink"
print "Known foods:"
for food in food_color:
    print food

#=> Known foods:
#=> Raspberry
#=> Carrot
#=> Lemon
#=> Apple
#=> Banana
#-----------------------------

Testing for the Presence of a Key in a Hash

# does mydict have a value for key?
if key in mydict:
    pass # it exists
else:
    pass # it doesn't

#-----------------------------
# food_color per the introduction
for name in ("Banana", "Martini"):
    if name in food_color:
        print name, "is a food."
    else:
        print name, "is a drink."

#=> Banana is a food.
#=> Martini is a drink.
#-----------------------------
age = {}
age["Toddler"] = 3
age["Unborn"] = 0
age["Phantasm"] = None

for thing in ("Toddler", "Unborn", "Phantasm", "Relic"):
    print ("%s:"%thing),
    if thing in age:
        print "Exists",
        if age[thing] is not None:
            print "Defined",
        if age[thing]:
            print "True",
    print
#=> Toddler: Exists Defined True
#=> Unborn: Exists Defined
#=> Phantasm: Exists
#=> Relic:
#-----------------------------
# Get file sizes for the requested filenames
import fileinput, os
size = {}
for line in fileinput.input():
    filename = line.rstrip()
    if filename in size:
        continue
    size[filename] = os.path.getsize(filename)

Deleting from a Hash

# remove key and its value from mydict
del mydict[key]
#-----------------------------
# food_color as per Introduction
def print_foods():
    foods = food_color.keys()

    print "Keys:", " ".join(foods)
    print "Values:",

    for food in foods:
        color = food_color[food]
        if color is not None:
            print color,
        else:
            print "(undef)",
    print

print "Initially:"
print_foods()

print "\nWith Banana set to None"
food_color["Banana"] = None
print_foods()

print "\nWith Banana deleted"
del food_color["Banana"]
print_foods()

#=> Initially:
#=> Keys: Carrot Lemon Apple Banana
#=> Values: orange yellow red yellow
#=> 
#=> With Banana set to None
#=> Keys: Carrot Lemon Apple Banana
#=> Values: orange yellow red (undef)
#=> 
#=> With Banana deleted
#=> Keys: Carrot Lemon Apple
#=> Values: orange yellow red
#-----------------------------
for key in ["Banana", "Apple", "Cabbage"]:
    del food_color[key]
#-----------------------------

Traversing a Hash

#-----------------------------
for key, value in mydict.items():  
    pass # do something with key and value

# If mydict is large, use iteritems() instead
for key, value in mydict.iteritems():  
    pass # do something with key and value

#-----------------------------
# DON'T DO THIS:
for key in mydict.keys():
    value = mydict[key]
    # do something with key and value
#-----------------------------
# food_color per the introduction
for food, color in food_color.items():
    print "%s is %s." % (food, color)

# DON'T DO THIS:
for food in food_color:
    color = food_color[food]
    print "%s is %s." % (food, color)

#-----------------------------
print """%(food)s

is

%(color)s.
""" % vars()
#-----------------------------
for food, color in sorted(food_color.items()):
    print "%s is %s." % (food, color)

#-----------------------------
#!/usr/bin/env python
# countfrom - count number of messages from each sender

import sys
if len(sys.argv) > 1:
    infile = open(sys.argv[1])
else:
    infile = sys.stdin

counts = {}
for line in infile:
    if line.startswith("From: "):
        name = line[6:-1]
        counts[name] = counts.get(name, 0) + 1

for (name, count) in sorted(counts.items()):
    print "%s: %s" % (name, count)

#-----------------------------

Printing a Hash

for key, val in mydict.items():
    print key, "=>", val
#-----------------------------
print "\n".join([("%s => %s" % item) for item in mydict.items()])
#-----------------------------
print mydict
#=> {'firstname': 'Andrew', 'login': 'dalke', 'state': 'New Mexico', 'lastname': 'Dalke'}
#-----------------------------
import pprint
pprint.pprint(dict)
#=> {'firstname': 'Andrew',
#=>  'lastname': 'Dalke',
#=>  'login': 'dalke',
#=>  'state': 'New Mexico'}
#-----------------------------

Retrieving from a Hash in Insertion Order

#-----------------------------
class SequenceDict(dict):
    """
    Dictionary that remembers the insertion order.
    The lists returned by keys(), values() and items() are
    in the insertion order.
    """
    def __init__(self, *args):
        self._keys={} # key --> id
        self._ids={}      # id  --> key
        self._next_id=0
        
    def __setitem__(self, key, value):
        self._keys[key]=self._next_id
        self._ids[self._next_id]=key
        self._next_id+=1
        return dict.__setitem__(self, key, value)
    
    def __delitem__(self, key):
        id=self._keys[key]
        del(self._keys[key])
        del(self._ids[id])
        return dict.__delitem__(self, key)

    def values(self):
        values=[]
        ids=list(self._ids.items())
        ids.sort()
        for id, key in ids:
            values.append(self[key])
        return values

    def items(self):
        items=[]
        ids=list(self._ids.items())
        ids.sort()
        for id, key in ids:
            items.append((key, self[key]))
        return items

    def keys(self):
        ids=list(self._ids.items())
        ids.sort()
        keys=[]
        for id, key in ids:
            keys.append(key)
        return keys

    def update(self, d):
        for key, value in d.items():
            self[key]=value

    def clear(self):
        dict.clear(self)
        self._keys={}
        self._ids={}
        self._next_id=0
        
def testSequenceDict():
    sd=SequenceDict()

    # First Test
    sd[3]="first"
    sd[2]="second"
    sd[1]="third"
    print sd.keys()
    print sd.items()
    print sd.values()

    del(sd[1])
    del(sd[2])
    del(sd[3])

    print sd.keys(), sd.items(), sd.values()
    print sd._ids, sd._keys

    print "---------------"
    # Second Test
    sd["b"]="first"
    sd["a"]="second"
    sd.update({"c": "third"})
    print sd.keys()
    print sd.items()
    print sd.values()

    del(sd["b"])
    del(sd["a"])
    del(sd["c"])

    print sd.keys(), sd.items(), sd.values()
    print sd._ids, sd._keys

def likePerlCookbook():
    food_color=SequenceDict()
    food_color["Banana"]="Yellow";
    food_color["Apple"]="Green";
    food_color["Lemon"]="Yellow"
    print "In insertion order, the foods' color are:"
    for food, color in food_color.items():
        print "%s is colored %s" % (food, color)

if __name__=="__main__":
    #testSequenceDict()
    likePerlCookbook()
    

Hashes with Multiple Values Per Key

import os
ttys = {}

who = os.popen("who")

for line in who:
    user, tty = line.split()[:2]
    ttys.setdefault(user, []).append(tty)

for (user, tty_list) in sorted(ttys.items()):
    print user + ": " + " ".join(tty_list)
#-----------------------------
import pwd
for (user, tty_list) in ttys.items():
    print user + ":", len(tty_list), "ttys."
    for tty in sorted(tty_list):
        try:
            uid = os.stat("/dev/" + tty).st_uid
            user = pwd.getpwuid(uid)[0]
        except os.error:
            user = "(not available)"
        print "\t%s (owned by %s)" % (tty, user)

Inverting a Hash

# lookup_dict maps keys to values
reverse = dict([(val, key) for (key, val) in lookup_dict.items()])
#-----------------------------
surname = {"Mickey": "Mantle", "Babe": "Ruth"}
first_name = dict([(last, first) for (first, last) in surname.items()])

print first_name["Mantle"]
#=> Mickey
#-----------------------------
#!/usr/bin/perl -w
# foodfind - find match for food or color

import sys
if not sys.argv[1:]:
    raise SystemExit("usage: foodfind food_or_color")
given = sys.argv[1]

color_dict = {"Apple":  "red",
              "Banana": "yellow",
              "Lemon":  "yellow",
              "Carrot": "orange",
             }
food_dict = dict([(color, food) for (food, color) in color_dict.items()])

if given in color_dict:
    print given, "is a food with color", color_dict[given]
elif given in food_dict:
    print food_dict[given], "is a food with color", given
#-----------------------------
# food_color as per the introduction
foods_with_color = {}
for food, color in food_color.items():
    foods_with_color.setdefault(color, []).append(food)

print " ".join(foods_with_color["yellow"]), "were yellow foods."
#-----------------------------

Sorting a Hash

#-----------------------------
# mydict is the hash to sort
for key, value in sorted(mydict.items()):
    # do something with key, value
#-----------------------------
# food_color as per section 5.8
for food, color in sorted(food_color.items()):
    print "%s is %s." % (food, color)
#-----------------------------
# NOTE: alternative version
for item in sorted(food_color.items()):
    print "%s is %s." % item
#-----------------------------
# NOTE: alternative version showing a user-defined function
def food_cmp(x, y):
    return cmp(x, y)

for food, color in sorted(food_color, cmp=food_cmp):
    print "%s is %s." % (food, color)
#-----------------------------
def food_len_cmp(x, y):
    return cmp(len(x), len(y))

for food in sorted(food_color, cmp=food_len_cmp):
    print "%s is %s." % (food, food_color[food])

# In this instance, however, the following is both simpler and faster:
for food in sorted(food_color, key=len):
    print "%s is %s." % (food, food_color[food])
#-----------------------------

Merging Hashes

#-----------------------------
merged = {}
merged.update(a_dict)
merged.update(b_dict)

#-----------------------------
# NOTE: alternative version
merged = a_dict.copy()
merged.update(b_dict)
#-----------------------------
# DON'T DO THIS:

merged = {}
for k, v in a_dict.items():
    merged[k] = v
for k, v in b_dict.items():
    merged[k] = v
#-----------------------------
# food_color as per section 5.8
drink_color = {"Galliano": "yellow",
               "Mai Tai": "blue"}

ingested_color = drink_color.copy()
ingested_color.update(food_color)
#-----------------------------
# DON'T DO THIS:
drink_color = {"Galliano": "yellow",
               "Mai Tai": "blue"}

substance_color = {}
for k, v in food_color.items():
    substance_color[k] = v
for k, v in drink_color.items():
    substance_color[k] = v
#-----------------------------
# DON'T DO THIS:
substance_color = {}
for mydict in (food_color, drink_color):
    for k, v in mydict:
        substance_color[k] = v
#-----------------------------
# DON'T DO THIS:
substance_color = {}
for item in food_color.items() + drink_color.items():
    for k, v in mydict:
        substance_color[k] = v
#-----------------------------
substance_color = {}
for mydict in (food_color, drink_color):
    for k, v in mydict.items():
        if substance_color.has_key(k):
            print "Warning:", k, "seen twice.  Using the first definition."
            continue
        substance_color[k] = v

# I think it's a copy, in which case
all_colors = new_colors.copy()

Finding Common or Different Keys in Two Hashes

common = [k for k in dict1 if k in dict2]
#-----------------------------
this_not_that = [k for k in dict1 if k not in dict2]
#-----------------------------
# citrus_color is a dict mapping citrus food name to its color.
citrus_color = {"Lemon":  "yellow",
                "Orange": "orange",
                "Lime":   "green"}

# build up a list of non-citrus foods
non_citrus = [k for k in food_color if k not in citruscolor]
#-----------------------------

Hashing References

#-----------------------------
# references as keys of dictionaries is no pb in python

name = {}
for filename in ("/etc/termcap", "/vmunix", "/bin/cat"):
    try:
        myfile = open(filename)
    except IOError:
        pass
    else:
        names[myfile] = filename

print "open files:", ", ".join(name.values())
for f, fname in name.items():
    f.seek(0, 2)       # seek to the end
    print "%s is %d bytes long." % (fname, f.tell())
#-----------------------------

Presizing a Hash

# Python doesn't allow presizing of dicts, but hashing is efficient -
# it only re-sizes at intervals, not every time an item is added.

Finding the Most Common Anything

count = {}
for element in mylist:
    count[element] = count.get(element, 0) + 1

Representing Relationships Between Data

#-----------------------------
import fileinput

father = {'Cain': 'Adam',
          'Abel': 'Adam',
          'Seth': 'Adam',
          'Enoch': 'Cain',
          'Irad': 'Enoch',
          'Mehujael': 'Irad',
          'Methusael': 'Mehujael',
          'Lamech': 'Methusael',
          'Jabal': 'Lamech',
          'Tubalcain': 'Lamech',
          'Enos': 'Seth',
         }

for line in fileinput.input():
    person = line.rstrip()
    while person:                    # as long as we have people,
        print person,                # print the current name
        person = father.get(person)  # set the person to the person's father
    print

#-----------------------------
import fileinput

children = {}
for k, v in father.items():
    children.setdefault(v, []).append(k)

for line in fileinput.input():
    person = line.rstrip()
    kids = children.get(person, ["nobody"])
    print person, "begat", ", ".join(kids)

#-----------------------------
import sys, re
pattern = re.compile(r'^\s*#\s*include\s*<([^>]+)')
includes = {}
for filename in filenames:
    try:
        infile = open(filename)
    except IOError, err:
        print>>sys.stderr, err
        continue
    for line in infile:
        match = pattern.match(line)
        if match:
            includes.setdefault(match.group(1), []).append(filename)
#-----------------------------
# list of files that don't include others
mydict = {}
for e in reduce(lambda a,b: a + b, includes.values()):
    if not includes.has_key(e):
        mydict[e] = 1
include_free = mydict.keys()
include_free.sort()

Program: dutree

#-----------------------------
#!/usr/bin/env python -w
# dutree - print sorted indented rendition of du output
import os, sys

def get_input(args):
    # NOTE: This is insecure - use only from trusted code!
    cmd = "du " + " ".join(args)
    infile = os.popen(cmd)

    dirsize = {}
    kids = {}
    for line in infile:
        size, name = line[:-1].split("\t", 1)
        dirsize[name] = int(size)
        parent = os.path.dirname(name)
        kids.setdefault(parent, []).append(name)
    # Remove the last field added, which is the root
    kids[parent].pop()
    if not kids[parent]: 
        del kids[parent]

    return name, dirsize, kids

def getdots(root, dirsize, kids):
    size = cursize = dirsize[root]
    if kids.has_key(root):
        for kid in kids[root]:
            cursize -= dirsize[kid]
            getdots(kid, dirsize, kids)
    if size != cursize:
        dot = root + "/."
        dirsize[dot] = cursize
        kids[root].append(dot)

def output(root, dirsize, kids, prefix = "", width = 0):
    path = os.path.basename(root)
    size = dirsize[root]
    fmt = "%" + str(width) + "d %s"
    line = fmt % (size, path)
    print prefix + line

    prefix += (" " * (width-1)) + "| "  + (" " * len(path))

    if kids.has_key(root):
        kid_list = kids[root]
        kid_list.sort(lambda x, y, dirsize=dirsize:
                          cmp(dirsize[x], dirsize[y]))
        width = len(str(dirsize[kid_list[-1]]))
        for kid in kid_list:
            output(kid, dirsize, kids, prefix, width)

def main():
    root, dirsize, kids = get_input(sys.argv[1:])
    getdots(root, dirsize, kids)
    output(root, dirsize, kids)

if __name__ == "__main__":
    main()