11. References and Records

Introduction

#Introduction.
#   In Python, all names are references.
#   All objects are inherently anonymous, they don't know what names refer to them.
print ref   # prints the value that the name ref refers to. 
ref = 3     # assigns the name ref to the value 3.
#-----------------------------
aref = mylist
#-----------------------------
aref = [3, 4, 5]    # aref is a name for this list
href = {"How": "Now", "Brown": "Cow"} # href is a name for this dictionary
#-----------------------------
#   Python doesn't have autovivification as (for simple types) there is no difference between a name and a reference.
#   If we try the equivalent of the Perl code we get the list, not a reference to the list.
#-----------------------------
#   To handle multidimensional arrays, you should use an extension to Python,
#   such as numarray (http://www.stsci.edu/resources/software_hardware/numarray)
#-----------------------------
#   In Python, assignment doesn't return anything. 
#-----------------------------
Nat = { "Name": "Leonhard Euler",
        "Address": "1729 Ramanujan Lane\nMathworld, PI 31416",
        "Birthday": 0x5bb5580
}
#-----------------------------

Taking References to Arrays

aref = mylist
anon_list = [1, 3, 5, 7, 9]
anon_copy = anon_list
implicit_creation = [2, 4, 6, 8, 10]
#-----------------------------
anon_list.append(11)
#-----------------------------
two = implicit_creation[0]
#-----------------------------
#  To get the last index of a list, you can use len() 
# [or list.__len__() - but don't] directly
last_idx = len(aref) - 1

# Normally, though, you'd use an index of -1 for the last
# element, -2 for the second last, etc.
print implicit_creation[-1]
#=> 10

num_items = len(aref)
#-----------------------------
last_idx = aref.__len__() - 1
num_items = aref.__len__()
#-----------------------------
if not isinstance(someVar, type([])):
    print "Expected a list"
#-----------------------------
print list_ref
#-----------------------------
#   sort is in place.
list_ref.sort()
#-----------------------------
list_ref.append(item)
#-----------------------------
def list_ref():
    return []

aref1 = list_ref()
aref2 = list_ref()
#   aref1 and aref2 point to different lists.
#-----------------------------
list_ref[N] # refers to the Nth item in the list_ref list.
#-----------------------------
# The following two statements are equivalent and return up to 3 elements
# at indices 3, 4, and 5 (if they exist).
pie[3:6]
pie[3:6:1]
#-----------------------------
#   This will insert 3 elements, overwriting elements at indices 3,4, or 5 - if they exist.
pie[3:6] = ["blackberry", "blueberry", "pumpkin"]
#-----------------------------
for item in pie:
    print item

# DON'T DO THIS (this type of indexing should be done with enumerate)
# xrange does not create a list 0..len(pie) - 1, it creates an object 
# that returns one index at a time.
for idx in xrange(len(pie)):
    print pie[idx]

Making Hashes of Arrays

# Making Hashes of Arrays

hash["KEYNAME"].append("new value")

for mystr in hash.keys():
    print "%s: %s" % (mystr, hash[mystr])

hash["a key"] = [3, 4, 5]

values = hash["a key"]

hash["a key"].append(value)

# autovivification also does not work in python.
residents = phone2name[number]
# do this instead
residents = phone2name.get(number, [])

Taking References to Hashes

# Taking References to Hashes

href = hash
anon_hash = { "key1":"value1", "key2" : "value2 ..." }
anon_hash_copy = anon_hash.copy()

hash = href
value = href[key]
slice = [href[k] for k in (key1, key2, key3)]
keys = hash.keys()

import types
if type(someref) != types.DictType:
    raise "Expected a dictionary, not %s" % type(someref)
if isinstance(someref,dict):
    raise "Expected a dictionary, not %s" % type(someref)

for href in ( ENV, INC ):
    for key in href.keys():
        print "%s => %s" % (key, href[key])

values = [hash_ref[k] for k in (key1, key2, key3)]

for key in ("key1", "key2", "key3"):
    hash_ref[k] += 7    # not like in perl but the same result.
#-----------------------------

Taking References to Functions

#-----------------------------
cref = func
cref = lambda a, b: ...
#-----------------------------
returned = cref(arguments)
#-----------------------------
funcname = "thefunc"
locals()[funcname]();
#-----------------------------
commands = {
    'happy': joy,
    'sad': sullen,
    'done': (lambda : sys.exit()),  # In this case "done: sys.exit" would suffice
    'mad': angry,
    }

print "How are you?",
cmd = raw_input()
if cmd in commands:
    commands[cmd]()
else:
    print "No such command: %s" % cmd
#-----------------------------
def counter_maker():
    start = [0]
    def counter_function():
        # start refers to the variable defined in counter_maker, but
        # we can't reassign or increment variables in parent scopes.
        # By using a one-element list we can modify the list without
        # reassigning the variable.  This way of using a list is very
        # like a reference.
        start[0] += 1
        return start[0]-1
    return counter_function

counter = counter_maker()
for i in range(5):
    print counter()
#-----------------------------
counter1 = counter_maker()
counter2 = counter_maker()

for i in range(5):
    print counter1()
print counter1(), counter2()
#=> 0
#=> 1
#=> 2
#=> 3
#=> 4
#=> 5 0
#-----------------------------
import time
def timestamp():
    start_time = time.time()
    def elapsed():
        return time.time() - start_time
    return elapsed
early = timestamp()
time.sleep(20)
later = timestamp()
time.sleep(10)
print "It's been %d seconds since early" % early()
print "It's been %d seconds since later" % later()
#=> It's been 30 seconds since early.
#=> It's been 10 seconds since later.
#-----------------------------

Taking References to Scalars

# A name is a reference to an object and an object can be referred to
# by any number of names. There is no way to manipulate pointers or
# an object's id.  This section is thus inapplicable.
x = 1
y = x
print x, id(x), y, id(y)
x += 1    # "x" now refers to a different object than y
print x, id(x), y, id(y)
y = 4     # "y" now refers to a different object than it did before
print x, id(x), y, id(y)

# Some objects (including ints and strings) are immutable, however, which
# can give the illusion of a by-value/by-reference distinction:
a = x = [1]
b = y = 1
c = z = "s"
print a, b, c
#=> [1] 1 s

x += x      # calls list.__iadd__ which is inplace.
y += y      # can't find int.__iadd__ so calls int.__add__ which isn't inplace
z += z      # can't find str.__iadd__ so calls str.__add__ which isn't inplace              
print a, b, c
#=> [1, 1] 1 s

Creating Arrays of Scalar References

# As indicated by the previous section, everything is referenced, so
# just create a list as normal, and beware that augmented assignment
# works differently with immutable objects to mutable ones:
mylist = [1, "s", [1]]
print mylist
#=> [1, s, [1]]

for elem in mylist:
    elem *= 2
print mylist
#=> [1, s, [1, 1]]

mylist[0] *= 2
mylist[-1] *= 2
print mylist
#=> [1, s, [1, 1, 1, 1]]

# If you need to modify every value in a list, you should use a list comprehension
# which does NOT modify inplace:
import math
mylist = [(val**3 * 4/3*math.pi) for val in mylist]

Using Closures Instead of Objects

#-----------------------------
c1 = mkcounter(20)
c2 = mkcounter(77)

print "next c1: %d" % c1['next']()  # 21
print "next c2: %d" % c2['next']()  # 78
print "next c1: %d" % c1['next']()  # 22
print "last c1: %d" % c1['prev']()  # 21
print "old  c2: %d" % c2['reset']() # 77
#-----------------------------
# DON'T DO THIS.  Use an object instead  
def mkcounter(start):
    count = [start]
    def next():
        count[0] += 1
        return count[0]
    def prev():
        count[0] -= 1
        return count[0]
    def get():
        return count[0]
    def set(value):
        count[0] = value
        return count[0]
    def bump(incr):
        count[0] += incr
        return count[0]
    def reset():
        count[0] = start
        return count[0]
    return {
        'next': next, 'prev': prev, 'get': get, 'set': set,
        'bump': bump, 'reset': reset, 'last': prev}
#-----------------------------

Creating References to Methods

#-----------------------------
mref = obj.meth
# later...
mref("args", "go", "here")
#-----------------------------

Constructing Records

#-----------------------------
record = {
    "name": "Jason",
    "empno": 132,
    "title": "deputy peon",
    "age": 23,
    "salary": 37000,
    "pals": ["Norbert", "Rhys", "Phineas"],
}
print "I am %s, and my pals are %s." % (record["name"],
                                        ", ".join(record["pals"]))
#-----------------------------
byname = {}
byname[record["name"]] = record

rp = byname.get("Aron")
if rp:
     print "Aron is employee %d."% rp["empno"]

byname["Jason"]["pals"].append("Theodore")
print "Jason now has %d pals." % len(byname["Jason"]["pals"])

for name, record in byname.items():
    print "%s is employee number %d." % (name, record["empno"])

employees = {}
employees[record["empno"]] = record;

# lookup by id
rp = employees.get(132)
if (rp):
    print "Employee number 132 is %s." % rp["name"]

byname["Jason"]["salary"] *= 1.035

peons = [r for r in employees.values() if r["title"] == "peon"]
tsevens = [r for r in employees.values() if r["age"] == 27]

# Go through all records
print employees.values()

for rp in sorted(employees.values(), key=lambda x:x["age"]):
    print "%s is age %d."%(rp["name"], rp["age"])

# use @byage, an array of arrays of records
byage = {}
byage[record["age"]] = byage.get(record["age"], []) + [record]

for age, records in byage.items():
    print records
    print "Age %s:"%age,
    for rp in records:
        print rp["name"],
    print
#-----------------------------

Reading and Writing Hash Records to Text Files

#-----------------------------
FieldName: Value
#-----------------------------
for record in list_of_records:
    # Note: sorted added in Python 2.4
    for key in sorted(record.keys()):
        print "%s: %s" % (key, record[key])
    print
#-----------------------------
import re
list_of_records = [{}]
while True:
    line = sys.stdin.readline()
    if not line:
        # EOF
        break
    # Remove trailing \n:
    line = line[:1]
    if not line.strip():
        # New record
        list_of_records.append({})
        continue
    key, value = re.split(r':\s*', line, 1)
    # Assign the key/value to the last item in the list_of_records:
    list_of_records[-1][key] = value
#-----------------------------

Printing Data Structures

import pprint

mylist = [[1,2,3], [4, [5,6,7], 8,9, [0,3,5]], 7, 8]
mydict = {"abc": "def", "ghi":[1,2,3]}
pprint.pprint(mylist, width=1)

fmtdict = pprint.pformat(mydict, width=1)
print fmtdict
# "import pprint; help(pprint)" for more details

# @@INCOMPLETE@@
# Note that pprint does not currently handle user objects

#-----------------------------

Copying Data Structures

newlist = list(mylist) # shallow copy
newdict = dict(mydict) # shallow copy

# Pre 2.3:
import copy
newlist = copy.copy(mylist) # shallow copy
newdict = copy.copy(mydict) # shallow copy

# shallow copies copy a data structure, but don't copy the items in those
# data structures so if there are nested data structures, both copy and
# original will refer to the same object
mylist = ["1", "2", "3"]
newlist = list(mylist)
mylist[0] = "0"
print mylist, newlist
#=> ['0', '2', '3'] ['1', '2', '3']

mylist = [["1", "2", "3"], 4]
newlist = list(mylist)
mylist[0][0] = "0"
print mylist, newlist
#=> [['0', '2', '3'], 4] [['0', '2', '3'], 4]
#-----------------------------
import copy
newlist = copy.deepcopy(mylist) # deep copy
newdict = copy.deepcopy(mydict) # deep copy

# deep copies copy a data structure recursively:
import copy

mylist = [["1", "2", "3"], 4]
newlist = copy.deepcopy(mylist)
mylist[0][0] = "0"
print mylist, newlist
#=> [['0', '2', '3'], 4] [['1', '2', '3'], 4]
#-----------------------------

Storing Data Structures to Disk

import pickle
class Foo(object):
    def __init__(self):
        self.val = 1

x = Foo()
x.val = 3
p_x = pickle.dumps(x)  # Also pickle.dump(x, myfile) which writes to myfile
del x
x = pickle.loads(p_x)  # Also x = pickle.load(myfile) which loads from myfile
print x.val
#=> 3
#-----------------------------

Transparently Persistent Data Structures

import os, shelve
fname = "testfile.db"
if not os.path.exists(fname):
    d = shelve.open("testfile.db")
    for i in range(100000):
        d[str(i)] = i
    d.close()

d = shelve.open("testfile.db")
print d["100"]
print d["1212010201"] # KeyError
#-----------------------------

Program: Binary Trees

# bintree - binary tree demo program
# Use the heapq module instead?
import random
import warnings

class BTree(object):
    def __init__(self):
        self.value = None
    
    ### insert given value into proper point of
    ### the tree, extending this node if necessary.
    def insert(self, value):
        if self.value is None:
            self.left = BTree()
            self.right = BTree()
            self.value = value
        elif self.value > value:
            self.left.insert(value)
        elif self.value < value:
            self.right.insert(value)
        else:
            warnings.warn("Duplicate insertion of %s."%value)
            
    # recurse on left child, 
    # then show current value, 
    # then recurse on right child.
    def in_order(self):
       if self.value is not None:
           self.left.in_order()
           print self.value,
           self.right.in_order()

    # show current value, 
    # then recurse on left child, 
    # then recurse on right child.
    def pre_order(self):
        if self.value is not None:
            print self.value,
            self.left.pre_order()
            self.right.pre_order()
    
    # recurse on left child, 
    # then recurse on right child,
    # then show current value. 
    def post_order(self):
        if self.value is not None:
            self.left.post_order()
            self.right.post_order()
            print self.value,

    # find out whether provided value is in the tree.
    # if so, return the node at which the value was found.
    # cut down search time by only looking in the correct
    # branch, based on current value.
    def search(self, value):
        if self.value is not None:
            if self.value == value:
                return self
            if value < self.value:
                return self.left.search(value)
            else:
                return self.right.search(value)

def test():
    root = BTree()

    for i in range(20):
        root.insert(random.randint(1, 1000))

    # now dump out the tree all three ways
    print "Pre order: ", root.pre_order()
    print "In order:  ", root.in_order()
    print "Post order:", root.post_order()

    ### prompt until empty line
    while True:
        val = raw_input("Search? ").strip()
        if not val:
            break
        val = int(val)
        found = root.search(val)
        if found:
            print "Found %s at %s, %s"%(val, found, found.value)
        else:
            print "No %s in tree" % val
            
if __name__ == "__main__":
    test()