5. Hashes

Introduction

#-----------------------------
%age = ( "Nat",   24,
         "Jules", 25,
         "Josh",  17  );
#-----------------------------
$age{"Nat"}   = 24;
$age{"Jules"} = 25;
$age{"Josh"}  = 17;
#-----------------------------
%food_color = (
               "Apple"  => "red",
               "Banana" => "yellow",
               "Lemon"  => "yellow",
               "Carrot" => "orange"
              );
#-----------------------------
%food_color = (
                Apple  => "red",
                Banana => "yellow",
                Lemon  => "yellow",
                Carrot => "orange"
               );
#-----------------------------

Adding an Element to a Hash

#-----------------------------
$HASH{$KEY} = $VALUE;
#-----------------------------
# %food_color defined per the introduction
$food_color{Raspberry} = "pink";
print "Known foods:\n";
foreach $food (keys %food_color) {
    print "$food\n";
}

# Known foods:
# 
# Banana
# 
# Apple
# 
# Raspberry
# 
# Carrot
# 
# Lemon
#-----------------------------

Testing for the Presence of a Key in a Hash

#-----------------------------
# does %HASH have a value for $KEY ?
if (exists($HASH{$KEY})) {
    # it exists
} else {
    # it doesn't
}
#-----------------------------
# %food_color per the introduction
foreach $name ("Banana", "Martini") {
    if (exists $food_color{$name}) {
        print "$name is a food.\n";
    } else {
        print "$name is a drink.\n";
    }
}

# Banana is a food.
# 
# Martini is a drink.
#-----------------------------
%age = ();
$age{"Toddler"}  = 3;
$age{"Unborn"}   = 0;
$age{"Phantasm"} = undef;

foreach $thing ("Toddler", "Unborn", "Phantasm", "Relic") {
    print "$thing: ";
    print "Exists " if exists $age{$thing};
    print "Defined " if defined $age{$thing};
    print "True " if $age{$thing};
    print "\n";
}

# Toddler: Exists Defined True 
# 
# Unborn: Exists Defined 
# 
# Phantasm: Exists 
# 
# Relic: 
#-----------------------------
%size = ();
while (<>) {
    chomp;
    next if $size{$_};              # WRONG attempt to skip
    $size{$_} = -s $_;
}
#-----------------------------
    next if exists $size{$_};
#-----------------------------

Deleting from a Hash

#-----------------------------
# remove $KEY and its value from %HASH
delete($HASH{$KEY});
#-----------------------------
# %food_color as per Introduction
sub print_foods {
    my @foods = keys %food_color;
    my $food;

    print "Keys: @foods\n";
    print "Values: ";

    foreach $food (@foods) {
        my $color = $food_color{$food};

        if (defined $color) {
            print "$color ";
        } else {
            print "(undef) ";
        }
    }
    print "\n";
}

print "Initially:\n";
print_foods();


print "\nWith Banana undef\n";
undef $food_color{"Banana"};
print_foods();


print "\nWith Banana deleted\n";
delete $food_color{"Banana"};
print_foods();


# Initially:
# 
# Keys: Banana Apple Carrot Lemon
# 
# Values: yellow red orange yellow 
# 
# 
# With Banana undef
# 
# Keys: Banana Apple Carrot Lemon
# 
# Values: (undef) red orange yellow 
# 
# 
# With Banana deleted
# 
# Keys: Apple Carrot Lemon
# 
# Values: red orange yellow 
#-----------------------------
delete @food_color{"Banana", "Apple", "Cabbage"};
#-----------------------------

Traversing a Hash

#-----------------------------
while(($key, $value) = each(%HASH)) {
    # do something with $key and $value
}
#-----------------------------
foreach $key (keys %HASH) {
    $value = $HASH{$key};
    # do something with $key and $value
}
#-----------------------------
# %food_color per the introduction
while(($food, $color) = each(%food_color)) {
    print "$food is $color.\n";
}
# Banana is yellow.
# 
# Apple is red.
# 
# Carrot is orange.
# 
# Lemon is yellow.

foreach $food (keys %food_color) {
    my $color = $food_color{$food};
    print "$food is $color.\n";
}
# Banana is yellow.
# 
# Apple is red.
# 
# Carrot is orange.
# 
# Lemon is yellow.
#-----------------------------
print
 
"$food
 
is
 
$food_color{$food}.\n"
 
#-----------------------------
foreach $food (sort keys %food_color) {
    print "$food is $food_color{$food}.\n";
}
# Apple is red.
# 
# Banana is yellow.
# 
# Carrot is orange.
# 
# Lemon is yellow.
#-----------------------------
while ( ($k,$v) = each %food_color ) {
    print "Processing $k\n";
    keys %food_color;               # goes back to the start of %food_color
}
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# countfrom - count number of messages from each sender

$filename = $ARGV[0] || "-";

open(FILE, "<$filename")         or die "Can't open $filename : $!";

while(<FILE>) {
    if (/^From: (.*)/) { $from{$1}++ }
}

foreach $person (sort keys %from) {    
    print "$person: $from{$person}\n";
}

#-----------------------------

Printing a Hash

#-----------------------------
while ( ($k,$v) = each %hash ) {
    print "$k => $v\n";
}
#-----------------------------
print map { "$_ => $hash{$_}\n" } keys %hash;
#-----------------------------
print "@{[ %hash ]}\n";
#-----------------------------
{
    my @temp = %hash;
    print "@temp";
}
#-----------------------------
foreach $k (sort keys %hash) {
    print "$k => $hash{$k}\n";
}
#-----------------------------

Retrieving from a Hash in Insertion Order

#-----------------------------
use Tie::IxHash;
tie %HASH, "Tie::IxHash";
# manipulate %HASH
@keys = keys %HASH;         # @keys is in insertion order
#-----------------------------
# initialize
use Tie::IxHash;

tie %food_color, "Tie::IxHash";
$food_color{Banana} = "Yellow";
$food_color{Apple}  = "Green";
$food_color{Lemon}  = "Yellow";

print "In insertion order, the foods are:\n";
foreach $food (keys %food_color) {
    print "  $food\n";
}

print "Still in insertion order, the foods' colors are:\n";
while (( $food, $color ) = each %food_color ) {
    print "$food is colored $color.\n";
}

#In insertion order, the foods are:
#
#  Banana
#
#  Apple
#
#  Lemon
#
#Still in insertion order, the foods' colors are:
#
#Banana is colored Yellow.
#
#Apple is colored Green.
#
#Lemon is colored Yellow.
#-----------------------------

Hashes with Multiple Values Per Key

#-----------------------------
%ttys = ();

open(WHO, "who|")                   or die "can't open who: $!";
while (<WHO>) {
    ($user, $tty) = split;
    push( @{$ttys{$user}}, $tty );
}

foreach $user (sort keys %ttys) {
    print "$user: @{$ttys{$user}}\n";
}
#-----------------------------
foreach $user (sort keys %ttys) {
    print "$user: ", scalar( @{$ttys{$user}} ), " ttys.\n";
    foreach $tty (sort @{$ttys{$user}}) {
        @stat = stat("/dev/$tty");
        $user = @stat ? ( getpwuid($stat[4]) )[0] : "(not available)";
        print "\t$tty (owned by $user)\n";
    }
}
#-----------------------------
sub multihash_delete {
    my ($hash, $key, $value) = @_;
    my $i;

    return unless ref( $hash->{$key} );
    for ($i = 0; $i < @{ $hash->{$key} }; $i++) {
        if ($hash->{$key}->[$i] eq $value) {
            splice( @{$hash->{$key}}, $i, 1);
            last;
        }
    }

    delete $hash->{$key} unless @{$hash->{$key}};
}
#-----------------------------

Inverting a Hash

#-----------------------------
# %LOOKUP maps keys to values
%REVERSE = reverse %LOOKUP;
#-----------------------------
%surname = ( "Mickey" => "Mantle", "Babe" => "Ruth" );
%first_name = reverse %surname;
print $first_name{"Mantle"}, "\n";
Mickey
#-----------------------------
("Mickey", "Mantle", "Babe", "Ruth")
#-----------------------------
("Ruth", "Babe", "Mantle", "Mickey")
#-----------------------------
("Ruth" => "Babe", "Mantle" => "Mickey")
#-----------------------------
# download the following standalone program
#!/usr/bin/perl -w
# foodfind - find match for food or color

$given = shift @ARGV or die "usage: foodfind food_or_color\n";

%color = (
           "Apple"  => "red",
           "Banana" => "yellow",
           "Lemon"  => "yellow",           
           "Carrot" => "orange"
         );    

%food = reverse %color;    

if (exists $color{$given}) {
        print "$given is a food with color $color{$given}.\n";
}    
if (exists $food{$given}) {
        print "$food{$given} is a food with color $given.\n";
}


#-----------------------------
# %food_color as per the introduction
while (($food,$color) = each(%food_color)) {
    push(@{$foods_with_color{$color}}, $food);
}

print "@{$foods_with_color{yellow}} were yellow foods.\n";
# Banana Lemon were yellow foods.
#-----------------------------

Sorting a Hash

#-----------------------------
# %HASH is the hash to sort
@keys = sort { criterion() } (keys %hash);
foreach $key (@keys) {
    $value = $hash{$key};
    # do something with $key, $value
}
#-----------------------------
foreach $food (sort keys %food_color) {
    print "$food is $food_color{$food}.\n";
}
#-----------------------------
foreach $food (sort { $food_color{$a} cmp $food_color{$b} }
                keys %food_color) 
{
    print "$food is $food_color{$food}.\n";
}
#-----------------------------
@foods = sort { length($food_color{$a}) <=> length($food_color{$b}) } 
    keys %food_color;
foreach $food (@foods) {
    print "$food is $food_color{$food}.\n";
}
#-----------------------------

Merging Hashes

#-----------------------------
%merged = (%A, %B);
#-----------------------------
%merged = ();
while ( ($k,$v) = each(%A) ) {
    $merged{$k} = $v;
}
while ( ($k,$v) = each(%B) ) {
    $merged{$k} = $v;
}
#-----------------------------
# %food_color as per the introduction
%drink_color = ( Galliano  => "yellow",
                 "Mai Tai" => "blue" );

%ingested_color = (%drink_color, %food_color);
#-----------------------------
# %food_color per the introduction, then
%drink_color = ( Galliano  => "yellow",
                 "Mai Tai" => "blue" );

%substance_color = ();
while (($k, $v) = each %food_color) {
    $substance_color{$k} = $v;
} 
while (($k, $v) = each %drink_color) {
    $substance_color{$k} = $v;
} 
#-----------------------------
foreach $substanceref ( \%food_color, \%drink_color ) {
    while (($k, $v) = each %$substanceref) {
        $substance_color{$k} = $v;
    }
}
#-----------------------------
foreach $substanceref ( \%food_color, \%drink_color ) {
    while (($k, $v) = each %$substanceref) {
        if (exists $substance_color{$k}) {
            print "Warning: $k seen twice.  Using the first definition.\n";
            next;
        }
        $substance_color{$k} = $v;
    }
}
#-----------------------------
@all_colors{keys %new_colors} = values %new_colors;
#-----------------------------

Finding Common or Different Keys in Two Hashes

#-----------------------------
my @common = ();
foreach (keys %hash1) {
    push(@common, $_) if exists $hash2{$_};
}
# @common now contains common keys
#-----------------------------
my @this_not_that = ();
foreach (keys %hash1) {
    push(@this_not_that, $_) unless exists $hash2{$_};
}
#-----------------------------
# %food_color per the introduction

# %citrus_color is a hash mapping citrus food name to its color.
%citrus_color = ( Lemon  => "yellow",
                  Orange => "orange",
                  Lime   => "green" );

# build up a list of non-citrus foods
@non_citrus = ();

foreach (keys %food_color) {
    push (@non_citrus, $_) unless exists $citrus_color{$_};
}
#-----------------------------

Hashing References

#-----------------------------
use Tie::RefHash;
tie %hash, "Tie::RefHash";
# you may now use references as the keys to %hash
#-----------------------------
# Class::Somewhere=HASH(0x72048)
# 
# ARRAY(0x72048)
#-----------------------------
use Tie::RefHash;
use IO::File;

tie %name, "Tie::RefHash";
foreach $filename ("/etc/termcap", "/vmunix", "/bin/cat") {
    $fh = IO::File->new("< $filename") or next;
    $name{$fh} = $filename;
}
print "open files: ", join(", ", values %name), "\n";
foreach $file (keys %name) {
    seek($file, 0, 2);      # seek to the end
    printf("%s is %d bytes long.\n", $name{$file}, tell($file));
}
#-----------------------------

Presizing a Hash

#-----------------------------
# presize %hash to $num
keys(%hash) = $num;
#-----------------------------
# will have 512 users in %users
keys(%users) = 512;
#-----------------------------
keys(%users) = 1000;
#-----------------------------

Finding the Most Common Anything

#-----------------------------
%count = ();
foreach $element (@ARRAY) {
    $count{$element}++;
}
#-----------------------------

Representing Relationships Between Data

#-----------------------------
%father = ( 'Cain'      => 'Adam',
            'Abel'      => 'Adam',
            'Seth'      => 'Adam',
            'Enoch'     => 'Cain',
            'Irad'      => 'Enoch',
            'Mehujael'  => 'Irad',
            'Methusael' => 'Mehujael',
            'Lamech'    => 'Methusael',
            'Jabal'     => 'Lamech',
            'Jubal'     => 'Lamech',
            'Tubalcain' => 'Lamech',
            'Enos'      => 'Seth' );
#-----------------------------
while (<>) {
    chomp;
    do {
        print "$_ ";        # print the current name
        $_ = $father{$_};   # set $_ to $_'s father
    } while defined;        # until we run out of fathers
    print "\n";
}
#-----------------------------
while ( ($k,$v) = each %father ) {
    push( @{ $children{$v} }, $k );
}

$" = ', ';                  # separate output with commas
while (<>) {
    chomp;
    if ($children{$_}) {
        @children = @{$children{$_}};
    } else {
        @children = "nobody";
    }
    print "$_ begat @children.\n";
}
#-----------------------------
foreach $file (@files) {
    local *F;               # just in case we want a local FH
    unless (open (F, "<$file")) {
        warn "Couldn't read $file: $!; skipping.\n";
        next;
    }
    
    while (<F>) {
        next unless /^\s*#\s*include\s*<([^>]+)>/;
        push(@{$includes{$1}}, $file);
    }
    close F;
}
#-----------------------------
@include_free = ();                 # list of files that don't include others
@uniq{map { @$_ } values %includes} = undef;
foreach $file (sort keys %uniq) {
        push( @include_free , $file ) unless $includes{$file};
}
#-----------------------------

Program: dutree

#-----------------------------
#% du pcb
#19      pcb/fix
#
#20      pcb/rev/maybe/yes
#
#10      pcb/rev/maybe/not
#
#705     pcb/rev/maybe
#
#54      pcb/rev/web
#
#1371    pcb/rev
#
#3       pcb/pending/mine
#
#1016    pcb/pending
#
#2412    pcb
#-----------------------------
#2412 pcb
#
#   
#|
#    1371 rev
#
#   
#|       |
#    705 maybe
#
#   
#|       |      |
#      675 .
#
#   
#|       |      |
#       20 yes
#
#   
#|       |      |
#       10 not
#
#   
#|       |
#    612 .
#
#   
#|       |
#     54 web
#
#   
#|
#    1016 pending
#
#   
#|       |
#        1013 .
#
#   
#|       |
#           3 mine
#
#   
#|
#      19 fix
#
#   
#|
#       6 .
#-----------------------------
#% dutree
#% dutree /usr
#% dutree -a 
#% dutree -a /bin
#-----------------------------
# download the following standalone program
#!/usr/bin/perl -w
# dutree - print sorted indented rendition of du output
use strict;

my %Dirsize;
my %Kids;

getdots(my $topdir = input());
output($topdir);

# run du, read in input, save sizes and kids
# return last directory (file?) read
sub input { 
    my($size, $name, $parent);
    @ARGV = ("du @ARGV |");         # prep the arguments
    while (<>) {                    # magic open is our friend
        ($size, $name) = split;
        $Dirsize{$name} = $size;
        ($parent = $name) =~ s#/[^/]+$##;   # dirname
        push @{ $Kids{$parent} }, $name unless eof;
    } 
    return $name;
}

# figure out how much is taken up in each directory
# that isn't stored in subdirectories.  add a new
# fake kid called "." containing that much.
sub getdots {
    my $root = $_[0];
    my($size, $cursize);
    $size = $cursize = $Dirsize{$root};
    if ($Kids{$root}) {
        for my $kid (@{ $Kids{$root} }) { 
            $cursize -= $Dirsize{$kid};
            getdots($kid);
        }
    } 
    if ($size != $cursize) {
        my $dot = "$root/.";
        $Dirsize{$dot} = $cursize;
        push @{ $Kids{$root} }, $dot;
    } 
} 

# recursively output everything,
# passing padding and number width in as well
# on recursive calls
sub output {
    my($root, $prefix, $width) = (shift, shift || '', shift || 0);
    my $path;
    ($path = $root) =~ s#.*/##;     # basename
    my $size = $Dirsize{$root};
    my $line = sprintf("%${width}d %s", $size, $path);
    print $prefix, $line, "\n";
    for ($prefix .= $line) {        # build up more output
        s/\d /| /;
        s/[^|]/ /g;
    }
    if ($Kids{$root}) {             # not a bachelor node
        my @Kids = @{ $Kids{$root} };
        @Kids = sort { $Dirsize{$b} <=> $Dirsize{$a} } @Kids;
        $Dirsize{$Kids[0]} =~ /(\d+)/;
        my $width = length $1;
        for my $kid (@Kids) { output($kid, $prefix, $width) }
    }
} 

#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# dutree_orig: the old version pre-perl5 (early 90s)

@lines = `du @ARGV`;
chop(@lines);
&input($top = pop @lines);
&output($top);
exit;

sub input {
    local($root, *kid, $him) = @_[0,0];
    while (@lines && &childof($root, $lines[$#lines])) {
        &input($him = pop(@lines));
        push(@kid, $him);
    i} 
    if (@kid) {
        local($mysize) = ($root =~ /^(\d+)/);
        for (@kid) { $mysize -= (/^(\d+)/)[0]; } 
        push(@kid, "$mysize .") if $size != $mysize;
    } 
    @kid = &sizesort(*kid);
} 

sub output {
    local($root, *kid, $prefix) = @_[0,0,1];
    local($size, $path) = split(' ', $root);
    $path =~ s!.*/!!;
    $line = sprintf("%${width}d %s", $size, $path);
    print $prefix, $line, "\n";
    $prefix .= $line;
    $prefix =~ s/\d /| /;
    $prefix =~ s/[^|]/ /g;
    local($width) = $kid[0] =~ /(\d+)/ && length("$1");
    for (@kid) { &output($_, $prefix); };
} 

sub sizesort {
    local(*list, @index) = shift;
    sub bynum { $index[$b] <=> $index[$a]; }
    for (@list) { push(@index, /(\d+)/); } 
    @list[sort bynum 0..$#list];
} 

sub childof {
    local(@pair) = @_;
    for (@pair) { s/^\d+\s+//g; s/$/\//; }          
    index($pair[1], $pair[0]) >= 0;
}

#-----------------------------