5. Hashes

Introduction

// creating a mapping from arrays

mapping age = mkmapping( ({ "Nat", "Jules", "Josh", }), ({ 24, 25, 17 }) );

// initialize one index at a time

mapping age = ([]);
age["Nat"] = 24;
age["Jules"] = 25;
age["Josh"] = 17;

// if your index names are valid identifiers:

age->Nat = 24;
age->Jules = 25;
age->Josh = 17;

// the traditional way to initialize mappings

mapping age = ([ "Nat":24, "Jules":25, "Josh":17 ]);

mapping(string:string) food_color = ([ 
                                      "Apple":"red",
                                      "Banana":"yellow",
                                      "Lemon":"yellow",
                                      "Carrot":"orange"
                                     ]);

// a index may be of any type

mapping any = ([ "1":"a string", 1:"an int", 1.0:"a float" ]);

// you may use other types too, but be aware that they are matched by

// reference, and not by value.

Adding an Element to a Hash


mapping[mixed] = mixed;
mapping->string = mixed;  //any string that is a valid identifier


// food_color as per section 5.0

food_color->Raspberry = "pink";
write("Known foods:\n");
foreach(food_color; string food; )
{
  write(food+"\n");
}
// Lemon

// Banana

// Apple

// Carrot

// Raspberry

Testing for the Presence of a Key in a Hash

// an undefined value in a mapping gets turned to 0.

// assigning 0 as a value is allowed and will not remove the index.

// checking for the index will of course return 0 and be interpreted as false.

// to check if the index is really there, use zero_type()


if(!zero_type(mapping->index))
{
  // it exists

}
else
{
  // it doesn't

}

// food_color as per section 5.0

foreach( ({ "Banana", "Milk" }) ;; string name)
{
  if(!zero_type(food_color[name]))
    write("%s is a food.\n", name);
  else
    write("%s is a drink.\n", name);
}
// Banana is a food.

// Milk is a drink.


// ---------------------------------------------------------

mapping age = ([ "Toddler":3, 
                 "Unborn":0, 
                 "Newborn":0.0, 
                 "Phantasm":UNDEFINED ]);

foreach( ({ "Toddler", "Unborn", "Newborn", "Phantasm", "Relic" });; string thing) 
{
    write(thing+":");
    if(!zero_type(age[thing]))
      write(" Exists");
    if(age[thing])
      write(" True");
    write("\n");
}
// Toddler: Exists True

// Unborn: Exists

// Newborn: Exists True

// Phantasm: Exists

// Relic:


// age->Toddler exists, because zero_type() is only true if the index is not in

// the mapping. it is true because the value is not 0.

// age->Unborn exists, but is false because 0 is false

// age->Newborn exists and is true, because 0.0 is not false

// age->Phantasm exists and is false, like Unborn

// age->Relic does not exist


// we can not test for defined. UNDEFINED is a special value used internally by

// the compiler. it gets converted to 0 as soon as it is assigned in a mapping


// however we can create something equivalent that can be treated like any

// other value, except that it is false:


class Nil
{
  // this is a minimal example. 

  // a more complete one would also handle casting


  int `!() {return 1;}
  string _sprintf() {return "Nil";}

  // we could have this function externally, but this is more convenient

  int defined(mixed var)
  {
    return !zero_type(var) && var!=this;
  }
}

Nil NIL = Nil();                    // create an instance so we can use it

function defined = NIL->defined;  // just for symetry

 
mapping age = ([ "Toddler":3, 
                 "Unborn":0, 
                 "Phantasm":NIL ]);

foreach( ({ "Toddler", "Unborn", "Phantasm", "Relic" });; string thing) 
{
    write(thing+":");
    if(!zero_type(age[thing]))
      write(" Exists");
    if(defined(age[thing]))
      write(" Defined");
    if(age[thing])
      write(" True");
    write("\n");
}

// Toddler: Exists Defined True

// Unborn: Exists Defined

// Phantasm: Exists

// Relic:

 
// age->Toddler exists, because zero_type() is only true if the index is not in

// the mapping. it is defined because it exists an is not NIL. 

// it is true because the value is not 0.

// age->Unborn exists, and is defined, but is false because 0 is false

// age->Phantasm exists, is not defined because it is NIL, 

// it is not true because NIL is false.

// age->Relic does not exist, it is not defined even though it is not NIL

// because it doesn't exist. it is also not true, because it does not exist.


// ----------------------------------------------------

mapping size = ([]);
string filename;
while(filename = Stdio.stdin->gets())
{
  filename -= "\n";
  if(size[filename])                // wrong

    continue;
  if(!zero_type(size[filename]))    // right

    continue
  object stat = file_stat(filename)  
  if(stat)
    size[filename] = stat->size;
  else
    size[filename] = -1; // since sizes can't be negative, this will do.

                         // if -1 is a valid value, use NIL

}

Deleting from a Hash

// users occasionally may get the idea that mapping[index]=0; may remove index

// from mapping. the normal way to remove a index from a mapping is to

// subtract: mapping -= ([ index:0 ]); the following shall demonstrate the

// difference between subtracting a index and assigning 0 to it.


// food_color as per section 5.0

void print_foods()
{
  write("Foods:%{ %s%}\n", indices(food_color));
  write("Values: ");

  foreach(food_color; string food; string color)
  {
    if(color)
      write(color+" ");
    else
      write("(no value) ");
  }
  write("\n");
}

write("Initially:\n");
print_foods();

write("\nWith Banana set to 0\n");
food_color->Banana = 0;
print_foods();

write("\nWith Banana deleted\n");
food_color -= ([ "Banana":"the value is irrelevant" ]);
print_foods();

// Initially:

// Foods: Lemon Banana Apple Carrot

// Values: yellow yellow red orange

//

// With Banana set to 0

// Foods: Banana Lemon Apple Carrot

// Values: (no value) yellow red orange 

//  

// With Banana deleted

// Foods: Lemon Carrot Apple

// Values: yellow orange red


// you can also subtract multiple indices:

food_color -= ([ "Banana":0, "Apple":0, "Cabbage":0 ]);

// note that subtracting a mapping from another creates a new mapping.

// thus any references you have to a mapping will be broken.

// in most cases this is what you want anyways. if it is not, you can also

// remove indices using m_delete();


m_delete(food_color, "Banana");

Traversing a Hash


foreach( mapping; type index; type value)
{
  //do something with index and value

}

// food_color as per 5.0

foreach(food_color; string food; string color)
{
  write("%s is %s.\n", food, color);
}

// Banana is yellow.

// Lemon is yellow.

// Carrot is orange.

// Apple is red.



foreach(sort(indices(food_color));; string food)
{
  write("%s is %s.\n", food, food_color[food]);
}

// Apple is red.

// Banana is yellow.

// Carrot is orange.

// Lemon is yellow.


// since pike does not have any equivalent to each() its problems do not apply


// download the following standalone program
#!/usr/bin/pike
// countfrom - count number of messages from each sender

void main(int argc, array argv)
{
  object file;
  mapping from = ([]);

  if(sizeof(argv)>1)
    file = Stdio.File(argv[1], "r");
  else
    file = Stdio.stdin;

  
  foreach(file; int count; string line)
  {
    array email = array_sscanf(line, "From: %s");
    if(sizeof(email))
      from[email[0]]++;
  }
  write("end\n");

  foreach(sort(indices(from));; string person)
  {
    write("%s: %d\n", person, from[person]);
  }
}

Printing a Hash

// perls problems and solutions do not apply to pike

// here are a few ways to print a mapping:

// food_color as per 5.0


// debugging style

write("%O\n", food_color);
// ([ /* 4 elements */

//   "Apple": "red",

//   "Banana": "yellow",

//   "Carrot": "orange",

//   "Lemon": "yellow"

// ])


// one element at a time:

foreach(food_color; string food; string color)
{
  write("%s is %s\n", food, color);
}
// Lemon is yellow

// Carrot is orange

// Banana is yellow

// Apple is red


// with the help of an array

write("%{%s is %s\n%}", sort((array)food_color));
// Apple is red

// Banana is yellow

// Carrot is orange

// Lemon is yellow

Retrieving from a Hash in Insertion Order

// for this we need to first create an OrderedMapping class.

// work for this is in progress

// @@INCOMPLETE@@

// @@INCOMPLETE@@

Hashes with Multiple Values Per Key


mapping(string:array(string)) ttys = ([]);

object pipe = Stdio.File();
Process.create_process(({ "who" }), ([ "stdout":pipe->pipe() ]));

foreach(pipe->line_iterator();; string line)
{
  array tty=(line/" ")-({ "" });
  if(!ttys[tty[0]])
    ttys[tty[0]] = ({ tty[1] });
  else
    ttys[tty[0]] += ({ tty[1] });
}

foreach(sort(indices(ttys));; string user)
{
  write("%s: %{%s %}\n", user, ttys[user]);
}

foreach(sort(indices(ttys));; string user)
{
  write("%s: %d ttys.\n", user, sizeof(ttys[user]));
  foreach(ttys[user];; string tty)
  {
    object stat = file_stat("/dev/"+tty);
    string user;
    if(stat)
      user = getpwuid(stat->uid)[0];
    else
      user = "(not available)";
    write("\t%s (owned by %s)\n", tty, user);
  }
}

mapping multihash_delete(mapping hash, mixed key, mixed value)
{
  if(arrayp(hash[key]))
    hash[key]-=({ value });
  if(!sizeof(hash[key]))
    m_delete(hash, key);
  return hash;
}

Inverting a Hash

// search for a value in a mapping

mapping lookup;
mixed value;
mixed key = search(lookup, value);

// transposing: (this will break if there are multiple occurances of a value)

mapping lookup;
mapping reverse = mkmapping(values(lookup), indices(lookup));
//----------------------------------------------------------

mapping surname = ([ "Mickey":"Mantle", "Babe":"Ruth" ]);
write("%s\n", search(surname, "Mantle"));
// Mikey


// with a transposed mapping (only worth doing if you'd have to search a lot)

mapping first_name = mkmapping(values(surname), indices(surname));
write("%s\n", first_name->Mantle);
// Mikey

//----------------------------------------------------------

// download the following standalone program
#!/usr/bin/pike

void main(int argc, array(string) argv)
{
  if(argc < 2)
  {
    write("usage: foodfind food_or_color\n");
  }

  string given = argv[1];
  mapping color = ([ "Apple":"red",
                     "Banana":"yellow",
                     "Lemon":"yellow",
                     "Carrot":"orange"
                  ]);
  if(color[given])
    write("%s is a food with color %s\n", given, color[given]);
    
  string food = search(color, given);
  if(food)
    write("%s is a food with color %s\n", food, given);

// search will only find one value, 
// but it can be given a place where it should start searching

  array foods = ({ search(color, given) });
  food = 0;
  while(food = search(color, given, foods[-1]))
  { 
    foods += ({ food });
  }
  write("%{%s %}were %s foods.\n", foods, given);

}

//----------------------------------------------------------


// food_color as per 5.0

mapping foods_with_color = ([]);
foreach(food_color; string food; string color)
{
  if(!foods_with_color[color])
    foods_with_color[color] = ({ food });
  else
    foods_with_color[color] += ({ food });
}

write("%{%s %}were yellow foods.\n", foods_with_color->yellow);

Sorting a Hash

mapping hash;
foreach(sort(indices(hash));; mixed key)
{
  mixed value = hash[key];
  // do something with key, value

}

foreach(sort(indices(food_color));; string food)
{
  write("%s is %s.\n", food, food_color[food]);
}

array foods = indices(food_color);
sort(sizeof(values(food_color)[*]), foods);

foreach(foods;; string food)
{
  write("%s is %s.\n", food, food_color[food]);
}

Merging Hashes

// the natural way to merge two mappings is to use + or |

// for mappings both operations are equivalent.

mapping A, B;
mapping merged = A + B;
mapping merged = A | B;

// if an index is in both mappings, the value will be taken from the second

// one. 


mapping drink_color = ([ "Milk":"white",
                         "Tomato juice":"red" ]);

mapping ingested_color = drink_color + food_color;

//  Result: ([ /* 6 elements */

//             "Apple": "red",

//             "Banana": "yellow",

//             "Carrot": "orange",

//             "Lemon": "yellow",

//             "Milk": "white",

//             "Tomato juice": "red"

//           ])

Finding Common or Different Keys in Two Hashes


// create a mapping where indices are in both A and B

mapping both = A & B;

// in A or B, but not in both

mapping one = A ^ B;

// in A, but not in B

mapping exA = A - B;

mapping citrus_color = ([ "Lemon":"yellow",
                          "Orange":"orange",
                          "Lime":"green" 
                       ]);

array non_citrus = indices(food_color - citrus_color);

Hashing References

// this problem does not apply to pike

// any value may be used as an index, if you get an object reference from

// anywhere, if will work, if the index in the mapping is actually the same

// object.

// however note that the same value is not the same reference:

array a = ({ 1,2 });
array b = ({ 1,2 });
mapping m = ([ a:"a" ]);
m[b];  
// Result: 0 (b will not be found.)

m[b]="b";
m;
// Result: ([ ({ 1, 2 }): "a",

//            ({ 1, 2 }): "b"

//           ])

// this looks as if the mapping has the same index twice

// but since they are references this is not the case.

Presizing a Hash

// this problem does not apply to pike

// pike uses a smart preallocation algorythm that will avoid the need to

// allocate memory everytime an element is added

Finding the Most Common Anything

mapping count = ([]);
foreach(ARRAY;; mixed element)
{
    count[element]++;
}

Representing Relationships Between Data

mapping father = ([ "Cain":"Adam",
                    "Abel":"Adam",
                    "Seth":"Adam",
                    "Enoch":"Cain",
                    "Irad":"Enoch",
                    "Mehujael":"Irad",
                    "Methusael":"Mehujael",
                    "Lamech":"Methusael",
                    "Jabal":"Lamech",
                    "Jubal":"Lamech",
                    "Tubalcain":"Lamech",
                    "Enos":"Seth" 
                 ]);

foreach(Stdio.stdin;; string name)
{
  do
  {
    write("%s ", name);
  } while(name = father[name]);
  write("\n");
}

mapping children = ([]);
foreach(father; string k; string v)
{
  if(!children[v])
    children[v] = ({ k });
  else
    children[v] += ({ k });
}

foreach(Stdio.stdin;; string name)
{
  write("%s begat %s.\n", name, (children[name]||({ "nobody" }))*", ");
}

//-----------------------------------------------------


mapping includes = ([]);
foreach(files, string file)
{
  string F = Stdio.read_file(file);
  if(!F)
    werror("Couldn't read %s; skipping.\n", file);

  foreach(F/"\n";; string line)
  {
    array included = array_sscanf(line, "%*[ \t]#include%*[ \t]%*[<\"]%s%*[>\"]%*[ \t]");
    if(sizeof(included))
    {
      if(!includes[included[0]])
        includes[included[0]] = ({ file });
      else
        includes[included[0]] += ({ file });
    }
  }
}
//-----------------------------------------------------


array uniq = `|( @values(includes) );
array include_free = sort( uniq - indices(includes) );

// values(includes) is an array of arrays.

// @ splices values(includes) as arguments into `|()

// `|() is a function that represents the | operator: a|b|c is `|(a,b,c)

// | on arrays creates a new array with elements in a or b

// the resulting array is unique as long as each array only has unique elements

// from the uniq array we remove all those that are used as indices in the

// includes mapping.

// at last we sort the remaining list.

Program: dutree

// download the following standalone program
#!/usr/bin/pike
// dutree - print sorted indented rendition of du output
// as a slight deviation from the perl version, the sizes are still in one
// column, to make it more readable.

array input(array(string) args)
{
  mapping Dirsize=([]);
  mapping Kids=([]);

  object pipe = Stdio.File();
  Process.create_process(({ "du" })+args, ([ "stdout":pipe->pipe() ]));

  string name;
  foreach(pipe->line_iterator();; string line)
  {
    int size;
    [size, name] = array_sscanf(line, "%d%*[ \t]%s");
    Dirsize[name] = size;
    array path = name/"/";
    string parent = path[..sizeof(path)-2]*"/";
    if(!Kids[parent])
      Kids[parent] = ({ name });
    else
      Kids[parent] += ({ name });
  }
  return ({ name, Dirsize, Kids });
}

void getdots(string root, mapping Dirsize, mapping Kids)
{
  int size, cursize;
  size = cursize = Dirsize[root];
  if(Kids[root])
  {
    foreach(Kids[root];; string kid)
    {
      cursize -= Dirsize[kid]; 
      getdots(kid, Dirsize, Kids);
    }
  }
  else
    Kids[root] = ({});

  if(size != cursize)
  {
    string dot = root+"/.";    
    Dirsize[dot] = cursize;
    Kids[root] += ({ dot });
  }
}

void output(string root, mapping Dirsize, mapping Kids, 
            int width, void|string prefix)
{
  if(!prefix)
    prefix="";
  string path = (root/"/")[-1];
  int size = Dirsize[root];
  write("%*d %s%s\n", width, size, prefix, path);
  prefix += "|" + " "*(sizeof(path)-1);
  if(Kids[root])
  {
    array kids = Kids[root];
    // get the dirsize for each kid and sort by that
    sort(Dirsize[kids[*]], kids); 
    
    // make the output for each kid
    output(kids[*], Dirsize, Kids, width, prefix);
  }
}

void main(int argc, array(string) argv)
{
  mapping Dirsize;
  mapping Kids;
  string topdir;

  [ topdir, Dirsize, Kids ] = input(argv[1..]);
  getdots(topdir, Dirsize, Kids);
  output(topdir, Dirsize, Kids, sizeof((string)sort(values(Dirsize))[-1]));
}