1. Strings

Introduction

// in pike only double quotes are used for strings

// they are not interpolated.

// single quotes are used for chars (the integer value of a character)

// see chapter 1.4

//-----------------------------

string str;                     // declare a variable of type string

str = "\n";                     // a "newline" character

str = "Jon \"Maddog\" Orwant";  // literal double quotes

//-----------------------------

str =
#"This is a multiline string
terminated by a double-quote like any other string";
//-----------------------------

Accessing Substrings

// accessing part of a string

//------------------------------

string str, value;
int offset, count;
value = str[offset..offset+count];
value = str[offset..];

string newstring, newtail;
str = str[..offset-1]+newstring+str[offset+count..];
str = str[..offset-1]+newtail;

//------------------------------

// get a 5-byte string, skip 3, then grab 2 8-byte strings, then the rest

string leading, s1, s2, trailing;
[leading, s1, s2, trailing] = array_sscanf(str, "%5s%*3s%8s%8s%s");

// split at five byte boundaries

array(string) fivers = str/5;

// chop string into individual characters

array(string) chars = str/"";

//------------------------------


str = "This is what you have"; 

string first, start, rest, last, end, piece;
int t = str[0];
// 84

first = str[0..0];                     
// "T"          

start = str[5..5+1]; 
// "is"

rest  = str[13..];   
// "you have"

last  = str[sizeof(str)-1..sizeof(str)-1];
// "e"

end   = str[sizeof(str)-4..]; 
// "have"                 

piece = str[sizeof(str)-8..sizeof(str)-8+2];
// "you"

               
str = "This is what you have";               
str = replace(str, ([ " is ":" wasn't " ]) );
// "This wasn't what you have"

str = str[..sizeof(str)-13]+"ondrous";       
// "This wasn't wondrous"

str = str[1..]; 
// "his wasn't wondrous"               

str = str[..sizeof(str)-11];
// "his wasn'"            

str = "This is what you have";
str = replace(str[..4], ([ "is":"at" ]) )+str[5..];
// "That is what you have"    

str = "make a hat";
// "make a hat"          

[str[0], str[-1]] = ({ str[-1], str[0] });
// "take a ham"


string a, b, c;
a = "To be or not to be";    
b = a[6..11];      
// "or not"                

b = a[6..7]; c=a[3..4];            
write("%s\n%s\n", b, c);
/*
or                  
be
*/
//------------------------------------------------


string cut2fmt(int ... positions)
{ 
  string template = "";
  int lastpos  = 1;
  foreach(positions ;; int place) 
  {      
    template += "A" + (place - lastpos) + " ";
    lastpos = place;     
  }
  template += "A*";
  return template;
}

string fmt = cut2fmt(8, 14, 20, 26, 30);
write("%s\n", fmt);
//A7 A6 A6 A6 A4 A*         

Establishing a Default Value

// set a default, ie, only set the value if no other value is set.

//-----------------------------

// use b if b is true, else c

a = b || c;

// set x to y unless x is already true

if(!x)
  x = y;

// use b if b is defined, else c

// an undefined variable would be a compile time error so this

// does not really apply. 


// return b if b is defined (was supplied by the caller), else c

int foo(int c, int|void b)
{
  return zero_type(b) ? c : b;
}

foo = bar || "DEFAULT VALUE";
argv = argv[1..];              // remove program, as that is always set.

dir = argv[0] || "/tmp";       // and see if anything is left...

dir = sizeof(argv) ? argv[0] : "/tmp";
count[shell||"/bin/sh"]++;     

user = getenv("USER") || getenv("LOGNAME") || getpwuid(getuid())[0] ||
"Unknown uid number "+getuid();

if(!starting_point)
  starting_point = "Greenwich";

if(!sizeof(a))                  
  a = b;              // copy only if empty              

a = (sizeof(b)?b:c);  // assign b if nonempty, else c

Exchanging Values Without Using Temporary Variables

[var1, var2] = ({ var2, var1 });  // gee, i love this example.        

                                  // it didn't even occur to me before

                                  // :-)

temp = a;
a    = b;             
b    = temp;

a = "alpha";
b = "omega";
[a, b] = ({ b, a });

[alpha, beta, production] = "January March August"/" ";
[alpha, beta, production] = ({ beta, production, alpha });

Converting Between ASCII Characters and Values

// print the ascii value of a char, or the char from its ascii value

int i;                          // declare a variable of type int             

i = 'a';                        // the ascii value of "a"

i = '\n';                       // the ascii value of a "newline"

//-----------------------------

string char = "foo";
int num = char[0];   // gets the ascii value from the first char (that's

                     // what ord() in perl does)

char = String.int2char(num);

char = sprintf("%c",num);   // the same as String.int2char(num) :-)

write("Number %d is character %[0]c\n", num);

Number 101 is character e

string str;
array(int) arr;
arr = (array)str;
str = (string)arr;
int ascii_value = 'e';                      // now 101

string character = String.int2char(101);    // now "e"                  


write("Number %d is character %[0]c\n", 101);


array(int) ascii_character_numbers = (array(int))"sample";
write("%s\n", (array(string))ascii_character_numbers*" ");    

string word = (string)ascii_character_numbers;
string word = (string)({ 115, 97, 109, 112, 108, 101 });  // same

write(word+"\n");
// sample           


string hal ="HAL";
array(int) ascii = (array)hal;
array(int) ibm = ascii[*]+1;       // add 1 to each element in the array.

array(int) ibm = map(ascii, `+, 1) // apply the function +, with the argument

                                   // 1, to each element in the array.

write(ibm+"\n");                   // prints "IBM"

Processing a String One Character at a Time

string hello = "Hello world!";
array(string) chars = hello/"";     // array of characters as strings


foreach(chars;; string char)        // this also matches newlines

  ;  // do stuff with char

//-----------------------------

string data = "an apple a day";
array(string) chars = data/"";
mapping(string:int) seen = ([]);

foreach(chars ;; string char)
  seen[char]++; 

write("unique chars are: %s\n", sort(indices(seen))*"");
// unique chars are:  adelnpy

//-----------------------------

string data = "an apple a day";
string result = sort(indices(mkmapping(data/"", allocate(sizeof(data))))*"";

write("unique chars are: %s\n", result);
// unique chars are:  adelnpy

//-----------------------------

string data = "an apple a day";
int sum;

foreach(data ;; int char)
  sum += char;

write("sum is %d\n", sum);
// sum is 1248

//-----------------------------

string data = "an apple a day";
int sum=`+(@(array)data);  

write("sum is %d\n", sum);
// sum is 1248

//-----------------------------

// download the following standalone program
#/usr/bin/pike
// chapter 1.5
void main(int argc, array(string) argv)
{
  string data = Stdio.read_file(argv[1]);
  int checksum;

  foreach(data ;; int char)
    checksum += char;

  checksum %= pow(2,16)-1;
  write("%d\n", checksum);
}

//-----------------------------

// alternate version

// download the following standalone program
#!/usr/bin/pike
// chapter 1.5
void main(int argc, array(string) argv)
{
  string data=Stdio.read_file(argv[1]);
  int checksum = `+(@(array)data) % ((1<<16)-1);
  write("%d\n", checksum);
}

//-----------------------------

// download the following standalone program
#!/usr/bin/pike
// chapter 1.5
// slowcat - emulate a   s l o w  line printer
// usage: slowcat [-DELAY] [files ...]
void main(int argc, array argv)
{
  array(string) files;
  int delay = 1;

  if(argv[1][0] == '-')
  {
    files = argv[2..];
    delay = (int)argv[1][1..];
  }
  else
    files = argv[1..];

  foreach(files, string file)
  {
    string data = Stdio.read_file(file);
    foreach(data/"", string char)
    {
      write(char);
      sleep(0.005*delay);
    }
  }
}

Reversing a String by Word or Character

// #1.6 (reverse a string by char/word)

// by Olivier Girondel


string s = "This is  a string";
// Result: "This is  a string"


reverse(s);
// Result: "gnirts a  si sihT"


reverse(s/" ") * " ";               // preserve whitespace

// Result: "string a  is This"


(reverse(s/" ")-({ "" })) * " ";    // collapse whitespace

// Result: "string a is This"

//-----------------------------

string word = "reviver";
int is_palindrome = word==reverse(word);
//-----------------------------

// download the following standalone program
#!/usr/bin/pike
// chapter 1.6
void main(int argc, array(string) argv)
{
  string data=Stdio.read_file(argv[1]);
  foreach(data/"\n", string line)
  {
    if(line==reverse(line) && sizeof(line)>5)
    write("%s\n", line);
  }
}

Expanding and Compressing Tabs

string s = "This         is                a    \n   string";

string notabs=String.expand_tabs(s);
// Result: "This     is                a    \n   string"


string notabs=String.expand_tabs(s, 4);
// Result: "This     is            a    \n   string"


string notabs=String.expand_tabs(s, 4, "-");
// Result: "This   - is --------   a ---\n   string"

//-----------------------------

string s = "This     is      a     string";
string tabs="";

foreach(s/8.0 ;; string stop)
{ 
  int spaces=sizeof(String.common_prefix(({ reverse(stop), "        "}))); 
  tabs+=stop[..7-spaces]; 
  if(spaces)
    tabs+="^I";
}
// Result: "This\t is\t a     string"


string notabs=""; 
foreach(tabs/"^I" ;; string stop)
{ 
  notabs+=stop; 
  if(sizeof(stop)<8) 
    notabs+=" "*(8-sizeof(stop)); 
}
// Result: "This     is      a     string"

Expanding Variables in User Input

// since variable names in pike do not have a special notation we need to 

// "invent" one for this.

// there are a few ways to solve this problem.

// here is one:


mapping(string:string) vars = ([ "$fruit$":"apple", "$desert$":"pudding" ]);
string template  = "Todays fruit is $fruit$, and for desert we have $desert$";
string menu = replace(template, vars);

// Result: "Todays fruit is apple, and for desert we have pudding"


Controlling Case

string upper, lower, result;
upper = "DON'T SHOUT!";
result = lower_case(upper);
// Result: "don't shout!"

//-----------------------------

lower = "speak up";
result = upper_case(lower);
// Result: "SPEAK UP"

//-----------------------------

result = String.capitalize(lower);
// Result: "Speak up"


//-----------------------------

string text = "thIS is a loNG liNE";
array(string) words = text/" ";       // splits the line into words

words = lower_case(words[*]);         // lower_case each word

words = String.capitalize(words[*]);  // capitalize each word

text = words*" ";                     // join back

// you may do the same in one short line:

text = String.capitalize(lower_case((text/" ")[*])[*])*" ";

// download the following standalone program
#!/usr/bin/pike
// chapter 1.9
// randcap: filter to randomly capitalize 20% of the letters

void main()
{
  string input;
  while(input=Stdio.stdin.read(1))
    write(randcap(input));
}

string randcap(string char)
{
  if(random(100)<20)
    char=String.capitalize(char);
  return char;
}

Interpolating Functions and Expressions Within Strings

// since pike does not provide any string interpolation 

// there are no sneaky tricks here.

// a solution could be similar to the one in chapter 1.8

// putting functions into the mapping instead of string values, or use xml and

// callbacks

// TODO: provide an example of using the xml parser here

Indenting Here Documents

// we believe that indenting the string and then removing that indent does not

// actually enhance readability of the code.

// but if you insist the following will remove all whitespace at the beginning

// of each line:

string here=#"your text
              goes here";
    
string there=array_sscanf((here/"\n")[*], "%*[\t ]%s")[*][0]*"\n";

// expanded version:

array tmp=({});
foreach(here/"\n";; string line)
{
  tmp+=array_sscanf(line, "%*[\t ]%s");
}
string there=tmp*"\n";

Reformatting Paragraphs

// pike sprintf() provides a facility for wrapping (column mode):

// sprintf("%-=<int width>s", text);

// download the following standalone program
#!/usr/bin/pike
// chapter 1.12
// wrapdemo - show how wrapping with sprintf works
void main()
{
  array(string) input = ({ "Folding and splicing is the work of an editor,",
                           "not a mere collection of silicon",
                           "and",
                           "mobile electrons!"});
  int columns = 20;

  write("0123456789"*2+"\n");
  write(wrap(input*" ", 20, "  ", "  ")+"\n");
}

// unlike the perl version here leadtab is relative to nexttab, 
// to get a shorter lead use a negative int value. this allows the default of 0
// to be a lead indent that is the same as nexttab, and it also has the
// advantage of allowing you to change the indent without having to worry about
// the lead getting messed up.
// a negative lead will cut away from the nexttab which will be visible if you
// use something other than spaces
string wrap(string text, void|int width, 
            void|string|int nexttab, void|string|int leadtab)
{
  string leadindent="";
  string indent=""; 
  string indent2="";

  if(!width)
    width=Stdio.stdout->tcgetattr()->columns;

  if(stringp(nexttab))
  {
    indent=nexttab;
    width-=sizeof(nexttab);  // this will be off if there are chars that have a
                             // different width than 1.
  }
  else if(intp(nexttab))
  {
    indent=" "*nexttab;
    width-=nexttab;
  }

  if(stringp(leadtab))
    leadindent=leadtab;
  else if(intp(leadtab))
    if(leadtab > 0)
      leadindent=" "*leadtab;
    else if(leadtab < 0)
    {
      write(indent+".\n");
      indent=indent[..(sizeof(indent)+leadtab)-1];
      write(indent+".\n");
      indent2=text[..-leadtab-1]; 
      text=text[-leadtab..];
    }
  return sprintf("%^s%=*s%-=*s", indent, sizeof(indent2), indent2, 
                                 width, leadindent+text);
}

//----------------------------------------------

$ ./wrapdemo 
01234567890123456789
    Folding and     
  splicing is the   
  work of an editor,
  not a mere        
  collection of     
  silicon and mobile
  electrons!        

// merge multiple lines into one, then wrap one long line

inherit "wrapdemo.pike";
wrap(replace(text, "\n", " "));

// read stdin and split by paragraph,

// remove \n in paragraphs

// reformat

// add paragraph break

foreach(Stdio.stdin->read()/"\n\n";; string para)
  write(wrap(replace(para, "\n", " "))+"\n\n");

Escaping Characters

// we need to escape the \ for this example, ironic, eh?

array(string) charlist=({ "%", "\\" }); 
string var="some input % text with \\";

// backslash

var=replace(var, charlist, "\\"+charlist[*]);

// double

var=replace(var, charlist, charlist[*]+charlist[*]);

Trimming Blanks from the Ends of a String

string line=" foo\n\t ";
array(string) many=({ " bar\n\t ", " baz\t " });

// remove spaces and tabs

line=String.trim_whites(line);
many=String.trim_whites(many[*]);

//remove spaces, tabs, newlines and carriage returns

line=String.trim_all_whites(line);
many=String.trim_all_whites(many[*]);

Parsing Comma-Separated Data

Soundex Matching

// contributed by martin nilsson


write("Lookup user: ");
string user = String.soundex(Stdio.stdin.gets());
foreach(get_all_users(), array u) 
{
  string firstname="", lastname="";
  sscanf(u[4], "%s %s,", firstname, lastname);
  if( user==String.soundex(u[0]) ||
      user==String.soundex(firstname) ||
      user==String.soundex(lastname) )
    write("%s: %s %s\n", u[0], firstname, lastname);
}

Program: fixstyle

Program: psgrep