1. Strings

Introduction

#-----------------------------
$string = '\n';                     # two characters, \ and an n
$string = 'Jon \'Maddog\' Orwant';  # literal single quotes
$string = 'Jon "Maddog" Orwant';    # literal double quotes
#-----------------------------
$string = "\n";                     # a "newline" character
$string = "Jon \"Maddog\" Orwant";  # literal double quotes
$string = "Jon 'Maddog' Orwant";    # literal single quotes
#-----------------------------
$a = 
"This is a multiline
here document";

$a = <<<EOF
This is a multiline here document
terminated by EOF on a line by itself
EOF;
#-----------------------------

Accessing Substrings

#-----------------------------
$value = substr($string, $offset, $count);
$value = substr($string, $offset);

$string = substr_replace($string, $newstring, $offset, $count);
$string = substr_replace($string, $newtail, $offset);
#-----------------------------
# get a 5-byte string, skip 3, then grab 2 8-byte strings, then the rest
list($leading, $s1, $s2, $trailing) =
    array_values(unpack("A5a/x3/A8b/A8c/A*d", $data);

# split at five byte boundaries
preg_match_all ("/.{5}/", $data, $f, PREG_PATTERN_ORDER);
$fivers = $f[0];

# chop string into individual characters
$chars  = $string;
#-----------------------------
$string = "This is what you have";
#         +012345678901234567890  Indexing forwards  (left to right)
#          109876543210987654321- Indexing backwards (right to left)
#           note that 0 means 10 or 20, etc. above

$first  = substr($string, 0, 1);  # "T"
$start  = substr($string, 5, 2);  # "is"
$rest   = substr($string, 13);    # "you have"
$last   = substr($string, -1);    # "e"
$end    = substr($string, -4);    # "have"
$piece  = substr($string, -8, 3); # "you"
#-----------------------------
$string = "This is what you have";
print $string;
#This is what you have

$string = substr_replace($string, "wasn't", 5, 2);  # change "is" to "wasn't"
#This wasn't what you have

$string = substr_replace($string, "ondrous", -12);  # "This wasn't wondrous"
#This wasn't wondrous

$string = substr_replace($string, "", 0, 1);        # delete first character
#his wasn't wondrous

$string = substr_replace($string, "", -10);         # delete last 10 characters
#his wasn'
#-----------------------------
if (preg_match("/pattern/", substr($string, -10)) {
    print "Pattern matches in last 10 characters\n";
}

# substitute "at" for "is", restricted to first five characters
$string=(substr_replace(preg_replace("/is/", "at", substr($string,0,5)),0,5);
#-----------------------------
# exchange the first and last letters in a string
$a = "make a hat";
list($a[0], $a[strlen($a)-1]) = Array(substr($a,-1), substr($a,0,1));
print $a;

#-----------------------------
# extract column with unpack
$a = "To be or not to be";
$b = unpack("x6/A6a", $a);  # skip 6, grab 6
print $b['a'];


$b = unpack("x6/A2b/X5/A2c", $a); # forward 6, grab 2; backward 5, grab 2
print $b['b']."\n".$b['c']."\n";

#-----------------------------
function cut2fmt() {
    $positions = func_get_args();
    $template  = '';
    $lastpos   = 1;
    foreach($positions as $place) {
        $template .= "A" . ($place - $lastpos) . " ";
        $lastpos   = $place;
    }
    $template .= "A*";
    return $template;
}

$fmt = cut2fmt(8, 14, 20, 26, 30);
print "$fmt\n";
#A7 A6 A6 A6 A4 A*
#-----------------------------

Establishing a Default Value

#-----------------------------
# use $b if $b is true, else $c
$a = $b?$b:$c;

# set $x to $y unless $x is already true
$x || $x=$y;
#-----------------------------
# use $b if $b is defined, else $c
$a = defined($b) ? $b : $c;
#-----------------------------
$foo = $bar || $foo = "DEFAULT VALUE";
#-----------------------------
$dir = array_shift($_SERVER['argv']) || $dir = "/tmp";
#-----------------------------
$dir = $_SERVER['argv'][0] || $dir = "/tmp";
#-----------------------------
$dir = defined($_SERVER['argv'][0]) ? array_shift($_SERVER['argv']) : "/tmp";
#-----------------------------
$dir = count($_SERVER['argv']) ? $_SERVER['argv'][0] : "/tmp";
#-----------------------------
$count[$shell?$shell:"/bin/sh"]++;
#-----------------------------
# find the user name on Unix systems
$user = $_ENV['USER']
     || $user = $_ENV['LOGNAME']
     || $user = posix_getlogin()
     || $user = posix_getpwuid(posix_getuid())[0]
     || $user = "Unknown uid number $<";
#-----------------------------
$starting_point || $starting_point = "Greenwich";
#-----------------------------
count($a) || $a = $b;          # copy only if empty
$a = count($b) ? $b : $c;          # assign @b if nonempty, else @c
#-----------------------------

Exchanging Values Without Using Temporary Variables

#-----------------------------
list($VAR1, $VAR2) = array($VAR2, $VAR1);
#-----------------------------
$temp    = $a;
$a       = $b;
$b       = $temp;
#-----------------------------
$a       = "alpha";
$b       = "omega";
list($a, $b) = array($b, $a);        # the first shall be last -- and versa vice
#-----------------------------
list($alpha, $beta, $production) = Array("January","March","August");
# move beta       to alpha,
# move production to beta,
# move alpha      to production
list($alpha, $beta, $production) = array($beta, $production, $alpha);
#-----------------------------

Converting Between ASCII Characters and Values

#-----------------------------
$num  = ord($char);
$char = chr($num);
#-----------------------------
$char = sprintf("%c", $num);                # slower than chr($num)
printf("Number %d is character %c\n", $num, $num);
#-----------------------------
$ASCII = unpack("C*", $string);
eval('$STRING = pack("C*", '.implode(',',$ASCII).');');
#-----------------------------
$ascii_value = ord("e");    # now 101
$character   = chr(101);    # now "e"
#-----------------------------
printf("Number %d is character %c\n", 101, 101);
#-----------------------------
$ascii_character_numbers = unpack("C*", "sample");
print explode(" ",$ascii_character_numbers)."\n";

eval('$word = pack("C*", '.implode(',',$ascii_character_numbers).');');
$word = pack("C*", 115, 97, 109, 112, 108, 101);   # same
print "$word\n";
#-----------------------------
$hal = "HAL";
$ascii = unpack("C*", $hal);
foreach ($ascii as $val) {
    $val++;                 # add one to each ASCII value
}
eval('$ibm = pack("C*", '.implode(',',$ascii).');');
print "$ibm\n";             # prints "IBM"
#-----------------------------

Processing a String One Character at a Time

#-----------------------------
// using perl regexp
$array = preg_split('//', $string ,-1, PREG_SPLIT_NO_EMPTY);
// using PHP function: $array = str_split($string);

// Cannot use unpack with a format of 'U*' in PHP.
#-----------------------------
for ($offset = 0; preg_match('/(.)/', $string, $matches, 0, $offset) > 0; $offset++) {
    // $matches[1] has charcter, ord($matches[1]) its number
}
#-----------------------------
$seen = array();
$string = "an apple a day";
foreach (str_split($string) as $char) {
    $seen[$char] = 1;
}
$keys = array_keys($seen);
sort($keys);
print "unique chars are: " . implode('', $keys)) . "\n";
unique chars are:  adelnpy
#-----------------------------
$seen = array();
$string = "an apple a day";
for ($offset = 0; preg_match('/(.)/', $string, $matches, 0, $offset) > 0; $offset++) {
    $seen[$matches[1]] = 1;
}
$keys = array_keys($seen);
sort($keys);
print "unique chars are: " . implode('', $keys) . "\n";
unique chars are:  adelnpy
#-----------------------------
$sum = 0;
foreach (unpack("C*", $string) as $byteval) {
    $sum += $byteval;
}
print "sum is $sum\n";
// prints "1248" if $string was "an apple a day"
#-----------------------------
$sum = array_sum(unpack("C*", $string));
#-----------------------------

// sum - compute 16-bit checksum of all input files
$handle = @fopen($argv[1], 'r');
$checksum = 0;
while (!feof($handle)) {
    $checksum += (array_sum(unpack("C*", fgets($handle))));
}
$checksum %= pow(2,16) - 1;
print "$checksum\n";

# download the following standalone program
#!/usr/bin/php
<?php
// slowcat - emulate a   s l o w   line printer

// usage: php slowcat.php [-DELAY] file

$delay = 1;
if (preg_match('/(.)/', $argv[1], $matches)) {
    $delay = $matches[1];
    array_shift($argv);
};
$handle = @fopen($argv[1], 'r');
while (!feof($handle)) {
    foreach (str_split(fgets($handle)) as $char) {
        print $char;
        usleep(5000 * $delay);
    }
}
#-----------------------------

Reversing a String by Word or Character

#-----------------------------
$revchars = strrev($string);
#-----------------------------
$revwords = implode(" ", array_reverse(explode(" ", $string)));
#-----------------------------
// reverse word order
$string = 'Yoda said, "can you see this?"';
$allwords    = explode(" ", $string);
$revwords    = implode(" ", array_reverse($allwords));
print $revwords . "\n";
this?" see you "can said, Yoda
#-----------------------------
$revwords = implode(" ", array_reverse(explode(" ", $string)));
#-----------------------------
$revwords = implode(" ", array_reverse(preg_split("/(\s+)/", $string)));
#-----------------------------
$word = "reviver";
$is_palindrome = ($word === strrev($word));
#-----------------------------
// quite a one-liner since "php" does not have a -n switch
% php -r 'while (!feof(STDIN)) { $word = rtrim(fgets(STDIN)); if ($word == strrev($word) && strlen($word) > 5) print $word; }' < /usr/dict/words
#-----------------------------

Expanding and Compressing Tabs

Expanding Variables in User Input

#-----------------------------
$text = preg_replace('/\$(\w+)/e', '$$1', $text);
#-----------------------------
list($rows, $cols) = Array(24, 80);
$text = 'I am $rows high and $cols long';
$text = preg_replace('/\$(\w+)/e', '$$1', $text);
print $text;

#-----------------------------
$text = "I am 17 years old";
$text = preg_replace('/(\d+)/e', '2*$1', $text);
#-----------------------------
# expand variables in $text, but put an error message in
# if the variable isn't defined
$text = preg_replace('/\$(\w+)/e','isset($$1)?$$1:\'[NO VARIABLE: $$1]\'', $text);
#-----------------------------

// As PHP arrays are used as hashes too, separation of section 4
// and section 5 makes little sense.

Controlling Case

#-----------------------------
$big = strtoupper($little);
$little = strtolower($big);
// PHP does not have the\L and\U string escapes.
#-----------------------------
$big = ucfirst($little);
$little = strtolower(substr($big, 0, 1)) . substr($big, 1);
#-----------------------------
$beast   = "dromedary";
// capitalize various parts of $beast
$capit   = ucfirst($beast); // Dromedar
// PHP does not have the\L and\U string escapes.
$capall  = strtoupper($beast); // DROMEDAR
// PHP does not have the\L and\U string escapes.
$caprest = strtolower(substr($beast, 0, 1)) . substr(strtoupper($beast), 1); // dROMEDAR
// PHP does not have the\L and\U string escapes.
#-----------------------------
// titlecase each word's first character, lowercase the rest
$text = "thIS is a loNG liNE";
$text = ucwords(strtolower($text));
print $text;
This Is A Long Line
#-----------------------------
if (strtoupper($a) == strtoupper($b)) { // or strcasecmp($a, $b) == 0
    print "a and b are the same\n";
}
#-----------------------------
# download the following standalone program
#!/usr/bin/php
<?php
// randcap: filter to randomly capitalize 20% of the letters

function randcase($word) {
  return rand(0, 100) < 20 ? ucfirst($word) : lcfirst($word);
}
function lcfirst($word) {
  return strtolower(substr($word, 0, 1)) . substr($word, 1);
}
while (!feof(STDIN)) {
  print preg_replace("/(\w)/e", "randcase('\\1')", fgets(STDIN));
}

// % php randcap.php < genesis | head -9
#-----------------------------

Interpolating Functions and Expressions Within Strings

#-----------------------------
echo $var1 . func() . $var2; // scalar only
#-----------------------------
// PHP can only handle variable expression without operators
$answer = "STRING ${[ VAR EXPR ]} MORE STRING";
#-----------------------------
$phrase = "I have " . ($n + 1) . " guanacos.";
// PHP cannot handle the complex exression: ${\($n + 1)}
#-----------------------------
// Rest of Discussion is not applicable to PHP
#-----------------------------
// Interpolating functions not available in PHP
#-----------------------------

Indenting Here Documents

# @@INCOMPLETE@@
# @@INCOMPLETE@@

Reformatting Paragraphs

#-----------------------------
$output = wordwrap($str, $width, $break, $cut);
#-----------------------------
# download the following standalone program
#!/usr/bin/php
<?php
// wrapdemo - show how wordwrap works

$input = "Folding and splicing is the work of an editor, " .
         "not a mere collection of silicon " .
         "and " .
         "mobile electrons!";
$columns = 20;
print str_repeat("0123456789", 2) . "\n";
print wordwrap('    ' . $input, $columns - 3, "\n  ") . "\n";
#-----------------------------
// merge multiple lines into one, then wrap one long line
print wordwrap(str_replace("\n", " ", file_get_contents('php://stdin')));
#-----------------------------
while(!feof(STDIN)) {
    print wordwrap(str_replace("\n", " ", stream_get_line(STDIN, 0, "\n\n")));
    print "\n\n";
}
#-----------------------------

Escaping Characters

#-----------------------------
//backslash
$var = preg_replace('/([CHARLIST])/', '\\\$1', $var);
// double
$var = preg_replace('/([CHARLIST])/', '$1$1', $var);
#-----------------------------
$var = preg_replace('/%/', '%%', $var);
#-----------------------------
$string = 'Mom said, "Don\'t do that."';
$string = preg_replace('/([\'"])/', '\\\$1', $string);
// in PHP you can also use the addslashes() function
#-----------------------------
$string = 'Mom said, "Don\'t do that."';
$string = preg_replace('/([\'"])/', '$1$1', $string);
#-----------------------------
$string = preg_replace('/([^A-Z])/', '\\\$1', $string);
#-----------------------------
// PHP does not have the \Q and \E string metacharacters
$string = "this is\\ a\\ test\\!";
// PHP's quotemeta() function is not the same as perl's quotemeta() function
$string = preg_replace('/(\W)/', '\\\$1', 'is a test!');
#-----------------------------

Trimming Blanks from the Ends of a String

#-----------------------------
$string = trim($string);
#-----------------------------
// print what's typed, but surrounded by > < symbols
while (!feof(STDIN)) {
    print ">" . substr(fgets(STDIN), 0, -1) . "<\n";
}
#-----------------------------
$string = preg_replace('/\s+/', ' ', $string); // finally, collapse middle
#-----------------------------
$string = trim($string);
$string = preg_replace('/\s+/', ' ', $string);
#-----------------------------
// 1. trim leading and trailing white space
// 2. collapse internal whitespace to single space each
function sub_trim($string) {
    $string = trim($string);
    $string = preg_replace('/\s+/', ' ', $string);
    return $string;
}
#-----------------------------

Parsing Comma-Separated Data

# @@INCOMPLETE@@
# @@INCOMPLETE@@

Soundex Matching

#-----------------------------
$code = soundex($string);
#-----------------------------
$phoned_words = metaphone("Schwern");
#-----------------------------
// substitution function for getpwent():
// returns an array of user entries,
// each entry contains the username and the full name
function getpwent() {
    $pwents = array();
    $handle = fopen("passwd", "r");
    while (!feof($handle)) {
        $line = fgets($handle);
        if (preg_match("/^#/", $line)) continue;
        $cols = explode(":", $line);
        $pwents[$cols[0]] = $cols[4];
    }
    return $pwents;
}

print "Lookup user: ";
$user = rtrim(fgets(STDIN));
if (empty($user)) exit;
$name_code = soundex($user);
$pwents = getpwent();
foreach($pwents as $username => $fullname) {
    preg_match("/(\w+)[^,]*\b(\w+)/", $fullname, $matches);
    list(, $firstname, $lastname) = $matches;
  
    if ($name_code == soundex($username) ||
        $name_code == soundex($lastname) ||
        $name_code == soundex($firstname))
    {
        printf("%s: %s %s\n", $username, $firstname, $lastname);
    }
}
#-----------------------------

Program: fixstyle

#-----------------------------
# download the following standalone program
#!/usr/bin/php
<?php
$data = <<<DATA
  analysed=> analyzed
  built-in=> builtin
  chastized   => chastised
  commandline => command-line
  de-allocate => deallocate
  dropin  => drop-in
  hardcode=> hard-code
  meta-data   => metadata
  multicharacter  => multi-character
  multiway=> multi-way
  non-empty   => nonempty
  non-profit  => nonprofit
  non-trappable   => nontrappable
  pre-define  => predefine
  preextend   => pre-extend
  re-compiling=> recompiling
  reenter => re-enter
  turnkey => turn-key
DATA;

$scriptName = $argv[0];
$verbose = ($argc > 1 && $argv[1] == "-v" && array_shift($argv));
$change = array();
foreach (preg_split("/\n/", $data) as $pair) {
  list($in, $out) = preg_split("/\s*=>\s*/", trim($pair));
  if (!$in || !$out) continue;
  $change[$in] = $out;
}
if (count($argv) > 1)  {
  // no in-place edit in PHP

  // preserve old files

  $orig = $argv[1] . ".orig";
  copy($argv[1], $orig);
  $input = fopen($orig, "r");
  $output = fopen($argv[1], "w");
} else if ($scriptName != "-") {
  $input = STDIN;
  trigger_error("$scriptName: Reading from stdin\n", E_USER_WARNING);
}
$ln = 1;
while (!feof($input)) {
  $line = fgets($input);
  foreach ($change as $in => $out) {
    $line = preg_replace("/$in/", $out, $line, -1, $count);
    if ($count > 0 && $verbose) {
      fwrite(STDERR, "$in => $out at $argv[1] line $ln.\n");
    }
  }
  @fwrite($output, $line);
  $ln++;
}
#-----------------------------
# download the following standalone program
#!/usr/bin/php
<?php
$data = <<<DATA
  analysed=> analyzed
  built-in=> builtin
  chastized   => chastised
  commandline => command-line
  de-allocate => deallocate
  dropin  => drop-in
  hardcode=> hard-code
  meta-data   => metadata
  multicharacter  => multi-character
  multiway=> multi-way
  non-empty   => nonempty
  non-profit  => nonprofit
  non-trappable   => nontrappable
  pre-define  => predefine
  preextend   => pre-extend
  re-compiling=> recompiling
  reenter => re-enter
  turnkey => turn-key
DATA;

$scriptName = $argv[0];
$verbose = ($argc > 1 && $argv[1] == "-v" && array_shift($argv));
if (count($argv) > 1)  {
  // no in-place edit in PHP

  // preserve old files

  $orig = $argv[1] . ".orig";
  copy($argv[1], $orig);
  $input = fopen($orig, "r");
  $output = fopen($argv[1], "w");
} else if ($scriptName != "-") {
  $input = STDIN;
  trigger_error("$scriptName: Reading from stdin\n", E_USER_WARNING);
}

$config = array();
foreach (preg_split("/\n/", $data) as $pair) {
  list($in, $out) = preg_split("/\s*=>\s*/", trim($pair));
  if (!$in || !$out) continue;
  $config[$in] = $out;
}

$ln = 1;
while (!feof($input)) {
  $i = 0;
  preg_match("/^(\s*)(.*)/", fgets($input), $matches); // emit leading whitespace

  fwrite($output, $matches[1]);
  foreach (preg_split("/(\s+)/", $matches[2], -1, PREG_SPLIT_DELIM_CAPTURE) as $token) { // preserve trailing whitespace

    fwrite($output, ($i++ & 1) ? $token : (array_key_exists($token, $config) ? $config[$token] : $token));
  }
}
#-----------------------------
// very fast, but whitespace collapse
while (!feof($input)) {
  $i = 0;
  preg_match("/^(\s*)(.*)/", fgets($input), $matches); // emit leading whitespace
  fwrite($output, $matches[1]);
  foreach (preg_split("/(\s+)/", $matches[2]) as $token) { // preserve trailing whitespace
    fwrite($output, (array_key_exists($token, $config) ? $config[$token] : $token) . " ");
  }
  fwrite($output, "\n");
}
#-----------------------------

Program: psgrep