#-----------------------------
match( $string, $pattern );
subst( $string, $pattern, $replacement );
#-----------------------------
$meadow =~ m/sheep/; # True if $meadow contains "sheep"
$meadow !~ m/sheep/; # True if $meadow doesn't contain "sheep"
$meadow =~ s/old/new/; # Replace "old" with "new" in $meadow
#-----------------------------
# Fine bovines demand fine toreadors.
# Muskoxen are a polar ovibovine species.
# Grooviness went out of fashion decades ago.
#-----------------------------
# Ovines are found typically in oviaries.
#-----------------------------
if ($meadow =~ /\bovines?\b/i) { print "Here be sheep!" }
#-----------------------------
$string = "good food";
$string =~ s/o*/e/;
#-----------------------------
# good food
#
# geod food
#
# geed food
#
# geed feed
#
# ged food
#
# ged fed
#
# egood food
#-----------------------------
#% echo ababacaca | perl -ne 'print "$&\n" if /(a|ba|b)+(a|ac)+/'
#ababa
#-----------------------------
#% echo ababacaca |
# awk 'match($0,/(a|ba|b)+(a|ac)+/) { print substr($0, RSTART, RLENGTH) }'
#ababacaca
#-----------------------------
while (m/(\d+)/g) {
print "Found number $1\n";
}
#-----------------------------
@numbers = m/(\d+)/g;
#-----------------------------
$digits = "123456789";
@nonlap = $digits =~ /(\d\d\d)/g;
@yeslap = $digits =~ /(?=(\d\d\d))/g;
print "Non-overlapping: @nonlap\n";
print "Overlapping: @yeslap\n";
# Non-overlapping: 123 456 789
# Overlapping: 123 234 345 456 567 678 789
#-----------------------------
$string = "And little lambs eat ivy";
$string =~ /l[^s]*s/;
print "($`) ($&) ($')\n";
# (And ) (little lambs) ( eat ivy)
#-----------------------------
|
#-----------------------------
$dst = $src;
$dst =~ s/this/that/;
#-----------------------------
($dst = $src) =~ s/this/that/;
#-----------------------------
# strip to basename
($progname = $0) =~ s!^.*/!!;
# Make All Words Title-Cased
($capword = $word) =~ s/(\w+)/\u\L$1/g;
# /usr/man/man3/foo.1 changes to /usr/man/cat3/foo.1
($catpage = $manpage) =~ s/man(?=\d)/cat/;
#-----------------------------
@bindirs = qw( /usr/bin /bin /usr/local/bin );
for (@libdirs = @bindirs) { s/bin/lib/ }
print "@libdirs\n";
# /usr/lib /lib /usr/local/lib
#-----------------------------
($a = $b) =~ s/x/y/g; # copy $b and then change $a
$a = ($b =~ s/x/y/g); # change $b, count goes in $a
#-----------------------------
|
#-----------------------------
if ($var =~ /^[A-Za-z]+$/) {
# it is purely alphabetic
}
#-----------------------------
use locale;
if ($var =~ /^[^\W\d_]+$/) {
print "var is purely alphabetic\n";
}
#-----------------------------
use locale;
use POSIX 'locale_h';
# the following locale string might be different on your system
unless (setlocale(LC_ALL, "fr_CA.ISO8859-1")) {
die "couldn't set locale to French Canadian\n";
}
while (<DATA>) {
chomp;
if (/^[^\W\d_]+$/) {
print "$_: alphabetic\n";
} else {
print "$_: line noise\n";
}
}
#__END__
#silly
#façade
#coöperate
#niño
#Renée
#Molière
#hæmoglobin
#naïve
#tschüß
#random!stuff#here
#-----------------------------
|
#----------------------------- #/\S+/ # as many non-whitespace bytes as possible #/[A-Za-z'-]+/ # as many letters, apostrophes, and hyphens #----------------------------- #/\b([A-Za-z]+)\b/ # usually best #/\s([A-Za-z]+)\s/ # fails at ends or w/ punctuation #----------------------------- |
#----------------------------- # download the following standalone program #!/usr/bin/perl -p # resname - change all "foo.bar.com" style names in the input stream # into "foo.bar.com [204.148.40.9]" (or whatever) instead use Socket; # load inet_addr s{ # ( # capture the hostname in $1 (?: # these parens for grouping only (?! [-_] ) # lookahead for neither underscore nor dash [\w-] + # hostname component \. # and the domain dot ) + # now repeat that whole thing a bunch of times [A-Za-z] # next must be a letter [\w-] + # now trailing domain part ) # end of $1 capture }{ # replace with this: "$1 " . # the original bit, plus a space ( ($addr = gethostbyname($1)) # if we get an addr ? "[" . inet_ntoa($addr) . "]" # format it : "[???]" # else mark dubious ) }gex; # /g for global # /e for execute # /x for nice formatting #----------------------------- s/ # replace \# # a pound sign (\w+) # the variable name \# # another pound sign /${$1}/xg; # with the value of the global variable ##----------------------------- s/ # replace \# # a pound sign (\w+) # the variable name \# # another pound sign /'$' . $1/xeeg; # ' with the value of *any* variable #----------------------------- |
#-----------------------------
# One fish two fish red fish blue fish
#-----------------------------
$WANT = 3;
$count = 0;
while (/(\w+)\s+fish\b/gi) {
if (++$count == $WANT) {
print "The third fish is a $1 one.\n";
# Warning: don't `last' out of this loop
}
}
# The third fish is a red one.
#-----------------------------
/(?:\w+\s+fish\s+){2}(\w+)\s+fish/i;
#-----------------------------
# simple way with while loop
$count = 0;
while ($string =~ /PAT/g) {
$count++; # or whatever you'd like to do here
}
# same thing with trailing while
$count = 0;
$count++ while $string =~ /PAT/g;
# or with for loop
for ($count = 0; $string =~ /PAT/g; $count++) { }
# Similar, but this time count overlapping matches
$count++ while $string =~ /(?=PAT)/g;
#-----------------------------
$pond = 'One fish two fish red fish blue fish';
# using a temporary
@colors = ($pond =~ /(\w+)\s+fish\b/gi); # get all matches
$color = $colors[2]; # then the one we want
# or without a temporary array
$color = ( $pond =~ /(\w+)\s+fish\b/gi )[2]; # just grab element 3
print "The third fish in the pond is $color.\n";
# The third fish in the pond is red.
#-----------------------------
$count = 0;
$_ = 'One fish two fish red fish blue fish';
@evens = grep { $count++ % 2 == 1 } /(\w+)\s+fish\b/gi;
print "Even numbered fish are @evens.\n";
# Even numbered fish are two blue.
#-----------------------------
$count = 0;
s{
\b # makes next \w more efficient
( \w+ ) # this is what we'll be changing
(
\s+ fish \b
)
}{
if (++$count == 4) {
"sushi" . $2;
} else {
$1 . $2;
}
}gex;
# One fish two fish red fish sushi fish
#-----------------------------
$pond = 'One fish two fish red fish blue fish swim here.';
$color = ( $pond =~ /\b(\w+)\s+fish\b/gi )[-1];
print "Last fish is $color.\n";
# Last fish is blue.
#-----------------------------
m{
A # find some pattern A
(?! # mustn't be able to find
.* # something
A # and A
)
$ # through the end of the string
}x
#-----------------------------
$pond = 'One fish two fish red fish blue fish swim here.';
if ($pond =~ m{
\b ( \w+) \s+ fish \b
(?! .* \b fish \b )
}six )
{
print "Last fish is $1.\n";
} else {
print "Failed!\n";
}
# Last fish is blue.
#-----------------------------
|
#----------------------------- # download the following standalone program #!/usr/bin/perl # killtags - very bad html tag killer undef $/; # each read is whole file while (<>) { # get one whole file at a time s/<.*?>//gs; # strip tags (terribly) print; # print file to STDOUT } #----------------------------- # download the following standalone program #!/usr/bin/perl # headerfy: change certain chapter headers to html $/ = ''; while ( <> ) { # fetch a paragraph s{ \A # start of record ( # capture in $1 Chapter # text string \s+ # mandatory whitespace \d+ # decimal number \s* # optional whitespace : # a real colon . * # anything not a newline till end of line ) }{<H1>$1</H1>}gx; print; } #----------------------------- #% perl -00pe 's{\A(Chapter\s+\d+\s*:.*)}{<H1>$1</H1>}gx' datafile #----------------------------- $/ = ''; # paragraph read mode for readline access while (<ARGV>) { while (m#^START(.*?)^END#sm) { # /s makes . span line boundaries # /m makes ^ match near newlines print "chunk $. in $ARGV has <<$1>>\n"; } } #----------------------------- |
#-----------------------------
undef $/;
@chunks = split(/pattern/, <FILEHANDLE>);
#-----------------------------
# .Ch, .Se and .Ss divide chunks of STDIN
{
local $/ = undef;
@chunks = split(/^\.(Ch|Se|Ss)$/m, <>);
}
print "I read ", scalar(@chunks), " chunks.\n";
#-----------------------------
|
#-----------------------------
while (<>) {
if (/BEGIN PATTERN/ .. /END PATTERN/) {
# line falls between BEGIN and END in the
# text, inclusive.
}
}
while (<>) {
if ($FIRST_LINE_NUM .. $LAST_LINE_NUM) {
# operate only between first and last line, inclusive.
}
}
#-----------------------------
while (<>) {
if (/BEGIN PATTERN/ ... /END PATTERN/) {
# line is between BEGIN and END on different lines
}
}
while (<>) {
if ($FIRST_LINE_NUM ... $LAST_LINE_NUM) {
# operate only between first and last line, but not same
}
}
#-----------------------------
# command-line to print lines 15 through 17 inclusive (see below)
perl -ne 'print if 15 .. 17' datafile
# print out all <XMP> .. </XMP> displays from HTML doc
while (<>) {
print if m#<XMP>#i .. m#</XMP>#i;
}
# same, but as shell command
# perl -ne 'print if m#<XMP>#i .. m#</XMP>#i' document.html
#-----------------------------
# perl -ne 'BEGIN { $top=3; $bottom=5 } print if $top .. $bottom' /etc/passwd # previous command FAILS
# perl -ne 'BEGIN { $top=3; $bottom=5 } \
# print if $. == $top .. $. == $bottom' /etc/passwd # works
# perl -ne 'print if 3 .. 5' /etc/passwd # also works
#-----------------------------
print if /begin/ .. /end/;
print if /begin/ ... /end/;
#-----------------------------
while (<>) {
$in_header = 1 .. /^$/;
$in_body = /^$/ .. eof();
}
#-----------------------------
%seen = ();
while (<>) {
next unless /^From:?\s/i .. /^$/;
while (/([^<>(),;\s]+\@[^<>(),;\s]+)/g) {
print "$1\n" unless $seen{$1}++;
}
}
#-----------------------------
|
#-----------------------------
sub glob2pat {
my $globstr = shift;
my %patmap = (
'*' => '.*',
'?' => '.',
'[' => '[',
']' => ']',
);
$globstr =~ s{(.)} { $patmap{$1} || "\Q$1" }ge;
return '^' . $globstr . '$'; #'
}
#-----------------------------
|
#-----------------------------
while ($line = <>) {
if ($line =~ /$pattern/o) {
# do something
}
}
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# popgrep1 - grep for abbreviations of places that say "pop"
# version 1: slow but obvious way
@popstates = qw(CO ON MI WI MN);
LINE: while (defined($line = <>)) {
for $state (@popstates) {
if ($line =~ /\b$state\b/) {
print; next LINE;
}
}
}
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# popgrep2 - grep for abbreviations of places that say "pop"
# version 2: eval strings; fast but hard to quote
@popstates = qw(CO ON MI WI MN);
$code = 'while (defined($line = <>)) {';
for $state (@popstates) {
$code .= "\tif (\$line =~ /\\b$state\\b/) { print \$line; next; }\n";
}
$code .= '}';
print "CODE IS\n----\n$code\n----\n" if 0; # turn on to debug
eval $code;
die if $@;
#-----------------------------
while (defined($line = <>)) {
if ($line =~ /\bCO\b/) { print $line; next; }
if ($line =~ /\bON\b/) { print $line; next; }
if ($line =~ /\bMI\b/) { print $line; next; }
if ($line =~ /\bWI\b/) { print $line; next; }
if ($line =~ /\bMN\b/) { print $line; next; }
}
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# popgrep3 - grep for abbreviations of places that say "pop"
# version 3: use build_match_func algorithm
@popstates = qw(CO ON MI WI MN);
$expr = join('||', map { "m/\\b\$popstates[$_]\\b/o" } 0..$#popstates);
$match_any = eval "sub { $expr }";
die if $@;
while (<>) {
print if &$match_any;
}
#-----------------------------
sub {
m/\b$popstates[0]\b/o || m/\b$popstates[1]\b/o ||
m/\b$popstates[2]\b/o || m/\b$popstates[3]\b/o ||
m/\b$popstates[4]\b/o
}
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# grepauth - print lines that mention both Tom and Nat
$multimatch = build_match_all(q/Tom/, q/Nat/);
while (<>) {
print if &$multimatch;
}
exit;
sub build_match_any { build_match_func('||', @_) }
sub build_match_all { build_match_func('&&', @_) }
sub build_match_func {
my $condition = shift;
my @pattern = @_; # must be lexical variable, not dynamic one
my $expr = join $condition => map { "m/\$pattern[$_]/o" } (0..$#pattern);
my $match_func = eval "sub { local \$_ = shift if \@_; $expr }";
die if $@; # propagate $@; this shouldn't happen!
return $match_func;
}
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# popgrep4 - grep for abbreviations of places that say "pop"
# version 4: use Regexp module
use Regexp;
@popstates = qw(CO ON MI WI MN);
@poppats = map { Regexp->new( '\b' . $_ . '\b') } @popstates;
while (defined($line = <>)) {
for $patobj (@poppats) {
print $line if $patobj->match($line);
}
}
#-----------------------------
|
#-----------------------------
do {
print "Pattern? ";
chomp($pat = <>);
eval { "" =~ /$pat/ };
warn "INVALID PATTERN $@" if $@;
} while $@;
#-----------------------------
sub is_valid_pattern {
my $pat = shift;
return eval { "" =~ /$pat/; 1 } || 0;
}
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# paragrep - trivial paragraph grepper
die "usage: $0 pat [files]\n" unless @ARGV;
$/ = '';
$pat = shift;
eval { "" =~ /$pat/; 1 } or die "$0: Bad pattern $pat: $@\n";
while (<>) {
print "$ARGV $.: $_" if /$pat/o;
}
#-----------------------------
$pat = "You lose @{[ system('rm -rf *')]} big here";
#-----------------------------
$safe_pat = quotemeta($pat);
something() if /$safe_pat/;
#-----------------------------
something() if /\Q$pat/;
#-----------------------------
|
#----------------------------- use locale; #----------------------------- # download the following standalone program #!/usr/bin/perl -w # localeg - demonstrate locale effects use locale; use POSIX 'locale_h'; $name = "andreas k\xF6nig"; @locale{qw(German English)} = qw(de_DE.ISO_8859-1 us-ascii); setlocale(LC_CTYPE, $locale{English}) or die "Invalid locale $locale{English}"; @english_names = (); while ($name =~ /\b(\w+)\b/g) { push(@english_names, ucfirst($1)); } setlocale(LC_CTYPE, $locale{German}) or die "Invalid locale $locale{German}"; @german_names = (); while ($name =~ /\b(\w+)\b/g) { push(@german_names, ucfirst($1)); } print "English names: @english_names\n"; print "German names: @german_names\n"; English names: Andreas K Nig German names: Andreas König #----------------------------- |
#-----------------------------
use String::Approx qw(amatch);
if (amatch("PATTERN", @list)) {
# matched
}
@matches = amatch("PATTERN", @list);
#-----------------------------
use String::Approx qw(amatch);
open(DICT, "/usr/dict/words") or die "Can't open dict: $!";
while(<DICT>) {
print if amatch("balast");
}
ballast
balustrade
blast
blastula
sandblast
#-----------------------------
|
#-----------------------------
while (/(\d+)/g) {
print "Found $1\n";
}
#-----------------------------
$n = " 49 here";
$n =~ s/\G /0/g;
print $n;
00049 here
#-----------------------------
while (/\G,?(\d+)/g) {
print "Found number $1\n";
}
#-----------------------------
$_ = "The year 1752 lost 10 days on the 3rd of September";
while (/(\d+)/gc) {
print "Found number $1\n";
}
if (/\G(\S+)/g) {
print "Found $1 after the last number.\n";
}
#Found number 1752
#
#Found number 10
#
#Found number 3
#
#Found rd after the last number.
#-----------------------------
print "The position in \$a is ", pos($a);
pos($a) = 30;
print "The position in \$_ is ", pos;
pos = 30;
#-----------------------------
|
#-----------------------------
# greedy pattern
s/<.*>//gs; # try to remove tags, very badly
# non-greedy pattern
s/<.*?>//gs; # try to remove tags, still rather badly
#-----------------------------
#<b><i>this</i> and <i>that</i> are important</b> Oh, <b><i>me too!</i></b>
#-----------------------------
m{ <b><i>(.*?)</i></b> }sx
#-----------------------------
/BEGIN((?:(?!BEGIN).)*)END/
#-----------------------------
m{ <b><i>( (?: (?!</b>|</i>). )* ) </i></b> }sx
#-----------------------------
m{ <b><i>( (?: (?!</[ib]>). )* ) </i></b> }sx
#-----------------------------
m{
<b><i>
[^<]* # stuff not possibly bad, and not possibly the end.
(?:
# at this point, we can have '<' if not part of something bad
(?! </?[ib]> ) # what we can't have
< # okay, so match the '<'
[^<]* # and continue with more safe stuff
) *
</i></b>
}sx
#-----------------------------
|
#-----------------------------
$/ = ''; # paragrep mode
while (<>) {
while ( m{
\b # start at a word boundary (begin letters)
(\S+) # find chunk of non-whitespace
\b # until another word boundary (end letters)
(
\s+ # separated by some whitespace
\1 # and that very same chunk again
\b # until another word boundary
) + # one or more sets of those
}xig
)
{
print "dup word '$1' at paragraph $.\n";
}
}
#-----------------------------
This is a test
test of the duplicate word finder.
#-----------------------------
$a = 'nobody';
$b = 'bodysnatcher';
if ("$a $b" =~ /^(\w+)(\w+) \2(\w+)$/) {
print "$2 overlaps in $1-$2-$3\n";
}
body overlaps in no-body-snatcher
#-----------------------------
/^(\w+?)(\w+) \2(\w+)$/,
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# prime_pattern -- find prime factors of argument using pattern matching
for ($N = ('o' x shift); $N =~ /^(oo+?)\1+$/; $N =~ s/$1/o/g) {
print length($1), " ";
}
print length ($N), "\n";
#-----------------------------
# solve for 12x + 15y + 16z = 281, maximizing x
if (($X, $Y, $Z) =
(('o' x 281) =~ /^(o*)\1{11}(o*)\2{14}(o*)\3{15}$/))
{
($x, $y, $z) = (length($X), length($Y), length($Z));
print "One solution is: x=$x; y=$y; z=$z.\n";
} else {
print "No solution.\n";
}
#One solution is: x=17; y=3; z=2.
#-----------------------------
('o' x 281) =~ /^(o+)\1{11}(o+)\2{14}(o+)\3{15}$/;
#One solution is: x=17; y=3; z=2
('o' x 281) =~ /^(o*?)\1{11}(o*)\2{14}(o*)\3{15}$/;
#One solution is: x=0; y=7; z=11.
('o' x 281) =~ /^(o+?)\1{11}(o*)\2{14}(o*)\3{15}$/;
#One solution is: x=1; y=3; z=14.
#-----------------------------
|
#-----------------------------
chomp($pattern = <CONFIG_FH>);
if ( $data =~ /$pattern/ ) { ..... }
#-----------------------------
/ALPHA|BETA/;
#-----------------------------
/^(?=.*ALPHA)(?=.*BETA)/s;
#-----------------------------
/ALPHA.*BETA|BETA.*ALPHA/s;
#-----------------------------
/^(?:(?!PAT).)*$/s;
#-----------------------------
/(?=^(?:(?!BAD).)*$)GOOD/s;
#-----------------------------
if (!($string =~ /pattern/)) { something() } # ugly
if ( $string !~ /pattern/) { something() } # preferred
#-----------------------------
if ($string =~ /pat1/ && $string =~ /pat2/ ) {
something
() }
#-----------------------------
if ($string =~ /pat1/ || $string =~ /pat2/ ) {
something
() }
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# minigrep - trivial grep
$pat = shift;
while (<>) {
print if /$pat/o;
}
#-----------------------------
"labelled" =~ /^(?=.*bell)(?=.*lab)/s
#-----------------------------
$string =~ /bell/ && $string =~ /lab/
#-----------------------------
if ($murray_hill =~ m{
^ # start of string
(?= # zero-width lookahead
.* # any amount of intervening stuff
bell # the desired bell string
) # rewind, since we were only looking
(?= # and do the same thing
.* # any amount of intervening stuff
lab # and the lab part
)
}sx ) # /s means . can match newline
{
print "Looks like Bell Labs might be in Murray Hill!\n";
}
#-----------------------------
"labelled" =~ /(?:^.*bell.*lab)|(?:^.*lab.*bell)/
#-----------------------------
$brand = "labelled";
if ($brand =~ m{
(?: # non-capturing grouper
^ .*? # any amount of stuff at the front
bell # look for a bell
.*? # followed by any amount of anything
lab # look for a lab
) # end grouper
| # otherwise, try the other direction
(?: # non-capturing grouper
^ .*? # any amount of stuff at the front
lab # look for a lab
.*? # followed by any amount of anything
bell # followed by a bell
) # end grouper
}sx ) # /s means . can match newline
{
print "Our brand has bell and lab separate.\n";
}
#-----------------------------
$map =~ /^(?:(?!waldo).)*$/s
#-----------------------------
if ($map =~ m{
^ # start of string
(?: # non-capturing grouper
(?! # look ahead negation
waldo # is he ahead of us now?
) # is so, the negation failed
. # any character (cuzza /s)
) * # repeat that grouping 0 or more
$ # through the end of the string
}sx ) # /s means . can match newline
{
print "There's no waldo here!\n";
}
#-----------------------------
7:15am up 206 days, 13:30, 4 users, load average: 1.04, 1.07, 1.04
USER TTY FROM LOGIN@ IDLE JCPU PCPU WHAT
tchrist tty1 5:16pm 36days 24:43 0.03s xinit
tchrist tty2 5:19pm 6days 0.43s 0.43s -tcsh
tchrist ttyp0 chthon 7:58am 3days 23.44s 0.44s -tcsh
gnat ttyS4 coprolith 2:01pm 13:36m 0.30s 0.30s -tcsh
#-----------------------------
#% w | minigrep '^(?!.*ttyp).*tchrist'
#-----------------------------
m{
^ # anchored to the start
(?! # zero-width look-ahead assertion
.* # any amount of anything (faster than .*?)
ttyp # the string you don't want to find
) # end look-ahead negation; rewind to start
.* # any amount of anything (faster than .*?)
tchrist # now try to find Tom
}x
#-----------------------------
#% w | grep tchrist | grep -v ttyp
#-----------------------------
#% grep -i 'pattern' files
#% minigrep '(?i)pattern' files
#-----------------------------
|
#-----------------------------
my $eucjp = q{ # EUC-JP encoding subcomponents:
[\x00-\x7F] # ASCII/JIS-Roman (one-byte/character)
| \x8E[\xA0-\xDF] # half-width katakana (two bytes/char)
| \x8F[\xA1-\xFE][\xA1-\xFE] # JIS X 0212-1990 (three bytes/char)
| [\xA1-\xFE][\xA1-\xFE] # JIS X 0208:1997 (two bytes/char)
};
#-----------------------------
/^ (?: $eucjp )*? \xC5\xEC\xB5\xFE/ox # Trying to find Tokyo
#-----------------------------
/^ ( (?:eucjp)*? ) $Tokyo/$1$Osaka/ox
#-----------------------------
/\G ( (?:eucjp)*? ) $Tokyo/$1$Osaka/gox
#-----------------------------
@chars = /$eucjp/gox; # One character per list element
#-----------------------------
while (<>) {
my @chars = /$eucjp/gox; # One character per list element
for my $char (@chars) {
if (length($char) == 1) {
# Do something interesting with this one-byte character
} else {
# Do something interesting with this multiple-byte character
}
}
my $line = join("",@chars); # Glue list back together
print $line;
}
#-----------------------------
$is_eucjp = m/^(?:$eucjp)*$/xo;
#-----------------------------
$is_eucjp = m/^(?:$eucjp)*$/xo;
$is_sjis = m/^(?:$sjis)*$/xo;
#-----------------------------
while (<>) {
my @chars = /$eucjp/gox; # One character per list element
for my $euc (@chars) {
my $uni = $euc2uni{$char};
if (defined $uni) {
$euc = $uni;
} else {
## deal with unknown EUC->Unicode mapping here.
}
}
my $line = join("",@chars);
print $line;
}
#-----------------------------
|
#----------------------------- 1 while $addr =~ s/\([^()]*\)//g; #----------------------------- Dear someuser@host.com, Please confirm the mail address you gave us Wed May 6 09:38:41 MDT 1998 by replying to this message. Include the string "Rumpelstiltskin" in that reply, but spelled in reverse; that is, start with "Nik...". Once this is done, your confirmed address will be entered into our records. #----------------------------- |
#-----------------------------
chomp($answer = <>);
if ("SEND" =~ /^\Q$answer/i) { print "Action is send\n" }
elsif ("STOP" =~ /^\Q$answer/i) { print "Action is stop\n" }
elsif ("ABORT" =~ /^\Q$answer/i) { print "Action is abort\n" }
elsif ("LIST" =~ /^\Q$answer/i) { print "Action is list\n" }
elsif ("EDIT" =~ /^\Q$answer/i) { print "Action is edit\n" }
#-----------------------------
use Text::Abbrev;
$href = abbrev qw(send abort list edit);
for (print "Action: "; <>; print "Action: ") {
chomp;
my $action = $href->{ lc($_) };
print "Action is $action\n";
}
#-----------------------------
$name = 'send';
&$name();
#-----------------------------
# assumes that &invoke_editor, &deliver_message,
# $file and $PAGER are defined somewhere else.
use Text::Abbrev;
my($href, %actions, $errors);
%actions = (
"edit" => \&invoke_editor,
"send" => \&deliver_message,
"list" => sub { system($PAGER, $file) },
"abort" => sub {
print "See ya!\n";
exit;
},
"" => sub {
print "Unknown command: $cmd\n";
$errors++;
},
);
$href = abbrev(keys %actions);
local $_;
for (print "Action: "; <>; print "Action: ") {
s/^\s+//; # trim leading white space
s/\s+$//; # trim trailing white space
next unless $_;
$actions->{ $href->{ lc($_) } }->();
}
#-----------------------------
$abbreviation = lc($_);
$expansion = $href->{$abbreviation};
$coderef = $actions->{$expansion};
&$coderef();
#-----------------------------
|
#----------------------------- #% gunzip -c ~/mail/archive.gz | urlify > archive.urlified #----------------------------- #% urlify ~/mail/*.inbox > ~/allmail.urlified #----------------------------- # download the following standalone program #!/usr/bin/perl # urlify - wrap HTML links around URL-like constructs $urls = '(http|telnet|gopher|file|wais|ftp)'; $ltrs = '\w'; $gunk = '/#~:.?+=&%@!\-'; $punc = '.:?\-'; $any = "${ltrs}${gunk}${punc}"; while (<>) { s{ \b # start at word boundary ( # begin $1 { $urls : # need resource and a colon [$any] +? # followed by on or more # of any valid character, but # be conservative and take only # what you need to.... ) # end $1 } (?= # look-ahead non-consumptive assertion [$punc]* # either 0 or more punctuation [^$any] # followed by a non-url char | # or else $ # then end of the string ) }{<A HREF="$1">$1</A>}igox; print; } #----------------------------- |
#----------------------------- #% tcgrep -ril '^From: .*kate' ~/mail #----------------------------- # download the following standalone program #!/usr/bin/perl -w # tcgrep: tom christiansen's rewrite of grep # v1.0: Thu Sep 30 16:24:43 MDT 1993 # v1.1: Fri Oct 1 08:33:43 MDT 1993 # v1.2: Fri Jul 26 13:37:02 CDT 1996 # v1.3: Sat Aug 30 14:21:47 CDT 1997 # v1.4: Mon May 18 16:17:48 EDT 1998 use strict; # globals use vars qw($Me $Errors $Grand_Total $Mult %Compress $Matches); my ($matcher, $opt); # matcher - anon. sub to check for matches # opt - ref to hash w/ command line options init(); # initialize globals ($opt, $matcher) = parse_args(); # get command line options and patterns matchfile($opt, $matcher, @ARGV); # process files exit(2) if $Errors; exit(0) if $Grand_Total; exit(1); ################################### sub init { ($Me = $0) =~ s!.*/!!; # get basename of program, "tcgrep" $Errors = $Grand_Total = 0; # initialize global counters $Mult = ""; # flag for multiple files in @ARGV $| = 1; # autoflush output %Compress = ( # file extensions and program names z => 'gzcat', # for uncompressing gz => 'gzcat', Z => 'zcat', ); } ################################### sub usage { die <<EOF usage: $Me [flags] [files] Standard grep options: i case insensitive n number lines c give count of lines matching C ditto, but >1 match per line possible w word boundaries only s silent mode x exact matches only v invert search sense (lines that DON'T match) h hide filenames e expression (for exprs beginning with -) f file with expressions l list filenames matching Specials: 1 1 match per file H highlight matches u underline matches r recursive on directories or dot if none t process directories in 'ls -t' order p paragraph mode (default: line mode) P ditto, but specify separator, e.g. -P '%%\\n' a all files, not just plain text files q quiet about failed file and dir opens T trace files as opened May use a TCGREP environment variable to set default options. EOF } ################################### sub parse_args { use Getopt::Std; my ($optstring, $zeros, $nulls, %opt, $pattern, @patterns, $match_code); my ($SO, $SE); if ($_ = $ENV{TCGREP}) { # get envariable TCGREP s/^([^\-])/-$1/; # add leading - if missing unshift(@ARGV, $_); # add TCGREP opt string to @ARGV } $optstring = "incCwsxvhe:f:l1HurtpP:aqT"; $zeros = 'inCwxvhelut'; # options to init to 0 (prevent warnings) $nulls = 'pP'; # options to init to "" (prevent warnings) @opt{ split //, $zeros } = ( 0 ) x length($zeros); @opt{ split //, $nulls } = ( '' ) x length($nulls); getopts($optstring, \%opt) or usage(); if ($opt{f}) { # -f patfile open(PATFILE, $opt{f}) or die qq($Me: Can't open '$opt{f}': $!); # make sure each pattern in file is valid while ( defined($pattern = <PATFILE>) ) { chomp $pattern; eval { 'foo' =~ /$pattern/, 1 } or die "$Me: $opt{f}:$.: bad pattern: $@"; push @patterns, $pattern; } close PATFILE; } else { # make sure pattern is valid $pattern = $opt{e} || shift(@ARGV) || usage(); eval { 'foo' =~ /$pattern/, 1 } or die "$Me: bad pattern: $@"; @patterns = ($pattern); } if ($opt{H} || $opt{u}) { # highlight or underline my $term = $ENV{TERM} || 'vt100'; my $terminal; eval { # try to look up escapes for stand-out require POSIX; # or underline via Term::Cap use Term::Cap; my $termios = POSIX::Termios->new(); $termios->getattr; my $ospeed = $termios->getospeed; $terminal = Tgetent Term::Cap { TERM=>undef, OSPEED=>$ospeed } }; unless ($@) { # if successful, get escapes for either local $^W = 0; # stand-out (-H) or underlined (-u) ($SO, $SE) = $opt{H} ? ($terminal->Tputs('so'), $terminal->Tputs('se')) : ($terminal->Tputs('us'), $terminal->Tputs('ue')); } else { # if use of Term::Cap fails, ($SO, $SE) = $opt{H} # use tput command to get escapes ? (`tput -T $term smso`, `tput -T $term rmso`) : (`tput -T $term smul`, `tput -T $term rmul`) } } if ($opt{i}) { @patterns = map {"(?i)$_"} @patterns; } if ($opt{p} || $opt{P}) { @patterns = map {"(?m)$_"} @patterns; } $opt{p} && ($/ = ''); $opt{P} && ($/ = eval(qq("$opt{P}"))); # for -P '%%\n' $opt{w} && (@patterns = map {'\b' . $_ . '\b'} @patterns); $opt{'x'} && (@patterns = map {"^$_\$"} @patterns); if (@ARGV) { $Mult = 1 if ($opt{r} || (@ARGV > 1) || -d $ARGV[0]) && !$opt{h}; } $opt{1} += $opt{l}; # that's a one and an ell $opt{H} += $opt{u}; $opt{c} += $opt{C}; $opt{'s'} += $opt{c}; $opt{1} += $opt{'s'} && !$opt{c}; # that's a one @ARGV = ($opt{r} ? '.' : '-') unless @ARGV; $opt{r} = 1 if !$opt{r} && grep(-d, @ARGV) == @ARGV; $match_code = ''; $match_code .= 'study;' if @patterns > 5; # might speed things up a bit foreach (@patterns) { s(/)(\\/)g } if ($opt{H}) { foreach $pattern (@patterns) { $match_code .= "\$Matches += s/($pattern)/${SO}\$1${SE}/g;"; } } elsif ($opt{v}) { foreach $pattern (@patterns) { $match_code .= "\$Matches += !/$pattern/;"; } } elsif ($opt{C}) { foreach $pattern (@patterns) { $match_code .= "\$Matches++ while /$pattern/g;"; } } else { foreach $pattern (@patterns) { $match_code .= "\$Matches++ if /$pattern/;"; } } $matcher = eval "sub { $match_code }"; die if $@; return (\%opt, $matcher); } ################################### sub matchfile { $opt = shift; # reference to option hash $matcher = shift; # reference to matching sub my ($file, @list, $total, $name); local($_); $total = 0; FILE: while (defined ($file = shift(@_))) { if (-d $file) { if (-l $file && @ARGV != 1) { warn "$Me: \"$file\" is a symlink to a directory\n" if $opt->{T}; next FILE; } if (!$opt->{r}) { warn "$Me: \"$file\" is a directory, but no -r given\n" if $opt->{T}; next FILE; } unless (opendir(DIR, $file)) { unless ($opt->{'q'}) { warn "$Me: can't opendir $file: $!\n"; $Errors++; } next FILE; } @list = (); for (readdir(DIR)) { push(@list, "$file/$_") unless /^\.{1,2}$/; } closedir(DIR); if ($opt->{t}) { my (@dates); for (@list) { push(@dates, -M) } @list = @list[sort { $dates[$a] <=> $dates[$b] } 0..$#dates]; } else { @list = sort @list; |