1. Strings

Introduction


%% A character is simply an integer. For Latin-1 (or any Latin-n) and
%% thus ASCII we have some syntactic sugar.

C = 110.					% an ASCII 'n'
C = .						% Latin1 tilde-n, also Latin2 acute-n
C = $\n.					% a newline
C = $\^j					% another newline
C = $\116.					% ASCII 'N' in octal

%% Strings are simply represented as lists of characters. This is
%% usually enough. Sometimes, however, strings are better represented
%% as binaries (usually where Perl would use pack/unpack). Here I use
%% whatever feels natural.

String = "\\n".					% two characters, \ and an n
String = [$\, $n].				% ditto, less sugar
String = [92, 110].				% ditto, no sugar

String = "\n".					% a "newline" character
String = "Jon \"Maddog\" Orwant".		% literal double quotes

String = "Jon 'Maddog' Orwant".			% single quotes, no \ needed
String = "Jon \'Maddog\' Orwant".		% ...but you can if you want

A = "This is a multiline string\nspanning two lines". % use of '\n'

A = "This is a multiline string that spans two lines\n"
    "using two source lines for convenience".	% string concatenation

A = "This is a multiline string that spans two lines
using two source lines in a here-document style". % "real" newline

A = << "This string is really a binary" >>.	% literal binary string

%% This won't work well with "wide" encodings
ListString = binary_to_list(BinaryString).
BinaryString = list_to_binary(ListString).

%% @@INCOMPLETE@@
%% TODO: Maybe mention atoms.

Accessing Substrings


Value = string:substr(String, Offset, Count).
Value = string:substr(String, Offset).

string:substr(String, 1, Offset) 
  ++ Newstring 
  ++ string:substr(String, 1+Offset+Count).
string:substr(String, 1, Offset) 
  ++ Newtail.

%%-----------------------------
%% get a 5-byte string, skip 3, then grab 2 8-byte strings, then the rest
<<Leading:5/binary, _:3/binary, S1:8/binary, S2:8/binary, Trailing/binary>> = Data.

%% split at five-byte boundaries.
split_five(<<Bytes:5/binary, Rest/binary>>) ->
    [Bytes | split_five(Rest)];
split_five(<<All/binary) ->
    [All].
Fivers = split_five(String).

%%-----------------------------
S = "This is what you have",
%%  +123456789012345678901   Indexing forwards  (left to right)
%%   098765432109876543210-  Indexing backwards (right to left)
%% note that 0 means 10, 20, etc. above
%% note also that the forward indexing is 1-based, while the (faked)
%% reverse indexing is 0-based.

First = string:substr(S, 1, 1),			% "T"
Start = string:substr(S, 6, 2),			% "is"
Rest = string:substr(S, 14),			% "you have"
Last = string:substr(S, string:len(S)),		% "e"
End = string:substr(S, string:len(S)-3),	% "have"
Piece = string:substr(S, string:len(S)-7, 3).	% "you"

%%-----------------------------
String = "This is what you have",
io:put_chars(String),
% -| This is what you have

String2 = string:substr(String, 1, 5)		% change "is" to "wasn't"
++ "wasn't" ++ string:substr(String, 8),
% => This wasn't what you have

String3 = string:substr(String2, 1, string:len(String2)-12)
++ "ondrous",					% "This wasn't wondrous"
% => This wasn't wondrous

String4 = string:substr(String3, 2),		% delete first character
% => his wasn't wondrous

String5 = string:substr(String4, 1, string:len(String4)-10). % delete last 10
% => his wasn'

%%-----------------------------
%% you can test substrings with regexp:first_match/3
case regexp:first_match(string:substr(String, string:len(String)-10), 
			Pattern) of
    {match, _, _} ->
	io:put_chars("Pattern matches in last 10 characters");
    _ -> ok
end.

%% substitute "at" for "is", restricted to first five characters
{ok, NewSub, _} = regexp:gsub(string:substr(String, 1, 5), "is", "at"),
NewString = NewSub ++ string:substr(String, 6).

%%-----------------------------
%% exchange the first and last letters in a string
A = "make a hat",
B = string:substr(A, string:len(A))
++  string:substr(A, 2, string:len(A)-2)
++  string:substr(A, 1, 1),
io:put_chars(B).
% -| take a ham

%%-----------------------------
%% extract column with bitsyntax
A = << "To be or not to be" >>,
<< _:6/binary, B:6/binary, _/binary >> = A.
% B = <<"or not">>

%% Bitsyntax cannot do Perl unpack's backward skip.

%%-----------------------------

%% This function returns a function (actually a closure) instead of an
%% unpack format string --- so I call it cut2fun intead.

cut2fun(Positions) ->
    %% Translate from positions to distances
    Distances = lists_map2(fun(LastPos, Pos) ->
				   Pos - LastPos
			   end,
			   [1 | Positions], 
			   Positions),
    FunSeq = lists:foldl(
	       fun(Dist, AccFn) ->
		       fun(Bin, AccL) ->
			       <<Hd:Dist/binary, Tl/binary>> = Bin,
			       AccFn(Tl, [Hd | AccL])
		       end
	       end,
	       fun(X, AccL) -> [X | AccL] end,
	       lists:reverse(Distances)),
    fun(Bin) -> lists:reverse(FunSeq(Bin, [])) end.

Fun = cut2fun([8, 14, 20, 26, 30]).		% no point in printing it
% => #Fun<tmp.3.14582076>
Fun(Bin).					% so use it instead
% => [Bin1, Bin2, Bin3, Bin4, Bin5, BinRest]
(cut2fun([8, 14, 20, 26, 30]))(Bin).		% or use it instantly (somewhat messy syntax)

Establishing a Default Value

%%-----------------------------
%% use b if b is true, else c
A = if
	B -> B;
	true -> C
    end.

%%-----------------------------
%% While the Erlang language proper has no concept of not-defined most
%% lookup-functions have it in some guise: The function erlang:get/1
%% uses, for example, the atom 'undefined'.

%% use B if B is defined, else C
A = case B of
	undefined -> C;
	_ -> B
    end.
	    
%%-----------------------------
Foo = case Bar of
	  "" -> "DEFAULT VALUE";
	  _ -> Bar
      end.

%%-----------------------------
Dir = case init:get_plain_arguments() of
	  [] -> "/tmp";
	  [Str | _ ] -> Str
      end.

Dir = case length(init:get_plain_arguments()) of
	  0 -> "/tmp";
	  _ -> hd(init:get_plain_arguments())
      end.

%%-----------------------------
NewCount = dict:update_counter(case Shell of
				   "" -> "/bin/sh";
				   _ -> Shell
			       end,
			       1, Count).

%%-----------------------------
%% find the user name on Unix systems
User = catch lists:foreach(
	       fun(F) ->
		       case F() of
			   false -> go_on;
			   "" -> go_on;
			   X -> throw(X)
		       end
	       end,
	       [fun() -> os:getenv("USER") end,
		fun() -> os:getenv("LOGNAME") end,
		fun() -> os:cmd("id -n -u") end,
		fun() -> "Unknown user" end]).
%% As can be seen, chaining alteratives together is not trivial.

%% TODO: Find out some way to get the uid (and do getlogin() and
%% getpwuid() syscalls). Should I dare count on env-var UID?
	      
%%-----------------------------
starting_point() ->
    starting_point_(greenwich).
starting_point(Place) ->
    starting_point_(Place).

Exchanging Values Without Using Temporary Variables

%%-----------------------------

%% As Erlang has write-once variables, a new scope is needed to swap
%% values around. Here, I use anonymous functions to get such a scope.

fun(V1, V2) ->
	%% Swapped here...
	end(V2, V1).


{Alpha, Beta, Production} = {january, march, august},
fun(Alpha, Beta, Production) ->
	%% Rotated here...
	end(Beta, Production, Alpha).

%% And here I use named functions

f1(V1, V2) ->
    f2(V2, V1).

f2(V1, V2) ->
    %% Swapped here...
    ok.


f() ->
    %%...
    f1(january, march, august),
    %%...
    ok.

f1(Alpha, Beta, Production) ->
    f2(Beta, Production, Alpha).

f2(Alpha, Beta, Production) ->
    %% Rotated here...
    ok.

Converting Between ASCII Characters and Values

%%-----------------------------

%% XXX: Maybe move character stuff here from 1.1 

io:format("Number ~w is character ~c\n", [101, 101]).
% -| Number 101 is character e

io:format("~w~n", ["sample"]).
% -| [115,97,109,112,108,101]

io:format("~s~n", [[115, 97, 109, 112, 108, 101]]).
% -| sample

io:format("~s~n", [lists:map(fun(C) -> C+1 end, "HAL")]).
% -| IBM

Processing a String One Character at a Time

%%-----------------------------

%% handle each character in a tail-recursive loop
f([Char | Rest]) ->
    %% do something with Char
    f(Rest);
f("") ->
    ok.

f(String).

%% or with foreach/2 and an anonymous function
lists:foreach(
  fun(Char) ->
	  %% do something with Char here
	  ok
  end,
  String).

%%-----------------------------
String = "an apple a day",
Seen = lists:foldl(fun(Byte, S) ->
			   sets:add_element(Byte, S)
		    end,
		    sets:new(),
		    String),
io:format("unique chars are: ~s\n", [lists:sort(sets:to_list(Seen))]).
% -| unique chars are:  adelnpy

%%-----------------------------
Sum = lists:foldl(fun(Ch, S) ->
			  (Ch + S) band (1 bsl 32 - 1)
		  end, 0, String),
io:format("sum is ~w\n", [Sum]).
% prints "sum is 1248" if String is "an apple a day".

%% ...and with binaries

Sum = binaries_fold(fun(Bin, S)->
			    <<Ch:1/unit:8, Rest/binary>> = Bin,
			    {(S + Ch) band (1 bsl 32 - 1), Rest}
		    end, 0, BinString).

%%-----------------------------

%% TODO: Make this a standalone program. For now, say
%%
%%    sum:sum(["file", "file", "file"]).
%%
%% at the Erlang prompt.

-module(sum).
-author('sverkerw').

-compile(export_all).

sum(FNames) ->
    Sum = lists:foldl(fun sum_/2, 0, FNames),
    io:format("~w\n", [Sum]).

sum_(FName, Acc) ->
    case file:read_file(FName) of
	{error, Err} ->
	    io:format("[~w]", [Err]),
	    Acc;
	{ok, Bin} ->
	    binaries_fold(
	      fun(<<Ch:1/unit:8, Rest/binary>>, S) ->
		      {(Acc + Ch) rem (1 bsl 16 - 1), Rest}
	      end, Acc, Bin)
    end.

binaries_fold(_, Acc, << >>) ->
    Acc;
binaries_fold(Fun, Acc, Bin) ->
    {Acc1, Bin1} = Fun(Bin, Acc),
    binaries_fold(Fun, Acc1, Bin1).

%%-----------------------------

%% TODO: Make this a standalone program.

-module(slowcat).
-author('sverkerw').

-compile(export_all).

slowcat(Files) ->
    slowcat(1, Files).

slowcat(Delay, Files) ->
    lists:foreach(
      fun(F) ->
	      slowcat_(Delay, F)
      end, Files).

slowcat_(Delay, File) ->
    case file:read_file(File) of
	{error, Err} ->
	    io:format("[~w]", [Err]);
	{ok, Bin} ->
	    binaries_foreach(
	      fun(<<Ch:1/unit:8, Rest/binary>>) ->
		      timer:sleep(Delay*5),
		      io:put_chars([Ch]),
		      Rest
	      end, Bin)
    end.

binaries_foreach(_, << >>) ->
    ok;
binaries_foreach(Fun, Bin) ->
    Bin1 = Fun(Bin),
    binaries_foreach(Fun, Bin1).

Reversing a String by Word or Character

%%-----------------------------

%% reversing by character
Revbytes = lists:reverse(String).

%% reversing by word
Revwords = perl_join(" ", lists:reverse(string:tokens(String, " "))).

Expanding and Compressing Tabs

Expanding Variables in User Input

Controlling Case

Interpolating Functions and Expressions Within Strings

Indenting Here Documents

Reformatting Paragraphs

Escaping Characters

Trimming Blanks from the Ends of a String

Parsing Comma-Separated Data

Soundex Matching

Program: fixstyle

Program: psgrep