1. Strings

Introduction


str  = "\\n"                             -- two characters, \ and an n
str2 = "Jon 'Maddog' Orwant"             -- in haskell we can do string only with ", no single quote
str3 = "\n"                              -- a "newline" character
str4 = "Jon \"Maddog\" Orwant"           -- literal double quotes
-- no q/qq/qw/.. stuff in haskell
-- no multiline, you can emulate it with ++ and \n, or unlines
str5 = "This is a multiline here document\n" ++
       "terminated by  on a line by itself\n"

Accessing Substrings

s     = "This is what you have"
first = s!![0]                            -- "T"
start = s!![5,6]                          -- "is"
rest  = s!![13..]                         -- "you have"
las   = s!![-1]                           -- "e"
las2  = s.last                            -- 'e'
end   = s!![-4 .. -1]                     -- "have"
piece = s!![-8 .. -5]                     -- "you"


[lead,ss1,ss2,trailing]     = s.unpack [Grab 2, Forward 3, Grab 1, Grab 2, GrabAll] 
-- Regexp are better for this, why introduce extra concept (unpack)
[lead2,ss12,ss22,trailing2] = s.match "^(..)...(.)(..)(.*)"
[lead3,ss13,ss23,trailing3] = s.match "^(.{2}).{3}(.)(.{2})(.*)" -- just to show how to use repetition

fivers                      = s.unpack ((Grab 5).replicate (s.length `div` 5))

-- in haskell string == [Char] => no need to do  unpack("A1" x length($string), $string)
characters = s

-- can't modify string in haskell
s'    = s.substr 5 2 "wasn't"       -- "This wasn't what you have"
s''   = s'.substr2 (-12) "ondrous"  -- "This wasn't wondrous",note the () arount negative number :(
s'''  = s''.substr 0 1 ""           -- "his wasn't wondrous", delete first character <=> tail
s'''' = s'''.substr2 (-10) ""       -- "his wasn'"  ,delete last 10 characters <=> reverse . drop 10 . reverse

m     = if s!![-10 .. -1] =~ "have" 
        then putStrLn "Pattern matches in last 10 characters" 
        else return ()
m'    = putStrLn "Pattern matches in last 10 characters" `when` (s!![-10 .. -1] =~ "have" )

-- cant do as short as in perl cos cant use affection in haskell
news = s!![0..5].(substS "is" "at" [Global_Match]) ++ s!![6..] -- "That at what you have"

a1  = "make a hat"
a1' = a1!![-1] ++ a1!![1..(a1.length -2)] ++ a1!![0] -- "take a ham"

b1 = "to be or not to be"
b2      = b1.unpack [Forward 6, Grab 6]  -- "or not"
[b3,c3] = b1.unpack [Forward 6, Grab 2, Backward 5, Grab 2] -- ["or","be"]

cut2fmt xs = xs.foldl aux (1,[]).snd.(GrabAll:).reverse 
    where aux (n,l) i = (i,(Grab (i - n)) : l)
fmt = cut2fmt [8,14,20,26,30] -- [Grab 7,Grab 6,Grab 6,Grab 6,Grab 4,GrabAll]

Establishing a Default Value


v3 = "a" ||| "b" -- -> "a"
v4 = ""  ||| "b" -- -> "b"

-- force to put the type :(
v5 = (2::Int) &&& "ee" -- -> "ee"
v6 = (0::Int) &&& "ee" -- -> ""

-- no x ||= y
v3' = v3 ||| v4

dir  = argv >>> (\x -> (x ||| ["/tmp"]).head)
dir' = argv >>> ((||| ["/tmp"]) $ head) -- or even more cryptic

Exchanging Values Without Using Temporary Variables

-- no side effect in haskell => swap is a nosense

Converting Between ASCII Characters and Values

i1 = ord 'a'
c1 = chr 97

-- ascii = string.unpack("C*"), string = ascii.pack("C*"), no need in haskell, String = [Char]
--  there is also packedString that are more efficient but less cool cos not list anymore
ibm = "HAL".map next_char                   -- "IBM"

Processing a String One Character at a Time

s1 = "unique chars are: " ++ ("an apple a day".unique.sort)
sum1 = ("sum is " ^ s1.map ord.sum).putStrLn

-- TODO, script.hs HERE

Reversing a String by Word or Character

s2 = s1.reverse

-- can do: s3 = (unwords . reverse . words) s1
--         s3 = (words $ reverse $ unwords) s1
s3 = s1.words.reverse.unwords 

s3' = s1.split " ".reverse.join " " -- not the same, cos words = split "\s+"
s3''= s1.split "\\s+".reverse.join " " 

-- eval long_palindromes or long_palindromes >>= (mapM putStrLn)
long_palindromes = cat "/usr/share/dict/words" >>> filter(\s -> s == s.reverse && s.length > 4) 

Expanding and Compressing Tabs

-- TODO, must do: \s if s == [] then [] else foldr1 .... ? must make a foldr1sure
expand_tabs = split "\t" $ (foldr1 (\a b -> a ++ (times (8 - a.length `mod` 8) ' ') ++ b)) -- more efficient
-- expand_tabs = fixpoint (break_char '\t' (\a b -> a ++ (' '.times (8 - a.length `mod` 8)) ++ b))
                                

Expanding Variables in User Input

-- cant do eval in haskell, too meta, too reflexif

s4 = "I am 17 years old".subst "\\d+" (\n -> show ((read n) * 2))

Controlling Case

s5 = "bo beep".upcase
s6 = s5.downcase
s7 = s6.capitalize

s8 = "thIS is a loNG liNE".words.map capitalize

Interpolating Functions and Expressions Within Strings

-- s9 = "I have " ++ (show (10+1)) ++ " guanacos."
s9 = "I have "^10+1^" guanacos."

Indenting Here Documents

-- cant put lines at beginning of line cos of layout pb
-- Multi_line is to say that ^ match beginning of every line, not of the string
multi = (substS "^\\s+" "" [Global_Match,Multi_Line] ° unlines) [
 "       This is a multiline here document",
 "       terminated by nothing"
 ]

Reformatting Paragraphs

-- in fact this fonction is wrong, cos it insert a space at the front of the sentence (must use a foldl1)
wrap width s = let (all, line) = (words s).foldl (\ (all, line) w -> 
                                            if (line++" "++w).length > width 
                                            then (all++[line], w)
                                            else (all, line++" "++w)
                                            ) ([],"") in
               join "\n" (all++[line])

Escaping Characters

escaping = "Mom Said, \"Don't do that.\""
esc1 = escaping.gsubst "['\"]" ("\\"++) -- section, <=> (\x -> "\\" ++ x)
esc3 = escaping.gsubst "[^A-Z]" ("\\"++)
esc4 = escaping.gsubst "[^\\w]" ("\\"++)

Trimming Blanks from the Ends of a String

s10 = "    titi".substS "^\\s+" "" []
s11 = "  titi  ".substS "\\s+$" "" []

trim = substS "^\\s+" "" [] ° substS "\\s+$" "" []

Parsing Comma-Separated Data

parse_csv s = let re = ["\"([^\"\\\\]*(\\\\.[^\"\\\\]*)*)\",?" -- "toto,titi"
                       ,"([^,]+),?" -- something that not start with " can not contain any , => go until next ,
                       ,","
                       ].join "|"
                  xxs  = s.gmatch re in
               xxs.map (\xs -> (xs!(0::Int)) ||| (xs!(2::Int)))
                         
testline = "XYZZY,\"\",\"O'Reilly, Inc\",\"Wall, Larry\",\"a \\\"glug\\\" bit,\",5,\"Error, Core Dumped\""
mparse = testline.parse_csv.foldM(\i x -> putStrLn (i^" : "^x) >>> (\_ -> i+1)) 0

Soundex Matching

-- from explanation from http://www.myatt.demon.co.uk/sxalg.htm
soundex s = x:'-':((res++[0,0,0]).take 3.map show.concat) -- note that as haskell is lazy, this code is efficient
    where group      =  [("AEIOUYHW",0)
                        ,("BFPV",1)
                        ,("CGJKQSXZ",2)
                        ,("DT",3)
                        ,("L",4)
                        ,("MN",5)
                        ,("R",6)
                        ]
          code x = group.find (\(letters,_) -> x `elem` letters).just.snd
          (x:xs) = s.upcase.substS "[^\\w]" "" [Global_Match]
          -- A consonant immediately following an initial letter from the same code group is ignored
          xs'    = if xs.length > 0 && xs.head.code == x.code && x.code > 0 then xs.tail else xs
          trim xs = xs.dropWhile (\x -> x.code == 0)
          next_code xs = xs.trim.(\xs -> if length xs > 0 then xs.head.code else 0)
          aux xs = case trim xs of
                   []     -> []
                   (x:xs) -> if x.code == xs.next_code then aux xs else x.code:aux xs
          res    = aux xs'
          
soundexes     =  ["holmes"           -- H-452
                  ,"adomomi"         -- A-350 -- 355
                  ,"vonderlehr"      -- V-536
                  ,"ball"            -- B-400
                  ,"shaw"            -- S-000
                  ,"jackson"         -- J-250
                  ,"scanlon"         -- S-545
                  ,"saintjohn"       -- S-532
                  ,"kingsmith"       -- K-525
                  ,"booth-davis"     -- B-312
                  ,"Knuth"           -- K-530
                  ,"Kant"            -- K-530
                  ,"Lloyd"           -- L-300
                  ,"Ladd"            -- L-300
                  ].map soundex      
-- TODO, not complete ($quota,$comment,$gcos,$expire) = getpw*
data Passwd_entry = Passwd_entry {pw_name,pw_passwd,pw_comment,pw_dir,pw_shell::String, pw_uid,pw_gid::Int}
                  deriving (Eq,Show)
getpwent s = let [name1,passwd1,uid1,gid1,comment1,dir1,shell1] = s.split ":" in
                 Passwd_entry {pw_name=name1,pw_passwd=passwd1,pw_uid=read uid1,pw_gid=read gid1,
                               pw_comment=comment1,pw_dir=dir1,pw_shell=shell1} 

msoundex = do putStr "Lookup user: "
              user  <- getLine
              cat "/etc/passwd" >>=
                  each (\l -> let Passwd_entry {pw_name=name,pw_comment=comment} = l.getpwent
                                  [firstname,lastname] = comment.match "(\\w+)[^,]*\\b(\\w+)"
                                  soundex' xs = if xs == [] then "" else xs.soundex in
                               if user.soundex' `elem` (map soundex' [name,firstname,lastname])
                               then putStrLn (name++": "++firstname++" "++lastname)
                               else return ()
                        )
                         
-- TODO, pstyle and psgrep                       
                         
----------------------------------------------------------------------------------------------------

Program: fixstyle

Program: psgrep