-- -*- haskell -*-

-- @@PLEAC@@_NAME
-- @@SKIP@@ Haskell

-- @@PLEAC@@_WEB
-- @@SKIP@@ http://www.haskell.org/

-- @@PLEAC@@_INTRO
-- @@SKIP@@ Please see http://haskell.org/haskellwiki/Cookbook for a more Haskell centered cookbook

-- @@PLEAC@@_1.0
str  = "\\n"                             -- two characters, \ and an n
str2 = "Jon 'Maddog' Orwant"             -- in haskell we can do string only with ", no single quote
str3 = "\n"                              -- a "newline" character
str4 = "Jon \"Maddog\" Orwant"           -- literal double quotes
str5 = "Multiline string must use a backslash at end of line\n\
\and a backslash at beginning of line\n";
str6 = "It is a common practice\n\
       \to indent lines\n\
       \(the indentation doesn't change the value of the string)\n"

-- @@PLEAC@@_1.1
groupNelem n = unfoldr f
    where f [] = Nothing
          f s = Just (splitAt n s)

-- split at five byte boundaries
fivers = groupNelem 5

-- chop string into individual characters:
-- nothing to do

s     = "This is what you have"
first = take 1 s                          -- "T"
start = take 2 $ drop 5 s                 -- "is"
rest  = drop 13 s                         -- "you have"
last' = last s                            -- 'e'

import Text.Regex
s = "This is what you have"

-- strings are immutable
s2 = a ++ "wasn't" ++ drop 2 b
     where (a, b) = splitAt 5 s
s3 = subRegex (mkRegex " is ") s " wasn't " -- "This wasn't what you have"

-- substitute "at" for "is", restricted to first five characters
s4 = a' ++ b
    where (a, b) = splitAt 5 s
          a' = subRegex (mkRegex "is") a "at"
-- do it another way
import Text.Regex
import qualified Control.Arrow as Arrow
f = uncurry (++) . Arrow.first subst . splitAt 5
    where subst s = subRegex (mkRegex "is") s "at"

-- unpack chars using parsec
import Text.ParserCombinators.Parsec
fromRight (Right v) = v
fromRight v = error ("fromRight: " ++ show v)
parseStr parser s = fromRight $ parse parser "" s
nchars n = count n anyToken

a = "To be or not to be"
b = parseStr (nchars 6 >> nchars 6) a -- "or not"

-- not same example as perl's unpack with "X5" which means "back up a byte"
-- here is an example using lookAhead where we start again at some point
[b3,c3] = parseStr (sequence [ lookAhead p1, p2 ]) a -- ["or","be"]
          where p1 = nchars 6 >> nchars 2
                p2 = nchars 3 >> nchars 2


import List
cut2fmt l = zipWith (-) l (1:l)
applyfmt fmt s = l ++ [s']
    where (s', l) = mapAccumL f s fmt
          f s n = (s',subs)
              where (subs,s') = splitAt n s

fmt = cut2fmt [8,14,20,26,30] -- [7,6,6,6,4]
l = applyfmt fmt "12345678912345678901234567890123456789" 
-- 1234567 891234 567890 123456 7890 123456789

-- @@PLEAC@@_1.2
-- boolean operators only work on booleans
a = b || c

-- if you have an optional value, use type Maybe
import Maybe
v1 = fromMaybe "b" $ Just "a"   -- "a"
v2 = fromMaybe "b" $ Nothing    -- "b"

-- to combine Maybe values, you can use mplus
import Control.Monad (mplus)

v1' = (Just "a") `mplus` (Just "b") -- Just "a"
v2' = Nothing `mplus` (Just "b")    -- Just "b"

-- you could also define some perlish things:
class Default_val a where
    default_val :: a
    is_default_val :: a -> Bool

instance Default_val [a] where 
    default_val = []
    is_default_val = null
instance Default_val Num where 
    default_val = 0
    is_default_val = (== 0)

a &&& b = if a.is_default_val then default_val else b
a ||| b = if a.is_default_val then b else a

foo = bar ||| "DEFAULT VALUE"

--
import Maybe
import System
argv0 = fmap listToMaybe getArgs
dir = fmap (fromMaybe "/tmp") argv0

-- or
dir' = fmap (head . (++ ["/tmp"])) getArgs

-- @@PLEAC@@_1.3
-- no side effect in haskell => swap is a nonsense

-- @@PLEAC@@_1.4
import Char
i = ord 'e'                     -- 101
c = chr 101                     -- 'e'

import Text.Printf
printf "Number %d is character %c\n" 101 101

ascii_character_numbers = map ord "sample" -- [115,97,109,112,108,101]
word = map chr ascii_character_numbers

ibm = map (chr . (+ 1) . ord) "HAL" -- "IBM"

-- @@PLEAC@@_1.5
s = "an apple a day"
msg1 = "unique chars are: " ++ sort (nub s)
msg2 = "sum is " ++ (show $ sum $ map ord $ s)


-- slowcat -----------------------
#!/usr/bin/runghc
{-# OPTIONS_GHC -fglasgow-exts #-}

import System
import System.IO
import System.Posix
import Text.Regex

any_input [] = getContents
any_input (f:_) = readFile f

main = do (time, args') <- fmap get_time getArgs
          s <- any_input args'
          hSetBuffering stdout NoBuffering
          mapM_ (\c -> putChar c >> usleep (5000 * time)) s
    where get_time (x:args)
              | (Just [d]) <- matchRegex (mkRegex "^-([0-9]+)") x = (read d, args)
          get_time args = (1, args)


-- @@PLEAC@@_1.6
string = "Yoda said, \"can you see this?\""
allwords = words string
revwords = unwords (reverse allwords)

revwords = (unwords . reverse . words) string

-- another version of revwords which works for spaces
revwords' = (unwords . reverse . splitRegex (mkRegex " ")) string

-- yet another version using a home made version of splitRegex' which keeps the matched string
import List
import Text.Regex

-- special unfoldr (unfoldr only returns [b] whereas we also need the rest)
-- nb: we can use Control.Arrow.first which is \f (a,b) -> (f a, b)
--     and write:  Arrow.first (a :) (unfoldr' f b')
unfoldr' :: (a -> Maybe (b, a)) -> a -> ([b], a)
unfoldr' f b  =
  case f b of
   Just (a, b') -> let (l, b'') = unfoldr' f b' in (a:l, b'')
   Nothing      -> ([], b)

splitRegex' :: Regex -> String -> [(String, String)]
splitRegex' re s = 
    case unfoldr' f s of
      (l, "") -> l
      (l, s) -> l ++ [(s, "")]
    where 
      -- small helper functions which tranform matchRegexAll output
      -- to the one wanted by unfoldr'
      f = fmap f' . matchRegexAll re
      f' (before, matched, after, _) = ((before, matched), after)

words' = concatMap (\(a,b) -> [a,b]) . splitRegex' (mkRegex "\\s+")

revwords' = (concat . reverse . words') string

{-
import Test.QuickCheck
import Char

instance Arbitrary Char where
    arbitrary     = fmap chr $ choose (32,255)
    coarbitrary n = variant (ord n)

property_words' s = (concat . words') s == s
verif = quickCheck property_words'
-}
--

word = "reviver";
is_palindrome s = s == reverse s

long_palindromes = fmap (filter (\s -> s == reverse s && length s > 5) . lines) $
                      readFile "/usr/share/dict/words"

-- @@PLEAC@@_1.7

expand_tabs "" = ""
expand_tabs s = foldr1 ((++) . adjust) $ splitRegex (mkRegex "\t") s
    where adjust a = a ++ replicate (8 - (length a) `mod` 8) ' '

-- replace spaces with tab
unexpand = concat . map (reverse . unexp . reverse) . groupNelem 8 where
    unexp s = if head s == ' ' && length s == 8 
              then '\t' : dropWhile (== ' ') s
              else s
{- 
   Here is a property that can be given to quickCheck 
   property_expand s = (expand_tabs . unexpand) s == s
-}
                        
-- @@PLEAC@@_1.8
-- can't do eval in haskell

-- subRegex only allow a fixed string
-- subRegexOnceWith below takes a (String -> String) function to compute a result
-- (and do the substitution only once)
subRegexOnceWith re new s = 
    case matchRegexAll re s of
      Nothing -> s
      Just (before, matched, after, _) -> before ++ new matched ++ after
s = subRegexOnceWith (mkRegex "[0-9]+") (show . (* 2) . read) "I am 17 years old"

-- @@PLEAC@@_1.9
import Char
s1 = map toUpper "dromedary"       -- "DROMEDARY"
s2 = map toLower s1              -- "dromedary"
s3 = toUpper (head s2) : tail s2

capitalize "" = ""
capitalize (x:xs) = toUpper x : map toLower xs

-- capitalize each word's first character, downcase the rest
s4 = map capitalize $ words "thIS is a loNG liNE"

-- randcap: filter to randomly capitalize 20% of the letters
#!/usr/bin/runghc
import System
import System.IO
import Random
import Char

any_input [] = getContents
any_input (f:_) = readFile f

rand :: Int -> Int -> IO Int
rand low high = getStdRandom (randomR (low, high))

randcap :: String -> IO String
randcap = sequence . map f where
      f c = fmap (modify_char c) $ rand(1,5)
      modify_char c n = (if n == 1 then toUpper else toLower) c

main = do s <- getArgs >>= any_input
          randcap s >>= putStr

-- @@PLEAC@@_1.10
s = "I have " ++ show (n+1) ++ " guanacos."

-- multiline strings are quite ugly in haskell
-- so skipping the send_mail example

-- @@PLEAC@@_1.11
var = "your text\n\
      \goes here\n"

-- @@PLEAC@@_1.12
import Data.List

input = "Folding and splicing is the work of an editor,\
        \ not a mere collection of silicon and mobile electrons!"

wrap columns first_indent indent s =
        intercalate "\n" $ wgroup (tail first_indent) (words s)
        where wgroup current [] = [current]
              wgroup current (w : ws) =
                if length current + length w + 1 < columns 
                then wgroup (current ++ " " ++ w) ws 
                else current : wgroup (indent ++ w) ws


-- another version
wrap_simple columns s =
    intercalate "\n" $ map unwords $ groupWhile pred $ words s
    where pred = (<= columns) . sum . map ((+1) . length)

wrap columns first_indent indent s =
    first_indent ++ intercalate ("\n" ++ indent) (map unwords $ first : next)
    where (first, rest) = spanWhile (pred $ columns - length first_indent) (words s)
          next = groupWhile (pred $ columns - length indent) rest 
          pred width = (<= width) . sum . map ((+1) . length)

spanWhile :: ([a] -> Bool) -> [a] -> ([a],[a])
spanWhile p l = spanWhile' [] l              
    where spanWhile' seen [] = (seen, [])
          spanWhile' seen (x:xs) =
              let seen' = seen ++ [x] in
              if p seen' then spanWhile' seen' xs else (seen, x:xs)

groupWhile :: ([a] -> Bool) -> [a] -> [[a]]
groupWhile p l = case spanWhile p l of
                   ([], []) -> []
                   (l', []) -> [l']
                   (l', rest) -> l' : groupWhile p rest

-- @@PLEAC@@_1.13
import Text.Regex
import Char

subRegexWith re new s =
    case matchRegexAll re s of
      Nothing -> s
      Just (before, matched, after, _) -> before ++ new matched ++ (subRegexWith re new after)

quoteMeta :: String -> String
quoteMeta = concatMap (\c -> if (isAlphaNum c) then [c] else ['\\', c])

t1 = subRegex (mkRegex $ quoteMeta "^") "foo^bar" "+" -- "foo+bar" (without the quoteMeta, it goes into a dead loop)
t2 = subRegex (mkRegex $ quoteMeta "${") "${foo}bar" "{" -- "{foo}bar"
t3 = subRegexWith (mkRegex "\\$\\{[^}]*\\}") (map toUpper) "${foo}bar}" -- "${FOO}bar}"

-- @@PLEAC@@_1.14
trim_beg, trim_end, trim :: String -> String
trim_beg = snd . span isSpace
trim_end = reverse . trim_beg . reverse
trim = trim_end . trim_beg

trimmed = ">" ++ trim "\t       `Here' \t \t \n\n\n" ++ "<"

-- @@PLEAC@@_1.15
parse_csv :: String -> [String]
parse_csv s = case lex s of
        (x, "") -> [x]
        (x, xs) -> x : parse_csv xs
      where
           lex "" = ("", "")
           lex (',':xs) = ("",xs)
           lex ('"':xs) = (x++x',xs'') where (x,xs') = lexString xs
                                             (x',xs'') = lex xs'         
           lex (other:xs) = (other:x,xs') where (x,xs') = lex xs                                       

           lexString ('"':xs) = ("",xs)
           lexString ('\\':c:xs) = ('\\':c:x , xs') where (x,xs') = lexString xs
           lexString (c:xs) = (c:x,xs') where (x,xs') = lexString xs

-- the same using Parsec
import Text.ParserCombinators.Parsec

fromRight (Right v) = v
fromRight v = error ("fromRight: " ++ show v)
parseStr parser s = fromRight $ parse parser "" s
toList = fmap (\c -> [c])
many_ = fmap concat . many

parse_csv' = parseStr $ (quoted_string <|> raw_string) `sepBy` char ','
    where quoted_string = between (char '"') (char '"') (chars_until '"')
          raw_string = chars_until ','
          chars_until c = many_ (sequence [ char '\\', anyChar ] <|> toList (noneOf [c]))

test_string = "XYZZY,\"\",\"O'Reilly, Inc\",\"Wall, Larry\",\"a \\\"glub\\\" bit,\",5,\"Error, Core Dumped\""

output = concatMap format_it $ zip [0..] (parse_csv test_string)
    where format_it (line, s) = show line ++ " : " ++ s ++ "\n"

-- @@PLEAC@@_1.16
import List
import Maybe
import Char
import System.Posix.User

soundex name = (chars!!0) : concatMap show codes'
    where
      chars = map toUpper (filter isAlpha name)

      -- duplicate consecutive soundex digits are skipped
      codes = map head $ group $ map letter_to_code chars

      -- remove first, remove codes 0, add right-pad with 0
      codes' = take 3 (filter (/= 0) (tail codes) ++ [0, 0..])

letter_to_code :: Char -> Int
letter_to_code c = snd $ fromJust $ find (\(letters, _) -> c `elem` letters) letters_code
letters_code =
    [ ("AEIOUYHW", 0) , ("BFPV", 1) , ("CGJKQSXZ", 2) , ("DT", 3) , ("L", 4) , ("MN", 5) , ("R", 6) ]
          
errors = filter (\(code,name) -> code /= soundex name)
            [ ("H452", "holmes")
            , ("A355", "adomomi")
            , ("V536", "vonderlehr")
            , ("B400", "ball")
            , ("S000", "shaw")
            , ("J250", "jackson")
            , ("S545", "scanlon")
            , ("S532", "saintjohn")
            , ("K525", "kingsmith")
            , ("B331", "booth-davis")
            , ("K530", "Knuth")
            , ("K530", "Kant")
            , ("L300", "Lloyd")
            , ("L300", "Ladd")
            ]


-- one need to flush between putStr and getLine,
--   it would not be needed if we had printer a newline
-- the other solution is to disable buffering with hSetBuffering
putStr_getLine s = do putStr s
                      hFlush stdout
                      getLine

msoundex = do user <- putStr_getLine "Lookup user: "
              matching <- fmap (filter $ matches user) getAllUserEntries
              mapM_ (putStrLn . format_user) matching
    where 
      matches wanted user = soundex wanted `elem` (map soundex $ userName user : words (userGecos user))
      format_user user = userName user ++ ": " ++ userGecos user
                         
-- TODO, pstyle and psgrep                       
                         
----------------------------------------------------------------------------------------------------
-- @@PLEAC@@_2.1
-- using readIO
is_integer s = catch (do readIO s :: IO Int
                         putStrLn "is an integer")
                     (\_ -> putStrLn "is not")

-- using regexp
is_integer = isJust . matchRegex (mkRegex "^[+-]?[0-9]+$")

-- using reads
read_maybe s = case reads s of
                 [ (v, "") ] -> Just v
                 _ -> Nothing

is_integer s = isJust (read_maybe s :: Maybe Int)
is_float s = isJust (read_maybe s :: Maybe Double)

-- @@PLEAC@@_2.2

-- equal_num num1 num2 accuracy : returns true if num1 and num2 are
--   equal to accuracy number of decimal places
equal_num n1 n2 accuracy = abs (n1 - n2) < 10 ^^ (-accuracy)

wage = 536          -- $5.36/hour
week = 40 * wage    -- $214.40
weekwage :: String
weekwage = printf "One week's wage is: $%.2f\n" (week / 100 :: Double)

-- @@PLEAC@@_2.3
rounded = round unrounded

a = 0.255 :: Double
b = printf "%.2f" a
t = do putStrLn ("Unrounded: " ++ show a ++ "\nRounded: " ++ b)
       printf "Unrounded: %f\nRounded: %.2f\n" a a
-- Unrounded: 0.255
-- Rounded: 0.26
-- Unrounded: 0.255
-- Rounded: 0.26


a = [3.3, 3.5, 3.7, -3.3]
t = let l = map (\n -> printf "%.1f\t%d\t%d\t%d" (n :: Double) (truncate n :: Int)
                                                 (floor n :: Int) (ceiling n :: Int)) a in
    putStrLn (unlines $ "number\ttrncate\tfloor\tceiling" : l)

-- nb: to have a better looking output, use %4.1f and %2d instead of %.1f and %d

-- @@PLEAC@@_2.4
bin2dec :: String -> Integer
bin2dec = foldr (\c s -> s * 2 + c) 0 . reverse . map c2i
    where c2i c = if c == '0' then 0 else 1
-- bin2dec "0110110" == 54

dec2bin = map i2c . reverse . unfoldr decomp
    where decomp n = if n == 0 then Nothing else Just(n `mod` 2, n `div` 2)
          i2c i = if i == 0 then '0' else '1'
-- dec2bin 54 == "110110"

{-
import Test.QuickCheck
property_bindec n = n >= 0 ==> (bin2dec . dec2bin) n == n
verif = quickCheck property_bindec
-}

-- @@PLEAC@@_2.5

-- clean & pure way:
m1 = putStrLn $ "Infancy is: " ++ unwords (map show [0..2])
-- Infancy is: 0 1 2 

-- imperative way:
m2 = do putStr "Infancy is: "
        mapM_ (printf "%d ") [0 :: Int .. 2]
        putStrLn ""

-- imperative way':
m2' = do putStr "Infancy is: "
         mapM_ (\n -> putStr $ show n ++ " ") [0 :: Int .. 2]
         putStrLn ""

-- [0,2..8] == [0,2,4,6,8]

-- @@PLEAC@@_2.6
roman n = concat $ reverse $ snd $ mapAccumL transform n sets
    where
      transform n set = (n `div` 10, roman set (n `mod` 10))
      roman (i,v,x) n = l !! n
          where l = [ [], [i], [i,i], [i,i,i], [i,v], [v], [v,i], [v,i,i], [v,i,i,i], [i,x] ]
      sets = [('I','V','X'), ('X','L','C'), ('C','D','M'), ('M',too_big,too_big)] 
      too_big = error "roman: number greater than 3999"

arabic = sum . snd . mapAccumL set_sign 0 . map c2i . reverse
    where
      -- if the roman digit is smaller than biggest digit so far, substract it (eg: I is -1 in IV)
      set_sign max i = if i >= max then (i, i) else (max, -i)
      c2i c = case toUpper c of 
                'I' -> 1; 'V' -> 5; 'X' -> 10; 'L' -> 50 
                'C' -> 100; 'D' -> 500; 'M' -> 1000

roman_fifteen = roman 15 -- "XV"
s1 = "Roman for fifteen is " ++ roman_fifteen
arabic_fifteen = arabic roman_fifteen
s2 = "Converted back, " ++ roman_fifteen ++ " is " ++ show arabic_fifteen

{-
property_roman_arabic n = n >= 0 && n < 4000 ==> (arabic . roman) n == n
verif = quickCheck property_roman_arabic
-}

-- @@PLEAC@@_2.7
import Random
import Control.Monad (replicateM)

rand :: Int -> Int -> IO Int
rand low high = getStdRandom (randomR (low, high))

n = rand 25 75 -- [25,75]

random_elt l = do i <- rand 0 (length l - 1)
                  return (l !! i)

password = replicateM 8 (random_elt chars)
    where chars = concat [ ['A'..'Z'], ['a'..'z'], ['0'..'9'], "!@$%^&*" ]


-- random_elt can be also be written
random_elt' l = fmap (l !!) (rand 0 $ length l - 1)

-- @@PLEAC@@_2.8
srand = setStdGen . mkStdGen

randfixed = do srand 2
               rand 1 10

-- @@PLEAC@@_2.9
-- you can provide your own random generator by playing with the StdGen type

-- @@PLEAC@@_2.10
std_rand :: IO Double
std_rand = getStdRandom (randomR (0,1))

gaussian_rand = 
    do r1 <- std_rand
       r2 <- std_rand
       let u1 = 2*r1 - 1
       let u2 = 2*r2 - 1
       let w  = u1*u1 + u2*u2 -- variance
       if w >= 1 
         then gaussian_rand
         else let w2 = sqrt ((-2 * log w) / w) in 
              return (u2*w2, u1*w2) -- gaussian-distributed numbers

mean = 25
sdev = 2
t = do (r, _) <- gaussian_rand 
       let salary = r * sdev + mean
       printf "You have been hired at $%.2f\n" salary

-- @@PLEAC@@_2.11
deg2rad d = d * pi / 180
rad2deg r = r * 180 / pi

degree_sine = sin . deg2rad

-- @@PLEAC@@_2.12
asin_val = asin 1
acos_val = acos 1

-- @@PLEAC@@_2.13
v = log 10

log10 = logBase 10

t = putStrLn $ "log10(10,000)=" ++ log10 10000
-- log10(10,000) = 4

-- @@PLEAC@@_2.14
-- no standard matrix function in haskell library
-- here is a simple implementation
-- see also http://darcs.haskell.org/hugs98/demos/Matrix.hs
--       or http://darcs.haskell.org/nofib/ghc/matrix/Matrix.hs

sum_product :: Num a => [a] -> [a] -> a
sum_product u v = sum (zipWith (*) u v)

matrix_zipWith f a b = [zipWith f ak bk | (ak,bk) <- zip a b]

add_matrices a b = matrix_zipWith (+)

inner_product :: Num a => [[a]] -> [[a]] -> [[a]]
inner_product a b = mult a (transpose b)
    where mult [] _ = []
          mult (a_x:a_xs) b = [sum_product a_x bi | bi <- b] : mult a_xs b

a = [ [3, 2, 3]
    , [5, 9, 8] ]
b = [ [4, 7]
    , [9, 3]
    , [8, 1] ]
c = inner_product a b

-- @@PLEAC@@_2.15
import Complex

a = 3 :+ 5    -- 3 + 5i
b = 2 :+ (-2) -- 2 - 2i
c = a * b     -- 16 + 4i

t1 = (realPart c, imagPart c, conjugate c) --  16, 4, 16-4i

d = 3 :+ 4
t2 = sqrt d  -- 2 + i

-- @@PLEAC@@_2.16
-- "read" handles both octal and hexadecimal when prefixed with 0x or 0o
-- here are versions adding the prefix and calling "read"
hex s = read ("0x" ++ s) :: Integer
oct s = read ("0o" ++ s) :: Integer

-- hex "45" == 69
-- oct "45" == 37
-- hex "45foo" => Exception: Prelude.read: no parse

-- calling explicitly readHex or readOct:
hex = fst . head . Numeric.readHex
oct = fst . head . Numeric.readOct

-- one need to flush between putStr and getLine,
--   it would not be needed if we had printer a newline
-- the other solution is to disable buffering with hSetBuffering
putStr_getLine s = do putStr s
                      hFlush stdout
                      getLine

t = do s <- putStr_getLine "Gimme a number in decimal, octal, or hex: "
       let n = read s :: Integer
       printf "%d %x %o\n" n n n

t' = do permissions <- putStr_getLine "Enter file permission in octal: "
        putStrLn $ "The decimal value is " ++ show (oct permissions)


-- @@PLEAC@@_2.17
import Data.List
-- (using groupNelem from "Helpers")
commify = reverse . intercalate "," . groupNelem 3 . reverse
-- commify "-1740525205" == "-1,740,525,205"

-- a version handling decimal numbers (using subRegexOnceWith from "Helpers")
commify' = subRegexOnceWith (mkRegex "[0-9]+") commify
-- commify' "-1740525205.000001" == "-1,740,525,205.000001"


-- @@PLEAC@@_2.18
hours = 2
s = "It took " ++ show hours ++ " hour" ++ if hours == 1 then "" else "s"

s2 = printf fmt (hours :: Int) :: String
    where fmt = if hours == 1 then "%d hour is enough.\n" else "%d hours are enough.\n"


-- subRegex doesn't return wether it succeeded or not
-- calling matchRegex first
subRegexMaybe :: Regex -> String -> String -> Maybe String
subRegexMaybe re s repla = do matchRegex re s -- discard the result
                              Just (subRegex re s repla)

subRegexMany :: [(String, String)] -> String -> Maybe String
subRegexMany regexps s = msum (map try_one regexps)
    where try_one (re, repla) = subRegexMaybe (mkRegex re) s repla
-- note the use of msum to take the first (Just _) in the list
-- here msum is used with type [Maybe a] -> Maybe a

noun_plural s = fromMaybe (error "can't get here") (subRegexMany regexps s)
    where regexps = [ ("ss$", "sses")
                    , ("([psc]h)$", "\\1es")
                    , ("z$", "zes")
                    , ("ff$", "ffs")
                    , ("f$", "ves")
                    , ("ey$", "eys")
                    , ("y$", "ies")
                    , ("ix$", "ices")
                    , ("([sx])$", "\\1es")
                    , ("(.)$", "\\1s") -- note that subRegex is unsafe if the regexp matches an empty strings, cf documentation
                    ]

s = unlines $ map (\s -> "One " ++ s ++ ", two " ++ noun_plural s ++ ".") $ words test_words
    where test_words = "fish fly ox \
                       \species genus phylum \
                       \cherub radius jockey \
                       \index matrix mythos \
                       \phenomenon formula"

-- @@PLEAC@@_2.19
#!/usr/bin/runghc

import List
import System
import Text.Printf

-- from http://haskell.org/haskellwiki/99_questions/31_to_41#Problem_36
encode :: Eq a => [a] -> [(a, Int)]
encode = map (\x -> (head x, length x)) . group

primeFactors n = factor n primes
  where factor n (p:ps) | p*p > n = [n]
                        | n `mod` p /= 0 = factor n ps
                        | otherwise = p : factor (n `div` p) (p:ps)
        primes = 2 : filter ((==1) . length . primeFactors) [3,5..]

main = getArgs >>= mapM_ do_one
    where do_one n = printf "%-10s %s\n" n (to_string $ encode $ primeFactors $ read n)
          to_string [(_, 1)] = "PRIME"
          to_string l = unwords $ map (\(n,power) -> show n ++ to_string_power power) l
          to_string_power p = if p == 1 then "" else "**" ++ show p

{-
% bigfact  8 9 96 2178
8          2**3
9          3**2
96         2**5 3
2178       2 3**2 11**2
% bigfact 239322000000000000000000
239322000000000000000000 2**19 3 5**18 39887
% bigfact 25000000000000000000000000
25000000000000000000000000 2**24 5**26
-}


-- @@PLEAC@@_3.0
-- you can use haskell-98 standard module:
import Time

date = fmap show getClockTime   -- "Wed Apr 25 19:43:29 CEST 2007"
localtime = getClockTime >>= toCalendarTime
-- => CalendarTime {ctYear = 2007, ctMonth = April, ctDay = 25, ctHour = 19, ctMin = 46, ctSec = 41, ctPicosec = 214805000000, ctWDay = Wednesday, ctYDay = 114, ctTZName = "CEST", ctTZ = 7200, ctIsDST = True}

utc_time = fmap toUTCTime getClockTime
-- => CalendarTime {ctYear = 2007, ctMonth = April, ctDay = 25, ctHour = 17, ctMin = 47, ctSec = 59, ctPicosec = 325921000000, ctWDay = Wednesday, ctYDay = 114, ctTZName = "UTC", ctTZ = 0, ctIsDST = False}

t = do tm <- localtime
       putStrLn $ "Today is day " ++ show (ctYDay tm) ++ " of the current year"
-- => Today is day 114 of the current year


-- or you can use new "time library":
import Data.Time

date = getCurrentTime
-- "2008-04-18 14:11:22.476894 UTC"

t = do today <- fmap utctDay getCurrentTime      
       let (year, _, _) = toGregorian today
       let days = diffDays today (fromGregorian year 0 0)
       putStrLn $ "Today is day " ++ show days ++ " of the current year"
-- => Today is day 108 of the current year

-- @@PLEAC@@_3.1
import Data.Time
import System.Locale (defaultTimeLocale)

t = do tm <- getCurrentTime
       let (year, month, day) = toGregorian (utctDay tm)
       printf "The current date is %04d %02d %02d\n" year month day

t2 = do tm <- getCurrentTime
        return $ "The current date is " ++ show (utctDay tm)
-- The current date is 2008-04-18

t3 = fmap (formatTime defaultTimeLocale "%Y-%m-%d") getCurrentTime
-- => 2008-04-18

-- @@PLEAC@@_3.2
import Data.Time
import Data.Time.Clock.POSIX

-- !! epoch is not the "base" value in Data.Time, so do not use it unless you !!
-- !! really need it !!

-- if you want epoch, use:
t = getPOSIXTime                -- 1208529250.856017s

-- if you want to get epoch from a time, use:
t2 = fmap utcTimeToPOSIXSeconds getCurrentTime -- 1208529250.856017s

-- @@PLEAC@@_3.3
import System.Time
import Data.Time
import Data.Time.Clock.POSIX

epoch = 111111
t1 = posixSecondsToUTCTime epoch      -- 1970-01-02 06:51:51 UTC
t2 = timeToTimeOfDay (utctDayTime t1) -- 06:51:51

-- @@PLEAC@@_3.4
import Data.Time

ten_seconds_before = addUTCTime (-10)
t = do now <- getCurrentTime
       return (now, ten_seconds_before now)
-- (2008-04-18 14:48:33.075113 UTC,
--  2008-04-18 14:48:23.075113 UTC)

-- ten_seconds_before can also be written:
ten_seconds_before (UTCTime day time) = UTCTime day (time - 10)
ten_seconds_before t = t { utctDayTime = utctDayTime t - 10 }


birth_date = fromGregorian 1973 1 18
t  = "Nat was 55 days old on: " ++ show (addDays 55 birth_date)
-- Nat was 55 days old on: 1973-03-14

-- @@PLEAC@@_3.5
import Data.Time
import Data.Time.Clock.POSIX

bree = UTCTime (fromGregorian 1981 6 16) (timeOfDayToTime $ TimeOfDay 4 35 25) -- 1981-06-16 04:35:25 UTC
nat  = UTCTime (fromGregorian 1973 1 18) (timeOfDayToTime $ TimeOfDay 3 45 50) -- 1973-01-18 03:45:50 UTC
-- or simpler:
bree' = read "1981-06-16 04:35:25" :: UTCTime
nat'  = read "1973-01-18 03:45:50" :: UTCTime

difference = diffUTCTime bree nat / posixDayLength
t = "There were " ++ (show $ round difference) ++ " days between Nat and Bree"
-- There were 3071 days between Nat and Bree

toFloat n = realToFrac n :: Float
t2 = printf "There were %.2f days between Nat and Bree" (toFloat difference) :: String
-- There were 3071.03 days between Nat and Bree

-- @@PLEAC@@_3.6
import Data.Time
import Data.Time.Calendar.MonthDay
import Data.Time.Calendar.OrdinalDate
import Locale

(year, month, day) = (1981, 6, 16)

t = do printf "%d/%d/%d was a %s\n" year month day week_day_name
       printf "%d/%d/%d was day %d of the week %d\n" year month day week_day week
       printf "%d/%d/%d was day %d of month %d\n" year month day month_day month_
       printf "%d/%d/%d was day %d of year %d\n" year month day year_day year_
    where date = (fromGregorian year month day)
          (week, week_day) = sundayStartWeek date
          (_, month_, month_day) = toGregorian date
          (year_, year_day) = toOrdinalDate date
          (week_day_name, _) = wDays defaultTimeLocale !! week_day
-- 1981/6/16 was a Tuesday
-- 1981/6/16 was day 2 of the week 24
-- 1981/6/16 was day 16 of month 6
-- 1981/6/16 was day 167 of year 1981

-- @@PLEAC@@_3.7
import Data.Time
import Data.Time.Format
import Data.Time.Clock.POSIX
import Locale

day :: Day
day = readTime defaultTimeLocale "%F" "1998-06-03"

-- calculate epoch seconds at midnight on that day in UTC
epoch = utcTimeToPOSIXSeconds (UTCTime day 0)
-- 896832000s

-- readTime can return a UTCTime:
epoch_ = utcTimeToPOSIXSeconds (readTime defaultTimeLocale "%F" "1998-06-03")

-- it seems readTime is not flexible, 
-- for example "%d/%m/%Y" can't handle 16/6/1981, only 16/06/1981
--
-- here is an alternative solution, using regexp
import Data.Time
import Data.Time.Clock.POSIX
import Text.Regex

day = fromGregorian (read year) (read month) (read day)
      where Just [year, month, day] = matchRegex (mkRegex "(.*)-(.*)-(.*)") "1998-06-03"
-- 1998-06-03

-- @@PLEAC@@_3.8
-- formatTime from Data.Time.Format allow powerful time formatting:
import Data.Time
import Data.Time.Format
import Locale

t = do now <- getCurrentTime
       return $ formatTime defaultTimeLocale "The date is %A (%a) %d/%m/%Y" now
-- "The date is Tuesday (Tue) 28/10/2008"

-- @@PLEAC@@_3.9
-- getCurrentTime/UTCTime has a precision of 1 picosecond, full precision is used by default
import Data.Time
import System.Posix.Unistd

t = do t1 <- getCurrentTime
       usleep 100000 -- 100ms
       t2 <- getCurrentTime
       return (diffUTCTime t2 t1)
-- 0.111262s

-- @@PLEAC@@_3.10
-- short sleeps:
t = do usleep 3100000

-- for even more precision, nanosleep is available


-- @@PLEAC@@_4.0
single_level = [ "this", "that", "the", "other" ]

-- there is no non-homogeneous list. You can do things with tuples:
nested = ("this", "that", ["the", "other"]) -- (String, String, [String])

-- arrays are generally eschewed in Haskell, lists are prefered.
-- arrays are useful if you desperately need:
-- * huge amounts of raw data.
-- * constant (O(1)) lookup => use Data.Array
-- * constant (O(1)) update => use Data.Array.IO, Data.Array.ST or Data.Array.Storable
--                             (or even Data.Array.Diff)
-- see http://haskell.org/haskellwiki/Cookbook#Arrays
-- and http://www.haskell.org/haskellwiki/Modern_array_libraries for more

-- creating an Array from a list:
import Data.Array
listToArray :: [a] -> Array Int a
listToArray l = listArray (0, length l - 1) l

a1 = listToArray l1
-- => array (0,3) [(0,"Crosby"),(1,"Stills"),(2,"Nash"),(3,"Young")]

-- getting back the list
l1' = elems a1


-- creating an IOArray from a list:
import Data.Array.IO
listToIOArray :: [a] -> IO (IOArray Int a)
listToIOArray l = newListArray (0, length l - 1) l

a1 = listToIOArray l1

l1' = a1 >>= getElems 


-- @@PLEAC@@_4.1
a  = [ "quick", "brown", "fox" ]
a' = words "Why are you teasing me?"

l = lines 
      "The boy stood on the burning deck\n\
      \It was as hot as glass."

big_array = do s <- readFile "mydatafile"
               return (lines s)
-- more compact:
big_array = fmap lines (readFile "mydatafile")


-- using readProcess from "Helpers"
his_host = "www.haskell.org"
host_info = do Right info <- readProcess "nslookup" [his_host] ""
               return info

-- calling ps on our running process
import System
import System.Posix (getProcessID)

t = do id <- getProcessID
       system ("ps " ++ show id)

-- @@PLEAC@@_4.2
import Data.List
commify_series :: [String] -> String
commify_series []  = ""
commify_series [x]  = x
commify_series xs  = intercalate ", " (init xs) ++ " and " ++ (last xs)

array = ["red", "yellow", "green"]
s1 = "I have " ++ commify_series array ++ " marbles" -- I have red, yellow and green marbles
s2 = "I have " ++ concat  array ++ " marbles" -- I have redyellowgreen marbles
s3 = "I have " ++ unwords array ++ " marbles" -- I have red yellow green marbles

-- @@PLEAC@@_4.3
-- arrays can't be resized
-- similar behaviour for lists explained below

what_about_that_list x = unlines
    [ "The list now has " ++ show (length x) ++ " elements."
    , "The index of the last element is " ++ show (length x - 1) ++ "."
    , "Element .3 is`" ++ x!!3 ++ "'."
    ]

l1 = [ "Crosby", "Stills", "Nash", "Young" ]

-- remove last element
l2 = init l1    -- ["Crosby", "Stills", "Nash"]

-- keep 3 elements
l3 = take 3 l1  -- ["Crosby", "Stills", "Nash"]

-- calling what_about_that_list on l2 or l3 would raise
--   *** Exception: Prelude.(!!): index too large
-- when accessing element .3

-- grow list
l4 = take 10001 (l1 ++ repeat "")
-- same, but much less haskell'ish, prefer above
l4' = l1 ++ replicate (10001 - length l1) ""

-- writing a function that grow and shrink a list is easy:
resize_list n default_val l = take n (l ++ repeat default_val)



-- @@PLEAC@@_4.4
bad_users = filter is_valid_user all_users

t = mapM_ complain bad_users

t' = mapM_ do_it all_users
    where do_it user = do usage <- get_usage user
                          when (usage > max_quota) (complain user)

-- equivalent of "env | sort"
import System.Environment
import List
t = getEnvironment >>= putStr . format
    where format = unlines . map (\(var, val) -> var ++ "=" ++ val) . sort

-- display lines from "who" matching "simonpj" (readProcess is in "Helpers")
t = do Right s <- readProcess "who" [] ""
       mapM_ putStrLn (filter ("simonpj" `isPrefixOf`) $ lines s)

-- print file with words reversed:
t = readFile "/etc/fstab" >>= putStr . reverse_words_of_each_line
    where reverse_words_of_each_line = unlines . map (unwords . reverse . words) . lines

-- modify every element
l = map (-1 +) [1,2,3]          -- [0,1,2]

-- multiply everything in a and b by seven
l = [ ("a", [ 0.5, 3 ]), ("b", [ 0, 1 ]) ]
l' = map (\(name, xs) -> (name, map (* 7) xs)) l
-- => [ ("a", [ 3.5, 21 ]), ("b", [ 0, 7 ]) ]
-- same using Arrow.second
l_' = map (Arrow.second $ map (* 7)) l
-- for a more cryptic version, replace Arrow.second with fmap

-- same using Data.Map
import qualified Data.Map as Map
m = Map.fromList [ ("a", [ 0.5, 3 ]), ("b", [ 0, 1 ]) ]
m' = Map.map (map (* 7)) m 


-- @@PLEAC@@_4.5
-- not relevant in haskell since we have always kind of references


-- @@PLEAC@@_4.6
l = nub [ 1, 1, 2, 2, 3, 3, 3, 5 ]  -- [ 1, 2, 3, 5 ]

-- generate a list of users logged in, removing duplicates (readProcess is in "Helpers")
t = do Right s <- readProcess "who" [] ""
       let users = sort $ nub $ map (head . words) $ lines s
       putStrLn ("users logged in: " ++ unwords users)


-- @@PLEAC@@_4.7
l = [ 1, 2, 4 ] \\ [ 4, 1 ]         -- [ 2 ]

-- beware of duplicates:
xs = [ 1, 1, 2, 3, 3, 4, 5 ]
ys = [ 1, 2, 4 ]
l1 = xs \\ ys                   -- [ 1, 3, 3, 5 ]
l2 = nub xs \\ ys               -- [ 3, 5 ]
l3 = filter (`notElem` ys) xs   -- [ 3, 3, 5 ]

-- for faster operations, use Data.Set:
import qualified Data.Set as Set
l = filter (`Set.notMember` Set.fromList [ 4, 1 ]) [ 1, 2, 4 ]
-- => [ 2 ]

s1 = Set.fromList [ 1, 2, 4 ]
s2 = Set.fromList [ 4, 1 ]
s3 = Set.\\ s1 s2


-- @@PLEAC@@_4.8
a = [1, 3, 5, 6, 7, 8]
b = [2, 3, 5, 7, 9]

l1 = a `intersect` b            -- [ 3, 5, 7 ]
l2 = a `union` b                -- [ 1, 3, 5, 6, 7, 8, 2, 9 ]
l3 = a \\ b                     -- [ 1, 6, 8 ]

-- see also union and intersection in Data.Set


-- @@PLEAC@@_4.9
l = [ "Time", "Flies" ]
l2 = l ++ [ "An", "Arrow" ]   -- ["Time", "Flies", "An", "Arrow"]

insertAt n e l = before ++ [e] ++ after
    where (before, after) = splitAt n l

l3 = insertAt 2 "Like" l2 -- ["Time", "Flies", "Like", "An", "Arrow"]

replaceAt n e l = before ++ [e] ++ tail after
    where (before, after) = splitAt n l

replaceAllAt assoc l = foldr (\(n, e) -> replaceAt n e) l assoc

l4 = replaceAt 0 "Fruit" l3
l5 = replaceAllAt [ (3,"A"), (4,"Banana") ] l4
-- => ["Fruit", "Flies", "Like", "A", "Banana"]

-- arrays are better suited for modifying an element,
-- but arrays are not suited for adding elements
-- (using listToArray from "Helpers")
l3' = listToArray l3
l4' = l3' Array.// [ (0, "Fruit") ]
l5' = l4' Array.// [ (3,"A"), (4,"Banana") ]


-- @@PLEAC@@_4.10
l = reverse [1..5]       -- [5,4,3,2,1]

reverse_sort   = reverse . sort
reverse_sort'  = sortBy (\a b -> b <=> a)
reverse_sort'' = sortBy (flip (<=>))

l = reverse_sort [3,2,5,1]      -- [5,3,2,1]


-- @@PLEAC@@_4.11
-- remove n elements from front of array
(front,array') = splitAt array n

-- remove n elements from the end of array
(array',end)  = splitAt array (array.length array - n)

(this : that : friends') = [ "Peter", "Paul", "Mary", "Jim", "Tim" ]
-- this contains "Peter", that has "Paul", and friends' has "Mary", "Jim", and "Tim"

beverages = words "Dew Jolt Cola Sprite Fresca"
(beverages', pair) = splitAt (length beverages - 2) beverages 
-- pair contains ["Sprite", "Fresca"]
-- and beverages' has ["Dew", "Jolt", "Cola"]


-- @@PLEAC@@_4.12
data Employee = Employee { name, position :: String
                         , salary :: Int } deriving (Show,Eq)

employees = [ Employee { name="Jim",  position="Manager",  salary=26000 }
            , Employee { name="Jill", position="Engineer", salary=24000 }
            , Employee { name="Jack", position="Engineer", salary=22000 } ]

(Just engineer) = find ((== "Engineer") . position) employees

s = name engineer ++ " is engineer"


-- @@PLEAC@@_4.13
bigs = filter (> 1000000) nums

-- display lines from "who" matching "simonpj" (readProcess is in "Helpers")
t = do Right s <- readProcess "who" [] ""
       mapM_ putStrLn (filter ("simonpj" `isPrefixOf`) $ lines s)

-- using employees defined in previous section
engineers  = [ x | x <- employees, position x == "Engineer"]
engineers' = filter ((== "Engineer") . position) employees
-- => [ Employee { name="Jill", ... }, Employee { name="Jack", ... } ]

secondary_assistance = filter ((\x -> x >= 26000 && x < 30000) . salary) employees
-- => [ Employee { name="Jim", position="Manager", salary=26000 } ]


-- @@PLEAC@@_4.14
-- sort works for numbers
l1 = sort [100, 3, 20]          -- [3, 20, 100]
-- strings representing numbers will be sorted alphabetically
l2 = sort ["100", "3", "20"]    -- ["100", "20", "3"]

-- you may specify another sort method (not lexicographic ordering):
-- for comparing: import Data.Ord (comparing)
l3 = sortBy (comparing (read :: String -> Int)) ["100", "3", "20"] -- ["3", "20", "100"]
-- same, but may typically run faster
sortByKey f = map snd . sortBy (comparing fst) . map (\x -> (f x, x))
l3' = sortByKey (read :: String -> Int) ["100", "3", "20"]

-- more cryptic: replace (\x -> (f x, x)) with (f Arrow.&&& id)


-- @@PLEAC@@_4.15
data Employee = Employee { name, position :: String
                         , salary :: Int } deriving (Show,Eq)

employees = [ Employee { name="Jim",  position="Manager",  salary=26000 }
            , Employee { name="Jill", position="Engineer", salary=24000 }
            , Employee { name="Jack", position="Engineer", salary=22000 } ]

employee_sorted = sortBy (comparing name) employees
-- => [ Employee { name="Jack", ... }, Employee { name="Jill", ... }, Employee { name="Jim", ... } ]

sortByKey f = map snd . sortBy (comparing fst) . map (\x -> (f x, x))
sortByKeyRev f = map snd . sortBy (flip $ comparing fst) . map (\x -> (f x, x))

employee_sorted' = sortByKey name employees

ordered     = sortByKey    abs [ 1, -3, 2, 6 ] -- [ 1, 2, -3, 6 ]
ordered_rev = sortByKeyRev abs [ 1, -3, 2, 6 ] -- [ 6, -3, 2, 1 ]

names = [ "Azzz", "Za", "Bbbbbbb" ]
sort_by_2nd_char  = sortByKey (!! 1) names -- ["Za","Bbbbbbb","Azzz"]
sort_by_last_char = sortByKey last names   -- ["Za","Bbbbbbb","Azzz"]
sort_by_length    = sortByKey length names -- ["Za","Azzz","Bbbbbbb"]

--
import Text.Regex
m = readFile "/etc/passwd" >>= (putStr . unlines . sortByKey key . lines)
    where key line = case splitRegex (mkRegex ":") line of
                       (name : _ : uid : gid : _) ->
                           (read gid :: Int, read uid :: Int, name) 


-- @@PLEAC@@_4.16
import System.Posix (sleep)

-- circular lists are easily defined using infinite list
circular = [1,2,3,4,5] ++ circular

m = mapM_ do_it (take 10 circular)
    where do_it n = do putStrLn ("Handling process " ++ show n)
                       sleep 1


-- @@PLEAC@@_4.17
rand :: Int -> Int -> IO Int
rand low high = getStdRandom (randomR (low, high))

randomPermute :: [a] -> IO [a]
randomPermute []  = return []
randomPermute xs  = 
    do n <- rand 0 (length xs - 1)
       let (a, e : b) = splitAt n xs -- taking n-th element and putting it first
       ab <- randomPermute (a ++ b)
       return $ e : ab

l = randomPermute [1..5]        -- example of result: [2,3,5,1,4]


-- @@PLEAC@@_4.18
-- using groupNelem from "Helpers"
make_columns :: Int -> [String] -> [String]
make_columns screen_width mylist = 
    map concat $ transpose $ groupNelem rows $ map pad_it mylist
        where maxlen = maximum(map length mylist) + 1 -- 1 needed to make extra space
              cols = max 1 (screen_width `div` maxlen)
              rows = 1 + (length mylist `div` cols)
              pad_it s = take maxlen (s ++ repeat ' ')

-- using external program "resize -u" to get terminal size (readProcess is in "Helpers")
getTerminalSize :: IO (Maybe (Int, Int))
getTerminalSize =
    do Right s <- readProcess "resize" ["-u"] ""
       let vars = parse_shell_vars s
       return $ do x <- lookup "COLUMNS" vars
                   y <- lookup "LINES" vars
                   return (read x, read y)
    where parse_shell_vars = catMaybes . map break_it . lines
          break_it s = case break (== '=') s of
                         (_, "") -> Nothing
                         (var, _:val) -> Just (var, if last val == ';' then init val else val)

main = do size <- getTerminalSize
          let width = maybe 80 fst size
          interact (unlines . make_columns width . words)


-- @@PLEAC@@_4.19
permutations []     =  [[]]
permutations (x:xs) = [zs | ys <- permutations xs, zs <- interleave x ys ]
  where interleave x []     =  [[x]]
        interleave x (y:ys) =  [x:y:ys] ++ map (y:) (interleave x ys)
-- the list comprehension can be replaced by: concatMap (interleave x) $ permutations xs

t = mapM_ (putStrLn . unwords) $ permutations $ words "man bites dog" 
-- man bites dog
-- bites man dog
-- bites dog man
-- man dog bites
-- dog man bites
-- dog bites man


-- @@PLEAC@@_5.0
-- using simple association list
age = [ ("Nat", 24)
      , ("Jules", 25)
      , ("Josh", 17) ]

-- access by key:
Just jules_age = lookup "Jules" age    -- 25

-- using Data.Map
import qualified Data.Map as Map
age = Map.fromList [ ("Nat", 24)
                   , ("Jules", 25)
                   , ("Josh", 17) ]

-- access by key:
jules_age' = age' Map.! "Jules"   -- 25

-- same, incrementally
age1 = Map.empty
age2 = Map.insert "Nat"   24 age1
age3 = Map.insert "Jules" 25 age2
age4 = Map.insert "Josh"  17 age3
-- => Map.fromList [("Josh",17),("Jules",25),("Nat",24)]

food_color = [ ("Apple" , "red")
             , ("Banana", "yellow")
             , ("Lemon" , "yellow")
             , ("Carrot", "orange") ]

food_color' = Map.fromList food_color


-- @@PLEAC@@_5.1
food_color2 = ("Raspberry", "pink") : food_color

t = putStrLn $ "Know foods: " ++ unwords (map fst food_color2)
-- => Know foods: Raspberry Apple Banana Lemon Carrot

-- nb: it doesn't remove duplicates
food_color3 = ("Raspberry", "red") : food_color2
-- => [ ("Raspberry","red"), ("Raspberry","pink"), ("Apple","red"), ... ]
-- (see section "Finding Common or Different Keys in Two Hashes" for a solution removing duplicates)


-- same using Data.Map
food_color2' = Map.insert "Raspberry" "pink" food_color'

t' = putStrLn $ "Know foods: " ++ unwords (Map.keys food_color2')
-- => Know foods: Apple Banana Carrot Lemon Raspberry

-- nb: it does remove duplicates
food_color3' = Map.insert "Raspberry" "red" food_color2'
-- Map.fromList [("Apple","red"),("Banana","yellow"),("Carrot","orange"),("Lemon","yellow"),("Raspberry","red")]


-- @@PLEAC@@_5.2
s = map describe ["Banana", "Martini"]
    where describe name = name ++ " is a " ++ kind name ++ "."
          kind name = if isJust (lookup name food_color) then "food" else "drink"
-- => ["Banana is a food.","Martini is a drink."]

--   isJust (lookup name food_color)   
-- can be replaced by
--   name `elem` map fst food_color

-- for Data.Map, replace (isJust . lookup) with Map.member:
s' = map describe ["Banana", "Martini"]
    where describe name = name ++ " is a " ++ kind name ++ "."
          kind name = if Map.member name food_color' then "food" else "drink"
-- => ["Banana is a food.","Martini is a drink."]


-- @@PLEAC@@_5.3
-- removing key on association list is done with a simple filter:
food_color2 = filter ((/= "Banana") . fst) food_color

print_foods foods = do putStrLn $ "Keys: " ++ (unwords $ map fst foods)
                       putStrLn $ "Values: " ++ (unwords $ map snd foods)

before = print_foods food_color
-- Keys: Apple Banana Lemon Carrot
-- Values: red yellow yellow orange
after = print_foods food_color2
-- Keys: Apple Lemon Carrot
-- Values: red yellow orange


-- using Data.Map
food_color2' = Map.delete "Banana" food_color'

print_foods' foods = do putStrLn $ "Keys: " ++ (unwords $ Map.keys foods)
                        putStrLn $ "Values: " ++ (unwords $ Map.elems foods)

before' = print_foods' food_color'
-- Keys: Apple Banana Carrot Lemon
-- Values: red yellow orange yellow
after' = print_foods' food_color2'
-- Keys: Apple Carrot Lemon
-- Values: red orange yellow

-- to remove a list of keys, simply use foldr:
food_color3' = foldr Map.delete food_color' [ "Banana", "Apple", "Cabbage" ]


-- @@PLEAC@@_5.4
t  = map (\(food, color) -> food ++ " is " ++ color) food_color
-- ["Apple is red","Banana is yellow","Lemon is yellow","Carrot is orange"]
t2 = map (\(food, color) -> food ++ " is " ++ color) $ sort food_color
-- ["Apple is red","Banana is yellow","Carrot is orange","Lemon is yellow"]

-- using Data.Map
t'  = map (\(food, color) -> food ++ " is " ++ color) $ Map.assocs food_color'
-- ["Apple is red","Banana is yellow","Carrot is orange","Lemon is yellow"]

t2' = map format_it $ sort $ Map.assocs food_color'
    where format_it (food, color) = food ++ " is " ++ color
-- ["Apple is red","Banana is yellow","Carrot is orange","Lemon is yellow"]

-- you can of course get keys first, and use Map.!
t2' = map format_it $ sort $ Map.keys food_color'
    where format_it food = food ++ " is " ++ food_color' Map.! food

-- countfrom - count number of messages from each sender
#!/usr/bin/runghc

import System
import Maybe
import List

any_input [] = getContents
any_input (f:_) = readFile f

get_senders = catMaybes . map get_sender
    where get_sender line = if prefix `isPrefixOf` line 
                            then Just (drop (length prefix) line) 
                            else Nothing
          prefix = "From: "

main = do s <- getArgs >>= any_input
          let senders = group $ sort $ get_senders (lines s)
          mapM_ (putStrLn . format_it) $ senders
    where format_it froms@(name:_) = name ++ ": " ++ show (length froms)


-- @@PLEAC@@_5.5
-- show gives a string representation
s = show food_color
t = putStrLn s
-- => [("Apple","red"),("Banana","yellow"),("Lemon","yellow"),("Carrot","orange")]

s2  = map (\(food, color) -> food ++ " => " ++ color) food_color
-- see "Traversing a Hash" for more


-- @@PLEAC@@_5.6
-- association list as explained in previous sections keeps order


-- @@PLEAC@@_5.7
-- one can use simple list functions
lookupAll k = map snd . filter ((== k) . fst)

color_to_foods color = map fst $ filter ((== color) . snd) food_color
yellow_food = color_to_foods "yellow" -- ["Banana","Lemon"]

-- one can precompute it:
food_of_color = map (\color -> (color, color_to_foods color)) colors
                where colors = nub (map snd food_color)
-- => [("red",["Apple"]), ("yellow",["Banana","Lemon"]), ("orange",["Carrot"])]
-- then use it
yellow_food_ = fromMaybe [] (lookup "yellow" food_of_color)

-- another way to pre-compute
assocListGroupByKey :: Ord a => [(a,b)] -> [(a,[b])]
assocListGroupByKey = map factorize . groupBy same_key . sortBy (comparing fst)
    where factorize l@((user, _) : _) = (user, map snd l)          
          same_key (a,_) (b,_) = a == b
-- you can write: factorize = fst . head Arrow.&&& map snd
food_of_color = assocListGroupByKey food_color

-- or one can use a full fledged data structure
-- (need: import qualified Data.Set as Set)
food_of_color' = foldl (\l (v, k) -> Map.alter (add_it v) k l) Map.empty food_color
    where add_it val prev = Just (Set.insert val $ fromMaybe Set.empty prev)
yellow_food' = food_of_color' Map.! "yellow"
-- => Set.fromList ["Banana","Lemon"]

-- for every logged-in user, display the ttys on which it is logged
t = do Right s <- readProcess "who" [] ""
       let l = assocListGroupByKey $ map (parse_it . words) (lines s)
       mapM_ (putStrLn . format_it) l
    where format_it l (user, ttys) = user ++ ": " ++ unwords ttys
          parse_it (user : tty : _) = (user, tty)


-- @@PLEAC@@_5.8
surname = [ ("Mickey", "Mantle"), ("Babe", "Ruth") ];
first_name = map swap surname
swap (a,b) = (b,a)

Just name = lookup "Mantle" first_name -- "Mickey"

-- see also section "Hashes with Multiple Values Per Key"


-- @@PLEAC@@_5.9
t = map (\(food, color) -> food ++ " is " ++ color) $ sort food_color
-- ["Apple is red","Banana is yellow","Carrot is orange","Lemon is yellow"]

-- sorting on length of color (sortByKey is in "Helpers")
t2 = map (\(food, color) -> food ++ " is " ++ color) $ sortByKey (length . snd) food_color
-- => ["Apple is red","Banana is yellow","Lemon is yellow","Carrot is orange"]

-- using Data.Map, the simplest is to use Map.assocs and do as above
t' = map format_it $ sort $ Map.assocs food_color'
    where format_it (food, color) = food ++ " is " ++ color
-- ["Apple is red","Banana is yellow","Carrot is orange","Lemon is yellow"]

-- you can of course get keys first, and use Map.!
t_' = map format_it $ sort $ Map.keys food_color'
    where format_it food = food ++ " is " ++ food_color' Map.! food


-- @@PLEAC@@_5.10
-- food_color as per the introduction
drink_color = [ ("Galliano", "yellow"), ("Mai Tai", "blue") ]
ingested_color = drink_color ++ food_color
-- => [("Galliano","yellow"),("Mai Tai","blue"),("Apple","red"),("Banana","yellow"),("Lemon","yellow"),("Carrot","orange")]
-- (it doesn't remove duplicates, see section "Finding Common or Different Keys in Two Hashes" for a solution removing duplicates)

-- using Data.Map
drink_color' = Map.fromList drink_color
ingested_color' = Map.union drink_color' food_color'
-- => Map.fromList [("Apple","red"),("Banana","yellow"),("Carrot","orange"),("Galliano","yellow"),("Lemon","yellow"),("Mai Tai","blue")]


-- @@PLEAC@@_5.11
-- food_color as per the introduction
citrus_color = [ ("Lemon", "yellow"), ("Orange", "orange"), ("Lime", "green") ]
common = food_color ++ citrus_color
-- => [("Apple","red"),("Banana","yellow"),("Lemon","yellow"),("Carrot","orange"),("Lemon","yellow"),("Orange","orange"),("Lime","green")]
-- the duplicates are still here, to remove them:
nubByKey f = nubBy (\a b -> f a == f b)
common_ = nubByKey fst common
-- => [("Apple","red"),("Banana","yellow"),("Lemon","yellow"),("Carrot","orange"),("Orange","orange"),("Lime","green")]
-- no more duplicates

non_citrus = food_color \\ citrus_color
-- => [("Apple","red"),("Banana","yellow"),("Carrot","orange")]

-- using Data.Map
citrus_color' = Map.fromList citrus_color
common' = Map.union food_color' citrus_color'
-- => Map.fromList [("Apple","red"),("Banana","yellow"),("Carrot","orange"),("Lemon","yellow"),("Lime","green"),("Orange","orange")]
non_citrus' = Map.difference food_color' citrus_color'
-- => Map.fromList [("Apple","red"),("Banana","yellow"),("Carrot","orange")]


-- @@PLEAC@@_5.12
-- no problem here, haskell handles any kind of object for key-ing

name = mapM open_files [ "/etc/termcap", "/etc/passwd", "/bin/cat" ]
       where open_files file = do fh <- openFile file ReadMode
                                  return (fh, file)
t = do files <- fmap (map snd) name
       putStrLn $ "open files: " ++ intercalate ", " files
t2 = name >>= mapM_ display_size
    where display_size (fh, file) = 
              do size <- hFileSize fh
                 printf "%s is %d bytes long.\n" file size


-- @@PLEAC@@_5.13
-- presizing is only useful for mutable maps

-- about performance, one may use Map.IntMap when the keys are Int


-- @@PLEAC@@_5.14
-- from http://haskell.org/haskellwiki/99_questions/31_to_41#Problem_36
encode :: Eq a => [a] -> [(a, Int)]
encode = map (\x -> (head x, length x)) . group

s = encode $ sort "Apple Banana Lemon"
-- => [(' ',2),('A',1),('B',1),('L',1),('a',3),('e',2),('l',1),('m',1),('n',3),('o',1),('p',2)]


-- @@PLEAC@@_5.15
import Data.List

father = [ ("Cain",      "Adam")
         , ("Abel",      "Adam")
         , ("Seth",      "Adam")
         , ("Enoch",     "Cain")
         , ("Irad",      "Enoch")
         , ("Mehujael",  "Irad")
         , ("Methusael", "Mehujael")
         , ("Lamech",    "Methusael")
         , ("Jabal",     "Lamech")
         , ("Jubal",     "Lamech")
         , ("Tubalcain", "Lamech")
         , ("Enos",      "Seth")
         ]

upline person = person : parents
    where parents = unfoldr (\name -> fmap (\x -> (x,x)) $ lookup name father) person
-- you can replace (\x -> (x,x)) with (join (,))   ("join" is from Control.Monad)

from_Irad = upline "Irad"       -- ["Irad","Enoch","Cain","Adam"]

ifNull default_ [] = default_
ifNull _ l = l

downline person = person ++ " begat " ++ (ifNull "Nobody" $ intercalate ", " $ children)
    where children = map fst $ filter ((== person) . snd) father

to_Tubalcain = downline "Tubalcain" -- Tubalcain begat Nobody
to_Adam      = downline "Adam"      -- Adam begat Cain, Abel, Seth


-- the perl example is doing something quite weird,
-- include_free is the list of files which are not included by other files,
-- but that do include a file.
dir = "/usr/include"
include_free =
    do files <- getDirectoryContents dir
       let h_files = filter (isSuffixOf ".h") files
       file2includes <- mapM get_includes h_files
       -- getting the files that do include something:
       let files_ = map fst $ filter (not . null . snd) file2includes
       -- removing files included by other files:
       return $ files_ \\ (concatMap snd file2includes)
    where get_includes h_file = fmap (with_content h_file) (readFile $ dir ++ "/" ++ h_file)
          with_content h_file content = (h_file, content_to_includes content)
          content_to_includes = catMaybes . map (fmap head . matchRegex regex) . lines
          regex = mkRegex "^\\s*#\\s*include\\s*<([^>]+)>"


-- @@PLEAC@@_5.16
import Data.Ord (comparing)
import qualified Data.Tree as Tree

dirname_basename file = (if dir == "" then "." else (reverse $ tail dir), reverse base)
    where (base, dir) = break (== '/') $ reverse file

dirname = fst . dirname_basename
basename = snd . dirname_basename

duProcessFakedInput = 
    "11732   groovysoap/lib\n\
    \68      groovysoap/src/main/groovy/net/soap\n\
    \71      groovysoap/src/main/groovy/net\n\
    \74      groovysoap/src/main/groovy\n\
    \77      groovysoap/src/main\n\
    \9       groovysoap/src/examples\n\
    \8       groovysoap/src/examples/groovy\n\
    \102     groovysoap/src/test\n\
    \202     groovysoap/src\n\
    \11966   groovysoap\n"

sortByKeyRev f = map snd . sortBy (flip $ comparing fst) . map (\x -> (f x, x))

dirs_tree :: Tree.Tree (String, Int)
dirs_tree = Tree.unfoldTree get_info (last dir2size)
    where get_info (dir, size) = 
              let subdirs = filter ((== dir) . dirname . fst) dir2size in
              ((dir, size), 
               if null subdirs then [] else 
                   sortByKeyRev snd $ subdirs ++ [ (dir ++ "/.", size - sum (map snd subdirs)) ])
          dir2size = map ((\(size:dir:_) -> (dir, read size)) . words) $ lines duProcessFakedInput

t = putStr $ Tree.drawTree$ fmap (\(f,s)-> show s ++ " " ++ basename f) dirs_tree
-- using Tree.drawTree, the result is not as expected, but it's quite nice:
--
-- 11966 groovysoap
-- |
-- +- 11732 lib
-- |
-- +- 202 src
-- |  |
-- |  +- 102 test
-- |  |
-- |  +- 77 main
-- |  |  |
-- |  |  +- 74 groovy
-- |  |  |  |
-- |  |  |  +- 71 net
-- |  |  |  |  |
-- |  |  |  |  +- 68 soap
-- |  |  |  |  |
-- |  |  |  |  `- 3 .
-- |  |  |  |
-- |  |  |  `- 3 .
-- |  |  |
-- |  |  `- 3 .
-- |  |
-- |  +- 14 .
-- |  |
-- |  `- 9 examples
-- |     |
-- |     +- 8 groovy
-- |     |
-- |     `- 1 .
-- |
-- `- 32 .

-- now the exact same solution 
mydraw :: Tree.Tree (String, Int) -> String
mydraw tree = unlines $ drawSubTrees [tree]
  where drawSubTrees [] = []
        drawSubTrees l@(Tree.Node (_,size_max) _ : _) =
            let format = "%" ++ show (length $ show size_max) ++ "s %s" in
            let draw_one (Tree.Node (name, size) sub) =
                    let name' = basename name in
                    printf format (show size) name' :
                           map (\s -> printf format "|" (map (\_ -> ' ') name') ++ s) (drawSubTrees sub) in
            concatMap draw_one l

t2 = putStr $ mydraw dirs_tree

--11966 groovysoap
--    |           11732 lib
--    |             202 src
--    |               |    102 test
--    |               |     77 main
--    |               |      |     74 groovy
--    |               |      |      |       71 net
--    |               |      |      |        |    68 soap
--    |               |      |      |        |     3 .
--    |               |      |      |        3 .
--    |               |      |      3 .
--    |               |     14 .
--    |               |      9 examples
--    |               |      |         8 groovy
--    |               |      |         1 .
--    |              32 .

-- @@PLEAC@@_6.0
-- for info about the various regexp libraries, see http://haskell.org/haskellwiki/Regular_expressions
--
-- [Oct 2008] Text.Regex.Posix/Text.Regex.Base provides =~ which is quite powerful
-- but it misses simple things like subRegex and splitRegex so in many cases
-- Text.Regex must be used
--
-- with Text.Regex.Posix:
import Text.Regex.Posix
import Text.Regex.Base

meadow = [ "sheeps are ovine", "Sheeps are ovine"
         , "Fine bovines demand fine toreadors."
         , "Muskoxen are polar ovibovine species."
         , "Grooviness went out of fashion decades ago." ]

t1 = filter (=~ "sheep") meadow
-- ["sheeps are ovine"]

t2 = filter (=~ "\\<ovines?") meadow
-- ["sheeps are ovine","Sheeps are ovine"]

-- below require "import Data.Bits" for .|. :
ignoreCase = makeRegexOpts (defaultCompOpt .|. compIgnoreCase) defaultExecOpt
t3 = filter (matchTest $ ignoreCase "sheep") meadow
-- ["sheeps are ovine","Sheeps are ovine"]


-- the same with Text.Regex:
import Data.Maybe
import Text.Regex

t1 = filter (isJust . matchRegex (mkRegex "sheep")) meadow
-- ["sheeps are ovine"]

t2 = filter (isJust . matchRegex (mkRegex "\\<ovines?")) meadow
-- ["sheeps are ovine","Sheeps are ovine"]

ignoreCase s = mkRegexWithOpts s True False
t3 = filter (isJust . matchRegex (ignoreCase "sheep")) meadow
-- ["sheeps are ovine","Sheeps are ovine"]

-- note that Data.ByteString.Char8 has a few interesting functions,
-- but usually regexps are better
import qualified Data.ByteString.Char8 as B

t = filter is_sheep meadow
    where is_sheep s = B.isInfixOf (B.pack "sheep") (B.pack s)
-- ["sheeps are ovine"]


s1 = subRegex (mkRegex "o+") "good food" "e"
-- ged fed 

-- the following helper functions are useful
subRegexWith re new s =
    case matchRegexAll re s of
      Nothing -> s
      Just (before, matched, after, _) -> before ++ new matched ++ (subRegexWith re new after)
subRegexOnceWith re new s = 
    case matchRegexAll re s of
      Nothing -> s
      Just (before, matched, after, _) -> before ++ new matched ++ after

s2 = subRegexOnceWith (mkRegex "o+") (\_ -> "e") "good food" 
-- ged food


digits = "123456789";
l = digits =~ "..." :: [String] -- nb: in List context, calls matchAll
-- ["123","456","789"]

-- for overlapping matches, one must do it by hand:
allOverlappingMatches re s = 
    case matchRegexAll re s of
      Nothing -> []
      Just (before, matched, after, _) -> matched : allOverlappingMatches re (tail matched ++ after)

l = allOverlappingMatches (mkRegex "...") digits
-- ["123","234","345","456","567","678","789"]


string = "And little lambs eat ivy"
(before, matched, after) = string =~ "l[^s]*s" :: (String, String, String)
s = printf "(%s) (%s) (%s)" before matched after :: String
-- "(And ) (little lambs) ( eat ivy)"

-- @@PLEAC@@_6.1
-- copying is the default in pure languages :)
import Text.Regex
import System.Environment
import System.FilePath.Posix

prog_ = subRegex (mkRegex ".*/") "/etc/foo" ""
-- "foo"
-- but it's much nicer to use takeFileName
prog__ = takeFileName "/etc/foo"
-- "foo"

-- as for the program basename, it is:
prog = getProgName


manpage = "/usr/man/man3/foo.1"
catpage = subRegexMatchesWith (mkRegex "man([0-9])") (\[n] -> "cat" ++ n) manpage
-- /usr/man/cat3/foo.1

subRegexMatchesWith re new s = 
    case matchRegexAll re s of
      Nothing -> s
      Just (before, matched, after, l) -> before ++ new l ++ after

bindirs = [ "/usr/bin", "/bin", "/usr/local/bin" ]
libdirs = map (\s -> subRegex (mkRegex "bin$") s "lib") bindirs

-- @@PLEAC@@_6.2
is_pure_alphabetic = isJust . matchRegex (mkRegex "^[A-Za-z]+$")
is_pure_alphabetic_ = all isLetter -- this handles unicode (accents...)

some_words = [ "silly", "random!stuff#here", "façade", "tschÃŒÃ", "niño", "ÑÑÑÑкОй" ]
l1 = filter is_pure_alphabetic some_words -- ["silly"]
l2 = filter is_pure_alphabetic_ some_words -- all except "random!stuff#here"

-- @@PLEAC@@_6.3
-- "\\S+"         -- as many non-whitespace bytes as possible
-- "[A-Za-z'-]+"  -- as many letters, apostrophes, and hyphens
-- 
-- "\\<([A-Za-z]+)\\>" -- usually best
-- "\\s([A-Za-z]+)\\s" -- fails at ends or w/ punctuation

-- words is similar to splitting on "\s+"
l = words "A text   with some\tseparator\n"
-- ["A","text","with","some","separator"]
--
-- (nb: spaces at beginning or end do not change the result)

-- @@PLEAC@@_6.4
-- Commenting Regular Expressions
-- since regexps are simple strings, you can concatenate regexp pieces:
re = "#"          -- a pound sign
     ++ "(\\w+)"  -- the variable name
     ++ "#"       -- another pound sign
expand_macro vals = subAllRegexMatchesWith (mkRegex re) get_val
    where get_val [s] = fromJust (lookup s vals)
s = expand_macro [ ("foo", "42") ] "blah #foo# blah"
-- "blah 42 blah"

subAllRegexMatchesWith re new s = 
    case matchRegexAll re s of
      Nothing -> s
      Just (before, matched, after, l) -> before ++ new l ++ (subAllRegexMatchesWith re new after)

-- @@PLEAC@@_6.5
fishes = "One fish two fish red fish blue fish"

-- first some very simple and useful functions:
ignoreCase s = mkRegexWithOpts s True False
allMatches re s =
    case matchRegexAll re s of
      Nothing -> []
      Just (_before, _, after, matches) -> matches : allMatches re after

-- here we go:
colors = map head $ allMatches (ignoreCase "(\\w+)\\s+fish\\>") fishes
-- ["One","two","red","blue"]
s1 = printf "The third fish is a %s one." (colors !! 2) :: String
-- The third fish is a red one.

-- another solution to skip the first 2 matches using an ad'hoc regexp:
s2 = printf "The third fish is a %s one." color :: String
    where Just (_, _, _, [_, color]) = 
              matchRegexAll (ignoreCase "(\\w+\\s+fish\\s+){2}(\\w+)\\s+fish") fishes
-- The third fish is a red one.

-- a modified subRegex to be able to handle differently the 4th match
subRegexWithEnv re f env s =
    case matchRegexAll re s of
      Nothing -> s
      Just (before, matched, after, matches) -> before ++ matched' ++ (subRegexWithEnv re f env' after)
          where (env', matched') = f env (matched:matches)

s3 = subRegexWithEnv (ignoreCase "(\\w+)(\\s+fish\\>)") f 1 fishes
    where f n [verbatim, _, fish] = (n+1, if n == 4 then "sushi" ++ fish else verbatim)
-- One fish two fish red fish sushi fish

-- note that we can easily specialize subRegexWithEnv into subRegexWithCount:
withCountEnv f g s = f (\n val -> (n+1, g n val)) 1 s
subRegexWithCount re = withCountEnv (subRegexWithEnv re)

s3' = subRegexWithCount (ignoreCase "(\\w+)(\\s+fish\\>)") f fishes
    where f n [verbatim, _, fish] = if n == 4 then "sushi" ++ fish else verbatim
-- One fish two fish red fish sushi fish

-- adding .* at beginning of the regexp will ensure we get the last hit.
-- but we must be carefull ".*(\\w+)\\s+fish\\>" is wrong since .* will match
-- most the color, so \\< is also needed
s4 = case matchRegex (ignoreCase ".*\\<(\\w+)\\s+fish\\>") fishes of
     Just [last_fish_color] -> printf "Last fish is %s." last_fish_color
     _ -> "Failed!"
-- Last fish is blue.

-- @@PLEAC@@_6.6
-- for multi line regexps, use (mkRegexWithOpts _ False _)
#!/usr/bin/runghc

-- killtags - very bad html tag killer
import System
import Text.Regex

main = do s <- getArgs >>= any_input
          putStr (strip_tags s)

any_input [] = getContents
any_input (f:_) = readFile f

strip_tags s = subRegex (mkRegexWithOpts "<[^>]*>" False True) s ""

-- testing it:
test = "<a>after a\n\
       \<bbb\n\
       \bbb>after b <c>after c\n"
s = strip_tags test
-- "after a\n<bbb\nbbb>after b after c\n"

-- @@PLEAC@@_6.7
-- Read the whole file and split
l1 = fmap lines (readFile filename) -- on line ends
l2 = fmap words (readFile filename) -- on whitespace
l3 = fmap (splitRegex $ mkRegex "PATTERN") (readFile filename) -- on regexp

-- .Ch, .Se and .Ss divide chunks
f = splitRegex (mkRegex "^\\.(Ch|Se|Ss)$")

-- note that in the example above, you loose the matched line,
-- if you need it, you must create your own splitRegex:
splitRegex' :: Regex -> String -> [(String, String)]
splitRegex' re s = 
    case unfoldr' f s of
      (l, "") -> l
      (l, s) -> l ++ [(s, "")]
    where 
      -- small helper functions which tranform matchRegexAll output
      -- to the one wanted by unfoldr'
      f = fmap f' . matchRegexAll re
      f' (before, matched, after, _) = ((before, matched), after)

-- it uses this special unfoldr
-- (unfoldr only returns [b] whereas we also need the rest)
-- nb: with Control.Arrow.first which is \f (a,b) -> (f a, b)
--     we could write:  Arrow.first (a :) (unfoldr' f b')
unfoldr' :: (a -> Maybe (b, a)) -> a -> ([b], a)
unfoldr' f b  =
  case f b of
   Just (a, b') -> let (l, b'') = unfoldr' f b' in (a:l, b'')
   Nothing      -> ([], b)

unwords_keeping_spaces = splitRegex' (mkRegex "\\s+")
t = unwords_keeping_spaces "a b  c \t d"
-- [("a"," "),("b","  "),("c"," \t "),("d","")]

-- @@PLEAC@@_6.8
import Text.Regex
import Text.Regex.Posix
import Data.Maybe
import Text.Printf
import System

any_input [] = getContents
any_input (f:_) = readFile f

-- command-line to print lines 15 through 17 inclusive (see below)
main = do s <- getArgs >>= any_input
          putStr (unlines $ take 3 $ drop 14 $ lines s)

filterByRange :: (a -> Bool) -> (a -> Bool) -> [a] -> [[a]]
filterByRange _ _ [] = []
filterByRange f_beg f_end l = 
    let l' = dropWhile (not . f_beg) l in
    case break f_end l' of
      (before, match : l'') -> (before ++ [match]) : filterByRange f_beg f_end l''
      _ -> []

extract_XMP :: [String] -> [[String]]
extract_XMP = filterByRange (=~ "<XMP>") (=~ "</XMP>")

t1 = do s <- readFile "foo.html"
        return $ extract_XMP $ lines s

ignoreCase s = mkRegexWithOpts s True False
extract_XMP' = filterByRange (match "<XMP>") (match "</XMP>")
    where match re = isJust . matchRegex (ignoreCase re)

t1' = do s <- readFile "foo.html"
         return $ extract_XMP' $ lines s

mbox_to_headers = filterByRange (match "^From:?\\s") (match "^$")
    where match re = isJust . matchRegex (ignoreCase re)
mail_adresses = concatMap search_in_line
    where search_in_line line = (line =~ "[^<>(),;[:space:]]+@[^<>(),;[:space:]]+") :: [String]
          -- nb: in List context, calls matchAll.
          -- nb: \s can not be used, so instead of [^<>\s], one must do [^<>[:space:]]

t2 = do s <- readFile "mbox"
        return $ mail_adresses $ concat $ mbox_to_headers $ lines s

-- @@PLEAC@@_6.9
import List
import Data.Maybe

special_chars = ".()+*?[]\\{}$^|"

regex_escape_char :: Char -> String
regex_escape_char c = if elem c special_chars 
               then ['\\', c] 
               else [c]
regex_escape :: String -> String
regex_escape = concatMap regex_escape_char

glob_list = 
    [ ('*', ".*")
    , ('?', ".")
    , ('[', "[")
    , (']', "]") ]
glob_char_to_regexp c = fromMaybe (regex_escape_char c) (lookup c glob_list)
-- nb: fromMaybe will try (lookup ...) first, then fallback on (regex_escape_char c)

glob2pat = concatMap glob_char_to_regexp

pat = glob2pat "*File.*"
-- ".*File\\..*"

-- @@PLEAC@@_6.10
import Data.List
import Data.Maybe
import Text.Regex

popstates = ["CO","ON","MI","WI","MN"]

is_state = isJust . matchRegex re
    where re = mkRegex $ "\\<(" ++ intercalate "|" popstates ++ ")\\>"

l = filter is_state [ "xxx", "xx CO xx", "WI", "WIxx" ]
-- ["xx CO xx","WI"]

-- this is less efficient:
is_state' line = any (\re -> isJust $ matchRegex re line) (map to_regexp popstates)
    where to_regexp c = mkRegex $ "\\<" ++ c ++ "\\>"

-- @@PLEAC@@_6.11
import Data.Maybe
import Data.List
import Text.Regex.Base
import Text.Regex.Posix

-- one can not catch invalid patterns using Text.Regex

-- recent versions of Text.Regex.Base have makeRegexM:
is_valid_pattern :: String -> Bool
is_valid_pattern re = isJust (makeRegexM re :: Maybe Regex)
-- nb: the type Regex must be specified since matchRegexM uses abstract
--     classes and haskell can't guess which instance to use

-- or can use compile from Text.Regex.Posix.String:
t = let regexp = "(" in
    compile defaultCompOpt defaultExecOpt regexp >>=
      (\re -> case re of
                Left (_, err) -> error ("bad regexp \"" ++ regexp ++ "\": " ++ err)
                Right re -> return re)
-- *** Exception: bad regexp "(": Unmatched ( or \(

-- @@PLEAC@@_6.12
-- Text.Regex and Text.Regex.Posix do not handle unicode (accents...)

-- @@PLEAC@@_6.13
import Data.List

-- Calculates the Levenshtein, or edit distance, between two strings.
-- here is a basic/slow version:
levenshtein_basic s t = distance (length s) (length t)
    where distance i 0 = i
          distance 0 j = j
          distance i j = minimum [ distance (i-1) j + 1
                                 , distance i (j-1) + 1
                                 , distance (i-1) (j-1) + (if s!!(i-1)==t!!(j-1) then 0 else 1) ]

-- a fast version based on the previous one, adding memoization
-- (note the recursive use of "d")
levenshtein s t = d
    where d = [ [distance m n | n<-[0..length t]] | m<-[0..length s] ]
          distance i 0 = i
          distance 0 j = j
          distance i j = minimum [ d!!(i-1)!!j + 1
                                 , d!!i!!(j-1) + 1
                                 , d!!(i-1)!!(j-1) + (if s!!(i-1)==t!!(j-1) then 0 else 1) ]

-- a more efficient/cryptic version.
-- (it computes the "distance" figures for each "sb" chars,
--  ie it memoizes only what is needed)
levenshtein' sa sb = foldl transform [0..length sa] sb 
    where 
      transform xs@(x:xs') c = scanl compute (x+1) (zip3 sa xs xs')
          where
            compute z (c', x, y) = minimum [y+1, z+1, x + if c == c' then 0 else 1]

-- Determines if two strings are an approximate match.
amatch percentage s t = levenshtein s t * 100 <= percentage * length s

main = do s <- readFile "/usr/share/dict/words"
          print $ filter (amatch 20 "balast") (lines s)
-- ["balant","balas","balat","ballast","belast","blast"]

-- @@PLEAC@@_6.14
import List
import Text.Regex
import Text.Regex.Posix
import Text.Printf

allMatches re = unfoldr (fmap adjust . matchRegexAll re)
    where adjust (_before, _, after, matches) = (matches, after)

-- special unfoldr (unfoldr only returns [b] whereas we also need the rest)
unfoldr' :: (a -> Maybe (b, a)) -> a -> ([b], a)
unfoldr' f b  =
  case f b of
   Just (a, b') -> let (l, b'') = unfoldr' f b' in (a:l, b'')
   Nothing      -> ([], b)

allMatches' re = unfoldr' (fmap adjust . matchRegexAll re)
    where adjust (_before, _, after, matches) = (matches, after)

t = allMatches (mkRegex "([0-9]+)") "3,4,5,9,120"
-- [["3"],["4"],["5"],["9"],["120"]]
-- (note that \d is not special, one must use [0-9] or [[:digit:]])

mystr = "The year 1752 lost 10 days on the 3rd of September"
t2 = 
    let (l, remain) = allMatches' (mkRegex "([0-9]+)") mystr in
    let after = remain =~ "\\S+" :: String in
    putStr $ unlines $ map (\[n] -> "Found number " ++ n) l ++ 
                        [ printf "Found %s after the last number" after ]
    
-- Found number 1752
-- Found number 10
-- Found number 3
-- Found rd after the last number


-- @@PLEAC@@_APPENDIX
import List
import Data.Ord (comparing)
import qualified Data.Array as Array
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Control.Arrow as Arrow

listToArray :: [a] -> Array Int a
listToArray l = Array.listArray (0, length l - 1) l

indexOf subst s = findIndex (subst `isPrefixOf`) (tails s)

groupNelem :: Int -> [a] -> [[a]]
groupNelem n = unfoldr f
    where f [] = Nothing
          f s = Just (splitAt n s)

sortByKey f = map snd . sortBy (comparing fst) . map (\x -> (f x, x))
sortByKeyRev f = map snd . sortBy (flip $ comparing fst) . map (\x -> (f x, x))

-- alike python's fileinput.input() or perl's <>
-- typical usage: getArgs >>= any_input
any_input :: [FilePath] -> IO String
any_input [] = getContents
any_input (f:_) = readFile f

-- subRegex only allow a fixed string
-- subRegexWith below takes a (String -> String) function to compute a result
subRegexWith re new s =
    case matchRegexAll re s of
      Nothing -> s
      Just (before, matched, after, _) -> before ++ new matched ++ (subRegexWith re new after)

subRegexOnceWith re new s = 
    case matchRegexAll re s of
      Nothing -> s
      Just (before, matched, after, _) -> before ++ new matched ++ after

rand :: Int -> Int -> IO Int
rand low high = getStdRandom (randomR (low, high))

dirname_basename file = (if dir == "" then "." else (reverse $ tail dir), reverse base)
    where (base, dir) = break (== '/') $ reverse file

dirname = fst . dirname_basename
basename = snd . dirname_basename


-- from http://www.cse.unsw.edu.au/~dons/code/newpopen
-- modified to handle stderr
import System.Process
import System.Exit
import System.IO

import Control.Monad
import Control.Concurrent
import qualified Control.Exception as C

readProcess :: FilePath                     -- ^ command to run
            -> [String]                     -- ^ any arguments
            -> String                       -- ^ standard input
            -> IO (Either ExitCode String)  -- ^ either the stdout, or an exitcode

readProcess cmd args input = C.handle (return . handler) $ do

    (inh,outh,errh,pid) <- runInteractiveProcess cmd args Nothing Nothing

    -- fork off a thread to start consuming the output
    output  <- hGetContents outh
    outMVar <- newEmptyMVar
    forkIO $ C.evaluate (length output) >> putMVar outMVar ()

    -- fork off a thread to start consuming the output
    errput  <- hGetContents errh
    errMVar <- newEmptyMVar
    forkIO $ C.evaluate (length errput) >> putMVar errMVar ()

    -- now write and flush any input
    when (not (null input)) $ hPutStr inh input
    hClose inh          -- done with stdin

    -- wait on the output
    takeMVar outMVar
    hClose outh

    -- wait on the errput
    takeMVar errMVar
    hClose errh

    hPutStr stderr errput

    -- wait on the process
    ex <- C.catch (waitForProcess pid) (\_ -> return ExitSuccess)

    return $ case ex of
        ExitSuccess   -> Right output
        ExitFailure _ -> Left ex

  where
    handler (C.ExitException e) = Left e
    handler e                   = Left (ExitFailure 1)