#!/usr/bin/python
# text2html - trivial html encoding of normal text
import sys
import re
# precompile regular expressions
re_quoted = re.compile(r"(?m)^(>.*?)$")
re_url = re.compile(r"")
re_http = re.compile(r"(http:\S+)")
re_strong = re.compile(r"\*(\S+)\*")
re_em = re.compile(r"\b_(\S+)_\b")
# split paragraphs
for para in open(sys.argv[1]).read().split("\n\n"):
# TODO encode entities: dont encode "<>" but do "&"
if para.startswith(" "):
print "\n%s\n
" % para
else:
para = re_quoted.sub(r"\1
", para) # quoted text
para = re_url.sub(r'\1', para) # embedded URL
para = re_http.sub(r'\1', para) # guessed URL
para = re_strong.sub(r"\1",para) # this is *bold* here
para = re_em.sub(r"\1",para) # this is _italic_ here
print "\n%s\n
" % para # add paragraph tags