#!/usr/bin/python # text2html - trivial html encoding of normal text import sys import re # precompile regular expressions re_quoted = re.compile(r"(?m)^(>.*?)$") re_url = re.compile(r"") re_http = re.compile(r"(http:\S+)") re_strong = re.compile(r"\*(\S+)\*") re_em = re.compile(r"\b_(\S+)_\b") # split paragraphs for para in open(sys.argv[1]).read().split("\n\n"): # TODO encode entities: dont encode "<>" but do "&" if para.startswith(" "): print "

\n%s\n

" % para else: para = re_quoted.sub(r"\1
", para) # quoted text para = re_url.sub(r'\1', para) # embedded URL para = re_http.sub(r'\1', para) # guessed URL para = re_strong.sub(r"\1",para) # this is *bold* here para = re_em.sub(r"\1",para) # this is _italic_ here print "

\n%s\n

" % para # add paragraph tags