#!/usr/bin/python
# htitlebytes - get html title from URL
#
import sys, urllib2, HTMLParser
if len(sys.argv)<=1:
    print "usage: %s url ..." % sys.argv[0]
    sys.exit(1)
# simple but pedantic html parser: tpj.com breaks it.
class html(HTMLParser.HTMLParser):
    def __init__(self):
        HTMLParser.HTMLParser.__init__(self)
        self._data = {}
        self._open_tags = []
    def handle_starttag(self, tag, attrs):
        self._open_tags.append(tag)
    def handle_endtag(self, tag):
        if len(self._open_tags)>0:
            self._open_tags.pop()
    def handle_data(self, data):
        if len(self._open_tags)>0:
            self._data[self._open_tags[-1]] = data
    def __getattr__(self,attr):
        if not self._data.has_key(attr):
            return ""
        return self._data[attr]
    def error(self,msg):
        # ignore all errors
        pass
for url in sys.argv[1:]:
    print "%s: " % url,
    # TODO fake user agent "Schmozilla/v9.17 Platinum"
    # TODO referer "http://wizard.yellowbrick.oz"
    # as we only do http httplib would do also
    try:
        response = urllib2.urlopen(url)
    except:
        print " %s" % sys.exc_info()[1]
        sys.exit(1)
    # title is not in response
    parser = html()
    parser.feed(response.read())
    parser.close()  # force processing all data
    print parser.title