#----------------------------- mystr = "\n" # a newline character mystr = r"\n" # two characters, \ and n #----------------------------- mystr = "Jon 'Maddog' Orwant" # literal single quote inside double quotes mystr = 'Jon "Maddog" Orwant' # literal double quote inside single quotes #----------------------------- mystr = 'Jon \'Maddog\' Orwant' # escaped single quote mystr = "Jon \"Maddog\" Orwant" # escaped double quote #----------------------------- mystr = """ This is a multiline string literal enclosed in triple double quotes. """ mystr = ''' And this is a multiline string literal enclosed in triple single quotes. ''' #----------------------------- |
#----------------------------- # get a 5-char string, skip 3, then grab 2 8-char strings, then the rest # Note that struct.unpack cannot use * for an unknown length. # See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65224 import struct (lead, s1, s2), tail = struct.unpack("5s 3x 8s 8s", data[:24]), data[24:] # split at five-char boundaries fivers = struct.unpack("5s" * (len(data)//5), data) fivers = print [x[i*5:i*5+5] for i in range(len(x)/5)] # chop string into individual characters chars = list(data) #----------------------------- mystr = "This is what you have" # +012345678901234567890 Indexing forwards (left to right) # 109876543210987654321- Indexing backwards (right to left) # note that 0 means 10 or 20, etc. above first = mystr[0] # "T" start = mystr[5:7] # "is" rest = mystr[13:] # "you have" last = mystr[-1] # "e" end = mystr[-4:] # "have" piece = mystr[-8:-5] # "you" #----------------------------- # Python strings are immutable. # In general, you should just do piecemeal reallocation: mystr = "This is what you have" mystr = mystr[:5] + "wasn't" + mystr[7:] # Or replace and reallocate mystr = "This is what you have" mystr = mystr.replace(" is ", " wasn't ") # DON'T DO THIS: In-place modification could be done using character arrays import array mystr = array.array("c", "This is what you have") mystr[5:7] = array.array("c", "wasn't") # mystr is now array('c', "This wasn't what you have") # DON'T DO THIS: It could also be done using MutableString from UserString import MutableString mystr = MutableString("This is what you have") mystr[-12:] = "ondrous" # mystr is now "This is wondrous" #----------------------------- # you can test simple substrings with "in" (for regex matching see ch.6): if txt in mystr[-10:]: print "'%s' found in last 10 characters"%txt # Or use the startswith() and endswith() string methods: if mystr.startswith(txt): print "%s starts with %s."%(mystr, txt) if mystr.endswith(txt): print "%s ends with %s."%(mystr, txt) #----------------------------- |
#----------------------------- # Introductory Note: quite a bit of this section is not terribly Pythonic # as names must be set before being used. For instance, unless myvar has # been previously defined, these next lines will all raise NameError: myvar = myvar or some_default myvar2 = myvar or some_default myvar |= some_default # bitwise-or, not logical-or - for demo # The standard way of setting a default is often: myvar = default_value if some_condition: pass # code which may set myvar to something else # if myvar is returned from a function and may be empty/None, then use: myvar = somefunc() if not myvar: myvar = default_value # If you want a default value that can be overridden by the person calling # your code, you can often wrap it in a function with a named parameter: def myfunc(myvar="a"): return myvar + "b" print myfunc(), myfunc("c") #=> ab cb # Note, though, that this won't work for mutable objects such as lists or # dicts that are mutated in the function as the object is only created once # and repeated calls to the same function will return the same object. This # can be desired behaviour however - see section 10.3, for instance. def myfunc(myvar=[]): myvar.append("x") return myvar print myfunc(), myfunc() #=> ['x'] ['x', 'x'] # You need to do: def myfunc(myvar=None): if myvar is None: myvar = [] myvar.append("x") return myvar print myfunc(), myfunc() #=> ['x'] ['x'] #=== Perl Equivalencies start here # use b if b is true, otherwise use c a = b or c # as that is a little tricksy, the following may be preferred: if b: a = b else: a = c # set x to y unless x is already true if not x: x = y #----------------------------- # use b if b is defined, else c try: a = b except NameError: a = c #----------------------------- foo = bar or "DEFAULT VALUE" #----------------------------- # To get a user (for both UNIX and Windows), use: import getpass user = getpass.getuser() # DON'T DO THIS: find the user name on Unix systems import os user = os.environ.get("USER") if user is None: user = os.environ.get("LOGNAME") #----------------------------- if not starting_point: starting_point = "Greenwich" #----------------------------- if not a: # copy only if empty a = b if b: # assign b if nonempty, else c a = b else: a = c #----------------------------- |
#----------------------------- v1, v2 = v2, v1 #----------------------------- # DON'T DO THIS: temp = a a = b b = temp #----------------------------- a = "alpha" b = "omega" a, b = b, a # the first shall be last -- and versa vice #----------------------------- alpha, beta, production = "January March August".split() alpha, beta, production = beta, production, alpha #----------------------------- |
#----------------------------- num = ord(char) char = chr(num) #----------------------------- char = "%c" % num print "Number %d is character %c" % (num, num) print "Number %(n)d is character %(n)c" % {"n": num} print "Number %(num)d is character %(num)c" % locals() #=> Number 101 is character e #----------------------------- ascii_character_numbers = [ord(c) for c in "sample"] print ascii_character_numbers #=> [115, 97, 109, 112, 108, 101] word = "".join([chr(n) for n in ascii_character_numbers]) word = "".join([chr(n) for n in [115, 97, 109, 112, 108, 101]]) print word #=> sample #----------------------------- hal = "HAL" ibm = "".join([chr(ord(c)+1) for c in hal]) # add one to each ASCII value print ibm #=> IBM #----------------------------- |
#----------------------------- mylist = list(mystr) #----------------------------- for char in mystr: pass # do something with char #----------------------------- mystr = "an apple a day" uniq = sorted(set(mystr)) print "unique chars are: '%s'" % "".join(uniq) #=> unique chars are: ' adelnpy' #----------------------------- ascvals = [ord(c) for c in mystr] print "total is %s for '%s'."%(sum(ascvals), mystr) #=> total is 1248 for 'an apple a day'. #----------------------------- # sysv checksum def checksum(myfile): values = [ord(c) for line in myfile for c in line] return sum(values)%(2**16) - 1 import fileinput print checksum(fileinput.input()) # data from sys.stdin # Using a function means any iterable can be checksummed: print checksum(open("C:/test.txt") # data from file print checksum("sometext") # data from string #----------------------------- #!/usr/bin/python # slowcat - emulate a s l o w line printer # usage: slowcat [- DELAY] [files ...] import sys, select import re DELAY = 1 if re.match("^-\d+$",sys.argv[1]): DELAY=-int(sys.argv[1]) del sys.argv[1] for ln in fileinput.input(): for c in ln: sys.stdout.write(c) sys.stdout.flush() select.select([],[],[], 0.005 * DELAY) #----------------------------- |
#----------------------------- # 2.3+ only revchars = mystr[::-1] # extended slice - step is -1 revwords = " ".join(mystr.split(" ")[::-1]) # pre 2.3 version: mylist = list(mystr) mylist.reverse() revbytes = "".join(mylist) mylist = mystr.split() mylist.reverse() revwords = ' '.join(mylist) # Alternative version using reversed(): revchars = "".join(reversed(mystr)) revwords = " ".join(reversed(mystr.split(" "))) # reversed() makes an iterator, which means that the reversal # happens as it is consumed. This means that "print reversed(mystr)" is not # the same as mystr[::-1]. Standard usage is: for char in reversed(mystr): pass # ... do something #----------------------------- # 2.3+ only word = "reviver" is_palindrome = (word == word[::-1]) #----------------------------- # Generator version def get_palindromes(fname): for line in open(fname): word = line.rstrip() if len(word) > 5 and word == word[::-1]: yield word long_palindromes = list(get_palindromes("/usr/share/dict/words")) # Simpler old-style version using 2.2 string reversal def rev_string(mystr): mylist = list(mystr) mylist.reverse() return "".join(mylist) long_palindromes=[] for line in open("/usr/share/dict/words"): word = line.rstrip() if len(word) > 5 and word == rev_string(word): long_palindromes.append(word) print long_palindromes #----------------------------- |
#----------------------------- mystr.expandtabs() mystr.expandtabs(4) #----------------------------- |
#----------------------------- text = "I am %(rows)s high and %(cols)s long"%{"rows":24, "cols":80) print text #=> I am 24 high and 80 long rows, cols = 24, 80 text = "I am %(rows)s high and %(cols)s long"%locals() print text #=> I am 24 high and 80 long #----------------------------- import re print re.sub("\d+", lambda i: str(2 * int(i.group(0))), "I am 17 years old") #=> I am 34 years old #----------------------------- # expand variables in text, but put an error message in # if the variable isn't defined class SafeDict(dict): def __getitem__(self, key): return self.get(key, "[No Variable: %s]"%key) hi = "Hello" text = "%(hi)s and %(bye)s!"%SafeDict(locals()) print text #=> Hello and [No Variable: bye]! #If you don't need a particular error message, just use the Template class: from string import Template x = Template("$hi and $bye!") hi = "Hello" print x.safe_substitute(locals()) #=> Hello and $bye! print x.substitute(locals()) # will throw a KeyError #----------------------------- |
#----------------------------- mystr = "bo peep".upper() # BO PEEP mystr = mystr.lower() # bo peep mystr = mystr.capitalize() # Bo peep #----------------------------- beast = "python" caprest = beast.capitalize().swapcase() # pYTHON #----------------------------- print "thIS is a loNG liNE".title() #=> This Is A Long Line #----------------------------- if a.upper() == b.upper(): print "a and b are the same" #----------------------------- import random def randcase_one(letter): if random.randint(0,5): # True on 1, 2, 3, 4 return letter.lower() else: return letter.upper() def randcase(myfile): for line in myfile: yield "".join(randcase_one(letter) for letter in line[:-1]) for line in randcase(myfile): print line #----------------------------- |
#----------------------------- "I have %d guanacos." % (n + 1) print "I have", n+1, "guanacos." #----------------------------- #Python templates disallow in-string calculations (see PEP 292) from string import Template email_template = Template("""\ To: $address From: Your Bank CC: $cc_number Date: $date Dear $name, Today you bounced check number $checknum to us. Your account is now closed. Sincerely, the management """) import random import datetime person = {"address":"Joe@somewhere.com", "name": "Joe", "cc_number" : 1234567890, "checknum" : 500+random.randint(0,99)} print email_template.substitute(person, date=datetime.date.today()) #----------------------------- |
#----------------------------- # indenting here documents # # in python multiline strings can be used as here documents var = """ your text goes here """ # using regular expressions import re re_leading_blanks = re.compile("^\s+",re.MULTILINE) var1 = re_leading_blanks.sub("",var)[:-1] # using string methods # split into lines, use every line except first and last, left strip and rejoin. var2 = "\n".join([line.lstrip() for line in var.split("\n")[1:-1]]) poem = """ Here's your poem: Now far ahead the Road has gone, And I must follow, if I can, Pursuing it with eager feet, Until it joins some larger way Where many paths and errand meet. And whither then? I cannot say. --Bilbo in /usr/src/perl/pp_ctl.c """ import textwrap print textwrap.dedent(poem)[1:-1] #----------------------------- |
#----------------------------- from textwrap import wrap output = wrap(para, initial_indent=leadtab subsequent_indent=nexttab) #----------------------------- #!/usr/bin/env python # wrapdemo - show how textwrap works txt = """\ Folding and splicing is the work of an editor, not a mere collection of silicon and mobile electrons! """ from textwrap import TextWrapper wrapper = TextWrapper(width=20, initial_indent=" "*4, subsequent_indent=" "*2) print "0123456789" * 2 print wrapper.fill(txt) #----------------------------- """Expected result: 01234567890123456789 Folding and splicing is the work of an editor, not a mere collection of silicon and mobile electrons! """ #----------------------------- # merge multiple lines into one, then wrap one long line from textwrap import fill import fileinput print fill("".join(fileinput.input())) #----------------------------- # Term::ReadKey::GetTerminalSize() isn't in the Perl standard library. # It isn't in the Python standard library either. Michael Hudson's # recipe from python-list #530228 is shown here. # (http://aspn.activestate.com/ASPN/Mail/Message/python-list/530228) # Be aware that this will work on Unix but not on Windows. from termwrap import wrap import struct, fcntl def getheightwidth(): height, width = struct.unpack( "hhhh", fcntl.ioctl(0, TERMIOS.TIOCGWINSZ ,"\000"*8))[0:2] return height, width # PERL <>, $/, $\ emulation import fileinput import re _, width = getheightwidth() for para in re.split(r"\n{2,}", "".join(fileinput.input())): print fill(para, width) |
#----------------------------- mystr = '''Mom said, "Don't do that."''' #" re.sub("['\"]", lambda i: "\\" + i.group(0), mystr) re.sub("[A-Z]", lambda i: "\\" + i.group(0), mystr) re.sub("\W", lambda i: "\\" + i.group(0), "is a test!") # no function like quotemeta? |
#----------------------------- mystr = mystr.lstrip() # left mystr = mystr.rstrip() # right mystr = mystr.strip() # both ends |
#----------------------------- import csv def parse_csv(line): reader = csv.reader([line], escapechar='\\') return reader.next() line = '''XYZZY,"","O'Reilly, Inc","Wall, Larry","a \\"glug\\" bit,",5,"Error, Core Dumped,",''' #" fields = parse_csv(line) for i, field in enumerate(fields): print "%d : %s" % (i, field) # pre-2.3 version of parse_csv import re def parse_csv(text): pattern = re.compile('''"([^"\\\]*(?:\\\.[^"\\\]*)*)",?|([^,]+),?|,''') mylist = ["".join(elem) for elem in re.findall(pattern, text)] if text[-1] == ",": mylist += [''] return mylist # cvs.reader is meant to work for many lines, something like: # (NB: in Python default, quotechar is *not* escaped by backslash, # but doubled instead. That's what Excel does.) for fields in cvs.reader(lines, dialect="some"): for num, field in enumerate(fields): print num, ":", field #----------------------------- |
#----------------------------- def soundex(name, len=4): """ soundex module conforming to Knuth's algorithm implementation 2000-12-24 by Gregory Jorgensen public domain """ # digits holds the soundex values for the alphabet digits = '01230120022455012623010202' sndx = '' fc = '' # translate alpha chars in name to soundex digits for c in name.upper(): if c.isalpha(): if not fc: fc = c # remember first letter d = digits[ord(c)-ord('A')] # duplicate consecutive soundex digits are skipped if not sndx or (d != sndx[-1]): sndx += d # replace first digit with first alpha character sndx = fc + sndx[1:] # remove all 0s from the soundex code sndx = sndx.replace('0','') # return soundex code padded to len characters return (sndx + (len * '0'))[:len] user = raw_input("Lookup user: ") if user == "": raise SystemExit name_code = soundex(user) for line in open("/etc/passwd"): line = line.split(":") for piece in line[4].split(): if name_code == soundex(piece): print "%s: %s\n" % line[0], line[4]) #----------------------------- |
#----------------------------- import sys, fileinput, re data = """\ analysed => analyzed built-in => builtin chastized => chastised commandline => command-line de-allocate => deallocate dropin => drop-in hardcode => hard-code meta-data => metadata multicharacter => multi-character multiway => multi-way non-empty => nonempty non-profit => nonprofit non-trappable => nontrappable pre-define => predefine preextend => pre-extend re-compiling => recompiling reenter => re-enter turnkey => turn-key """ mydict = {} for line in data.split("\n"): if not line.strip(): continue k, v = [word.strip() for word in line.split("=>")] mydict[k] = v pattern_text = "(" + "|".join([re.escape(word) for word in mydict.keys()]) + ")" pattern = re.compile(pattern_text) args = sys.argv[1:] verbose = 0 if args and args[0] == "-v": verbose = 1 args = args[1:] if not args: sys.stderr.write("%s: Reading from stdin\n" % sys.argv[0]) for line in fileinput.input(args, inplace=1, backup=".orig"): output = "" pos = 0 while True: match = pattern.search(line, pos) if not match: output += line[pos:] break output += line[pos:match.start(0)] + mydict[match.group(1)] pos = match.end(0) sys.stdout.write(output) #----------------------------- |
#----------------------------- #!/usr/bin/python # psgrep - print selected lines of ps output by # compiling user queries into code. # # examples : # psgrep "uid<10" import sys, os, re class PsLineMatch: # each field from the PS header fieldnames = ("flags","uid","pid","ppid","pri","nice","size", \ "rss","wchan","stat","tty","time","command") numeric_fields = ("flags","uid","pid","ppid","pri","nice","size","rss") def __init__(self): self._fields = {} def new_line(self, ln): self._ln = ln.rstrip() # ps header for option "wwaxl" (different than in the perl code) """ F UID PID PPID PRI NI VSZ RSS WCHAN STAT TTY TIME COMMAND" 004 0 1 0 15 0 448 236 schedu S ? 0:07 init" . . . . . . . . . . . . . """ # because only the last entry might contain blanks, splitting # is safe data = self._ln.split(None,12) for fn, elem in zip(self.fieldnames, data): if fn in self.numeric_fields: # make numbers integer self._fields[fn] = int(elem) else: self._fields[fn] = elem def set_query(self, args): # assume args: "uid==500", "command ~ ^wm" conds=[] m = re.compile("(\w+)([=<>]+)(.+)") for a in args: try: (field,op,val) = m.match(a).groups() except: print "can't understand query \"%s\"" % (a) raise SystemExit if field in self.numeric_fields: conds.append(a) else: conds.append("%s%s'%s'",(field,op,val)) self._desirable = compile("(("+")and(".join(conds)+"))", "<string>","eval") def is_desirable(self): return eval(self._desirable, {}, self._fields) def __str__(self): # to allow "print". return self._ln if len(sys.argv)<=1: print """usage: %s criterion ... Each criterion is a Perl expression involving: %s All criteria must be met for a line to be printed.""" \ % (sys.argv[0], " ".join(PsLineMatch().fieldnames)) raise SystemExit psln = PsLineMatch() psln.set_query(sys.argv[1:]) p = os.popen("ps wwaxl") print p.readline()[:-1] # emit header line for ln in p.readlines(): psln.new_line(ln) if psln.is_desirable(): print psln p.close() # alternatively one could consider every argument being a string and # support wildcards: "uid==500" "command~^wm" by means of re, but this # does not show dynamic python code generation, although re.compile # also precompiles. #----------------------------- |