//---------------------------------------------------------------------------------- datafile = new File('Pleac/data/pleac8_0.txt') // change on your system datafile.eachLine{ line -> print line.size() } lines = datafile.readLines() wholeTextFile = datafile.text // on command line Groovy use -a auto split pattern instead of record separator // default pattern is /\s/ // groovy -a -e 'println "First word is ${split[0][1]}"' // (additional examples to original cookbook to illustrate -a) // Print processes owned by root: // ps aux|groovy -ane "if(split[0][1] =~ 'root')println split[0][10..-1]" // Print all logins from /etc/passwd that are not commented: // groovy -a':' -ne "if(!(split[0][1] =~ /^#/))println split[0][1]" /etc/passwd // Add the first and the penultimate column of a file: // groovy -ape "split[0][1].toInteger()+split[0][-2].toInteger()" accounts.txt // no BEGIN and END in Groovy (has been proposed, may be added soon) datafile.withOutputStream{ stream -> stream.print "one" + "two" + "three" // "onetwothree" -> file println "Baa baa black sheep." // sent to $stdout } // use streams or channels for advanced file handling int size = datafile.size() buffer = ByteBuffer.allocate(size) // for large files, use some block size, e.g. 4096 channel = new FileInputStream(datafile).channel println "Number of bytes read was: ${channel.read(buffer)}" // -1 = EOF channel = new FileOutputStream(File.createTempFile("pleac8", ".junk")).channel size = channel.size() channel.truncate(size) // shrinks file (in our case to same size) pos = channel.position() println "I'm $pos bytes from the start of datafile" channel.position(pos) // move to pos (in our case unchanged) channel.position(0) // move to start of file channel.position(size) // move to end of file // no sysread and syswrite are available but dataInput/output streams // can be used to achieve similar functionality, see 8.15. //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- testfile = new File('Pleac/data/pleac8_1.txt') // change on your system // contents of testfile: // DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \ // $(TEXINFOS) $(INFOS) $(MANS) $(DATA) // DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \ // $(TEXINFOS) $(INFO_DEPS) $(MANS) $(DATA) \ // $(EXTRA_DIST) lines = [] continuing = false regex = /\\$/ testfile.eachLine{ line -> stripped = line.replaceAll(regex,'') if (continuing) lines[-1] += stripped else lines += stripped continuing = (line =~ regex) } println lines.join('\n') // => // DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(INFOS) $(MANS) $(DATA) // DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(INFO_DEPS) $(MANS) $(DATA) $(EXTRA_DIST) // to remove hidden spaces after the slash (but keep the slash): def trimtail(line) { line = line.replaceAll(/(?<=\\)\s*$/, '') } b = /\\/ // backslash assert "abc $b" == trimtail("abc $b") assert "abc " == trimtail("abc ") assert "abc $b" == trimtail("abc $b ") //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- // unixScript: println ("wc -l < $filename".execute().text) // for small files which fit in memory println testfile.readLines().size() // streaming approach (lines and paras) lines = 0; paras = 1 testfile.eachLine{ lines++; if (it =~ /^$/) paras++ } println "Found $lines lines and $paras paras." // note: counts blank line at end as start of next empty para // with a StreamTokenizer st = new StreamTokenizer(testfile.newReader()) while (st.nextToken() != StreamTokenizer.TT_EOF) {} println st.lineno() //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- // general pattern def processWordsInFile(file, processWord) { testfile.splitEachLine(/\W+/) { matched -> matched.each{ w -> if (w) processWord(w) } } } testfile = new File('Pleac/src/pleac8.groovy') // change path on your system // count words count = 0 processWordsInFile(testfile){ count++ } println count // (variation to Perl example) // with a StreamTokenizer (counting words and numbers in Pleac chapter 8 source file) words = 0; numbers = 0 st = new StreamTokenizer(testfile.newReader()) st.slashSlashComments(true) // ignore words and numbers in comments while (st.nextToken() != StreamTokenizer.TT_EOF) { if (st.ttype == StreamTokenizer.TT_WORD) words++ else if (st.ttype == StreamTokenizer.TT_NUMBER) numbers++ } println "Found $words words and $numbers numbers." // word frequency count seen = [:] processWordsInFile(testfile) { w = it.toLowerCase() if (seen.containsKey(w)) seen[w] += 1 else seen[w] = 1 } // output map in a descending numeric sort of its values seen.entrySet().sort { a,b -> b.value <=> a.value }.each{ e -> printf("%5d %s\n", [e.value, e.key] ) } // => // 25 pleac // 22 line // 20 file // 19 println // 19 lines // 13 testfile // ... //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- testfile.readLines().reverseEach{ println it } lines = testfile.readLines() // normally one would use the reverseEach, but you can use // a numerical index if you want ((lines.size() - 1)..0).each{ println lines[it] } // Paragraph-based processing could be done as in 8.2. // A streaming-based solution could use random file access // and have a sliding buffer working from the back of the // file to the front. //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- logfile = new File('Pleac/data/sampleLog.txt') // logTailingScript: sampleInterval = 2000 // 2000 millis = 2 secs file = new RandomAccessFile( logfile, "r" ) filePointer = 0 // set to logfile.size() to begin tailing from the end of the file while( true ) { // Compare the length of the file to the file pointer long fileLength = logfile.size() if( fileLength < filePointer ) { // Log file must have been rotated or deleted; System.err.println "${new Date()}: Reopening $logfile" file = new RandomAccessFile( logfile, "r" ) filePointer = 0 } if( fileLength > filePointer ) { // There is data to read file.seek( filePointer ) while( (line = file.readLine()) != null ) { println '##' + line } filePointer = file.filePointer } // Sleep for the specified interval Thread.sleep( sampleInterval ) } //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- //testfile = newFile('/usr/share/fortune/humorists') // small files: random = new Random() lines = testfile.readLines() println lines[random.nextInt(lines.size())] // streamed alternative count = 0 def adage testfile.eachLine{ line -> count++ if (random.nextInt(count) < 1) adage = line } println adage //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- // non-streamed solution (like Perl and Ruby) lines = testfile.readLines() Collections.shuffle(lines) println lines.join('\n') //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- desiredLine = 235 // for small files lines = testfile.readLines() println "Line $desiredLine: ${lines[desiredLine-1]}" // streaming solution reader = testfile.newReader() count = 0 def line while ((line = reader.readLine())!= null) { if (++count == desiredLine) break } println "Line $desiredLine: $line" //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- println testfile.text.split(/@@pleac@@_8./i).size() // => 23 (21 sections .0 .. .20 plus before .0 plus line above) //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- file = new RandomAccessFile( logfile, "rw" ) long previous, lastpos = 0 while( (line = file.readLine()) != null ) { previous = lastpos lastpos = file.filePointer } if (previous) file.setLength(previous) //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- // Java's streams are binary at the lowest level if not processed with // higher level stream mechanisms or readers/writers. Some additions // to the Perl cookbook which illustrate the basics. // Print first ten bytes of a binary file: def dumpStart(filename) { bytes = new File(filename).newInputStream() 10.times{ print bytes.read() + ' ' } println() } dumpStart(System.getProperty('java.home')+'/lib/rt.jar') // => 80 75 3 4 10 0 0 0 0 0 (note first two bytes = PK - you might recognize this // as the starting sequence of a zip file) dumpStart('Pleac/classes/pleac8.class') // after running groovyc compiler in src directory // => 202 254 186 190 0 0 0 47 2 20 (starting bytes in HEX: CAFEBABE) binfile = new File('Pleac/data/temp.bin') binfile.withOutputStream{ stream -> (0..<20).each{ stream.write(it) }} binfile.eachByte{ print it + ' ' }; println() // => 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- // lets treat binfile as having 5 records of size 4, let's print out the 3rd record recsize = 4 recno = 2 // index starts at 0 address = recsize * recno randomaccess = new RandomAccessFile(binfile, 'r') randomaccess.seek(address) recsize.times{ print randomaccess.read() + ' ' }; println() // => 8 9 10 11 randomaccess.close() //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- // let's take the example from 8.12 but replace the 3rd record with // 90 - the original value in the file // this is an alternative example to the Perl cookbook which is cross platform // see chapter 1 regarding un/pack which could be combined with below // to achieve the full functionality of the original 8.13 recsize = 4 recno = 2 // index starts at 0 address = recsize * recno randomaccess = new RandomAccessFile(binfile, 'rw') randomaccess.seek(address) bytes = [] recsize.times{ bytes += randomaccess.read() } randomaccess.seek(address) bytes.each{ b -> randomaccess.write(90 - b) } randomaccess.close() binfile.eachByte{ print it + ' ' }; println() // => 0 1 2 3 4 5 6 7 82 81 80 79 12 13 14 15 16 17 18 19 //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- // reading a String would involve looping and collecting the read bytes // simple bgets // this is similar to the revised 8.13 but would look for the terminating 0 // simplistic strings functionality binfile.eachByte{ b -> if ((int)b in 32..126) print ((char)b) }; println() // => RQPO //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- // You could combine the byte-level reading/writing mechanisms shown // in 8.11 - 8.12 and combine that with the un/pack functionality from // Chapter 1 to achieve the desired functionality. A more Java and Groovy // friendly way to do this would be to use the Scattering and Gathering // stream operations of channels for byte-oriented record fields or // data-oriented records. Alternatively, the dataInput/output stream // capabilities for data-oriented records. Finally, the // objectInput/output stream capabilities could be used for object types. // Note, these examples mix reading and writing even though the original // Perl example was just about reading. // fixed-length byte-oriented records using channels // typical approach used with low-level protocols or file formats import java.nio.* binfile.delete(); binfile.createNewFile() // start from scratch buf1 = ByteBuffer.wrap([10,11,12,13] as byte[]) // simulate 4 byte field buf2 = ByteBuffer.wrap([44,45] as byte[]) // 2 byte field buf3 = ByteBuffer.wrap('Hello'.bytes) // String records = [buf1, buf2, buf3] as ByteBuffer[] channel = new FileOutputStream(binfile).channel channel.write(records) // gathering byte records channel.close() binfile.eachByte{ print it + ' ' }; println() // => 10 11 12 13 44 45 72 101 108 108 111 // ScatteringInputStream would convert this back into an array of byte[] // data-oriented streams using channels binfile.delete(); binfile.createNewFile() // start from scratch buf = ByteBuffer.allocate(24) now = System.currentTimeMillis() buf.put('PI='.bytes).putDouble(Math.PI).put('Date='.bytes).putLong(now) buf.flip() // readies for writing: set length and point back to start channel = new FileOutputStream(binfile).channel channel.write(buf) channel.close() // now read it back in channel = new FileInputStream(binfile).channel buf = ByteBuffer.allocate(24) channel.read(buf) buf.flip() 3.times{ print ((char)buf.get()) } println (buf.getDouble()) 5.times{ print ((char)buf.get()) } println (new Date(buf.getLong())) channel.close() // => // PI=3.141592653589793 // Date=Sat Jan 13 00:14:50 EST 2007 // object-oriented streams binfile.delete(); binfile.createNewFile() // start from scratch class Person implements Serializable { def name, age } binfile.withObjectOutputStream{ oos -> oos.writeObject(new Person(name:'Bernie',age:16)) oos.writeObject([1:'a', 2:'b']) oos.writeObject(new Date()) } // now read it back in binfile.withObjectInputStream{ ois -> person = ois.readObject() println "$person.name is $person.age" println ois.readObject() println ois.readObject() } // => // Bernie is 16 // [1:"a", 2:"b"] // Sat Jan 13 00:22:13 EST 2007 //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- // use built-in Java property class // suppose you have the following file: // # set your database settings here // server=localhost // url=jdbc:derby:derbyDB;create=true // user.name=me // user.password=secret props = new Properties() propsfile=new File('Pleac/data/plain.properties') props.load(propsfile.newInputStream()) props.list(System.out) // => // -- listing properties -- // user.name=me // user.password=secret // url=jdbc:derby:derbyDB;create=true // server=localhost // There are also provisions for writing properties file. // (additional example to Perl) // You can also read and write xml properties files. new File('Pleac/data/props.xml').withOutputStream{ os -> props.storeToXML(os, "Database Settings") } // => // <?xml version="1.0" encoding="UTF-8"?> // <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd"> // <properties> // <comment>Database Settings</comment> // <entry key="user.password">secret</entry> // <entry key="user.name">me</entry> // <entry key="url">jdbc:derby:derbyDB;create=true</entry> // <entry key="server">localhost</entry> // </properties> //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- // The File class provides canRead(), canWrite() and canExecute() (JDK6) methods // for finding out about security information specific to the user. JSR 203 // (expected in Java 7) provides access to additional security related attributes. // Another useful package to use when wondering about the trustworthiness of a // file is the java.security package. It contains many classes. Just one is // MessageDigest. This would allow you to create a strong checksum of a file. // Your program could refuse to operate if a file it was accessing didn't have the // checksum it was expecting - an indication that it may have been tampered with. // (additional info) // While getting file-based security permissions correct is important, it isn't the // only mechanism to use for security when using Java based systems. Java provides // policy files and an authorization and authentication API which lets you secure // any reources (not just files) at various levels of granularity with various // security mechanisms. // Security policies may be universal, apply to a particular codebase, or // using JAAS apply to individuals. Some indicative policy statements: // grant { // permission java.net.SocketPermission "*", "connect"; // permission java.io.FilePermission "C:\\users\\cathy\\foo.bat", "read"; // }; // grant codebase "file:./*", Principal ExamplePrincipal "Secret" { // permission java.io.FilePermission "dummy.txt", "read"; // }; //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- // general purpose utility methods def getString(buf,size){ // consider get(buf[]) instead of get(buf) for efficiency b=[]; size.times{b+=buf.get()}; new String(b as byte[]).trim() } def getInt(buf,size) { // normally in Java we would just use methods like getLong() // to read a long but wish to ignore platform issues here long val = 0 for (n in 0..<size) { val += ((int)buf.get() & 0xFF) << (n * 8) } return val } def getDate(buf) { return new Date(getInt(buf,4) * 1000) // Java uses millis } // specific utility method (wtmp file from ubuntu 6.10) def processWtmpRecords(file, origpos) { channel = new RandomAccessFile(file, 'r').channel recsize = 4 + 4 + 32 + 4 + 32 + 256 + 8 + 4 + 40 channel.position(origpos) newpos = origpos buf = ByteBuffer.allocate(recsize) while ((count = channel.read(buf)) != -1) { if (count != recsize) break buf.flip() print getInt(buf,4) + ' ' // type print getInt(buf,4) + ' ' // pid print getString(buf,32) + ' ' // line print getString(buf,4) + ' ' // inittab print getString(buf,32) + ' ' // user print getString(buf,256) + ' ' // hostname buf.position(buf.position() + 8) // skip println "${getDate(buf)} " // time buf.clear() newpos = channel.position() } return newpos } wtmp = new File('Pleac/data/wtmp') // wtmpTailingScript: sampleInterval = 2000 // 2000 millis = 2 secs filePointer = wtmp.size() // begin tailing from the end of the file while(true) { // Compare the length of the file to the file pointer long fileLength = wtmp.size() if( fileLength > filePointer ) { // There is data to read filePointer = processWtmpRecords(wtmp, filePointer) } // Sleep for the specified interval Thread.sleep( sampleInterval ) } //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- // contains most of the functionality of the original (not guaranteed to be perfect) // -i ignores errors, e.g. if one target is write protected, the others will work // -u writes files in unbuffered mode (ignore for '|') // -n not to stdout // -a all files are in append mode // '>>file1' turn on append for individual file // '|wc' or '|grep x' etc sends output to forked process (only one at any time) class MultiStream { private targets private ignoreErrors MultiStream(List targets, ignore) { this.targets = targets ignoreErrors = ignore } def println(String content) { targets.each{ try { it?.write(content.bytes) } catch (Exception ex) { if (!ignoreErrors) throw ex targets -= it it?.close() } } } def close() { targets.each{ it?.close() } } } class TeeTarget { private filename private stream private p TeeTarget(String name, append, buffered, ignore) { if (name.startsWith('>>')) { createFileStream(name[2..-1],true,buffered,ignore) } else if (name.startsWith('|')) { createProcessReader(name[1..-1]) } else { createFileStream(name,append,buffered,ignore) } } TeeTarget(OutputStream stream) { this.stream = stream } def write(bytes) { stream?.write(bytes) } def close() { stream?.close() } private createFileStream(name, append, buffered, ignore) { filename = name def fos try { fos = new FileOutputStream(name, append) } catch (Exception ex) { if (ignore) return } if (!buffered) stream = fos else stream = new BufferedOutputStream(fos) } private createWriter(os) {new PrintWriter(new BufferedOutputStream(os))} private createReader(is) {new BufferedReader(new InputStreamReader(is))} private createPiperThread(br, pw) { Thread.start{ def next while((next = br.readLine())!=null) { pw.println(next) } pw.flush(); pw.close() } } private createProcessReader(name) { def readFromStream = new PipedInputStream() def r1 = createReader(readFromStream) stream = new BufferedOutputStream(new PipedOutputStream(readFromStream)) p = Runtime.runtime.exec(name) def w1 = createWriter(p.outputStream) createPiperThread(r1, w1) def w2 = createWriter(System.out) def r2 = createReader(p.inputStream) createPiperThread(r2, w2) } } targets = [] append = false; ignore = false; includeStdout = true; buffer = true (0..<args.size()).each{ arg = args[it] if (arg.startsWith('-')) { switch (arg) { case '-a': append = true; break case '-i': ignore = true; break case '-n': includeStdout = false; break case '-u': buffer = false; break default: println "usage: tee [-ainu] [filenames] ..." System.exit(1) } } else targets += arg } targets = targets.collect{ new TeeTarget(it, append, buffer, ignore) } if (includeStdout) targets += new TeeTarget(System.out) def tee = new MultiStream(targets, ignore) while (line = System.in.readLine()) { tee.println(line) } tee.close() //---------------------------------------------------------------------------------- |
//---------------------------------------------------------------------------------- // most of the functionality - uses an explicit uid - ran on ubuntu 6.10 on intel lastlog = new File('Pleac/data/lastlog') channel = new RandomAccessFile(lastlog, 'r').channel uid = 1000 recsize = 4 + 32 + 256 channel.position(uid * recsize) buf = ByteBuffer.allocate(recsize) channel.read(buf) buf.flip() date = getDate(buf) line = getString(buf,32) host = getString(buf,256) println "User with uid $uid last logged on $date from ${host?host:'unknown'} on $line" // => User with uid 1000 last logged on Sat Jan 13 09:09:35 EST 2007 from unknown on :0 //---------------------------------------------------------------------------------- |