9. Directories

Introduction

//----------------------------------------------------------------------------------
// Groovy builds on Java's file and io classes which provide an operating
// system independent abstraction of a file system. The actual File class
// is the main class of interest. It represents a potential file or
// directory - which may or may not (yet) exist. In versions of Java up to
// and including Java 6, the File class was missing some of the functionality
// required to implement some of the examples in the Chapter (workarounds
// and alternatives are noted below). In Java 7, (also known as "Dolphin")
// new File abstraction facilities are being worked on but haven't yet been
// publically released. These new features are known as JSR 203 and are
// referred to when relevant to some of the examples. Thanks to Alan Bateman
// from Sun for clarification regarding various aspects of JSR 203. Apologies
// if I misunderstood any aspects relayed to me and also usual disclaimers
// apply regarding features which may change or be dropped before release.

// path='/usr/bin'; file='vi' // linux/mac os?
path='C:/windows'; file='explorer.exe' // windows
entry = new File("$path")
assert entry.isDirectory()
entry = new File("$path/$file")
assert entry.isFile()

println File.separator
// => \ (on Windows)
// => / (on Unix)
// however if you just stick to backslashes Java converts for you
// in most situations

// File modification time (no exact equivalent of ctime - but you can
// call stat() using JNI or use exec() of dir or ls to get this kind of info)
// JSR 203 also plans to provide such info in Java 7.
println new Date(entry.lastModified())
// => Wed Aug 04 07:00:00 EST 2004

// file size
println entry.size()
// => 1032192

// check if we have permission to read the file
assert entry.canRead()

// check if file is binary or text?
// There is no functionality for this at the file level.
// Java has the Java Activation Framework (jaf) which is used to
// associate files (and streams) with MIME Types and subsequently
// binary data streams or character encodings for (potentially
// multilanguage) text files. JSR-203 provides a method to determine
// the MIME type of a file. Depending on the platform the file type may
// be determined based on a file attribute, file name "extension", the
// bytes of the files (byte sniffing) or other means. It is service
// provider based so developers can plug in their own file type detection
// mechanisms as required. "Out of the box" it will ship with file type
// detectors that are appropriate for the platform (integrates with GNOME,
// Windows registry, etc.).

// Groovy uses File for directories and files
// displayAllFilesInUsrBin:
new File('/usr/bin').eachFile{ file ->
  println "Inside /usr/bin is something called $file.name"
}
//----------------------------------------------------------------------------------

Getting and Setting Timestamps

//----------------------------------------------------------------------------------
file = new File("filename")
file << 'hi'
timeModified = file.lastModified()
println new Date(timeModified)
// => Sun Jan 07 11:49:02 EST 2007

MILLIS_PER_WEEK = 60 * 60 * 24 * 1000 * 7
file.setLastModified(timeModified - MILLIS_PER_WEEK)
println new Date(file.lastModified())
// => Sun Dec 31 11:49:02 EST 2006

// Java currently doesn't provide access to other timestamps but
// there are things that can be done:
// (1) You can use JNI to call to C, e.g. stat()
// (2) Use exec() and call another program, e.g. dir, ls, ... to get the value you are after
// (3) Here is a Windows specific patch to get lastAccessedTime and creationTime
//     http://forum.java.sun.com/thread.jspa?forumID=31&start=0&threadID=409921&range=100#1800193
// (4) There is an informal patch for Java 5/6 which gives lastAccessedTime on Windows and Linux
//     and creationTime on windows:
//     http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6314708
// (5) JSR 203 (currently targetted for Java 7) aims to provide
//     "bulk access to file attributes, change notification, escape to filesystem-specific APIs"
//     this is supposed to include creationTime and lastAccessedTime along with many
//     security-related file attributes

// viFileWithoutChangingModificationTimeScript:
#!/usr/bin/groovy
// uvi - vi a file without changing it's last modified time
if (args.size() != 1)
  println "usage: uvi filename"
  System.exit(1)
}
file = args[0]
origTime = new File(file).lastModified()
"vi $file".execute()
new File(file).setLastModified(origTime)
//----------------------------------------------------------------------------------

Deleting a File

//----------------------------------------------------------------------------------
println new File('/doesnotexist').exists()  // => false
println new File('/doesnotexist').delete()  // => false

new File('/createme') << 'Hi there'
println new File('/createme').exists()  // => true
println new File('/createme').delete()  // => true

names = ['file1','file2','file3']
files = names.collect{ new File(it) }
// create 2 of the files
files[0..1].each{ f -> f << f.name }

def deleteFiles(files) {
    def problemFileNames = []
    files.each{ f ->
        if (!f.delete())
            problemFileNames += f.name
    }
    def delCnt = files.size() - problemFileNames.size()
    println "Successfully deleted $delCnt of ${files.size()} file(s)"
    if (problemFileNames)
        println "Problems file(s): " + problemFileNames.join(', ')
}

deleteFiles(files)
// =>
// Successfully deleted 2 of 3 file(s)
// Problems file(s): file3

// we can also set files for deletion on exit
tempFile = new File('/xxx')
assert !tempFile.exists()
tempFile << 'junk'
assert tempFile.exists()
tempFile.deleteOnExit()
assert tempFile.exists()
// To confirm this is working, run these steps multiple times in a row.

// Discussion:
// Be careful with deleteOnExit() as there is no way to cancel it.
// There are also mechanisms specifically for creating unqiuely named temp files.
// On completion of JSR 203, there will be additional methods available for
// deleting which throw exceptions with detailed error messages rather than
// just return booleans.
//----------------------------------------------------------------------------------

Copying or Moving a File

//----------------------------------------------------------------------------------
// (1) Copy examples

//shared setup
dummyContent = 'some content' + System.getProperty('line.separator')
setUpFromFile()
setUpToFile()

// built-in copy via memory (text files only)
to << from.text
checkSuccessfulCopyAndDelete()

// built-in as a stream (text or binary) with optional encoding
to << from.asWritable('US-ASCII')
checkSuccessfulCopyAndDelete()

// built-in using AntBuilder
// for options, see: http://ant.apache.org/manual/CoreTasks/copy.html
new AntBuilder().copy( file: from.canonicalPath, tofile: to.canonicalPath )
checkSuccessfulCopyAndDelete()
// =>
//     [copy] Copying 1 file to D:\


// use Apache Jakarta Commons IO (jakarta.apache.org)
import org.apache.commons.io.FileUtils
// Copies a file to a new location preserving the lastModified date.
FileUtils.copyFile(from, to)
checkSuccessfulCopyAndDelete()

// using execute()
// "cp $from.canonicalPath $to.canonicalPath".execute()      // unix
println "cmd /c \"copy $from.canonicalPath $to.canonicalPath\"".execute().text    // dos vms
checkSuccessfulCopyAndDelete()
// =>
//        1 file(s) copied.

// (2) Move examples
// You can just do copy followed by delete but many OS's can just 'rename' in place
// so you can additionally do using Java's functionality:
assert from.renameTo(to)
assert !from.exists()
checkSuccessfulCopyAndDelete()
// whether renameTo succeeds if from and to are on different platforms
// or if to pre-exists is OS dependent, so you should check the return boolean

// alternatively, Ant has a move task:
// http://ant.apache.org/manual/CoreTasks/move.html

//helper methods
def checkSuccessfulCopyAndDelete() {
    assert to.text == dummyContent
    assert to.delete()
    assert !to.exists()
}
def setUpFromFile() {
    from = new File('/from.txt') // just a name
    from << dummyContent         // now its a real file with content
    from.deleteOnExit()          // that will be deleted on exit
}
def setUpToFile() {
    to = new File('C:/to.txt')     // target name
    to.delete() // ensure not left from previous aborted run
    assert !to.exists()          // double check
}
//----------------------------------------------------------------------------------

Recognizing Two Names for the Same File

//----------------------------------------------------------------------------------
// Groovy (because of its Java heritage) doesn't have an exact
// equivalent of stat - as per 9.2 there are numerous mechanisms
// to achieve the equivalent, in particular, JSR203 (still in draft)
// has specific SymLink support including a FileId class in the
// java.nio.filesystems package. This will allow (depending on the
// operating system capabilities) files to be uniquely identified.
// If you work on Unix or Linux then you'll recognize this as it device/inode.

// If you are not interested in the above workarounds/future features
// and you are on a unix system, you can compare the absolutePath and
// canonicalPath attributes for a file. If they are different it is
// a symbolic link. On other operating systems, this difference is not
// to be relied upon and even on *nix systems, this will only get you
// so far and will also be relatively expensive resource and timewise.

// process only unique files
seen = []
def myProcessing(file) {
    def path = file.canonicalPath
    if (!seen.contains(path)) {
        seen << path
        // do something with file because we haven't seen it before
    }
}

// find linked files
seen = [:]
filenames = ['/dummyfile1.txt','/test.lnk','/dummyfile2.txt']
filenames.each{ filename ->
    def file = new File(filename)
    def cpath = file.canonicalPath
    if (!seen.containsKey(cpath)) {
        seen[cpath] = []
    }
    seen[cpath] += file.absolutePath
}

println 'Files with links:'
println seen.findAll{ k,v -> v.size() > 1 }
//---------------------------------------------------------------------------------

Processing All Files in a Directory

//----------------------------------------------------------------------------------
// general pattern is:
// new File('dirname').eachFile{ /* do something ... */ }

// setup (change this on your system)
basedir = 'Pleac/src'

// process all files printing out full name (. and .. auto excluded)
new File(basedir).eachFile{ f->
    if (f.isFile()) println f.canonicalPath
}
// also remove dot files such as '.svn' and '.cvs' etc.
new File(basedir).eachFileMatch(~'^[^.].*'){ f->
    if (f.isFile()) println f.canonicalPath
}
//----------------------------------------------------------------------------------

Globbing, or Getting a List of Filenames Matching a Pattern

//----------------------------------------------------------------------------------
// Globbing via Apache Jakarta ORO
import org.apache.oro.io.GlobFilenameFilter
dir = new File(basedir)
namelist = dir.list(new GlobFilenameFilter('*.c'))
filelist = dir.listFiles(new GlobFilenameFilter('*.h') as FilenameFilter)

// Built-in matching using regex's
files = []
new File(basedir).eachFileMatch(~/\.[ch]$/){ f->
    if (f.isFile()) files += f
}

// Using Ant's FileScanner (supports arbitrary nested levels using **)
// For more details about Ant FileSets, see here:
// http://ant.apache.org/manual/CoreTypes/fileset.html
scanner = new AntBuilder().fileScanner {
    fileset(dir:basedir) {
        include(name:'**/pleac*.groovy')
        include(name:'Slowcat.*y')
        exclude(name:'**/pleac??.groovy') // chaps 10 and above
        exclude(name:'**/*Test*', unless:'testMode')
    }
}
for (f in scanner) {
    println "Found file $f"
}

// find and sort directories with numeric names
candidateFiles = new File(basedir).listFiles()
allDigits = { it.name =~ /^\d+$/ }
isDir = { it.isDirectory() }
dirs = candidateFiles.findAll(isDir).findAll(allDigits)*.canonicalPath.sort()
println dirs
//----------------------------------------------------------------------------------

Processing All Files in a Directory Recursively

//----------------------------------------------------------------------------------
// find all files recursively
dir = new File(basedir)
files = []
dir.eachFileRecurse{ files += it }

// find total size
sum = files.sum{ it.size() }
println "$basedir contains $sum bytes"
// => Pleac/src contains 365676 bytes

// find biggest
biggest = files.max{ it.size() }
println "Biggest file is $biggest.name with ${biggest.size()} bytes"
// => Biggest file is pleac6.groovy with 42415 bytes

// find most recently modified
youngest = files.max{ it.lastModified() }
println "Most recently modified is $youngest.name, changed ${new Date(youngest.lastModified())}"
// => Most recently modified is pleac9.groovy, changed Tue Jan 09 07:35:39 EST 2007

// find all directories
dir.eachDir{ println 'Found: ' + it.name}

// find all directories recursively
dir.eachFileRecurse{ f -> if (f.isDirectory()) println 'Found: ' + f.canonicalPath}
//----------------------------------------------------------------------------------

Removing a Directory and Its Contents

//----------------------------------------------------------------------------------
base = new File('path_to_somewhere_to_delete')

// delete using Jakarta Apache Commons IO
FileUtils.deleteDirectory(base)

// delete using Ant, for various options see:
// http://ant.apache.org/manual/CoreTasks/delete.html
ant = new AntBuilder()
ant.delete(dir: base)
//----------------------------------------------------------------------------------

Renaming Files

//----------------------------------------------------------------------------------
names = ['Pleac/src/abc.java', 'Pleac/src/def.groovy']
names.each{ name -> new File(name).renameTo(new File(name + '.bak')) }

// The Groovy way of doing rename using an expr would be to use a closure
// for the expr:
// groovySimpleRenameScript:
#!/usr/bin/groovy
// usage rename closure_expr filenames
op = args[0]
println op
files = args[1..-1]
shell = new GroovyShell(binding)
files.each{ f ->
    newname = shell.evaluate("$op('$f')")
    new File(f).renameTo(new File(newname))
}

// this would allow processing such as:
//% rename "{n -> 'FILE_' + n.toUpperCase()}" files
// with param pleac9.groovy => FILE_PLEAC9.GROOVY
//% rename "{n -> n.replaceAll(/9/,'nine') }" files
// with param pleac9.groovy => pleacnine.groovy
// The script could also be modified to take the list of
// files from stdin if no args were present (not shown).

// The above lets you type any Groovy code, but instead you might
// decide to provide the user with some DSL-like additions, e.g.
// adding the following lines into the script:
sep = File.separator
ext = { '.' + it.tokenize('.')[-1] }
base = { new File(it).name - ext(it) }
parent = { new File(it).parent }
lastModified = { new Date(new File(it).lastModified()) }
// would then allow the following more succinct expressions:
//% rename "{ n -> parent(n) + sep + base(n).reverse() + ext(n) }" files
// with param Pleac/src/pleac9.groovy => Pleac\src\9caelp.groovy
//% rename "{ n -> base(n) + '_' + lastModified(n).year + ext(n) }" files
// with param pleac9.groovy => pleac9_07.groovy

// As a different alternative, you could hook into Ant's mapper mechanism.
// You wouldn't normally type in this from the command-line but it could
// be part of a script, here is an example (excludes the actual rename part)
ant = new AntBuilder()
ant.pathconvert(property:'result',targetos:'windows'){
    path(){ fileset(dir:'Pleac/src', includes:'pleac?.groovy') }
    compositemapper{
        globmapper(from:'*1.groovy', to:'*1.groovy.bak')
        regexpmapper(from:/^(.*C2)\.(.*)$/, to:/\1_beta.\2/, casesensitive:'no')
        chainedmapper{
            packagemapper(from:'*pleac3.groovy', to:'*3.xml')
            filtermapper(){ replacestring(from:'C:.', to:'') }
        }
        chainedmapper{
            regexpmapper(from:/^(.*)4\.(.*)$/, to:/\1_4.\2/)
            flattenmapper()
            filtermapper(){ replacestring(from:'4', to:'four') }
        }
    }
}
println ant.antProject.getProperty('result').replaceAll(';','\n')
// =>
// C:\Projects\GroovyExamples\Pleac\src\pleac1.groovy.bak
// C:\Projects\GroovyExamples\Pleac\src\pleac2_beta.groovy
// Projects.GroovyExamples.Pleac.src.3.xml
// pleac_four.groovy
//----------------------------------------------------------------------------------

Splitting a Filename into Its Component Parts

//----------------------------------------------------------------------------------
// Splitting a Filename into Its Component Parts
path = new File('Pleac/src/pleac9.groovy')
assert path.parent == 'Pleac' + File.separator + 'src'
assert path.name == 'pleac9.groovy'
ext = path.name.tokenize('.')[-1]
assert ext == 'groovy'

// No fileparse_set_fstype() equivalent in Groovy/Java. Java's File constructor
// automatically performs such a parse and does so appropriately of the operating
// system it is running on. In addition, 3rd party libraries allow platform
// specific operations ot be performed. As an example, many Ant tasks are OS
// aware, e.g. the pathconvert task (callable from an AntBuilder instance) has
// a 'targetos' parameter which can be one of 'unix', 'windows', 'netware',
// 'tandem' or 'os/2'.
//----------------------------------------------------------------------------------

Program: symirror

//----------------------------------------------------------------------------------
// Given the previous discussion regarding the lack of support for symlinks
// in Java's File class without exec'ing to the operating system or doing
// a JNI call (at least until JSR 203 arrives), I have modified this example
// to perform an actual replica forest of actual file copies rather than
// a shadow forest full of symlinks pointing back at the real files.
// Use Apache Jakarta Commons IO
srcdir = new File('Pleac/src') // path to src
destdir = new File('C:/temp') // path to dest
preserveFileStamps = true
FileUtils.copyDirectory(srcdir, destdir, preserveFileStamps)
//----------------------------------------------------------------------------------

Program: lst

//----------------------------------------------------------------------------------
#!/usr/bin/groovy
// lst - list sorted directory contents (depth first)
// Given the previous discussion around Java's more limited Date
// information available via the File class, this will be a reduced
// functionality version of ls
LONG_OPTION = 'l'
REVERSE_OPTION = 'r'
MODIFY_OPTION = 'm'
SIZE_OPTION = 's'
HELP_OPTION = 'help'

op = new joptsimple.OptionParser()
op.accepts( LONG_OPTION, 'long listing' )
op.accepts( REVERSE_OPTION, 'reverse listing' )
op.accepts( MODIFY_OPTION, 'sort based on modification time' )
op.accepts( SIZE_OPTION, 'sort based on size' )
op.accepts( HELP_OPTION, 'display this message' )

options = op.parse(args)
if (options.wasDetected( HELP_OPTION )) {
    op.printHelpOn( System.out )
} else {
    sort = {}
    params = options.nonOptionArguments()
    longFormat = options.wasDetected( LONG_OPTION )
    reversed = options.wasDetected( REVERSE_OPTION )
    if (options.wasDetected( SIZE_OPTION )) {
        sort = {a,b -> a.size()<=>b.size()}
    } else if (options.wasDetected( MODIFY_OPTION )) {
        sort = {a,b -> a.lastModified()<=>b.lastModified()}
    }
    displayFiles(params, longFormat, reversed, sort)
}

def displayFiles(params, longFormat, reversed, sort) {
    files = []
    params.each{ name -> new File(name).eachFileRecurse{ files += it } }
    files.sort(sort)
    if (reversed) files = files.reverse()
    files.each { file ->
        if (longFormat) {
            print (file.directory ? 'd' : '-' )
            print (file.canRead() ? 'r' : '-' )
            print (file.canWrite() ? 'w ' : '- ' )
            //print (file.canExecute() ? 'x' : '-' ) // Java 6
            print file.size().toString().padLeft(12) + ' '
            print new Date(file.lastModified()).toString().padRight(22)
            println '  ' + file
        } else {
            println file
        }
    }
}

// =>
// % lst -help
// Option Description
// ------ -------------------------------
// --help display this message
// -l     long listing
// -m     sort based on modification time
// -r     reverse listing
// -s     sort based on size
//
// % lst -l -m Pleac/src Pleac/lib
// ...
// drw            0 Mon Jan 08 22:33:00 EST 2007  Pleac\lib\.svn
// -rw        18988 Mon Jan 08 22:33:41 EST 2007  Pleac\src\pleac9.groovy
// -rw         2159 Mon Jan 08 23:15:41 EST 2007  Pleac\src\lst.groovy
//
// % -l -s -r Pleac/src Pleac/lib
// -rw      1034049 Sun Jan 07 19:24:41 EST 2007  Pleac\lib\ant.jar
// -r-      1034049 Sun Jan 07 19:40:27 EST 2007  Pleac\lib\.svn\text-base\ant.jar.svn-base
// -rw       421008 Thu Jun 02 15:15:34 EST 2005  Pleac\lib\ant-nodeps.jar
// -rw       294436 Sat Jan 06 21:19:58 EST 2007  Pleac\lib\geronimo-javamail_1.3.1_mail-1.0.jar
// ...
//----------------------------------------------------------------------------------