View Javadoc

1   /* FileUtils
2    *
3    * $Id: FileUtils.java 5863 2008-07-10 21:38:48Z gojomo $
4    *
5    * Created on Feb 2, 2004
6    *
7    * Copyright (C) 2004 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.util;
26  
27  import java.io.BufferedReader;
28  import java.io.File;
29  import java.io.FileFilter;
30  import java.io.FileInputStream;
31  import java.io.FileNotFoundException;
32  import java.io.FileOutputStream;
33  import java.io.FilenameFilter;
34  import java.io.IOException;
35  import java.io.InputStreamReader;
36  import java.nio.channels.FileChannel;
37  import java.util.Arrays;
38  import java.util.HashSet;
39  import java.util.Iterator;
40  import java.util.LinkedList;
41  import java.util.List;
42  import java.util.Set;
43  import java.util.logging.Level;
44  import java.util.logging.Logger;
45  import java.util.regex.Pattern;
46  
47  
48  /*** Utility methods for manipulating files and directories.
49   *
50   * @author John Erik Halse
51   */
52  public class FileUtils {
53      private static final Logger LOGGER =
54          Logger.getLogger(FileUtils.class.getName());
55      
56      public static final File TMPDIR =
57          new File(System.getProperty("java.io.tmpdir", "/tmp"));
58      
59      private static final boolean DEFAULT_OVERWRITE = true;
60      
61      /***
62       * Constructor made private because all methods of this class are static.
63       */
64      private FileUtils() {
65          super();
66      }
67      
68      public static int copyFiles(final File srcDir, Set srcFile,
69              final File dest)
70      throws IOException {
71          int count = 0;
72          for (Iterator i = srcFile.iterator(); i.hasNext();) {
73              String name = (String)i.next();
74              File src = new File(srcDir, name);
75              File tgt = new File(dest, name);
76              if (LOGGER.isLoggable(Level.FINE)) {
77                  LOGGER.fine("Before " + src.getAbsolutePath() + " " +
78                      src.exists() + ", " + tgt.getAbsolutePath() + " " +
79                      tgt.exists());
80              }
81              copyFiles(src, tgt);
82              if (LOGGER.isLoggable(Level.FINE)) {
83                  LOGGER.fine("After " + src.getAbsolutePath() + " " +
84                      src.exists() + ", " + tgt.getAbsolutePath() + " " +
85                      tgt.exists());
86              }
87              count++;
88          }
89          return count;
90      }
91  
92      /*** Recursively copy all files from one directory to another.
93       *
94       * @param src file or directory to copy from.
95       * @param dest file or directory to copy to.
96       * @throws IOException
97       */
98      public static void copyFiles(File src, File dest)
99      throws IOException {
100         copyFiles(src, null, dest, false, true, null);
101     }
102     
103     /***
104      * @param src Directory of files to fetch.
105      * @param filter Filter to apply to filenames.
106      * @return Files in directory sorted.
107      */
108     public static String [] getSortedDirContent(final File src,
109             final FilenameFilter filter) {
110         if (!src.exists()) {
111             if (LOGGER.isLoggable(Level.FINE)) {
112                 LOGGER.fine(src.getAbsolutePath() + " does not exist");
113             }
114             return null;
115         }
116        
117         if (!src.isDirectory()) {
118             if (LOGGER.isLoggable(Level.FINE)) {
119                 LOGGER.fine(src.getAbsolutePath() + " is not directory.");
120             }
121             return null;
122         }
123         // Go through the contents of the directory
124         String [] list = (filter == null)? src.list(): src.list(filter);
125         if (list != null) {
126             Arrays.sort(list);
127         }
128         return list;
129     }
130         
131     /***
132      * Recursively copy all files from one directory to another.
133      * 
134      * @param src File or directory to copy from.
135      * @param filter Filename filter to apply to src. May be null if no
136      * filtering wanted.
137      * @param dest File or directory to copy to.
138      * @param inSortedOrder Copy in order of natural sort.
139      * @param overwrite If target file already exits, and this parameter is
140      * true, overwrite target file (We do this by first deleting the target
141      * file before we begin the copy).
142      * @throws IOException
143      */
144     public static void copyFiles(final File src, final FilenameFilter filter,
145         final File dest, final boolean inSortedOrder, final boolean overwrite)
146     throws IOException {
147         copyFiles(src, filter, dest, inSortedOrder, overwrite, null);
148     }
149 
150     /***
151      * Recursively copy all files from one directory to another.
152      * 
153      * @param src File or directory to copy from.
154      * @param filter Filename filter to apply to src. May be null if no
155      * filtering wanted.
156      * @param dest File or directory to copy to.
157      * @param inSortedOrder Copy in order of natural sort.
158      * @param overwrite If target file already exits, and this parameter is
159      * true, overwrite target file (We do this by first deleting the target
160      * file before we begin the copy).
161      * @param exceptions if non-null, add any individual-file IOExceptions
162      * to this List rather than throwing, and proceed with the deep copy
163      * @return TODO
164      * @throws IOException
165      */
166     public static void copyFiles(final File src, final FilenameFilter filter,
167         final File dest, final boolean inSortedOrder, final boolean overwrite, 
168         List<IOException> exceptions)
169     throws IOException {
170         // TODO: handle failures at any step
171         if (!src.exists()) {
172             if (LOGGER.isLoggable(Level.FINE)) {
173                 LOGGER.fine(src.getAbsolutePath() + " does not exist");
174             }
175             return;
176         }
177 
178         if (src.isDirectory()) {
179             if (LOGGER.isLoggable(Level.FINE)) {
180                 LOGGER.fine(src.getAbsolutePath() + " is a directory.");
181             }
182             // Create destination directory
183             if (!dest.exists()) {
184                 dest.mkdirs();
185             }
186             // Go through the contents of the directory
187             String list[] = (filter == null)? src.list(): src.list(filter);
188             if (inSortedOrder) {
189                 Arrays.sort(list);
190             }
191             for (int i = 0; i < list.length; i++) {
192                 copyFiles(new File(src, list[i]), filter,
193                     new File(dest, list[i]), inSortedOrder, overwrite, exceptions);
194             }
195         } else {
196             try {
197                 copyFile(src, dest, overwrite);
198             } catch (IOException ioe) {
199                 if (exceptions != null) {
200                     exceptions.add(ioe);
201                 } else {
202                     // rethrow
203                     throw ioe;
204                 }
205             }
206         }
207     }
208 
209     /***
210      * Copy the src file to the destination.
211      * 
212      * @param src
213      * @param dest
214      * @return True if the extent was greater than actual bytes copied.
215      * @throws FileNotFoundException
216      * @throws IOException
217      */
218     public static boolean copyFile(final File src, final File dest)
219     throws FileNotFoundException, IOException {
220         return copyFile(src, dest, -1, DEFAULT_OVERWRITE);
221     }
222     
223     /***
224      * Copy the src file to the destination.
225      * 
226      * @param src
227      * @param dest
228      * @param overwrite If target file already exits, and this parameter is
229      * true, overwrite target file (We do this by first deleting the target
230      * file before we begin the copy).
231      * @return True if the extent was greater than actual bytes copied.
232      * @throws FileNotFoundException
233      * @throws IOException
234      */
235     public static boolean copyFile(final File src, final File dest,
236         final boolean overwrite)
237     throws FileNotFoundException, IOException {
238         return copyFile(src, dest, -1, overwrite);
239     }
240     
241     /***
242      * Copy up to extent bytes of the source file to the destination
243      *
244      * @param src
245      * @param dest
246      * @param extent Maximum number of bytes to copy
247      * @return True if the extent was greater than actual bytes copied.
248      * @throws FileNotFoundException
249      * @throws IOException
250      */
251     public static boolean copyFile(final File src, final File dest,
252         long extent)
253     throws FileNotFoundException, IOException {
254         return copyFile(src, dest, extent, DEFAULT_OVERWRITE);
255     }
256 
257 	/***
258      * Copy up to extent bytes of the source file to the destination
259      *
260      * @param src
261      * @param dest
262      * @param extent Maximum number of bytes to copy
263 	 * @param overwrite If target file already exits, and this parameter is
264      * true, overwrite target file (We do this by first deleting the target
265      * file before we begin the copy).
266 	 * @return True if the extent was greater than actual bytes copied.
267      * @throws FileNotFoundException
268      * @throws IOException
269      */
270     public static boolean copyFile(final File src, final File dest,
271         long extent, final boolean overwrite)
272     throws FileNotFoundException, IOException {
273         boolean result = false;
274         if (LOGGER.isLoggable(Level.FINE)) {
275             LOGGER.fine("Copying file " + src + " to " + dest + " extent " +
276                 extent + " exists " + dest.exists());
277         }
278         if (dest.exists()) {
279             if (overwrite) {
280                 dest.delete();
281                 LOGGER.finer(dest.getAbsolutePath() + " removed before copy.");
282             } else {
283                 // Already in place and we're not to overwrite.  Return.
284                 return result;
285             }
286         }
287         FileInputStream fis = null;
288         FileOutputStream fos = null;
289         FileChannel fcin = null;
290         FileChannel fcout = null;
291         try {
292             // Get channels
293             fis = new FileInputStream(src);
294             fos = new FileOutputStream(dest);
295             fcin = fis.getChannel();
296             fcout = fos.getChannel();
297             if (extent < 0) {
298                 extent = fcin.size();
299             }
300 
301             // Do the file copy
302             long trans = fcin.transferTo(0, extent, fcout);
303             if (trans < extent) {
304                 result = false;
305             }
306             result = true; 
307         } catch (IOException e) {
308             // Add more info to the exception. Preserve old stacktrace.
309             // We get 'Invalid argument' on some file copies. See
310             // http://intellij.net/forums/thread.jsp?forum=13&thread=63027&message=853123
311             // for related issue.
312             String message = "Copying " + src.getAbsolutePath() + " to " +
313                 dest.getAbsolutePath() + " with extent " + extent +
314                 " got IOE: " + e.getMessage();
315             if (e.getMessage().equals("Invalid argument")) {
316                 LOGGER.severe("Failed copy, trying workaround: " + message);
317                 workaroundCopyFile(src, dest);
318             } else {
319                 LOGGER.log(Level.SEVERE,message,e);
320                 // rethrow
321                 throw e;
322             }
323         } finally {
324             // finish up
325             if (fcin != null) {
326                 fcin.close();
327             }
328             if (fcout != null) {
329                 fcout.close();
330             }
331             if (fis != null) {
332                 fis.close();
333             }
334             if (fos != null) {
335                 fos.close();
336             }
337         }
338         return result;
339     }
340     
341     protected static void workaroundCopyFile(final File src,
342             final File dest)
343     throws IOException {
344         FileInputStream from = null;
345         FileOutputStream to = null;
346         try {
347             from = new FileInputStream(src);
348             to = new FileOutputStream(dest);
349             byte[] buffer = new byte[4096];
350             int bytesRead;
351             while ((bytesRead = from.read(buffer)) != -1) {
352                 to.write(buffer, 0, bytesRead);
353             }
354         } finally {
355             if (from != null) {
356                 try {
357                     from.close();
358                 } catch (IOException e) {
359                     e.printStackTrace();
360                 }
361             }
362             if (to != null) {
363                 try {
364                     to.close();
365                 } catch (IOException e) {
366                     e.printStackTrace();
367                 }
368             }
369         }
370     }
371 
372 	/*** Deletes all files and subdirectories under dir.
373      * @param dir
374      * @return true if all deletions were successful. If a deletion fails, the
375      *          method stops attempting to delete and returns false.
376      */
377     public static boolean deleteDir(File dir) {
378         if (dir.isDirectory()) {
379             String[] children = dir.list();
380             for (int i=0; i<children.length; i++) {
381                 boolean success = deleteDir(new File(dir, children[i]));
382                 if (!success) {
383                     return false;
384                 }
385             }
386         }
387         // The directory is now empty so delete it
388         return dir.delete();
389     }
390 
391 
392 
393     /***
394      * Utility method to read an entire file as a String.
395      *
396      * @param file
397      * @return File as String.
398      * @throws IOException
399      */
400     public static String readFileAsString(File file) throws IOException {
401         StringBuffer sb = new StringBuffer((int) file.length());
402         String line;
403         BufferedReader br = new BufferedReader(new InputStreamReader(
404         		new FileInputStream(file)));
405         try {
406         	    line = br.readLine();
407         	    while (line != null) {
408         	    	    sb.append(line);
409                         sb.append("\n");
410         	    	    line = br.readLine();
411         	    }
412         } finally {
413         	    br.close();
414         }
415         return sb.toString();
416     }
417 
418     /***
419      * Get a list of all files in directory that have passed prefix.
420      *
421      * @param dir Dir to look in.
422      * @param prefix Basename of files to look for. Compare is case insensitive.
423      *
424      * @return List of files in dir that start w/ passed basename.
425      */
426     public static File [] getFilesWithPrefix(File dir, final String prefix) {
427         FileFilter prefixFilter = new FileFilter() {
428                 public boolean accept(File pathname)
429                 {
430                     return pathname.getName().toLowerCase().
431                         startsWith(prefix.toLowerCase());
432                 }
433             };
434         return dir.listFiles(prefixFilter);
435     }
436 
437     /*** Get a @link java.io.FileFilter that filters files based on a regular
438      * expression.
439      *
440      * @param regexp the regular expression the files must match.
441      * @return the newly created filter.
442      */
443     public static FileFilter getRegexpFileFilter(String regexp) {
444         // Inner class defining the RegexpFileFilter
445         class RegexpFileFilter implements FileFilter {
446             Pattern pattern;
447 
448             protected RegexpFileFilter(String re) {
449                 pattern = Pattern.compile(re);
450             }
451 
452             public boolean accept(File pathname) {
453                 return pattern.matcher(pathname.getName()).matches();
454             }
455         }
456 
457         return new RegexpFileFilter(regexp);
458     }
459     
460     /***
461      * Use for case where files are being added to src.  Will break off copy
462      * when tgt is same as src.
463      * @param src Source directory to copy from.
464      * @param tgt Target to copy to.
465      * @param filter Filter to apply to files to copy.
466      * @throws IOException
467      */
468     public static void syncDirectories(final File src,
469             final FilenameFilter filter, final File tgt)
470     throws IOException {
471         Set<String> srcFilenames = null;
472         do {
473             srcFilenames = new HashSet<String>(Arrays.asList(src.list(filter)));
474             List<String> tgtFilenames = Arrays.asList(tgt.list(filter));
475             srcFilenames.removeAll(tgtFilenames);
476             if (srcFilenames.size() > 0) {
477                 int count = FileUtils.copyFiles(src, srcFilenames, tgt);
478                 if (LOGGER.isLoggable(Level.FINE)) {
479                     LOGGER.fine("Copied " + count);
480                 }
481             }
482         } while (srcFilenames != null && srcFilenames.size() > 0);
483     }
484     
485     /***
486      * Test file exists and is readable.
487      * @param f File to test.
488      * @exception IOException If file does not exist or is not unreadable.
489      */
490     public static File isReadable(final File f) throws IOException {
491         if (!f.exists()) {
492             throw new FileNotFoundException(f.getAbsolutePath() +
493                 " does not exist.");
494         }
495 
496         if (!f.canRead()) {
497             throw new FileNotFoundException(f.getAbsolutePath() +
498                 " is not readable.");
499         }
500         
501         return f;
502     }
503     
504     /***
505      * @param f File to test.
506      * @return True if file is readable, has uncompressed extension,
507      * and magic string at file start.
508      * @exception IOException If file does not exist or is not readable.
509      */
510     public static boolean isReadableWithExtensionAndMagic(final File f, 
511             final String uncompressedExtension, final String magic)
512     throws IOException {
513         boolean result = false;
514         FileUtils.isReadable(f);
515         if(f.getName().toLowerCase().endsWith(uncompressedExtension)) {
516             FileInputStream fis = new FileInputStream(f);
517             try {
518                 byte [] b = new byte[magic.length()];
519                 int read = fis.read(b, 0, magic.length());
520                 fis.close();
521                 if (read == magic.length()) {
522                     StringBuffer beginStr
523                         = new StringBuffer(magic.length());
524                     for (int i = 0; i < magic.length(); i++) {
525                         beginStr.append((char)b[i]);
526                     }
527                     
528                     if (beginStr.toString().
529                             equalsIgnoreCase(magic)) {
530                         result = true;
531                     }
532                 }
533             } finally {
534                 fis.close();
535             }
536         }
537 
538         return result;
539     }
540     
541     /***
542      * Turn path into a File, relative to context (which may be ignored 
543      * if path is absolute). 
544      * 
545      * @param context File context if path is relative
546      * @param path String path to make into a File
547      * @return File created
548      */
549     public static File maybeRelative(File context, String path) {
550         File f = new File(path);
551         if(f.isAbsolute()) {
552             return f;
553         }
554         return new File(context, path);
555     }
556 
557     /***
558      * Delete the file now -- but in the event of failure, keep trying
559      * in the future. 
560      * 
561      * VERY IMPORTANT: Do not use with any file whose name/path may be 
562      * reused, because the lagged delete could then wind up deleting the
563      * newer file. Essentially, only to be used with uniquely-named temp
564      * files. 
565      * 
566      * Necessary because some platforms (looking at you, 
567      * JVM-on-Windows) will have deletes fail because of things like 
568      * file-mapped buffers remaining, and there's no explicit way to 
569      * unmap a buffer. (See 6-year-old Sun-stumping Java bug
570      * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4724038 )
571      * We just have to wait and retry. 
572      * 
573      * (Why not just File.deleteOnExit? There could be an arbitrary, 
574      * unbounded number of files in such a situation, that are only 
575      * deletable a few seconds or minutes after our first attempt.
576      * Waiting for JVM exist could mean disk exhaustion. It's also
577      * unclear if the native FS class implementations of deleteOnExit
578      * use RAM per pending file.)
579      * 
580      * @param fileToDelete
581      */
582     public static synchronized void deleteSoonerOrLater(File fileToDelete) {
583         pendingDeletes.add(fileToDelete);
584         // if things are getting out of hand, force gc/finalization
585         if(pendingDeletes.size()>50) {
586             LOGGER.warning(">50 pending Files to delete; forcing gc/finalization");
587             System.gc();
588             System.runFinalization();
589         }
590         // try all pendingDeletes
591         Iterator<File> iter = pendingDeletes.listIterator();
592         while(iter.hasNext()) {
593             File pending = iter.next(); 
594             if(pending.delete()) {
595                 iter.remove();
596             }
597         }
598         // if things are still out of hand, complain loudly
599         if(pendingDeletes.size()>50) {
600             LOGGER.severe(">50 pending Files to delete even after gc/finalization");
601         }
602     }
603     static LinkedList<File> pendingDeletes = new LinkedList<File>(); 
604 }