View Javadoc

1   /* ImageWaitEvaluator
2    * 
3    * $Id: ImageWaitEvaluator.java 4667 2006-09-26 20:38:48Z paul_jack $
4    * 
5    * Created on 1.4.2005
6    *
7    * Copyright (C) 2005 Internet Archive.
8    * 
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   * 
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   * 
16   * Heritrix is distributed in the hope that it will be useful, 
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   * 
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.crawler.postprocessor;
26  
27  /***
28   * A specialized ContentBasedWaitEvaluator. Comes preset with a regular 
29   * expression that matches text documents. <code>^image/.*$</code>
30   *
31   * @author Kristinn Sigurdsson
32   * 
33   * @see org.archive.crawler.postprocessor.ContentBasedWaitEvaluator
34   */
35  public class ImageWaitEvaluator extends ContentBasedWaitEvaluator {
36  
37      private static final long serialVersionUID = -2762377129860398333L;
38  
39      protected final static Long DEFAULT_INITIAL_WAIT_INTERVAL =
40          new Long(172800); // 2 days
41  
42      protected final static String DEFAULT_CONTENT_REGEXPR = "^image/.*$"; //Text
43  
44      /***
45       * Constructor
46       * 
47       * @param name The name of the module
48       */
49      public ImageWaitEvaluator(String name) {
50          super(name,"Evaluates how long to wait before fetching a URI again. " +
51                  "Only handles CrawlURIs whose content type indicates a " +
52                  "image document (^image/.*$). " +
53                  "Typically, this processor should be in the post processing " +
54                  "chain. It will pass if another wait evaluator has already " +
55                  "processed the CrawlURI.", 
56                  DEFAULT_CONTENT_REGEXPR,
57                  DEFAULT_INITIAL_WAIT_INTERVAL,
58                  DEFAULT_MAX_WAIT_INTERVAL,
59                  DEFAULT_MIN_WAIT_INTERVAL,
60                  DEFAULT_UNCHANGED_FACTOR,
61                  DEFAULT_CHANGED_FACTOR);
62      }
63  
64  
65  }