View Javadoc

1   /* AcceptDecideRule
2   *
3   * $Id: PrerequisiteAcceptDecideRule.java 4649 2006-09-25 17:16:55Z paul_jack $
4   *
5   * Created on Mar 3, 2005
6   *
7   * Copyright (C) 2005 Internet Archive.
8   *
9   * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24  */
25  package org.archive.crawler.deciderules;
26  
27  import org.archive.crawler.datamodel.CandidateURI;
28  import org.archive.crawler.extractor.Link;
29  
30  
31  /***
32   * Rule which ACCEPTs all 'prerequisite' URIs (those with a 'P' in
33   * the last hopsPath position). Good in a late position to ensure
34   * other scope settings don't lock out necessary prerequisites.
35   *
36   * @author gojomo
37   */
38  public class PrerequisiteAcceptDecideRule extends AcceptDecideRule {
39  
40      private static final long serialVersionUID = 2762042167111186142L;
41  
42      public PrerequisiteAcceptDecideRule(String name) {
43          super(name);
44          setDescription("PrerequisiteAcceptDecideRule. ACCEPTs " +
45                  "all CrawlURIs discovered via a prerequisite " +
46                  "'link'.");
47      }
48  
49      public Object decisionFor(Object object) {        
50          try {
51              String hopsPath = ((CandidateURI)object).getPathFromSeed();
52              if (hopsPath != null && hopsPath.length() > 0 &&
53                      hopsPath.charAt(hopsPath.length()-1) == Link.PREREQ_HOP) {
54                  return ACCEPT;
55              }
56          } catch (ClassCastException e) {
57             // Do nothing
58          }
59          return PASS;
60      }
61  }