1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.deciderules;
26
27 import org.archive.crawler.datamodel.CandidateURI;
28 import org.archive.crawler.extractor.Link;
29
30
31 /***
32 * Rule which ACCEPTs all 'prerequisite' URIs (those with a 'P' in
33 * the last hopsPath position). Good in a late position to ensure
34 * other scope settings don't lock out necessary prerequisites.
35 *
36 * @author gojomo
37 */
38 public class PrerequisiteAcceptDecideRule extends AcceptDecideRule {
39
40 private static final long serialVersionUID = 2762042167111186142L;
41
42 public PrerequisiteAcceptDecideRule(String name) {
43 super(name);
44 setDescription("PrerequisiteAcceptDecideRule. ACCEPTs " +
45 "all CrawlURIs discovered via a prerequisite " +
46 "'link'.");
47 }
48
49 public Object decisionFor(Object object) {
50 try {
51 String hopsPath = ((CandidateURI)object).getPathFromSeed();
52 if (hopsPath != null && hopsPath.length() > 0 &&
53 hopsPath.charAt(hopsPath.length()-1) == Link.PREREQ_HOP) {
54 return ACCEPT;
55 }
56 } catch (ClassCastException e) {
57
58 }
59 return PASS;
60 }
61 }