1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.deciderules.recrawl;
26
27 import org.archive.crawler.datamodel.CoreAttributeConstants;
28 import org.archive.crawler.datamodel.CrawlURI;
29 import org.archive.crawler.deciderules.PredicatedDecideRule;
30 import org.archive.crawler.settings.SimpleType;
31 import org.archive.crawler.settings.Type;
32
33 import st.ata.util.AList;
34
35 /***
36 * Rule applies configured decision to any CrawlURIs whose prior-history
37 * content-digest matches the latest fetch.
38 *
39 * @author gojomo
40 */
41 public class IdenticalDigestDecideRule extends PredicatedDecideRule
42 implements CoreAttributeConstants {
43 private static final long serialVersionUID = 4275993790856626949L;
44
45 /***
46 * Usual constructor.
47 * @param name
48 */
49 public IdenticalDigestDecideRule(String name) {
50 super(name);
51 setDescription("IdenticalDigestDecideRule. Applies configured " +
52 "decision to any CrawlURIs whose prior-history " +
53 "content-digest matches the latest fetch.");
54
55 Type type = addElementToDefinition(new SimpleType(ATTR_DECISION,
56 "Decision to be applied", REJECT, ALLOWED_TYPES));
57 }
58
59 /***
60 * Evaluate whether given CrawlURI's content-digest exactly
61 * matches that of preceding fetch.
62 *
63 * @param object should be CrawlURI
64 * @return true if current-fetch content-digest matches previous
65 */
66 protected boolean evaluate(Object object) {
67 CrawlURI curi = (CrawlURI)object;
68 return hasIdenticalDigest(curi);
69 }
70
71 /***
72 * Utility method for testing if a CrawlURI's last two history
73 * entiries (one being the most recent fetch) have identical
74 * content-digest information.
75 *
76 * @param curi CrawlURI to test
77 * @return true if last two history entries have identical digests,
78 * otherwise false
79 */
80 public static boolean hasIdenticalDigest(CrawlURI curi) {
81 if(curi.getAList().containsKey(A_FETCH_HISTORY)) {
82 AList[] history = curi.getAList().getAListArray(A_FETCH_HISTORY);
83 return history[0] != null
84 && history[0].containsKey(CoreAttributeConstants.A_CONTENT_DIGEST)
85 && history[1] != null
86 && history[1].containsKey(CoreAttributeConstants.A_CONTENT_DIGEST)
87 && history[0].getString(CoreAttributeConstants.A_CONTENT_DIGEST).equals(
88 history[1].getString(CoreAttributeConstants.A_CONTENT_DIGEST));
89 } else {
90 return false;
91 }
92 }
93 }