1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.filter;
26
27 import java.io.File;
28 import java.io.FileReader;
29 import java.io.IOException;
30
31 import org.archive.crawler.deciderules.DecideRule;
32 import org.archive.crawler.deciderules.DecidingFilter;
33 import org.archive.crawler.framework.Filter;
34 import org.archive.crawler.settings.SimpleType;
35 import org.archive.util.SURT;
36 import org.archive.util.SurtPrefixSet;
37 /***
38 * A filter which tests a URI against a set of SURT
39 * prefixes, and if the URI's prefix is in the set,
40 * returns the chosen true/false accepts value.
41 *
42 * @author gojomo
43 * @deprecated As of release 1.10.0. Replaced by {@link DecidingFilter} and
44 * equivalent {@link DecideRule}.
45 */
46 public class SurtPrefixFilter extends Filter {
47
48 private static final long serialVersionUID = -6933592892325852022L;
49
50 public static final String ATTR_SURTS_SOURCE_FILE = "surts-source-file";
51 public static final String ATTR_MATCH_RETURN_VALUE = "if-match-return";
52
53 SurtPrefixSet surtPrefixes = null;
54
55 /***
56 * @param name
57 */
58 public SurtPrefixFilter(String name) {
59 super(name, "SURT prefix filter *Deprecated* Use" +
60 "DecidingFilter and equivalent DecideRule instead.");
61 addElementToDefinition(
62 new SimpleType(ATTR_MATCH_RETURN_VALUE, "What to return when " +
63 "a prefix matches.\n", new Boolean(true)));
64 addElementToDefinition(
65 new SimpleType(ATTR_SURTS_SOURCE_FILE,
66 "Source file from which to infer SURT prefixes. Any URLs " +
67 "in file will be converted to the implied SURT prefix, and " +
68 "literal SURT prefixes may be listed on lines beginning " +
69 "with a '+' character.",
70 ""));
71 }
72
73
74
75
76 protected synchronized boolean innerAccepts(Object o) {
77 if (surtPrefixes == null) {
78 readPrefixes();
79 }
80 String s = SURT.fromURI(o.toString());
81
82 if(s.startsWith("https:")) {
83 s = "http:"+s.substring(6);
84 }
85
86 return surtPrefixes.containsPrefixOf(s);
87 }
88
89 private void readPrefixes() {
90 surtPrefixes = new SurtPrefixSet();
91 String sourcePath = (String) getUncheckedAttribute(null,
92 ATTR_SURTS_SOURCE_FILE);
93 File source = new File(sourcePath);
94 if (!source.isAbsolute()) {
95 source = new File(getSettingsHandler().getOrder()
96 .getController().getDisk(), sourcePath);
97 }
98 FileReader fr = null;
99 try {
100 fr = new FileReader(source);
101 try {
102 surtPrefixes.importFromMixed(fr,true);
103 } finally {
104 fr.close();
105 }
106 } catch (IOException e) {
107 e.printStackTrace();
108 throw new RuntimeException(e);
109 }
110 }
111
112 /***
113 * Re-read prefixes after a settings update.
114 *
115 */
116 public synchronized void kickUpdate() {
117 super.kickUpdate();
118
119
120 readPrefixes();
121 }
122 }