1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.archive.crawler.filter;
25
26 import java.util.logging.Level;
27 import java.util.logging.Logger;
28
29 import javax.management.AttributeNotFoundException;
30
31 import org.archive.crawler.datamodel.CrawlURI;
32 import org.archive.crawler.deciderules.DecideRule;
33 import org.archive.crawler.deciderules.DecidingFilter;
34 import org.archive.crawler.framework.Filter;
35 import org.archive.crawler.settings.SimpleType;
36 import org.archive.util.TextUtils;
37
38
39 /***
40 * Compares passed object -- a CrawlURI, UURI, or String --
41 * against a regular expression, accepting matches.
42 *
43 * @author Gordon Mohr
44 * @deprecated As of release 1.10.0. Replaced by {@link DecidingFilter} and
45 * equivalent {@link DecideRule}.
46 */
47 public class URIRegExpFilter
48 extends Filter {
49
50 private static final long serialVersionUID = 1878356276332865537L;
51
52 private static final Logger logger =
53 Logger.getLogger(URIRegExpFilter.class.getName());
54 public static final String ATTR_REGEXP = "regexp";
55 public static final String ATTR_MATCH_RETURN_VALUE = "if-match-return";
56
57 /***
58 * @param name Filter name.
59 */
60 public URIRegExpFilter(String name) {
61 this(name, "URI regexp filter *Deprecated* Use DecidingFilter and " +
62 "equivalent DecideRule instead. ", "");
63 addElementToDefinition(
64 new SimpleType(ATTR_MATCH_RETURN_VALUE, "What to return when" +
65 " regular expression matches. \n", new Boolean(true)));
66 addElementToDefinition(
67 new SimpleType(ATTR_REGEXP, "Java regular expression.", ""));
68 }
69
70 public URIRegExpFilter(String name, String regexp) {
71 this(name, "URI regexp filter.", regexp);
72 }
73
74 protected URIRegExpFilter(String name, String description, String regexp) {
75 super(name, description);
76 addElementToDefinition(new SimpleType(ATTR_MATCH_RETURN_VALUE,
77 "What to return when" + " regular expression matches. \n",
78 new Boolean(true)));
79 addElementToDefinition(new SimpleType(ATTR_REGEXP,
80 "Java regular expression.", regexp));
81 }
82
83 protected boolean innerAccepts(Object o) {
84 String regexp = getRegexp(o);
85 String str = o.toString();
86 boolean result = (regexp == null)?
87 false: TextUtils.matches(regexp, str);
88 if (logger.isLoggable(Level.FINE)) {
89 logger.fine("Tested '" + str + "' match with regex '" +
90 getRegexp(o) + " and result was " + result);
91 }
92 return result;
93 }
94
95 /***
96 * Get the regular expression string to match the URI against.
97 *
98 * @param o the object for which the regular expression should be
99 * matched against.
100 * @return the regular expression to match against.
101 */
102 protected String getRegexp(Object o) {
103 try {
104 return (String) getAttribute(o, ATTR_REGEXP);
105 } catch (AttributeNotFoundException e) {
106 logger.severe(e.getMessage());
107
108
109 return null;
110 }
111 }
112
113 protected boolean returnTrueIfMatches(CrawlURI curi) {
114 try {
115 return ((Boolean)getAttribute(ATTR_MATCH_RETURN_VALUE, curi)).
116 booleanValue();
117 } catch (AttributeNotFoundException e) {
118 logger.severe(e.getMessage());
119 return true;
120 }
121 }
122 }