1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.archive.crawler.framework;
25
26 import java.util.logging.Level;
27 import java.util.logging.Logger;
28
29 import javax.management.AttributeNotFoundException;
30
31 import org.archive.crawler.datamodel.CrawlURI;
32 import org.archive.crawler.settings.ComplexType;
33 import org.archive.crawler.settings.MapType;
34 import org.archive.crawler.settings.ModuleType;
35 import org.archive.crawler.settings.SimpleType;
36
37 /***
38 * Base class for filter classes.
39 * <p>
40 * Several classes allow 'filters' to be applied to them. Filters are classes
41 * that, based on an arbitrary object passed to them, return a boolean stating
42 * if if passes the filter. Thus applying filters can affect the behavior of
43 * those classes. This class provides the basic framework for filters. All
44 * detailed implementation of filters inherit from it and it is considered to
45 * be a 'null' filter (always returns true).
46 *
47 * @author Gordon Mohr
48 *
49 * @see org.archive.crawler.framework.Processor
50 */
51 public class Filter extends ModuleType {
52
53 private static final long serialVersionUID = -356718306794776802L;
54
55 private static Logger logger =
56 Logger.getLogger("org.archive.crawler.framework.Filter");
57
58 public static final String ATTR_ENABLED = "enabled";
59
60 /***
61 * Creates a new 'null' filter.
62 * @param name the name of the filter.
63 * @param description an description of the filter suitable for showing in
64 * the user interface.
65 */
66 public Filter(String name, String description) {
67 super(name, description);
68 addElementToDefinition(
69 new SimpleType(ATTR_ENABLED,
70 "Filter is enabled.", new Boolean(true)));
71 }
72
73 /***
74 * Creates a new 'null' filter.
75 * @param name the name of the filter.
76 */
77 public Filter(String name) {
78 this(name, "Null filter - accepts everything.");
79 }
80
81 public boolean accepts(Object o) {
82 CrawlURI curi = (o instanceof CrawlURI) ? (CrawlURI) o : null;
83
84
85 try {
86 if (!((Boolean)getAttribute(ATTR_ENABLED, curi)).booleanValue()) {
87 return getFilterOffPosition(curi);
88 }
89 } catch (AttributeNotFoundException e) {
90 logger.severe(e.getMessage());
91 }
92
93 boolean accept = returnTrueIfMatches(curi) == innerAccepts(o);
94 if (accept && logger.isLoggable(Level.FINEST)) {
95
96 ComplexType p = this.getParent();
97 if (p instanceof MapType) {
98 p = p.getParent();
99 }
100 String msg = this.toString() + " belonging to " + p.toString()
101 + " accepted " + o.toString();
102 logger.finest(msg);
103 }
104 return accept;
105 }
106
107 /***
108 * If the filter is disabled, the value returned by this method is
109 * what filters return as their disabled setting.
110 * Default is that we return 'true', continue processing, but some
111 * filters -- the exclude filters for example -- will want to return
112 * false if disabled so processing can continue.
113 * @param curi CrawlURI to use as context. Passed curi can be null.
114 * @return This filters 'off' position.
115 */
116 protected boolean getFilterOffPosition(CrawlURI curi) {
117 return true;
118 }
119
120 /***
121 * Checks to see if filter functionality should be inverted for this
122 * curi.<p>
123 *
124 * All filters will by default return true if curi is accepted by the
125 * filter. If this method returns false, then the filter will return true
126 * if doesn't match.<p>
127 *
128 * Classes extending this class should override this method with
129 * appropriate code.
130 *
131 * @param curi Current CrawlURI
132 * @return true for default behaviour, false otherwise.
133 */
134 protected boolean returnTrueIfMatches(CrawlURI curi){
135 return true;
136 }
137
138 /***
139 * Classes subclassing this one should override this method to perfrom
140 * their custom determination of whether or not the object given to it.
141 *
142 * @param o The object
143 * @return True if it passes the filter.
144 */
145 protected boolean innerAccepts(Object o) {
146 return true;
147 }
148
149 public String toString() {
150 return "Filter<" + getName() + ">";
151 }
152
153 public void kickUpdate() {
154
155 }
156 }