1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.archive.crawler.filter;
25
26 import java.util.logging.Logger;
27
28 import javax.management.AttributeNotFoundException;
29
30 import org.apache.commons.httpclient.URIException;
31 import org.archive.crawler.datamodel.CandidateURI;
32 import org.archive.crawler.datamodel.CrawlURI;
33 import org.archive.crawler.deciderules.DecideRule;
34 import org.archive.crawler.deciderules.DecidingFilter;
35 import org.archive.crawler.framework.Filter;
36 import org.archive.crawler.settings.SimpleType;
37 import org.archive.net.UURI;
38
39 /***
40 * Accepts all urls passed in with a path depth
41 * less or equal than the max-path-depth
42 * value.
43 *
44 * @author Igor Ranitovic
45 * @deprecated As of release 1.10.0. Replaced by {@link DecidingFilter} and
46 * equivalent {@link DecideRule}.
47 */
48 public class PathDepthFilter extends Filter {
49
50 private static final long serialVersionUID = 1626115117327154205L;
51
52 private static final Logger logger =
53 Logger.getLogger(PathDepthFilter.class.getName());
54 public static final String ATTR_MATCH_RETURN_VALUE =
55 "path-less-or-equal-return";
56 public static final String ATTR_MAX_PATH_DEPTH = "max-path-depth";
57 Integer maxPathDepth = new Integer(Integer.MAX_VALUE);
58 final static char slash = '/';
59
60 /***
61 * @param name
62 */
63 public PathDepthFilter(String name) {
64 super(name, "Path depth less or equal filter *Deprecated* Use" +
65 "DecidingFilter and equivalent DecideRule instead.");
66 addElementToDefinition(new SimpleType(ATTR_MAX_PATH_DEPTH, "Max path" +
67 " depth for which this filter will match", maxPathDepth));
68 addElementToDefinition(new SimpleType(ATTR_MATCH_RETURN_VALUE,
69 "What to return when path depth is less or equal to max path" +
70 " depth. \n", new Boolean(true)));
71 }
72
73 protected boolean innerAccepts(Object o) {
74 String path = null;
75 if (o == null) {
76 return false;
77 }
78
79 if (o instanceof CandidateURI) {
80 try {
81 if (((CandidateURI)o).getUURI() != null) {
82 path = ((CandidateURI)o).getUURI().getPath();
83 }
84 }
85 catch (URIException e) {
86 logger.severe("Failed getpath for " +
87 ((CandidateURI)o).getUURI());
88 }
89 } else if (o instanceof UURI) {
90 try {
91 path = ((UURI)o).getPath();
92 }
93 catch (URIException e) {
94 logger.severe("Failed getpath for " + o);
95 }
96 }
97
98 if (path == null) {
99 return true;
100 }
101
102 int count = 0;
103 for (int i = path.indexOf(slash); i != -1;
104 i = path.indexOf(slash, i + 1)) {
105 count++;
106 }
107
108 if (o instanceof CrawlURI) {
109 try {
110 this.maxPathDepth = (Integer) getAttribute(
111 ATTR_MAX_PATH_DEPTH, (CrawlURI) o);
112 } catch (AttributeNotFoundException e) {
113 logger.severe(e.getMessage());
114 }
115 }
116
117 return (this.maxPathDepth != null) ?
118 count <= this.maxPathDepth.intValue():
119 false;
120 }
121
122 protected boolean returnTrueIfMatches(CrawlURI curi) {
123 try {
124 return ((Boolean) getAttribute(ATTR_MATCH_RETURN_VALUE, curi)).
125 booleanValue();
126 } catch (AttributeNotFoundException e) {
127 logger.severe(e.getMessage());
128 return true;
129 }
130 }
131
132 protected boolean getFilterOffPosition(CrawlURI curi) {
133 return returnTrueIfMatches(curi);
134 }
135 }