1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.archive.crawler.framework;
24
25 import java.util.logging.Handler;
26 import java.util.logging.Level;
27 import java.util.logging.Logger;
28
29 import javax.management.AttributeNotFoundException;
30
31 import org.archive.crawler.datamodel.CandidateURI;
32 import org.archive.crawler.settings.SimpleType;
33 import org.archive.crawler.settings.Type;
34 import org.archive.crawler.util.LogUtils;
35
36 /***
37 * Base class for Scopers.
38 * Scopers test CandidateURIs against a scope.
39 * Scopers allow logging of rejected CandidateURIs.
40 * @author stack
41 * @version $Date: 2010-05-11 22:15:04 +0000 (Tue, 11 May 2010) $, $Revision: 6867 $
42 */
43 public abstract class Scoper extends Processor {
44 private static Logger LOGGER =
45 Logger.getLogger(Scoper.class.getName());
46
47 /***
48 * Protected so avaiilable to subclasses.
49 */
50 protected static final String ATTR_OVERRIDE_LOGGER_ENABLED =
51 "override-logger";
52
53 /***
54 * Constructor.
55 * @param name
56 * @param description
57 */
58 public Scoper(String name, String description) {
59 super(name, description);
60 Type t = addElementToDefinition(
61 new SimpleType(ATTR_OVERRIDE_LOGGER_ENABLED,
62 "If enabled, override default logger for this class (Default " +
63 "logger writes the console). Override " +
64 "logger will instead send all logging to a file named for this " +
65 "class in the job log directory. Set the logging level and " +
66 "other " +
67 "characteristics of the override logger such as rotation size, " +
68 "suffix pattern, etc. in heritrix.properties. This attribute " +
69 "is only checked once, on startup of a job.",
70 new Boolean(false)));
71 t.setExpertSetting(true);
72 }
73
74 protected void initialTasks() {
75 super.initialTasks();
76 if (!isOverrideLogger(null)) {
77 return;
78 }
79
80
81 LogUtils.createFileLogger(getController().getLogsDir(),
82 this.getClass().getName(),
83 Logger.getLogger(this.getClass().getName()));
84 }
85
86 @Override
87 protected void finalTasks() {
88 super.finalTasks();
89 if (isOverrideLogger(null)) {
90 Logger logger = Logger.getLogger(this.getClass().getName());
91 logger.setUseParentHandlers(true);
92 for (Handler handler: logger.getHandlers()) {
93
94
95 logger.removeHandler(handler);
96 }
97 }
98 }
99
100 /***
101 * @param context Context to use looking up attribute.
102 * @return True if we are to override default logger (default logs
103 * to console) with a logger that writes all loggings to a file
104 * named for this class.
105 */
106 protected boolean isOverrideLogger(Object context) {
107 boolean result = true;
108 try {
109 Boolean b = (Boolean)getAttribute(context,
110 ATTR_OVERRIDE_LOGGER_ENABLED);
111 if (b != null) {
112 result = b.booleanValue();
113 }
114 } catch (AttributeNotFoundException e) {
115 LOGGER.warning("Failed get of 'enabled' attribute.");
116 }
117
118 return result;
119 }
120
121 /***
122 * Schedule the given {@link CandidateURI CandidateURI} with the Frontier.
123 * @param caUri The CandidateURI to be scheduled.
124 * @return true if CandidateURI was accepted by crawl scope, false
125 * otherwise.
126 */
127 protected boolean isInScope(CandidateURI caUri) {
128 boolean result = false;
129 if (getController().getScope().accepts(caUri)) {
130 result = true;
131 if (LOGGER.isLoggable(Level.FINER)) {
132 LOGGER.finer("Accepted: " + caUri);
133 }
134 } else {
135 outOfScope(caUri);
136 }
137 return result;
138 }
139
140 /***
141 * Called when a CandidateUri is ruled out of scope.
142 * Override if you don't want logs as coming from this class.
143 * @param caUri CandidateURI that is out of scope.
144 */
145 protected void outOfScope(CandidateURI caUri) {
146 if (!LOGGER.isLoggable(Level.INFO)) {
147 return;
148 }
149 LOGGER.info(caUri.getUURI().toString());
150 }
151 }