1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.settings;
26
27 import java.util.ArrayList;
28 import java.util.Date;
29 import java.util.HashMap;
30 import java.util.Iterator;
31 import java.util.List;
32 import java.util.ListIterator;
33 import java.util.Map;
34
35 import org.archive.crawler.settings.refinements.Refinement;
36 import org.archive.net.UURI;
37
38 /***
39 * Class representing a settings file.
40 *
41 * More precisely it represents a collection of settings valid in a particular
42 * scope. The scope is either the global settings, or the settings to be used
43 * for a particular domain or host. For scopes other than global, the instance
44 * will only contain those settings that are different from the global.
45 *
46 * In the default implementation this is a one to one mapping from a file to
47 * an instance of this class, but in other implementations the information in
48 * an instance of this class might be stored in a different way (for example
49 * in a RDBMS).
50 *
51 * @author John Erik Halse
52 */
53 public class CrawlerSettings {
54 /*** Registry of DataContainers for ComplexTypes in this settings object
55 * indexed on absolute name */
56 private final Map<String,DataContainer> localComplexTypes
57 = new HashMap<String,DataContainer>();
58
59 /*** Registry of top level ModuleTypes in this settings object indexed on
60 * module name. These are modules that doesn't have parents in this
61 * settings object
62 */
63 private final Map<String,ModuleType> topLevelModules
64 = new HashMap<String,ModuleType>();
65
66 /*** Registry of all ModuleTypes in this settings object indexed on
67 * module name.
68 */
69 private final Map<String,ComplexType> localModules
70 = new HashMap<String,ComplexType>();
71
72 /*** Reference to the settings handler this settings object belongs to */
73 private final SettingsHandler settingsHandler;
74
75 /*** Scope for this collection of settings (hostname) */
76 private final String scope;
77
78 /*** List of refinements applied to this settings object */
79 private List<Refinement> refinements;
80
81 /*** True if this settings object is a refinement */
82 private boolean isRefinement = false;
83
84 /*** Name of this collection of settings */
85 private String name = "";
86
87 /*** Description of this collection of settings */
88 private String description = "";
89
90 /***
91 * Operator of this crawl job.
92 */
93 private String operator = "Admin";
94
95 /***
96 * Organization running this crawl job.
97 */
98 private String organization = "";
99
100 /***
101 * Audience/recipient/customer on whose behalf this crawl is being run.
102 */
103 private String audience = "";
104
105
106 /*** Time when this collection was last saved to persistent storage */
107 private Date lastSaved = null;
108
109 /***
110 * Constructs a new CrawlerSettings object.
111 *
112 * Application code should not call the constructor directly, but use the
113 * methods in SettingsHandler instead.
114 *
115 * @param handler The SettingsHandler this object belongs to.
116 * @param scope The scope of this settings object (ie. host or domain).
117 *
118 * @see SettingsHandler#getSettings(String)
119 * @see SettingsHandler#getSettingsObject(String)
120 */
121 public CrawlerSettings(SettingsHandler handler, String scope) {
122 this.settingsHandler = handler;
123 this.scope = scope;
124 }
125
126 /***
127 * Constructs a new CrawlerSettings object which is a refinement of another
128 * settings object.
129 *
130 * Application code should not call the constructor directly, but use the
131 * methods in SettingsHandler instead.
132 *
133 * @param handler The SettingsHandler this object belongs to.
134 * @param scope The scope of this settings object (ie. host or domain).
135 * @param refinement the name or reference to the refinement.
136 *
137 * @see SettingsHandler#getSettings(String)
138 * @see SettingsHandler#getSettingsObject(String)
139 */
140 public CrawlerSettings(SettingsHandler handler, String scope,
141 String refinement) {
142 this(handler, scope);
143 if (refinement != null && !refinement.equals("")) {
144 this.isRefinement = true;
145 this.name = refinement;
146 }
147 }
148
149 /*** Get the description of this CrawlerSettings object.
150 *
151 * @return the description of this CrawlerSettings object.
152 */
153 public String getDescription() {
154 return description;
155 }
156
157 /*** Get the name of this CrawlerSettings object.
158 *
159 * @return the name of this CrawlerSettings object.
160 */
161 public String getName() {
162 return name;
163 }
164
165 /***
166 * Get the name of operator of this crawl from this CrawlerSettings object.
167 *
168 * @return the name of this CrawlerSettings object.
169 */
170 public String getOperator() {
171 return operator;
172 }
173
174 /***
175 * Get the name of the organization running this crawl from this
176 * CrawlerSettings object.
177 *
178 * @return the name of the organization running this crawl.
179 */
180 public String getOrganization() {
181 return organization;
182 }
183
184 /***
185 * Get the audience/customer/recipient of the crawl job product from
186 * this CrawlerSettings object.
187 *
188 * @return the audience/customer/recipient of the crawl job product.
189 */
190 public String getAudience() {
191 return audience;
192 }
193
194 /*** Get the scope of this CrawlerSettings object.
195 *
196 * @return the scope of this CrawlerSettings object.
197 */
198 public String getScope() {
199 return scope;
200 }
201
202 /*** Set the description of this CrawlerSettings object.
203 *
204 * @param string the description to be set for this CrawlerSettings object.
205 */
206 public void setDescription(String string) {
207 description = string;
208 }
209
210 /***
211 * Set the operator of this crawl job.
212 * @param name Operator running this crawl.
213 */
214 public void setOperator(String name) {
215 this.operator = name;
216 }
217
218 /***
219 * Set the name of the organization who is running this crawl.
220 * @param name Name of organization running this crawl.
221 */
222 public void setOrganization(String name) {
223 this.organization = name;
224 }
225
226 /***
227 * Set the recipient/customer for the crawl job product.
228 * @param name Recipient of crawl job product.
229 */
230 public void setAudience(String name) {
231 this.audience = name;
232 }
233
234 /*** Set the name of this CrawlerSettings object.
235 *
236 * @param string the name to be set for this CrawlerSettings object.
237 */
238 public void setName(String string) {
239 name = string;
240 }
241
242 /***
243 * Get the time when this CrawlerSettings was last saved to persistent
244 * storage.
245 *
246 * @return the time when this CrawlerSettings was last saved to persistent
247 * storage. Null if it has not been saved.
248 */
249 public Date getLastSavedTime() {
250 return lastSaved;
251 }
252
253 /***
254 * Set the time when this CrawlerSettings was last saved to persistent
255 * storage.
256 *
257 * @param lastSaved the time when this CrawlerSettings was last saved to
258 * persistent storage.
259 */
260 protected void setLastSavedTime(Date lastSaved) {
261 this.lastSaved = lastSaved;
262 }
263
264 protected void addTopLevelModule(ModuleType module) {
265
266
267
268
269 topLevelModules.put(module.getName(), module);
270
271 }
272
273 protected DataContainer addComplexType(ComplexType type) {
274 DataContainer data = new DataContainer(this, type);
275 localComplexTypes.put(type.getAbsoluteName(), data);
276 if (type instanceof ModuleType) {
277 localModules.put(type.getName(), type);
278 }
279 return data;
280 }
281
282 protected DataContainer getData(ComplexType complex) {
283 return getData(complex.getAbsoluteName());
284 }
285
286 protected DataContainer getData(String absoluteName) {
287 return (DataContainer) localComplexTypes.get(absoluteName);
288 }
289
290 protected ModuleType getTopLevelModule(String name) {
291 return (ModuleType) topLevelModules.get(name);
292 }
293
294 public ModuleType getModule(String name) {
295 return (ModuleType) localModules.get(name);
296 }
297
298 protected Iterator topLevelModules() {
299 return topLevelModules.values().iterator();
300 }
301
302 /*** Get the parent of this CrawlerSettings object.
303 *
304 * @return the parent of this CrawlerSettings object.
305 */
306 public CrawlerSettings getParent() {
307 return getParent(null);
308 }
309
310 /***
311 * Get the parent of this CrawlerSettings object.
312 * This method passes around a URI so that refinements could be checked.
313 *
314 * @param uri The uri for which parents of this object shoul be found.
315 * @return the parent of this CrawlerSettings object.
316 */
317 public CrawlerSettings getParent(UURI uri) {
318 return (isRefinement())?
319 settingsHandler.getSettingsForHost(scope):
320 (scope == null || scope.equals(""))?
321 null:
322 settingsHandler.
323 getSettings(settingsHandler.getParentScope(scope), uri);
324 }
325
326 /*** Get the SettingHandler this CrawlerSettings object belongs to.
327 *
328 * @return the SettingHandler this CrawlerSettings object belongs to.
329 */
330 public SettingsHandler getSettingsHandler() {
331 return settingsHandler;
332 }
333
334 /***
335 * Get an <code>ListIterator</code> over the refinements for this
336 * settings object.
337 *
338 * @return Returns an iterator over the refinements.
339 */
340 public ListIterator refinementsIterator() {
341 if (refinements == null) {
342 refinements = new ArrayList<Refinement>();
343 }
344 return refinements.listIterator();
345 }
346
347 /***
348 * Add a refinement to this settings object.
349 *
350 * @param refinement The refinements to set.
351 */
352 public void addRefinement(Refinement refinement) {
353 if (refinements == null) {
354 refinements = new ArrayList<Refinement>();
355 }
356 this.refinements.remove(refinement);
357 this.refinements.add(refinement);
358 }
359
360 /***
361 * Remove a refinement from this settings object.
362 *
363 * @param reference the reference (name) to the refinement to be removed.
364 * @return true if something was removed, false if the refinement was not
365 * found.
366 */
367 public boolean removeRefinement(String reference) {
368 if (hasRefinements()) {
369 for(Iterator it = refinements.iterator(); it.hasNext();) {
370 if (((Refinement) it.next()).getReference().equals(reference)) {
371 it.remove();
372 return true;
373 }
374 }
375 }
376 return false;
377 }
378
379 /***
380 * Get a refinement with a given reference.
381 *
382 * @param reference the reference (name) to the refinement to get.
383 * @return the refinement having the specified reference or null if no
384 * refinement matches it.
385 */
386 public Refinement getRefinement(String reference) {
387 if (hasRefinements()) {
388 for(Iterator it = refinements.iterator(); it.hasNext();) {
389 Refinement tmp = (Refinement) it.next();
390 if (tmp.getReference().equals(reference)) {
391 return tmp;
392 }
393 }
394 }
395 return null;
396 }
397
398 /***
399 * Returns true if this settings object has refinements attached to it.
400 *
401 * @return true if this settings object has refinements attached to it.
402 */
403 public boolean hasRefinements() {
404 return refinements != null && !refinements.isEmpty();
405 }
406
407 /***
408 * Returns true if this settings object is a refinement.
409 *
410 * @return true if this settings object is a refinement.
411 */
412 public boolean isRefinement() {
413 return isRefinement;
414 }
415
416 /***
417 * Mark this settings object as an refinement.
418 *
419 * @param isRefinement Set this to true if this settings object is a
420 * refinement.
421 */
422 public void setRefinement(boolean isRefinement) {
423 this.isRefinement = isRefinement;
424 }
425 }