1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.framework;
26
27 import java.util.ArrayList;
28 import java.util.HashMap;
29 import java.util.Iterator;
30 import java.util.List;
31 import java.util.Map;
32
33 import javax.management.AttributeNotFoundException;
34 import javax.management.MBeanException;
35 import javax.management.ReflectionException;
36
37 import org.archive.crawler.datamodel.CrawlOrder;
38 import org.archive.crawler.framework.exceptions.FatalConfigurationException;
39 import org.archive.crawler.settings.MapType;
40
41
42 /*** A list of all the ProcessorChains.
43 *
44 * @author John Erik Halse
45 */
46 public class ProcessorChainList {
47 private List<ProcessorChain> chainList = new ArrayList<ProcessorChain>();
48 private Map<String,ProcessorChain> chainMap
49 = new HashMap<String,ProcessorChain>();
50
51 /*** Constructs a new ProcessorChainList.
52 *
53 * @param order the Crawl Order to get configuration from.
54 *
55 * @throws FatalConfigurationException is thrown if chains could not be
56 * set up properly.
57 */
58 public ProcessorChainList(CrawlOrder order)
59 throws FatalConfigurationException {
60 try {
61 addProcessorMap(CrawlOrder.ATTR_PRE_FETCH_PROCESSORS,
62 (MapType) order.
63 getAttribute(CrawlOrder.ATTR_PRE_FETCH_PROCESSORS));
64 addProcessorMap(CrawlOrder.ATTR_FETCH_PROCESSORS, (MapType) order
65 .getAttribute(CrawlOrder.ATTR_FETCH_PROCESSORS));
66 addProcessorMap(CrawlOrder.ATTR_EXTRACT_PROCESSORS, (MapType) order
67 .getAttribute(CrawlOrder.ATTR_EXTRACT_PROCESSORS));
68 addProcessorMap(CrawlOrder.ATTR_WRITE_PROCESSORS, (MapType) order
69 .getAttribute(CrawlOrder.ATTR_WRITE_PROCESSORS));
70 addProcessorMap(CrawlOrder.ATTR_POST_PROCESSORS, (MapType) order
71 .getAttribute(CrawlOrder.ATTR_POST_PROCESSORS));
72 } catch (AttributeNotFoundException e) {
73 throw new FatalConfigurationException("Could not get processors" +
74 " from crawl order: " + e.getMessage());
75 } catch (MBeanException e) {
76 throw new FatalConfigurationException("Could not get processors" +
77 " from crawl order: " + e.getMessage());
78 } catch (ReflectionException e) {
79 throw new FatalConfigurationException("Could not get processors" +
80 " from crawl order: " + e.getMessage());
81 }
82
83 if (processorCount() == 0) { throw new FatalConfigurationException(
84 "No processors defined"); }
85 }
86
87 /*** Add a new chain of processors to the chain list.
88 *
89 * This method takes a map of processors and wraps it in a ProcessorChain
90 * object and adds it to the list of chains.
91 *
92 * @param processorMap the processor map to be added.
93 */
94 public void addProcessorMap(String name, MapType processorMap) {
95 ProcessorChain processorChain = new ProcessorChain(processorMap);
96 ProcessorChain previousChain = getLastChain();
97 if (previousChain != null) {
98 previousChain.setNextChain(processorChain);
99 }
100 chainList.add(processorChain);
101 chainMap.put(name, processorChain);
102 }
103
104 /*** Get the first processor chain.
105 *
106 * @return the first processor chain.
107 */
108 public ProcessorChain getFirstChain() {
109 return (ProcessorChain) chainList.get(0);
110 }
111
112 /*** Get the last processor chain.
113 *
114 * The last processor chain should contain processors that should always
115 * be run for a URI that has started its way through the processors.
116 *
117 * @return the last processor chain.
118 */
119 public ProcessorChain getLastChain() {
120 if (size() == 0) {
121 return null;
122 } else {
123 return (ProcessorChain) chainList.get(size() - 1);
124 }
125 }
126
127 /*** Get the total number of all processors in all the chains.
128 *
129 * @return the total number of all processors in all the chains.
130 */
131 public int processorCount() {
132 int processorCount = 0;
133 for (Iterator it = iterator(); it.hasNext();) {
134 processorCount += ((ProcessorChain) it.next()).size();
135 }
136 return processorCount;
137 }
138
139 /*** Get an iterator over the processor chains.
140 *
141 * @return an iterator over the processor chains.
142 */
143 public Iterator iterator() {
144 return chainList.iterator();
145 }
146
147 /*** Get the number of processor chains.
148 *
149 * @return the number of processor chains.
150 */
151 public int size() {
152 return chainList.size();
153 }
154
155 /*** Get a processor chain by its index in the list of chains.
156 *
157 * @param index the chains index in the list of chains.
158 * @return the requested processor chain.
159 */
160 public ProcessorChain getProcessorChain(int index) {
161 return (ProcessorChain) chainList.get(index);
162 }
163
164 /*** Get a processor chain by its name.
165 *
166 * @param name name of the processor chain to get.
167 * @return the requested processor chain.
168 */
169 public ProcessorChain getProcessorChain(String name) {
170 return (ProcessorChain) chainMap.get(name);
171 }
172
173 public void kickUpdate() {
174 for (ProcessorChain chain : chainList) {
175 chain.kickUpdate();
176 }
177 }
178
179 }