View Javadoc

1   /* ProcessorChainList
2    *
3    * $Id: ProcessorChainList.java 4664 2006-09-25 23:59:43Z paul_jack $
4    *
5    * Created on Mar 3, 2004
6    *
7    * Copyright (C) 2004 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.crawler.framework;
26  
27  import java.util.ArrayList;
28  import java.util.HashMap;
29  import java.util.Iterator;
30  import java.util.List;
31  import java.util.Map;
32  
33  import javax.management.AttributeNotFoundException;
34  import javax.management.MBeanException;
35  import javax.management.ReflectionException;
36  
37  import org.archive.crawler.datamodel.CrawlOrder;
38  import org.archive.crawler.framework.exceptions.FatalConfigurationException;
39  import org.archive.crawler.settings.MapType;
40  
41  
42  /*** A list of all the ProcessorChains.
43   *
44   * @author John Erik Halse
45   */
46  public class ProcessorChainList {
47      private List<ProcessorChain> chainList = new ArrayList<ProcessorChain>();
48      private Map<String,ProcessorChain> chainMap
49       = new HashMap<String,ProcessorChain>();
50  
51      /*** Constructs a new ProcessorChainList.
52       *
53       * @param order the Crawl Order to get configuration from.
54       *
55       * @throws FatalConfigurationException is thrown if chains could not be
56       *         set up properly.
57       */
58      public ProcessorChainList(CrawlOrder order)
59              throws FatalConfigurationException {
60          try {
61              addProcessorMap(CrawlOrder.ATTR_PRE_FETCH_PROCESSORS,
62                  (MapType) order.
63                      getAttribute(CrawlOrder.ATTR_PRE_FETCH_PROCESSORS));
64              addProcessorMap(CrawlOrder.ATTR_FETCH_PROCESSORS, (MapType) order
65                      .getAttribute(CrawlOrder.ATTR_FETCH_PROCESSORS));
66              addProcessorMap(CrawlOrder.ATTR_EXTRACT_PROCESSORS, (MapType) order
67                      .getAttribute(CrawlOrder.ATTR_EXTRACT_PROCESSORS));
68              addProcessorMap(CrawlOrder.ATTR_WRITE_PROCESSORS, (MapType) order
69                      .getAttribute(CrawlOrder.ATTR_WRITE_PROCESSORS));
70              addProcessorMap(CrawlOrder.ATTR_POST_PROCESSORS, (MapType) order
71                      .getAttribute(CrawlOrder.ATTR_POST_PROCESSORS));
72          } catch (AttributeNotFoundException e) {
73              throw new FatalConfigurationException("Could not get processors" +
74                  " from crawl order: " + e.getMessage());
75          } catch (MBeanException e) {
76              throw new FatalConfigurationException("Could not get processors" +
77                  " from crawl order: " + e.getMessage());
78          } catch (ReflectionException e) {
79              throw new FatalConfigurationException("Could not get processors" +
80                  " from crawl order: " + e.getMessage());
81          }
82  
83          if (processorCount() == 0) { throw new FatalConfigurationException(
84                  "No processors defined"); }
85      }
86  
87      /*** Add a new chain of processors to the chain list.
88       *
89       * This method takes a map of processors and wraps it in a ProcessorChain
90       * object and adds it to the list of chains.
91       *
92       * @param processorMap the processor map to be added.
93       */
94      public void addProcessorMap(String name, MapType processorMap) {
95          ProcessorChain processorChain = new ProcessorChain(processorMap);
96          ProcessorChain previousChain = getLastChain();
97          if (previousChain != null) {
98              previousChain.setNextChain(processorChain);
99          }
100         chainList.add(processorChain);
101         chainMap.put(name, processorChain);
102     }
103 
104     /*** Get the first processor chain.
105      *
106      * @return the first processor chain.
107      */
108     public ProcessorChain getFirstChain() {
109         return (ProcessorChain) chainList.get(0);
110     }
111 
112     /*** Get the last processor chain.
113      *
114      * The last processor chain should contain processors that should always
115      * be run for a URI that has started its way through the processors.
116      *
117      * @return the last processor chain.
118      */
119     public ProcessorChain getLastChain() {
120         if (size() == 0) {
121             return null;
122         } else {
123             return (ProcessorChain) chainList.get(size() - 1);
124         }
125     }
126 
127     /*** Get the total number of all processors in all the chains.
128      *
129      * @return the total number of all processors in all the chains.
130      */
131     public int processorCount() {
132         int processorCount = 0;
133         for (Iterator it = iterator(); it.hasNext();) {
134             processorCount += ((ProcessorChain) it.next()).size();
135         }
136         return processorCount;
137     }
138 
139     /*** Get an iterator over the processor chains.
140      *
141      * @return an iterator over the processor chains.
142      */
143     public Iterator iterator() {
144         return chainList.iterator();
145     }
146 
147     /*** Get the number of processor chains.
148      *
149      * @return the number of processor chains.
150      */
151     public int size() {
152         return chainList.size();
153     }
154 
155     /*** Get a processor chain by its index in the list of chains.
156      *
157      * @param index the chains index in the list of chains.
158      * @return the requested processor chain.
159      */
160     public ProcessorChain getProcessorChain(int index) {
161         return (ProcessorChain) chainList.get(index);
162     }
163 
164     /*** Get a processor chain by its name.
165      *
166      * @param name name of the processor chain to get.
167      * @return the requested processor chain.
168      */
169     public ProcessorChain getProcessorChain(String name) {
170         return (ProcessorChain) chainMap.get(name);
171     }
172 
173     public void kickUpdate() {
174         for (ProcessorChain chain : chainList) {
175             chain.kickUpdate();
176         }
177     }
178 
179 }