View Javadoc

1   /* Copyright (C) 2003 Internet Archive.
2    *
3    * This file is part of the Heritrix web crawler (crawler.archive.org).
4    *
5    * Heritrix is free software; you can redistribute it and/or modify
6    * it under the terms of the GNU Lesser Public License as published by
7    * the Free Software Foundation; either version 2.1 of the License, or
8    * any later version.
9    *
10   * Heritrix is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU Lesser Public License for more details.
14   *
15   * You should have received a copy of the GNU Lesser Public License
16   * along with Heritrix; if not, write to the Free Software
17   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18   *
19   * ToePool.java
20   * Created on Oct 1, 2003
21   *
22   * $Header$
23   */
24  package org.archive.crawler.framework;
25  
26  import java.io.PrintWriter;
27  import java.util.Date;
28  import java.util.Iterator;
29  import java.util.TreeSet;
30  
31  import org.archive.util.ArchiveUtils;
32  import org.archive.util.Histotable;
33  import org.archive.util.Reporter;
34  
35  /***
36   * A collection of ToeThreads. The class manages the ToeThreads currently
37   * running. Including increasing and decreasing their number, keeping track
38   * of their state and it can be used to kill hung threads.
39   *
40   * @author Gordon Mohr
41   * @author Kristinn Sigurdsson
42   *
43   * @see org.archive.crawler.framework.ToeThread
44   */
45  public class ToePool extends ThreadGroup implements Reporter {
46      /*** run worker thread slightly lower than usual */
47      public static int DEFAULT_TOE_PRIORITY = Thread.NORM_PRIORITY - 1;
48      
49      protected CrawlController controller;
50      protected int nextSerialNumber = 1;
51      protected int targetSize = 0; 
52  
53      /***
54       * Constructor. Creates a pool of ToeThreads. 
55       *
56       * @param c A reference to the CrawlController for the current crawl.
57       */
58      public ToePool(CrawlController c) {
59          super("ToeThreads");
60          this.controller = c;
61          setDaemon(true);
62      }
63      
64      public void cleanup() {
65          this.controller = null;
66      }
67  
68      /***
69       * @return The number of ToeThreads that are not available (Approximation).
70       */
71      public int getActiveToeCount() {
72          Thread[] toes = getToes();
73          int count = 0;
74          for (int i = 0; i < toes.length; i++) {
75              if((toes[i] instanceof ToeThread) &&
76                      ((ToeThread)toes[i]).isActive()) {
77                  count++;
78              }
79          }
80          return count; 
81      }
82  
83      /***
84       * @return The number of ToeThreads. This may include killed ToeThreads
85       *         that were not replaced.
86       */
87      public int getToeCount() {
88          Thread[] toes = getToes();
89          int count = 0;
90          for (int i = 0; i<toes.length; i++) {
91              if((toes[i] instanceof ToeThread)) {
92                  count++;
93              }
94          }
95          return count; 
96      }
97      
98      private Thread[] getToes() {
99          Thread[] toes = new Thread[activeCount()+10];
100         this.enumerate(toes);
101         return toes;
102     }
103 
104     /***
105      * Change the number of ToeThreads.
106      *
107      * @param newsize The new number of ToeThreads.
108      */
109     public void setSize(int newsize)
110     {
111         targetSize = newsize;
112         int difference = newsize - getToeCount(); 
113         if (difference > 0) {
114             // must create threads
115             for(int i = 1; i <= difference; i++) {
116                 startNewThread();
117             }
118         } else {
119             // must retire extra threads
120             int retainedToes = targetSize; 
121             Thread[] toes = this.getToes();
122             for (int i = 0; i < toes.length ; i++) {
123                 if(!(toes[i] instanceof ToeThread)) {
124                     continue;
125                 }
126                 retainedToes--;
127                 if (retainedToes>=0) {
128                     continue; // this toe is spared
129                 }
130                 // otherwise:
131                 ToeThread tt = (ToeThread)toes[i];
132                 tt.retire();
133             }
134         }
135     }
136 
137     /***
138      * Kills specified thread. Killed thread can be optionally replaced with a
139      * new thread.
140      *
141      * <p><b>WARNING:</b> This operation should be used with great care. It may
142      * destabilize the crawler.
143      *
144      * @param threadNumber Thread to kill
145      * @param replace If true then a new thread will be created to take the
146      *           killed threads place. Otherwise the total number of threads
147      *           will decrease by one.
148      */
149     public void killThread(int threadNumber, boolean replace){
150 
151         Thread[] toes = getToes();
152         for (int i = 0; i< toes.length; i++) {
153             if(! (toes[i] instanceof ToeThread)) {
154                 continue;
155             }
156             ToeThread toe = (ToeThread) toes[i];
157             if(toe.getSerialNumber()==threadNumber) {
158                 toe.kill();
159             }
160         }
161 
162         if(replace){
163             // Create a new toe thread to take its place. Replace toe
164             startNewThread();
165         }
166     }
167 
168     private synchronized void startNewThread() {
169         ToeThread newThread = new ToeThread(this, nextSerialNumber++);
170         newThread.setPriority(DEFAULT_TOE_PRIORITY);
171         newThread.start();
172     }
173 
174     /***
175      * @return Instance of CrawlController.
176      */
177     public CrawlController getController() {
178         return controller;
179     }
180     
181     //
182     // Reporter implementation
183     //
184     
185     public static String STANDARD_REPORT = "standard";
186     public static String COMPACT_REPORT = "compact";
187     protected static String[] REPORTS = {STANDARD_REPORT,COMPACT_REPORT};
188 
189     public String[] getReports() {
190         return REPORTS;
191     }
192 
193     public void reportTo(String name, PrintWriter writer) {
194         if(COMPACT_REPORT.equals(name)) {
195             compactReportTo(writer);
196             return;
197         }
198         if(name!=null && !STANDARD_REPORT.equals(name)) {
199             writer.print(name);
200             writer.print(" not recognized: giving standard report/n");
201         }
202         standardReportTo(writer);
203     }      
204             
205     /* (non-Javadoc)
206      * @see org.archive.util.Reporter#reportTo(java.io.Writer)
207      */
208     protected void standardReportTo(PrintWriter writer) {
209         writer.print("Toe threads report - "
210                 + ArchiveUtils.get12DigitDate() + "\n");
211         writer.print(" Job being crawled: "
212                 + this.controller.getOrder().getCrawlOrderName() + "\n");
213         writer.print(" Number of toe threads in pool: " + getToeCount() + " ("
214                 + getActiveToeCount() + " active)\n");
215 
216         Thread[] toes = this.getToes();
217         synchronized (toes) {
218             for (int i = 0; i < toes.length; i++) {
219                 if (!(toes[i] instanceof ToeThread)) {
220                     continue;
221                 }
222                 ToeThread tt = (ToeThread) toes[i];
223                 if (tt != null) {
224                     writer
225                             .print("   ToeThread #" + tt.getSerialNumber()
226                                     + "\n");
227                     tt.reportTo(writer);
228                 }
229             }
230         }
231     }
232     
233     /* (non-Javadoc)
234      * @see org.archive.util.Reporter#reportTo(java.io.Writer)
235      */
236     protected void compactReportTo(PrintWriter writer) {
237         writer.print(getToeCount() + " threads (" + getActiveToeCount()
238                 + " active)\n");
239 
240         Thread[] toes = this.getToes();
241         boolean legendWritten = false; 
242         // TODO: sort by activity: those with curi the longest at front
243         synchronized (toes) {
244             for (int i = 0; i < toes.length; i++) {
245                 if (!(toes[i] instanceof ToeThread)) {
246                     continue;
247                 }
248                 ToeThread tt = (ToeThread) toes[i];
249                 if (tt != null) {
250                     if(!legendWritten) {
251                         writer.println(tt.singleLineLegend());
252                         legendWritten = true;
253                     }
254                     tt.singleLineReportTo(writer);
255                 }
256             }
257         }
258     }
259 
260     public void singleLineReportTo(PrintWriter w) {
261         Histotable<Object> ht = new Histotable<Object>();
262         Thread[] toes = getToes();
263         for (int i = 0; i < toes.length; i++) {
264 
265             if(!(toes[i] instanceof ToeThread)) {
266                 continue;
267             }
268             ToeThread tt = (ToeThread)toes[i];
269             if(tt!=null) {
270                 ht.tally(tt.getStep());
271             }
272         }
273         TreeSet sorted = ht.getSortedByCounts();
274         w.print(getToeCount());
275         w.print(" threads: ");        
276         w.print(Histotable.entryString(sorted.first()));
277         if(sorted.size()>1) {
278             Iterator iter = sorted.iterator();
279             iter.next();
280             w.print("; ");
281             w.print(Histotable.entryString(iter.next()));
282         }
283         if(sorted.size()>2) {
284             w.print("; etc...");
285         }
286     }
287 
288     /* (non-Javadoc)
289      * @see org.archive.util.Reporter#singleLineLegend()
290      */
291     public String singleLineLegend() {
292         return "total: mostCommonStateTotal secondMostCommonStateTotal";
293     }
294     
295     public String singleLineReport() {
296         return ArchiveUtils.singleLineReport(this);
297     }
298 
299     public void reportTo(PrintWriter writer) {
300         reportTo(null,writer);
301     }
302 }