View Javadoc

1   /* Copyright (C) 2003 Internet Archive.
2    *
3    * This file is part of the Heritrix web crawler (crawler.archive.org).
4    *
5    * Heritrix is free software; you can redistribute it and/or modify
6    * it under the terms of the GNU Lesser Public License as published by
7    * the Free Software Foundation; either version 2.1 of the License, or
8    * any later version.
9    *
10   * Heritrix is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU Lesser Public License for more details.
14   *
15   * You should have received a copy of the GNU Lesser Public License
16   * along with Heritrix; if not, write to the Free Software
17   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18   *
19   * SeedCachingScope.java
20   * Created on Mar 25, 2005
21   *
22   * $Header$
23   */
24  package org.archive.crawler.scope;
25  
26  import java.util.ArrayList;
27  import java.util.Iterator;
28  import java.util.List;
29  
30  import org.archive.crawler.datamodel.CrawlURI;
31  import org.archive.net.UURI;
32  
33  /***
34   * A CrawlScope that caches its seed list for the
35   * convenience of scope-tests that are based on the 
36   * seeds. 
37   *
38   * @author gojomo
39   *
40   */
41  public class SeedCachingScope extends ClassicScope {
42  
43      private static final long serialVersionUID = 300230673616424926L;
44  
45      //private static final Logger logger =
46      //    Logger.getLogger(SeedCachingScope.class.getName());
47      List<UURI> seeds;
48  
49      public SeedCachingScope(String name) {
50          super(name);
51      }
52  
53      /* (non-Javadoc)
54       * @see org.archive.crawler.framework.CrawlScope#addSeed(org.archive.crawler.datamodel.UURI)
55       */
56      public boolean addSeed(CrawlURI curi) {
57          if (super.addSeed(curi) == false) {
58              // failed
59              return false;
60          }
61          // FIXME: This is not thread-safe.
62          List<UURI> newSeeds = new ArrayList<UURI>(seeds);
63          newSeeds.add(curi.getUURI());
64          seeds = newSeeds;
65          return true;
66      }
67      
68      /* (non-Javadoc)
69       * @see org.archive.crawler.framework.CrawlScope#refreshSeeds()
70       */
71      public synchronized void refreshSeeds() {
72          super.refreshSeeds();
73          seeds = null;
74          fillSeedsCache();
75      }
76      
77      /* (non-Javadoc)
78       * @see org.archive.crawler.framework.CrawlScope#seedsIterator()
79       */
80      public Iterator<UURI> seedsIterator() {
81          fillSeedsCache();
82          return seeds.iterator();
83      }
84  
85      /***
86       * Ensure seeds cache is created/filled
87       */
88      protected synchronized void fillSeedsCache() {
89          if (seeds==null) {
90              seeds = new ArrayList<UURI>();
91              Iterator<UURI> iter = super.seedsIterator();
92              while(iter.hasNext()) {
93                  seeds.add(iter.next());
94              }
95          }
96      }
97  }