View Javadoc

1   /* SettingsCache
2    *
3    * $Id: SettingsCache.java 4448 2006-08-07 06:29:13Z gojomo $
4    *
5    * Created on Mar 17, 2004
6    *
7    * Copyright (C) 2004 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.crawler.settings;
26  
27  
28  /*** This class keeps a map of host names to settings objects.
29   *
30   * It is implemented with soft references which implies that the elements can
31   * be garbage collected when there's no strong references to the elements.
32   * Even if there's no strong references left elements will not be garbage
33   * collected unless the memory is needed.
34   *
35   * @author John Erik Halse
36   *
37   */
38  public class SettingsCache {
39      /*** Cached CrawlerSettings objects */
40      private final SoftSettingsHash settingsCache = new SoftSettingsHash(16);
41  
42      /*** Maps hostname to effective settings object */
43      private final SoftSettingsHash hostToSettings = new SoftSettingsHash(4000);
44  
45      private final CrawlerSettings globalSettings;
46  
47      /*** Creates a new instance of the settings cache
48       */
49      public SettingsCache(CrawlerSettings globalSettings) {
50          this.globalSettings = globalSettings;
51      }
52  
53      /*** Get the effective settings for a host.
54       *
55       * @param host the host to get settings for.
56       * @return the settings or null if not in cache.
57       */
58      public CrawlerSettings getSettings(String host, String refinement) {
59          String key = computeKey(host, refinement);
60          return (key == "")? this.globalSettings: hostToSettings.get(key);
61      }
62  
63      /*** Get a settings object.
64       *
65       * @param scope the scope of the settings object to get.
66       * @return the settings object or null if not in cache.
67       */
68      public CrawlerSettings getSettingsObject(String scope, String refinement) {
69          String key = computeKey(scope, refinement);
70          return (key == "")? this.globalSettings: settingsCache.get(key);
71      }
72  
73      /*** Add a settings object to the cache.
74       *
75       * @param host the host for which the settings object is valid.
76       * @param settings the settings object.
77       */
78      public synchronized void putSettings(String host, CrawlerSettings settings) {
79          String refinement = settings.isRefinement() ? settings.getName() : null;
80          String key = computeKey(host, refinement);
81          hostToSettings.put(key, settings);
82          key = computeKey(settings.getScope(), refinement);
83          settingsCache.put(key, settings);
84      }
85  
86      /*** Delete a settings object from the cache.
87       *
88       * @param settings the settings object to remove.
89       */
90      public synchronized void deleteSettingsObject(CrawlerSettings settings) {
91          String refinement = settings.isRefinement() ? settings.getName() : null;
92          settingsCache.remove(computeKey(settings.getScope(), refinement));
93  
94          // Find all references to this settings object in the hostToSettings
95          // cache and remove them.
96          for (SoftSettingsHash.EntryIterator it = hostToSettings.iterator(); it.hasNext();) {
97              if (it.nextEntry().getValue().equals(settings)) {
98                  it.remove();
99              }
100         }
101     }
102 
103     /*** Make sure that no host strings points to wrong settings.
104      *
105      * This method clears most of the host to settings mappings. Because of the
106      * performance penalty this should only used when really needed.
107      */
108     public synchronized void refreshHostToSettings() {
109         hostToSettings.clear();
110         SoftSettingsHash.EntryIterator it = settingsCache.iterator();
111         while (it.hasNext()) {
112             SoftSettingsHash.SettingsEntry entry = it.nextEntry();
113             hostToSettings.put(entry);
114         }
115     }
116     
117     /***
118      * Clear all cached settings.
119      */
120     public void clear() {
121         hostToSettings.clear();
122         settingsCache.clear();
123     }
124 
125     public CrawlerSettings getGlobalSettings() {
126         return globalSettings;
127     }
128 
129     private String computeKey(String host, String refinement) {
130         host = host == null ? "" : host;
131         return (refinement == null) || refinement.equals("") ? host : host
132                 + '#' + refinement;
133     }
134 }