1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.settings;
26
27
28 /*** This class keeps a map of host names to settings objects.
29 *
30 * It is implemented with soft references which implies that the elements can
31 * be garbage collected when there's no strong references to the elements.
32 * Even if there's no strong references left elements will not be garbage
33 * collected unless the memory is needed.
34 *
35 * @author John Erik Halse
36 *
37 */
38 public class SettingsCache {
39 /*** Cached CrawlerSettings objects */
40 private final SoftSettingsHash settingsCache = new SoftSettingsHash(16);
41
42 /*** Maps hostname to effective settings object */
43 private final SoftSettingsHash hostToSettings = new SoftSettingsHash(4000);
44
45 private final CrawlerSettings globalSettings;
46
47 /*** Creates a new instance of the settings cache
48 */
49 public SettingsCache(CrawlerSettings globalSettings) {
50 this.globalSettings = globalSettings;
51 }
52
53 /*** Get the effective settings for a host.
54 *
55 * @param host the host to get settings for.
56 * @return the settings or null if not in cache.
57 */
58 public CrawlerSettings getSettings(String host, String refinement) {
59 String key = computeKey(host, refinement);
60 return (key == "")? this.globalSettings: hostToSettings.get(key);
61 }
62
63 /*** Get a settings object.
64 *
65 * @param scope the scope of the settings object to get.
66 * @return the settings object or null if not in cache.
67 */
68 public CrawlerSettings getSettingsObject(String scope, String refinement) {
69 String key = computeKey(scope, refinement);
70 return (key == "")? this.globalSettings: settingsCache.get(key);
71 }
72
73 /*** Add a settings object to the cache.
74 *
75 * @param host the host for which the settings object is valid.
76 * @param settings the settings object.
77 */
78 public synchronized void putSettings(String host, CrawlerSettings settings) {
79 String refinement = settings.isRefinement() ? settings.getName() : null;
80 String key = computeKey(host, refinement);
81 hostToSettings.put(key, settings);
82 key = computeKey(settings.getScope(), refinement);
83 settingsCache.put(key, settings);
84 }
85
86 /*** Delete a settings object from the cache.
87 *
88 * @param settings the settings object to remove.
89 */
90 public synchronized void deleteSettingsObject(CrawlerSettings settings) {
91 String refinement = settings.isRefinement() ? settings.getName() : null;
92 settingsCache.remove(computeKey(settings.getScope(), refinement));
93
94
95
96 for (SoftSettingsHash.EntryIterator it = hostToSettings.iterator(); it.hasNext();) {
97 if (it.nextEntry().getValue().equals(settings)) {
98 it.remove();
99 }
100 }
101 }
102
103 /*** Make sure that no host strings points to wrong settings.
104 *
105 * This method clears most of the host to settings mappings. Because of the
106 * performance penalty this should only used when really needed.
107 */
108 public synchronized void refreshHostToSettings() {
109 hostToSettings.clear();
110 SoftSettingsHash.EntryIterator it = settingsCache.iterator();
111 while (it.hasNext()) {
112 SoftSettingsHash.SettingsEntry entry = it.nextEntry();
113 hostToSettings.put(entry);
114 }
115 }
116
117 /***
118 * Clear all cached settings.
119 */
120 public void clear() {
121 hostToSettings.clear();
122 settingsCache.clear();
123 }
124
125 public CrawlerSettings getGlobalSettings() {
126 return globalSettings;
127 }
128
129 private String computeKey(String host, String refinement) {
130 host = host == null ? "" : host;
131 return (refinement == null) || refinement.equals("") ? host : host
132 + '#' + refinement;
133 }
134 }