1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.util;
26
27 import java.io.Serializable;
28 import java.util.logging.Logger;
29
30 import org.archive.util.fingerprint.LongFPSet;
31
32 /***
33 * Shell of functionality for a Set of primitive long fingerprints, held
34 * in an array of possibly-empty slots.
35 *
36 * The implementation of that holding array is delegated to subclasses.
37 *
38 * <p>Capacity is always a power of 2.
39 *
40 * <p>Fingerprints are already assumed to be well-distributed, so the
41 * hashed position for a value is just its high-order bits.
42 *
43 * @author gojomo
44 * @version $Date: 2005-05-06 02:49:04 +0000 (Fri, 06 May 2005) $, $Revision: 3437 $
45 */
46 public abstract class AbstractLongFPSet implements LongFPSet, Serializable {
47 private static Logger logger =
48 Logger.getLogger("org.archive.util.AbstractLongFPSet");
49
50 /***
51 * A constant used to indicate that a slot in the set storage is empty.
52 * A zero or positive value means slot is filled
53 */
54 protected static byte EMPTY = -1;
55
56 /*** the capacity of this set, specified as the exponent of a power of 2 */
57 protected int capacityPowerOfTwo;
58
59 /*** The load factor, as a fraction. This gives the amount of free space
60 * to keep in the Set. */
61 protected float loadFactor;
62
63 /*** The current number of elements in the set */
64 protected long count;
65
66 /***
67 * To support serialization
68 * TODO: verify needed?
69 */
70 public AbstractLongFPSet() {
71 super();
72 }
73
74 /***
75 * Create a new AbstractLongFPSet with a given capacity and load Factor
76 *
77 * @param capacityPowerOfTwo The capacity as the exponent of a power of 2.
78 * e.g if the capacity is <code>4</code> this means <code>2^^4</code>
79 * entries
80 * @param loadFactor The load factor as a fraction. This gives the amount
81 * of free space to keep in the Set.
82 */
83 public AbstractLongFPSet(final int capacityPowerOfTwo, float loadFactor) {
84 this.capacityPowerOfTwo = capacityPowerOfTwo;
85 this.loadFactor = loadFactor;
86 this.count = 0;
87 }
88
89 /***
90 * Does this set contain the given value?
91 *
92 * @see org.archive.util.fingerprint.LongFPSet#contains(long)
93 */
94 public boolean contains(long val) {
95 long i = indexFor(val);
96 if (slotHasData(i)) {
97 noteAccess(i);
98 return true;
99 }
100 return false;
101 }
102
103 /***
104 * Check the state of a slot in the storage.
105 *
106 * @param i the index of the slot to check
107 * @return -1 if slot is filled; nonegative if full.
108 */
109 protected abstract int getSlotState(long i);
110
111 /***
112 * Note access (hook for subclass cache-replacement strategies)
113 *
114 * @param index The index of the slot to check.
115 */
116 private void noteAccess(long index) {
117
118
119 }
120
121 /***
122 * Return the number of entries in this set.
123 *
124 * @see org.archive.util.fingerprint.LongFPSet#count()
125 */
126 public long count() {
127 return count;
128 }
129
130 /***
131 * Add the given value to this set
132 *
133 * @see org.archive.util.fingerprint.LongFPSet#add(long)
134 */
135 public boolean add(long val) {
136 logger.finest("Adding " + val);
137 long i = indexFor(val);
138 if (slotHasData(i)) {
139
140 return false;
141 }
142
143
144
145 if ((count + 1) > (loadFactor * (1 << capacityPowerOfTwo))) {
146 makeSpace();
147
148 i = indexFor(val);
149 assert i < 0 : "slot should be empty";
150 }
151
152 i = asDataSlot(i);
153 setAt(i, val);
154 count++;
155 noteAccess(i);
156 return true;
157 }
158
159 /***
160 * Make additional space to keep the load under the target
161 * loadFactor level.
162 *
163 * Subclasses may grow or discard entries to satisfy.
164 */
165 protected abstract void makeSpace();
166
167 /***
168 * Set the stored value at the given slot.
169 *
170 * @param i the slot index
171 * @param l the value to set
172 */
173 protected abstract void setAt(long i, long l);
174
175 /***
176 * Get the stored value at the given slot.
177 *
178 * @param i the slot index
179 * @return The stored value at the given slot.
180 */
181 protected abstract long getAt(long i);
182
183 /***
184 * Given a value, check the store for its existence. If it exists, it
185 * will return the index where the value resides. Otherwise it return
186 * an encoded index, which is a possible storage location for the value.
187 *
188 * <p>Note, if we have a loading factor less than 1.0, there should always
189 * be an empty location where we can store the value
190 *
191 * @param val the fingerprint value to check for
192 * @return The (positive) index where the value already resides,
193 * or an empty index where it could be inserted (encoded as a
194 * negative number).
195 */
196 private long indexFor(long val) {
197 long candidateIndex = startIndexFor(val);
198 while (true) {
199 if (getSlotState(candidateIndex) < 0) {
200
201 return asEmptySlot(candidateIndex);
202 }
203 if (getAt(candidateIndex) == val) {
204
205 return candidateIndex;
206 }
207 candidateIndex++;
208 if (candidateIndex == 1 << capacityPowerOfTwo) {
209 candidateIndex = 0;
210 }
211 }
212 }
213
214 /***
215 * Return the recommended storage index for the given value.
216 * Assumes values are already well-distributed; merely uses
217 * high-order bits.
218 *
219 * @param val
220 * @return The recommended storage index for the given value.
221 */
222 private long startIndexFor(long val) {
223 return (val >>> (64 - capacityPowerOfTwo));
224 }
225
226 public boolean remove(long l) {
227 long i = indexFor(l);
228 if (!slotHasData(i)) {
229
230 return false;
231 }
232 removeAt(i);
233 return true;
234 }
235
236 /***
237 * Remove the value at the given index, relocating its
238 * successors as necessary.
239 *
240 * @param index
241 */
242 protected void removeAt(long index) {
243 count--;
244 clearAt(index);
245 long probeIndex = index + 1;
246 while (true) {
247 if (probeIndex == 1 << capacityPowerOfTwo) {
248 probeIndex = 0;
249 }
250 if (getSlotState(probeIndex) < 0) {
251
252 break;
253 }
254 long val = getAt(probeIndex);
255 long newIndex = indexFor(val);
256 if (newIndex != probeIndex) {
257
258 newIndex = asDataSlot(newIndex);
259 relocate(val, probeIndex, newIndex);
260 }
261 probeIndex++;
262 }
263 }
264
265 protected abstract void clearAt(long index);
266
267 protected abstract void relocate(long value, long fromIndex, long toIndex);
268
269 /***
270 * Low-cost, non-definitive (except when true) contains
271 * test. Default answer of false is acceptable.
272 *
273 * @see org.archive.util.fingerprint.LongFPSet#quickContains(long)
274 */
275 public boolean quickContains(long fp) {
276 return false;
277 }
278
279 /***
280 * given a slot index, which could or could not be empty, return it as
281 * a slot index indicating an non-empty slot
282 *
283 * @param index the slot index to convert
284 * @return the index, converted to represent an slot with data
285 */
286 private long asDataSlot(final long index) {
287 if (slotHasData(index)) {
288 return index;
289 }
290 return - (index + 1);
291 }
292
293 /***
294 * Given a slot index, which could or could not be empty, return it as
295 * a slot index indicating an empty slot
296 * @param index the slot index to convert
297 * @return the index, converted to represent an empty slot
298 */
299 private long asEmptySlot(final long index) {
300 if (!slotHasData(index)) {
301 return index;
302 }
303 return -index - 1;
304 }
305
306 /***
307 * Does this index represent a slot with data?
308 *
309 * @param index the index to check
310 * @return <code>true</code> if the slot has data
311 */
312 private boolean slotHasData(final long index) {
313 return index >= 0;
314 }
315 }