1 /* PrefixSet.java
2 *
3 * $Id: PrefixSet.java 4947 2007-03-01 04:47:24Z gojomo $
4 *
5 * Created April 29, 2008
6 *
7 * Copyright (C) 2008 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26 package org.archive.util;
27
28 import java.util.SortedSet;
29 import java.util.TreeSet;
30
31 /***
32 * Utility class for maintaining sorted set of string prefixes.
33 * Redundant prefixes are coalesced into the shorter prefix.
34 */
35 public class PrefixSet extends TreeSet<String> {
36 private static final long serialVersionUID = -6054697706348411992L;
37
38 public PrefixSet() {
39 super();
40 }
41
42 /***
43 * Test whether the given String is prefixed by one
44 * of this set's entries.
45 *
46 * @param s
47 * @return True if contains prefix.
48 */
49 public boolean containsPrefixOf(String s) {
50 SortedSet sub = headSet(s);
51 // because redundant prefixes have been eliminated,
52 // only a test against last item in headSet is necessary
53 if (!sub.isEmpty() && s.startsWith((String)sub.last())) {
54 return true; // prefix substring exists
55 } // else: might still exist exactly (headSet does not contain boundary)
56 return contains(s); // exact string exists, or no prefix is there
57 }
58
59 /***
60 * Maintains additional invariant: if one entry is a
61 * prefix of another, keep only the prefix.
62 *
63 * @see java.util.Collection#add(java.lang.Object)
64 */
65 public boolean add(String s) {
66 SortedSet<String> sub = headSet(s);
67 if (!sub.isEmpty() && s.startsWith((String)sub.last())) {
68 // no need to add; prefix is already present
69 return false;
70 }
71 boolean retVal = super.add(s);
72 sub = tailSet(s+"\0");
73 while(!sub.isEmpty() && ((String)sub.first()).startsWith(s)) {
74 // remove redundant entries
75 sub.remove(sub.first());
76 }
77 return retVal;
78 }
79
80 }