View Javadoc

1   /* PrefixSet.java
2    *
3    * $Id: PrefixSet.java 4947 2007-03-01 04:47:24Z gojomo $
4    *
5    * Created April 29, 2008
6    *
7    * Copyright (C) 2008 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  
26  package org.archive.util;
27  
28  import java.util.SortedSet;
29  import java.util.TreeSet;
30  
31  /***
32   * Utility class for maintaining sorted set of string prefixes.
33   * Redundant prefixes are coalesced into the shorter prefix. 
34   */
35  public class PrefixSet extends TreeSet<String> {
36      private static final long serialVersionUID = -6054697706348411992L;
37  
38      public PrefixSet() {
39          super();
40      }
41  
42      /***
43       * Test whether the given String is prefixed by one
44       * of this set's entries. 
45       * 
46       * @param s
47       * @return True if contains prefix.
48       */
49      public boolean containsPrefixOf(String s) {
50          SortedSet sub = headSet(s);
51          // because redundant prefixes have been eliminated,
52          // only a test against last item in headSet is necessary
53          if (!sub.isEmpty() && s.startsWith((String)sub.last())) {
54              return true; // prefix substring exists
55          } // else: might still exist exactly (headSet does not contain boundary)
56          return contains(s); // exact string exists, or no prefix is there
57      }
58      
59      /*** 
60       * Maintains additional invariant: if one entry is a 
61       * prefix of another, keep only the prefix. 
62       * 
63       * @see java.util.Collection#add(java.lang.Object)
64       */
65      public boolean add(String s) {
66          SortedSet<String> sub = headSet(s);
67          if (!sub.isEmpty() && s.startsWith((String)sub.last())) {
68              // no need to add; prefix is already present
69              return false;
70          }
71          boolean retVal = super.add(s);
72          sub = tailSet(s+"\0");
73          while(!sub.isEmpty() && ((String)sub.first()).startsWith(s)) {
74              // remove redundant entries
75              sub.remove(sub.first());
76          }
77          return retVal;
78      }
79      
80  }