View Javadoc

1   /*
2    * Histotable.java
3    * 
4    * Created on Aug 5, 2004
5    *
6    * $Id: Histotable.java 5047 2007-04-10 01:47:42Z gojomo $
7    *
8    * Copyright (C) 2007 Internet Archive.
9    *
10   * This file is part of the Heritrix web crawler (crawler.archive.org).
11   *
12   * Heritrix is free software; you can redistribute it and/or modify
13   * it under the terms of the GNU Lesser Public License as published by
14   * the Free Software Foundation; either version 2.1 of the License, or
15   * any later version.
16   *
17   * Heritrix is distributed in the hope that it will be useful,
18   * but WITHOUT ANY WARRANTY; without even the implied warranty of
19   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20   * GNU Lesser Public License for more details.
21   *
22   * You should have received a copy of the GNU Lesser Public License
23   * along with Heritrix; if not, write to the Free Software
24   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25   */
26   
27  
28  package org.archive.util;
29  
30  import java.util.Comparator;
31  import java.util.HashMap;
32  import java.util.Map;
33  import java.util.TreeSet;
34  
35  
36  /***
37   * Collect and report frequency information. 
38   * 
39   * Assumes external synchornization.
40   * 
41   * @author gojomo
42   */
43  public class Histotable<K> extends HashMap<K,Long> {    
44      private static final long serialVersionUID = 310306238032568623L;
45      
46      /***
47       * Record one more occurence of the given object key.
48       * 
49       * @param key Object key.
50       */
51      public void tally(K key) {
52          tally(key,1L);
53      }
54      
55      /***
56       * Record <i>count</i> more occurence(s) of the given object key.
57       * 
58       * @param key Object key.
59       */
60      public void tally(K key,long count) {
61          if (containsKey(key)) {
62              put(key,get(key) + count);
63          } else {
64              // if we didn't find this key add it
65              put(key, count);
66          }
67      }
68      
69      /***
70       * @return Return an up-to-date sorted version of the totalled info.
71       */
72      public TreeSet getSortedByCounts() {
73          // sorted by count
74          TreeSet<Map.Entry<K,Long>> sorted = 
75            new TreeSet<Map.Entry<K,Long>>(
76             new Comparator<Map.Entry<K,Long>>() {
77              public int compare(Map.Entry<K,Long> e1, 
78                      Map.Entry<K,Long> e2) {
79                  long firstVal = e1.getValue();
80                  long secondVal = e2.getValue();
81                  if (firstVal < secondVal) { return 1; }
82                  if (secondVal < firstVal) { return -1; }
83                  // If the values are the same, sort by keys.
84                  String firstKey = (String) ((Map.Entry) e1).getKey();
85                  String secondKey = (String) ((Map.Entry) e2).getKey();
86                  return firstKey.compareTo(secondKey);
87              }
88          });
89          
90          sorted.addAll(entrySet());
91          return sorted;
92      }
93      
94      /***
95       * @return Return an up-to-date sorted version of the totalled info.
96       */
97      public TreeSet getSortedByKeys() {
98          TreeSet<Map.Entry<K,Long>> sorted = 
99            new TreeSet<Map.Entry<K,Long>>(
100            new Comparator<Map.Entry<K,Long>>() {
101             @SuppressWarnings("unchecked")
102             public int compare(Map.Entry<K,Long> e1, 
103                     Map.Entry<K,Long> e2) {
104                 K firstKey = e1.getKey();
105                 K secondKey = e2.getKey();
106                 // If the values are the same, sort by keys.
107                 return ((Comparable<K>)firstKey).compareTo(secondKey);
108             }
109         });   
110         sorted.addAll(entrySet());
111         return sorted;
112     }
113     
114     /***
115      * Return the largest value of any key that is larger than 0. If no 
116      * values or no value larger than zero, return zero. 
117      * 
118      * @return long largest value or zero if none larger than zero
119      */
120     public long getLargestValue() {
121         long largest = 0; 
122         for (Long el : values()) {
123             if (el > largest) {
124                 largest = el;
125             }
126         }
127         return largest;
128     }
129     
130     /***
131      * Return the total of all tallies. 
132      * 
133      * @return long total of all tallies
134      */
135     public long getTotal() {
136         long total = 0; 
137         for (Long el : values()) {
138             total += el; 
139         }
140         return total;
141     }
142     
143     /***
144      * Utility method to convert a key-&gt;Long into
145      * the string "count key".
146      * 
147      * @param e Map key.
148      * @return String 'count key'.
149      */
150     public static String entryString(Object e) {
151         Map.Entry entry = (Map.Entry) e;
152         return entry.getValue() + " " + entry.getKey();
153     }
154     
155     public long add(Histotable<K> ht) {
156         long net = 0;
157         for (K key : ht.keySet()) {
158             long change = ht.get(key);
159             net += change;
160             tally(key,change);
161         }
162         return net;
163     }
164     public long subtract(Histotable<K> ht) {
165         long net = 0; 
166         for (K key : ht.keySet()) {
167             long change = ht.get(key);
168             net -= change;
169             tally(key,-change);
170         }
171         return net;
172     }
173 }