View Javadoc

1   package org.archive.crawler.util;
2   
3   import org.apache.commons.httpclient.HttpStatus;
4   import org.archive.crawler.datamodel.CoreAttributeConstants;
5   import org.archive.crawler.datamodel.CrawlURI;
6   import org.archive.crawler.deciderules.recrawl.IdenticalDigestDecideRule;
7   import org.archive.util.Accumulator;
8   import org.archive.util.ArchiveUtils;
9   import org.archive.util.Histotable;
10  
11  public class CrawledBytesHistotable extends Histotable<String> 
12  implements Accumulator<CrawlURI>, CoreAttributeConstants {
13      private static final long serialVersionUID = 7923431123239026213L;
14      
15      public static final String NOTMODIFIED = "not-modified";
16      public static final String DUPLICATE = "dup-by-hash";
17      public static final String NOVEL = "novel";
18  
19      
20      public CrawledBytesHistotable() {
21          super();
22          tally(NOVEL,0);
23      }
24  
25      public void accumulate(CrawlURI curi) {
26          if(curi.getFetchStatus()==HttpStatus.SC_NOT_MODIFIED) {
27              tally(NOTMODIFIED, curi.getContentSize());
28          } else if (IdenticalDigestDecideRule.hasIdenticalDigest(curi)) {
29              tally(DUPLICATE,curi.getContentSize());
30          } else {
31              tally(NOVEL,curi.getContentSize());
32          }
33      }
34      
35      public String summary() {
36          StringBuilder sb = new StringBuilder();
37          sb.append(ArchiveUtils.formatBytesForDisplay(getTotal()));
38          sb.append(" crawled (");
39          sb.append(ArchiveUtils.formatBytesForDisplay(get(NOVEL)));
40          sb.append(" novel");
41          if(get(DUPLICATE)!=null) {
42              sb.append(", ");
43              sb.append(ArchiveUtils.formatBytesForDisplay(get(DUPLICATE)));
44              sb.append(" ");
45              sb.append(DUPLICATE);
46          }
47          if(get(NOTMODIFIED)!=null) {
48              sb.append(", ");
49              sb.append(ArchiveUtils.formatBytesForDisplay(get(NOTMODIFIED)));
50              sb.append(" ");
51              sb.append(NOTMODIFIED);
52          }
53          sb.append(")");
54          return sb.toString();
55      }
56  }