View Javadoc

1   /* BenchmarkBlooms
2   *
3   * $Id: BenchmarkBlooms.java 6891 2010-06-15 22:22:25Z gojomo $
4   *
5   * Created on Jun 30, 2005
6   *
7   * Copyright (C) 2005 Internet Archive
8   *
9   * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24  */
25  package org.archive.util;
26  
27  /***
28   * Simple benchmarking of different BloomFilter
29   * implementations.
30   * 
31   * Take care when interpreting results; the effect of GC,
32   * dynamic compilation, and any other activity on test 
33   * machine may affect relative time tallies in unpredictable
34   * ways.
35   * 
36   * @author Gordon Mohr
37   */
38  public class BenchmarkBlooms {
39  
40  	public static void main(String[] args) {
41  		(new BenchmarkBlooms()).instanceMain(args);
42  	}
43  	
44  	public void instanceMain(String[] args) {
45  		int reps = 
46  			(args.length > 0) ? Integer.parseInt(args[0]) : 3;
47  		int n_expected = 
48  			(args.length > 1) ? Integer.parseInt(args[1]) : 10000000;
49  		int d_hashes = 
50  			(args.length > 2) ? Integer.parseInt(args[2]) : 22;
51  		int adds = 
52  		    	(args.length > 3) ? Integer.parseInt(args[3]) : 10000000;
53  		int contains = 
54  		        (args.length > 4) ? Integer.parseInt(args[4]) : 8000000;
55  	    String prefix = 
56  	    	(args.length > 5) ? args[5] : "http://www.archive.org/";
57  	    
58  	    System.out.println(
59  	    		"reps="+reps+" n_expected="+n_expected+
60  				" d_hashes="+d_hashes+" adds="+adds+
61  				" contains="+contains+" prefix="+prefix);
62  	    
63  	    BloomFilter64bit bloom64;
64  //	    BloomFilter bloom32;
65  //      BloomFilter bloom32split;
66  		for (int r=0;r<reps;r++) {
67  //			bloom32 = new BloomFilter32bit(n_expected,d_hashes);
68  //			testBloom(bloom32,adds,contains,prefix);
69  //			bloom32=null;
70  //            bloom32split = new BloomFilter32bitSplit(n_expected,d_hashes);
71  //            testBloom(bloom32split,adds,contains,prefix);
72  //            bloom32split=null;
73              bloom64 = new BloomFilter64bit(n_expected,d_hashes);
74              testBloom(null, bloom64,adds,contains,prefix);
75              bloom64=null;
76              // rounded up to power-of-2 bits size
77              bloom64 = new BloomFilter64bit(n_expected,d_hashes,true);
78              testBloom("bitsize rounded up",bloom64,adds,contains,prefix);
79              bloom64=null;  
80  		}
81  	}
82  	
83  	/***
84  	 * @param bloom
85  	 * @param prefix
86  	 * @param adds
87  	 * @param d_hashes
88  	 */
89  	private void testBloom(String note, BloomFilter bloom, int adds, int contains, String prefix) {
90  		System.gc();
91  		long startTime = System.currentTimeMillis();
92  		long falsePositivesAdds = 0;
93  		int i = 0; 
94  		for(; i<adds; i++) {
95  			if(!bloom.add(prefix+Integer.toString(i))) {
96  				falsePositivesAdds++;
97  			}
98  		}
99  		long falsPositivesContains = 0; 
100         for(; i<(adds+contains); i++) {
101             if(bloom.contains(prefix+Integer.toString(i))) {
102                 falsPositivesContains++;
103             }
104         }
105 		long finishTime = System.currentTimeMillis();
106 		System.out.println(bloom.getClass().getName()
107 		        +((note!=null)?" ("+note+")" : "")
108 		        +":\n "
109 				+(finishTime-startTime)+"ms "
110 				+bloom.getSizeBytes()+"bytes "
111 				+falsePositivesAdds+" falseDuringAdds "
112 				+falsPositivesContains+" falseDuringContains ");
113 	}
114 }