1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.util;
26 import java.io.BufferedReader;
27 import java.io.BufferedWriter;
28 import java.io.File;
29 import java.io.FileReader;
30 import java.io.FileWriter;
31 import java.io.IOException;
32
33 import org.archive.crawler.datamodel.CandidateURI;
34 import org.archive.crawler.datamodel.UriUniqFilter;
35 import org.archive.util.fingerprint.MemLongFPSet;
36
37
38 /***
39 * BenchmarkUriUniqFilters
40 *
41 * @author gojomo
42 */
43 public class BenchmarkUriUniqFilters implements UriUniqFilter.HasUriReceiver {
44
45
46
47 private BufferedWriter out;
48 String current;
49
50 /***
51 * Test the UriUniqFilter implementation (MemUriUniqFilter,
52 * BloomUriUniqFilter, or BdbUriUniqFilter) named in first
53 * argument against the file of one-per-line URIs named
54 * in the second argument.
55 *
56 * @param args from cmd-line
57 * @throws IOException
58 */
59 public static void main(String[] args) throws IOException {
60 (new BenchmarkUriUniqFilters()).instanceMain(args);
61 }
62
63 public void instanceMain(String[] args) throws IOException {
64 String testClass = args[0];
65 String inputFilename = args[1];
66 long start = System.currentTimeMillis();
67 UriUniqFilter uniq = createUriUniqFilter(testClass);
68 long created = System.currentTimeMillis();
69 BufferedReader br = new BufferedReader(new FileReader(inputFilename));
70 if(args.length>2) {
71 String outputFilename = args[2];
72 out = new BufferedWriter(new FileWriter(outputFilename));
73 }
74 int added = 0;
75 while((current=br.readLine())!=null) {
76 added++;
77 uniq.add(current,null);
78 }
79 uniq.close();
80 long finished = System.currentTimeMillis();
81 if(out!=null) {
82 out.close();
83 }
84 System.out.println(added+" adds");
85 System.out.println(uniq.count()+" retained");
86 System.out.println((created-start)+"ms to setup UUF");
87 System.out.println((finished-created)+"ms to perform all adds");
88 }
89
90 private UriUniqFilter createUriUniqFilter(String testClass) throws IOException {
91 UriUniqFilter uniq = null;
92 if(BdbUriUniqFilter.class.getName().endsWith(testClass)) {;
93
94 File tmpDir = File.createTempFile("uuf","benchmark");
95 tmpDir.delete();
96 tmpDir.mkdir();
97 uniq = new BdbUriUniqFilter(tmpDir, 50);
98 } else if(BloomUriUniqFilter.class.getName().endsWith(testClass)) {
99
100 uniq = new BloomUriUniqFilter();
101 } else if(MemUriUniqFilter.class.getName().endsWith(testClass)) {
102
103 uniq = new MemUriUniqFilter();
104 } else if (FPUriUniqFilter.class.getName().endsWith(testClass)) {
105
106 uniq = new FPUriUniqFilter(new MemLongFPSet(21,0.75f));
107 }
108 uniq.setDestination(this);
109 return uniq;
110 }
111
112
113
114
115 public void receive(CandidateURI item) {
116 if(out!=null) {
117 try {
118
119
120
121 out.write(current);
122 out.write("\n");
123 } catch (IOException e) {
124
125 e.printStackTrace();
126 }
127 }
128 }
129 }