1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.archive.crawler.event;
20
21 import org.archive.crawler.datamodel.CrawlURI;
22
23 /***
24 * An interface for objects that want to be notified
25 * of a CrawlURI disposition (happens each time a
26 * curi has been through the processors).
27 * Classes implementing this interface can register with
28 * the CrawlController to receive these events.
29 * <p>
30 * This interface is to facilitate the gathering of
31 * statistics on a running crawl.
32 * <p>
33 * <b>WARNING:</b> One of these methods <i>will</i> be
34 * called for <b>each</b> CrawlURI that is processed.
35 * It is therefor imperative that the methods execute
36 * quickly!
37 * <p>
38 * Also note that the object implementing this interface
39 * must under <b>no circumstances</b> maintain a reference
40 * to the CrawlURI beyond the scope of the relevant method
41 * body!
42 *
43 * @author Kristinn Sigurdsson
44 *
45 * @see org.archive.crawler.framework.CrawlController
46 */
47 public interface CrawlURIDispositionListener
48 {
49 /***
50 * Notification of a successfully crawled URI
51 *
52 * @param curi The relevant CrawlURI
53 */
54 public void crawledURISuccessful(CrawlURI curi);
55
56 /***
57 * Notification of a failed crawl of a URI that
58 * will be retried (failure due to possible transient
59 * problems).
60 *
61 * @param curi The relevant CrawlURI
62 */
63 public void crawledURINeedRetry(CrawlURI curi);
64
65 /***
66 * Notification of a crawled URI that is to be disregarded.
67 * Usually this means that the robots.txt file for the
68 * relevant site forbids this from being crawled and we are
69 * therefor not going to keep it. Other reasons may apply.
70 * In all cases this means that it <i>was</i> successfully
71 * downloaded but will not be stored.
72 *
73 * @param curi The relevant CrawlURI
74 */
75 public void crawledURIDisregard(CrawlURI curi);
76
77 /***
78 * Notification of a failed crawling of a URI. The failure
79 * is of a type that precludes retries (either by it's very
80 * nature or because it has been retried to many times)
81 *
82 * @param curi The relevant CrawlURI
83 */
84 public void crawledURIFailure(CrawlURI curi);
85
86 }