1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.archive.crawler.datamodel;
25
26 /***
27 * Constant flag codes to be used, in lieu of per-protocol
28 * codes (like HTTP's 200, 404, etc.), when network/internal/
29 * out-of-band conditions occur.
30 *
31 * The URISelector may use such codes, along with user-configured
32 * options, to determine whether, when, and how many times
33 * a CrawlURI might be reattempted.
34 *
35 * @author gojomo
36 *
37 */
38 public interface FetchStatusCodes {
39 /*** fetch never tried (perhaps protocol unsupported or illegal URI) */
40 public static final int S_UNATTEMPTED = 0;
41 /*** DNS lookup failed */
42 public static final int S_DOMAIN_UNRESOLVABLE = -1;
43 /*** HTTP connect failed */
44 public static final int S_CONNECT_FAILED = -2;
45 /*** HTTP connect broken */
46 public static final int S_CONNECT_LOST = -3;
47 /*** HTTP timeout (before any meaningful response received) */
48 public static final int S_TIMEOUT = -4;
49 /*** Unexpected runtime exception; see runtime-errors.log */
50 public static final int S_RUNTIME_EXCEPTION = -5;
51 /*** DNS prerequisite failed, precluding attempt */
52 public static final int S_DOMAIN_PREREQUISITE_FAILURE = -6;
53 /*** URI recognized as unsupported or illegal) */
54 public static final int S_UNFETCHABLE_URI = -7;
55 /*** multiple retries all failed */
56 public static final int S_TOO_MANY_RETRIES = -8;
57
58 /*** temporary status assigned URIs awaiting preconditions; appearance in
59 * logs is a bug */
60 public static final int S_DEFERRED = -50;
61 /*** URI could not be queued in Frontier; when URIs are properly
62 * filtered for format, should never occur */
63 public static final int S_UNQUEUEABLE = -60;
64
65 /*** Robots prerequisite failed, precluding attempt */
66 public static final int S_ROBOTS_PREREQUISITE_FAILURE = -61;
67 /*** DNS prerequisite failed, precluding attempt */
68 public static final int S_OTHER_PREREQUISITE_FAILURE = -62;
69 /*** DNS prerequisite failed, precluding attempt */
70 public static final int S_PREREQUISITE_UNSCHEDULABLE_FAILURE = -63;
71
72 /*** synthetic status, used when some other status (such as connection-lost)
73 * is considered by policy the same as a document-not-found */
74 public static final int S_DEEMED_NOT_FOUND = -404;
75
76 /*** severe java 'Error' conditions (OutOfMemoryError, StackOverflowError,
77 * etc.) during URI processing */
78 public static final int S_SERIOUS_ERROR = -3000;
79
80 /*** 'chaff' detection of traps/content of negligible value applied */
81 public static final int S_DEEMED_CHAFF = -4000;
82 /*** overstepped link hops */
83 public static final int S_TOO_MANY_LINK_HOPS = -4001;
84 /*** overstepped embed/trans hops */
85 public static final int S_TOO_MANY_EMBED_HOPS = -4002;
86 /*** out-of-scope upoin reexamination (only when scope changes during
87 * crawl) */
88 public static final int S_OUT_OF_SCOPE = -5000;
89 /*** blocked from fetch by user setting. */
90 public static final int S_BLOCKED_BY_USER = -5001;
91 /***
92 * Blocked by custom prefetcher processor.
93 * A check against scope or against filters in a custom prefetch
94 * processor rules CrawlURI should not be crawled.
95 * TODO: Add to documentation and help page.
96 */
97 public static final int S_BLOCKED_BY_CUSTOM_PROCESSOR = -5002;
98 /***
99 * Blocked due to exceeding an established quota.
100 * TODO: Add to documentation and help page.
101 */
102 public static final int S_BLOCKED_BY_QUOTA = -5003;
103 /***
104 * Blocked due to exceeding an established runtime.
105 * TODO: Add to documentation and help page.
106 */
107 public static final int S_BLOCKED_BY_RUNTIME_LIMIT = -5004;
108 /*** deleted from frontier by user */
109 public static final int S_DELETED_BY_USER = -6000;
110
111 /*** Processing thread was killed */
112 public static final int S_PROCESSING_THREAD_KILLED = -7000;
113
114 /*** robots rules precluded fetch */
115 public static final int S_ROBOTS_PRECLUDED = -9998;
116
117 /*** DNS success */
118 public static final int S_DNS_SUCCESS = 1;
119 /*** InetAddress.getByName success */
120 public static final int S_GETBYNAME_SUCCESS = 1001;
121 }
122
123