View Javadoc

1   /* Copyright (C) 2003 Internet Archive.
2    *
3    * This file is part of the Heritrix web crawler (crawler.archive.org).
4    *
5    * Heritrix is free software; you can redistribute it and/or modify
6    * it under the terms of the GNU Lesser Public License as published by
7    * the Free Software Foundation; either version 2.1 of the License, or
8    * any later version.
9    *
10   * Heritrix is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU Lesser Public License for more details.
14   *
15   * You should have received a copy of the GNU Lesser Public License
16   * along with Heritrix; if not, write to the Free Software
17   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18   *
19   * FetchStatusCodes.java
20   * Created on Jun 19, 2003
21   *
22   * $Header$
23   */
24  package org.archive.crawler.datamodel;
25  
26  /***
27   * Constant flag codes to be used, in lieu of per-protocol
28   * codes (like HTTP's 200, 404, etc.), when network/internal/
29   * out-of-band conditions occur.
30   *
31   * The URISelector may use such codes, along with user-configured
32   * options, to determine whether, when, and how many times
33   * a CrawlURI might be reattempted.
34   *
35   * @author gojomo
36   *
37   */
38  public interface FetchStatusCodes {
39      /*** fetch never tried (perhaps protocol unsupported or illegal URI) */
40      public static final int S_UNATTEMPTED = 0;
41      /*** DNS lookup failed */
42      public static final int S_DOMAIN_UNRESOLVABLE = -1;  //
43      /*** HTTP connect failed */
44      public static final int S_CONNECT_FAILED = -2;       //
45      /*** HTTP connect broken */
46      public static final int S_CONNECT_LOST = -3;         //
47      /*** HTTP timeout (before any meaningful response received) */
48      public static final int S_TIMEOUT = -4;              //
49      /*** Unexpected runtime exception; see runtime-errors.log */
50      public static final int S_RUNTIME_EXCEPTION = -5;    //
51      /*** DNS prerequisite failed, precluding attempt */
52      public static final int S_DOMAIN_PREREQUISITE_FAILURE = -6; //
53      /*** URI recognized as unsupported or illegal)  */
54      public static final int S_UNFETCHABLE_URI = -7;      //
55      /*** multiple retries all failed */
56      public static final int S_TOO_MANY_RETRIES = -8;     //
57  
58      /*** temporary status assigned URIs awaiting preconditions; appearance in
59       *  logs is a bug */
60      public static final int S_DEFERRED = -50;
61      /*** URI could not be queued in Frontier; when URIs are properly
62       * filtered for format, should never occur */
63      public static final int S_UNQUEUEABLE = -60;
64      
65      /*** Robots prerequisite failed, precluding attempt */
66      public static final int S_ROBOTS_PREREQUISITE_FAILURE = -61; //
67      /*** DNS prerequisite failed, precluding attempt */
68      public static final int S_OTHER_PREREQUISITE_FAILURE = -62; //
69      /*** DNS prerequisite failed, precluding attempt */
70      public static final int S_PREREQUISITE_UNSCHEDULABLE_FAILURE = -63; //
71      
72      /*** synthetic status, used when some other status (such as connection-lost)
73       * is considered by policy the same as a document-not-found */
74      public static final int S_DEEMED_NOT_FOUND = -404; //
75  
76      /*** severe java 'Error' conditions (OutOfMemoryError, StackOverflowError,
77       *  etc.) during URI processing */
78      public static final int S_SERIOUS_ERROR = -3000;     //
79  
80      /*** 'chaff' detection of traps/content of negligible value applied */
81      public static final int S_DEEMED_CHAFF = -4000;
82      /*** overstepped link hops */
83      public static final int S_TOO_MANY_LINK_HOPS = -4001;
84      /*** overstepped embed/trans hops */
85      public static final int S_TOO_MANY_EMBED_HOPS = -4002;
86      /*** out-of-scope upoin reexamination (only when scope changes during
87       *  crawl) */
88      public static final int S_OUT_OF_SCOPE = -5000;
89      /*** blocked from fetch by user setting. */
90      public static final int S_BLOCKED_BY_USER = -5001;
91      /***
92       * Blocked by custom prefetcher processor.
93       * A check against scope or against filters in a custom prefetch
94       * processor rules CrawlURI should not be crawled.
95       * TODO: Add to documentation and help page.
96       */
97      public static final int S_BLOCKED_BY_CUSTOM_PROCESSOR = -5002;
98      /***
99       * Blocked due to exceeding an established quota.
100      * TODO: Add to documentation and help page.
101      */
102     public static final int S_BLOCKED_BY_QUOTA = -5003;
103     /***
104      * Blocked due to exceeding an established runtime.
105      * TODO: Add to documentation and help page.
106      */
107     public static final int S_BLOCKED_BY_RUNTIME_LIMIT = -5004;
108     /*** deleted from frontier by user */
109     public static final int S_DELETED_BY_USER = -6000;
110 
111     /*** Processing thread was killed */
112     public static final int S_PROCESSING_THREAD_KILLED = -7000;
113 
114     /*** robots rules precluded fetch */
115     public static final int S_ROBOTS_PRECLUDED = -9998;
116 
117     /*** DNS success */
118     public static final int S_DNS_SUCCESS = 1;
119     /*** InetAddress.getByName success */
120     public static final int S_GETBYNAME_SUCCESS = 1001;
121 }
122 
123