1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 package org.archive.crawler.admin;
27
28 import java.io.Serializable;
29 import java.util.logging.Logger;
30
31 import org.archive.crawler.datamodel.CandidateURI;
32 import org.archive.crawler.datamodel.CoreAttributeConstants;
33 import org.archive.crawler.datamodel.CrawlURI;
34
35 /***
36 * Record of all interesting info about the most-recent
37 * processing of a specific seed.
38 *
39 * @author gojomo
40 */
41 public class SeedRecord implements CoreAttributeConstants, Serializable {
42 private static final long serialVersionUID = -8455358640509744478L;
43 private static Logger logger =
44 Logger.getLogger(SeedRecord.class.getName());
45 private final String uri;
46 private int statusCode;
47 private String disposition;
48 private String redirectUri;
49
50 /***
51 * Create a record from the given CrawlURI and disposition string
52 *
53 * @param curi CrawlURI, already processed as reported to StatisticsTracker
54 * @param disposition descriptive disposition string
55 *
56 */
57 public SeedRecord(CrawlURI curi, String disposition) {
58 super();
59 this.uri = curi.toString();
60 this.statusCode = curi.getFetchStatus();
61 this.disposition = disposition;
62 if (statusCode==301 || statusCode == 302) {
63 for (CandidateURI cauri: curi.getOutCandidates()) {
64 if("location:".equalsIgnoreCase(cauri.getViaContext().
65 toString())) {
66 redirectUri = cauri.toString();
67 }
68 }
69 }
70 }
71
72 /***
73 * Constructor for when a CrawlURI is unavailable; such
74 * as when considering seeds not yet passed through as
75 * CrawlURIs.
76 *
77 * @param uri
78 * @param disposition
79 */
80 public SeedRecord(String uri, String disposition) {
81 this(uri, disposition, -1, null);
82 }
83
84 /***
85 * Create a record from the given URI, disposition, HTTP status code,
86 * and redirect URI.
87 * @param uri
88 * @param disposition
89 * @param statusCode
90 * @param redirectUri
91 */
92 public SeedRecord(String uri, String disposition, int statusCode,
93 String redirectUri) {
94 super();
95 this.uri = uri;
96 this.statusCode = statusCode;
97 this.disposition = disposition;
98 this.redirectUri = redirectUri;
99 }
100
101
102 /***
103 * A later/repeat report of the same seed has arrived; update with
104 * latest. Should be rare/never?
105 *
106 * @param curi
107 */
108 public void updateWith(CrawlURI curi,String disposition) {
109 if(!this.uri.equals(curi.toString())) {
110 logger.warning("SeedRecord URI changed: "+uri+"->"+curi.toString());
111 }
112 this.statusCode = curi.getFetchStatus();
113 this.disposition = disposition;
114 if (statusCode==301 || statusCode == 302) {
115 for (CandidateURI cauri: curi.getOutCandidates()) {
116 if("location:".equalsIgnoreCase(cauri.getViaContext().
117 toString())) {
118 redirectUri = cauri.toString();
119 }
120 }
121 } else {
122 redirectUri = null;
123 }
124 }
125
126 /***
127 * @return Returns the disposition.
128 */
129 public String getDisposition() {
130 return disposition;
131 }
132 /***
133 * @return Returns the redirectUri.
134 */
135 public String getRedirectUri() {
136 return redirectUri;
137 }
138 /***
139 * @return Returns the statusCode.
140 */
141 public int getStatusCode() {
142 return statusCode;
143 }
144 /***
145 * @return Returns the uri.
146 */
147 public String getUri() {
148 return uri;
149 }
150 }