1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.util;
26
27 import java.io.BufferedInputStream;
28 import java.io.File;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.io.InputStreamReader;
32 import java.io.OutputStream;
33 import java.nio.charset.Charset;
34 import java.util.logging.Level;
35 import java.util.logging.Logger;
36
37 import org.archive.io.RecordingInputStream;
38 import org.archive.io.RecordingOutputStream;
39 import org.archive.io.ReplayCharSequence;
40 import org.archive.io.ReplayInputStream;
41
42 import com.google.common.base.Charsets;
43
44
45 /***
46 * Pairs together a RecordingInputStream and RecordingOutputStream
47 * to capture exactly a single HTTP transaction.
48 *
49 * Initially only supports HTTP/1.0 (one request, one response per stream)
50 *
51 * Call {@link #markContentBegin()} to demarc the transition between HTTP
52 * header and body.
53 *
54 * @author gojomo
55 */
56 public class HttpRecorder {
57 protected static Logger logger =
58 Logger.getLogger("org.archive.util.HttpRecorder");
59
60 private static final int DEFAULT_OUTPUT_BUFFER_SIZE = 4096;
61 private static final int DEFAULT_INPUT_BUFFER_SIZE = 65536;
62
63 private RecordingInputStream ris = null;
64 private RecordingOutputStream ros = null;
65
66 /***
67 * Backing file basename.
68 *
69 * Keep it around so can clean up backing files left on disk.
70 */
71 private String backingFileBasename = null;
72
73 /***
74 * Backing file output stream suffix.
75 */
76 private static final String RECORDING_OUTPUT_STREAM_SUFFIX = ".ros";
77
78 /***
79 * Backing file input stream suffix.
80 */
81 private static final String RECORDING_INPUT_STREAM_SUFFIX = ".ris";
82
83 /***
84 * Response character encoding.
85 */
86 private String characterEncoding = null;
87
88 /***
89 * Constructor with limited access.
90 * Used internally for case where we're wrapping an already
91 * downloaded stream with a HttpRecorder.
92 */
93 protected HttpRecorder() {
94 super();
95 }
96
97 /***
98 * Create an HttpRecorder.
99 *
100 * @param tempDir Directory into which we drop backing files for
101 * recorded input and output.
102 * @param backingFilenameBase Backing filename base to which we'll append
103 * suffices <code>ris</code> for recorded input stream and
104 * <code>ros</code> for recorded output stream.
105 * @param outBufferSize Size of output buffer to use.
106 * @param inBufferSize Size of input buffer to use.
107 */
108 public HttpRecorder(File tempDir, String backingFilenameBase,
109 int outBufferSize, int inBufferSize) {
110 super();
111 tempDir.mkdirs();
112 this.backingFileBasename =
113 (new File(tempDir.getPath(), backingFilenameBase))
114 .getAbsolutePath();
115 this.ris = new RecordingInputStream(inBufferSize,
116 this.backingFileBasename + RECORDING_INPUT_STREAM_SUFFIX);
117 this.ros = new RecordingOutputStream(outBufferSize,
118 this.backingFileBasename + RECORDING_OUTPUT_STREAM_SUFFIX);
119 }
120
121 /***
122 * Create an HttpRecorder.
123 *
124 * @param tempDir
125 * Directory into which we drop backing files for recorded input
126 * and output.
127 * @param backingFilenameBase
128 * Backing filename base to which we'll append suffices
129 * <code>ris</code> for recorded input stream and
130 * <code>ros</code> for recorded output stream.
131 */
132 public HttpRecorder(File tempDir, String backingFilenameBase) {
133 this(tempDir, backingFilenameBase, DEFAULT_INPUT_BUFFER_SIZE,
134 DEFAULT_OUTPUT_BUFFER_SIZE);
135 }
136
137 /***
138 * Wrap the provided stream with the internal RecordingInputStream
139 *
140 * open() throws an exception if RecordingInputStream is already open.
141 *
142 * @param is InputStream to wrap.
143 *
144 * @return The input stream wrapper which itself is an input stream.
145 * Pass this in place of the passed stream so input can be recorded.
146 *
147 * @throws IOException
148 */
149 public InputStream inputWrap(InputStream is)
150 throws IOException {
151 logger.fine(Thread.currentThread().getName() + " wrapping input");
152 this.ris.open(is);
153 return this.ris;
154 }
155
156 /***
157 * Wrap the provided stream with the internal RecordingOutputStream
158 *
159 * open() throws an exception if RecordingOutputStream is already open.
160 *
161 * @param os The output stream to wrap.
162 *
163 * @return The output stream wrapper which is itself an output stream.
164 * Pass this in place of the passed stream so output can be recorded.
165 *
166 * @throws IOException
167 */
168 public OutputStream outputWrap(OutputStream os)
169 throws IOException {
170 this.ros.open(os);
171 return this.ros;
172 }
173
174 /***
175 * Close all streams.
176 */
177 public void close() {
178 logger.fine(Thread.currentThread().getName() + " closing");
179 try {
180 this.ris.close();
181 } catch (IOException e) {
182
183
184 DevUtils.logger.log(Level.SEVERE, "close() ris" +
185 DevUtils.extraInfo(), e);
186 }
187 try {
188 this.ros.close();
189 } catch (IOException e) {
190 DevUtils.logger.log(Level.SEVERE, "close() ros" +
191 DevUtils.extraInfo(), e);
192 }
193 }
194
195 /***
196 * Return the internal RecordingInputStream
197 *
198 * @return A RIS.
199 */
200 public RecordingInputStream getRecordedInput() {
201 return this.ris;
202 }
203
204 /***
205 * @return The RecordingOutputStream.
206 */
207 public RecordingOutputStream getRecordedOutput() {
208 return this.ros;
209 }
210
211 /***
212 * Mark current position as the point where the HTTP headers end.
213 */
214 public void markContentBegin() {
215 this.ris.markContentBegin();
216 }
217
218 public long getResponseContentLength() {
219 return this.ris.getResponseContentLength();
220 }
221
222 /***
223 * Close both input and output recorders.
224 *
225 * Recorders are the output streams to which we are recording.
226 * {@link #close()} closes the stream that is being recorded and the
227 * recorder. This method explicitly closes the recorder only.
228 */
229 public void closeRecorders() {
230 try {
231 this.ris.closeRecorder();
232 this.ros.closeRecorder();
233 } catch (IOException e) {
234 DevUtils.warnHandle(e, "Convert to runtime exception?");
235 }
236 }
237
238 /***
239 * Cleanup backing files.
240 *
241 * Call when completely done w/ recorder. Removes any backing files that
242 * may have been dropped.
243 */
244 public void cleanup() {
245 this.close();
246 this.delete(this.backingFileBasename + RECORDING_OUTPUT_STREAM_SUFFIX);
247 this.delete(this.backingFileBasename + RECORDING_INPUT_STREAM_SUFFIX);
248 }
249
250 /***
251 * Delete file if exists.
252 *
253 * @param name Filename to delete.
254 */
255 private void delete(String name) {
256 File f = new File(name);
257 if (f.exists()) {
258 f.delete();
259 }
260 }
261
262 /***
263 * Get the current threads' HttpRecorder.
264 *
265 * @return This threads' HttpRecorder. Returns null if can't find a
266 * HttpRecorder in current instance.
267 */
268 public static HttpRecorder getHttpRecorder() {
269 HttpRecorder recorder = null;
270 Thread thread = Thread.currentThread();
271 if (thread instanceof HttpRecorderMarker) {
272 recorder = ((HttpRecorderMarker)thread).getHttpRecorder();
273 }
274 return recorder;
275 }
276
277 /***
278 * @param characterEncoding Character encoding of recording.
279 */
280 public void setCharacterEncoding(String characterEncoding) {
281 this.characterEncoding = characterEncoding;
282 }
283
284 /***
285 * @return Returns the characterEncoding.
286 */
287 public String getCharacterEncoding() {
288 return this.characterEncoding;
289 }
290
291 /***
292 * @return A ReplayCharSequence. Call close on the RCS when done w/ it.
293 * Will return indeterminate results if the underlying recording streams
294 * have not been closed first.
295 * @throws IOException
296 * @throws IOException
297 */
298 public ReplayCharSequence getReplayCharSequence() throws IOException {
299 return getRecordedInput().
300 getReplayCharSequence(this.characterEncoding);
301 }
302
303 /***
304 * @return A replay input stream.
305 * @throws IOException
306 */
307 public ReplayInputStream getReplayInputStream() throws IOException {
308 return getRecordedInput().getReplayInputStream();
309 }
310
311 /***
312 * Return a short prefix of the presumed-textual content as a String.
313 *
314 * @param size max length of String to return
315 * @return String prefix, or empty String (with logged exception) on any error
316 */
317 public String getContentReplayPrefixString(int size) {
318 Charset charset = Charsets.ISO_8859_1;
319 if (characterEncoding != null) {
320 try {
321 charset = Charset.forName(characterEncoding);
322 } catch (IllegalArgumentException e) {
323
324 }
325 }
326
327 try {
328 InputStreamReader isr = new InputStreamReader(getRecordedInput().getContentReplayInputStream(), charset);
329 char[] chars = new char[size];
330 int count = isr.read(chars);
331 isr.close();
332 return new String(chars,0,count);
333 } catch (IOException e) {
334 logger.log(Level.SEVERE,"unable to get replay prefix string", e);
335 return "";
336 }
337 }
338
339 /***
340 * Record the input stream for later playback by an extractor, etc.
341 * This is convenience method used to setup an artificial HttpRecorder
342 * scenario used in unit tests, etc.
343 * @param dir Directory to write backing file to.
344 * @param basename of what we're recording.
345 * @param in Stream to read.
346 * @param encoding Stream encoding.
347 * @throws IOException
348 * @return An {@link org.archive.util.HttpRecorder}.
349 */
350 public static HttpRecorder wrapInputStreamWithHttpRecord(File dir,
351 String basename, InputStream in, String encoding)
352 throws IOException {
353 HttpRecorder rec = new HttpRecorder(dir, basename);
354 if (encoding != null && encoding.length() > 0) {
355 rec.setCharacterEncoding(encoding);
356 }
357
358
359 InputStream is = rec.inputWrap(new BufferedInputStream(in));
360 rec.markContentBegin();
361
362 final int BUFFER_SIZE = 1024 * 4;
363 byte [] buffer = new byte[BUFFER_SIZE];
364 while(true) {
365
366 int x = is.read(buffer);
367 if (x == -1) {
368 break;
369 }
370 }
371 is.close();
372 return rec;
373 }
374 }