1   /* RepositionableInputStream.java
2    *
3    * $Id: RepositionableInputStream.java 4826 2006-12-22 00:16:27Z stack-sf $
4    *
5    * Created Dec 20, 2005
6    *
7    * Copyright (C) 2005 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.io;
26  
27  import it.unimi.dsi.fastutil.io.RepositionableStream;
28  
29  import java.io.BufferedInputStream;
30  import java.io.IOException;
31  import java.io.InputStream;
32  
33  /***
34   * Wrapper around an {@link InputStream} to make a primitive Repositionable
35   * stream. Uses a {@link BufferedInputStream}.  Calls mark on every read so
36   * we'll remember at least the last thing read (You can only backup on the
37   * last thing read -- not last 2 or 3 things read).  Used by
38   * {@link GzippedInputStream} when reading streams over a network.  Wraps a
39   * HTTP, etc., stream so we can back it up if needs be after the
40   * GZIP inflater has done a fill of its full buffer though it only needed
41   * the first few bytes to finish decompressing the current GZIP member.
42   * 
43   * <p>TODO: More robust implementation.  Tried to use the it.unimi.dsi.io
44   * FastBufferdInputStream but relies on FileChannel ByteBuffers and if not
45   * present -- as would be the case reading from a network stream, the main
46   * application for this instance -- then it expects the underlying stream 
47   * implements RepositionableStream interface so chicken or egg problem.
48   * @author stack
49   */
50  public class RepositionableInputStream extends BufferedInputStream implements
51          RepositionableStream {
52      private long position = 0;
53      private long markPosition = -1;
54      
55      public RepositionableInputStream(InputStream in) {
56          super(in);
57      }
58      
59      public RepositionableInputStream(InputStream in, int size) {
60          super(in, size);
61      }
62  
63      public int read(byte[] b) throws IOException {
64          int read = super.read(b);
65          if (read != -1) {
66              position += read;
67          }
68          return read;
69      }
70      
71      public synchronized int read(byte[] b, int offset, int ct)
72      throws IOException {
73          // Mark the underlying stream so that we'll remember what we are about
74      	// to read unless a mark has been set in this RepositionableStream
75      	// (We have two levels of mark).  In this latter case we want the
76      	// underlying stream to preserve its mark position so aligns with
77      	// this RS when eset is called.
78      	if (!isMarked()) {
79      		super.mark((ct > offset)? ct - offset: ct);
80      	}
81          int read = super.read(b, offset, ct);
82          if (read != -1) {
83              position += read;
84          }
85          return read;
86      }
87      
88      public int read() throws IOException {
89          // Mark the underlying stream so that we'll remember what we are about
90      	// to read unless a mark has been set in this RepositionableStream
91      	// (We have two levels of mark).  In this latter case we want the
92      	// underlying stream to preserve its mark position so aligns with
93      	// this RS when eset is called.
94      	if (!isMarked()) {
95      		super.mark(1);
96      	}
97          int c = super.read();
98          if (c != -1) {
99              position++;
100         }
101         return c;
102     }
103 
104     public void position(final long offset) {
105         if (this.position == offset) {
106             return;
107         }
108         int diff =  (int)(offset - this.position);
109         long lowerBound = this.position - this.pos;
110         long upperBound = lowerBound + this.count;
111         if (offset < lowerBound || offset >= upperBound) {
112             throw new IllegalAccessError("Offset goes outside " +
113                 "current this.buf (TODO: Do buffer fills if positive)");
114         }
115         this.position = offset;
116         this.pos += diff;
117         // Clear any mark.
118         this.markPosition = -1;
119     }
120 
121     public void mark(int readlimit) {
122         this.markPosition = this.position;
123         super.mark(readlimit);
124     }
125 
126     public void reset() throws IOException {
127         super.reset();
128         this.position = this.markPosition;
129         this.markPosition = -1;
130     }
131     
132     protected boolean isMarked() {
133     	return this.markPosition != -1;
134     }
135 
136     public long position() {
137         return this.position;
138     }
139 }