|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.archive.io.ArchiveReader
public abstract class ArchiveReader
Reader for an Archive file of Archive ArchiveRecord
s.
Nested Class Summary | |
---|---|
protected class |
ArchiveReader.ArchiveRecordIterator
Inner ArchiveRecord Iterator class. |
protected class |
ArchiveReader.RandomAccessBufferedInputStream
Add buffering to RandomAccessInputStream. |
Field Summary | |
---|---|
static int |
MAX_ALLOWED_RECOVERABLES
Maximum amount of recoverable exceptions in a row. |
Fields inherited from interface org.archive.io.ArchiveFileConstants |
---|
ABSOLUTE_OFFSET_KEY, CDX, CDX_FILE, CDX_LINE_BUFFER_SIZE, COMPRESSED_FILE_EXTENSION, CRLF, DATE_FIELD_KEY, DEFAULT_DIGEST_METHOD, DOT_COMPRESSED_FILE_EXTENSION, DUMP, GZIP_DUMP, HEADER, INVALID_SUFFIX, LENGTH_FIELD_KEY, MIMETYPE_FIELD_KEY, NOHEAD, OCCUPIED_SUFFIX, READER_IDENTIFIER_FIELD_KEY, RECORD_IDENTIFIER_FIELD_KEY, SINGLE_SPACE, TYPE_FIELD_KEY, URL_FIELD_KEY, VERSION_FIELD_KEY |
Constructor Summary | |
---|---|
protected |
ArchiveReader()
|
Method Summary | |
---|---|
protected void |
cdxOutput(boolean toFile)
|
protected void |
cleanupCurrentRecord()
Cleanout the current record if there is one. |
void |
close()
|
protected abstract ArchiveRecord |
createArchiveRecord(java.io.InputStream is,
long offset)
Return an Archive Record homed on offset into
is . |
protected ArchiveRecord |
currentRecord(ArchiveRecord currentRecord)
|
abstract void |
dump(boolean compress)
Dump this file on STDOUT |
ArchiveRecord |
get()
|
ArchiveRecord |
get(long offset)
Get record at passed offset . |
protected ArchiveRecord |
getCurrentRecord()
|
abstract ArchiveReader |
getDeleteFileOnCloseReader(java.io.File f)
|
abstract java.lang.String |
getDotFileExtension()
|
abstract java.lang.String |
getFileExtension()
|
java.lang.String |
getFileName()
|
protected java.io.InputStream |
getIn()
|
protected java.io.InputStream |
getInputStream()
|
protected java.io.InputStream |
getInputStream(java.io.File f,
long offset)
Convenience method for constructors. |
protected java.util.logging.Logger |
getLogger()
|
protected static org.apache.commons.cli.Options |
getOptions()
|
java.lang.String |
getReaderIdentifier()
|
java.lang.String |
getStrippedFileName()
|
static java.lang.String |
getStrippedFileName(java.lang.String name,
java.lang.String dotFileExtension)
|
protected static boolean |
getTrueOrFalse(java.lang.String value)
|
java.lang.String |
getVersion()
|
protected abstract void |
gotoEOR(ArchiveRecord record)
Skip over any trailing new lines at end of the record so we're lined up ready to read the next. |
protected void |
initialize(java.lang.String i)
Convenience method used by subclass constructors. |
boolean |
isCompressed()
|
boolean |
isDigest()
|
boolean |
isStrict()
|
boolean |
isValid()
Test Archive file is valid. |
java.util.Iterator<ArchiveRecord> |
iterator()
Returns an ArchiveRecord iterator. |
void |
logStdErr(java.util.logging.Level level,
java.lang.String message)
Log on stderr. |
protected boolean |
output(java.lang.String format)
|
protected static void |
outputRecord(ArchiveReader r,
java.lang.String format)
Output passed record using passed format specifier. |
boolean |
outputRecord(java.lang.String format)
Output passed record using passed format specifier. |
protected void |
rewind()
Rewinds stream to start of the Archive file. |
protected void |
setCompressed(boolean compressed)
|
void |
setDigest(boolean d)
|
protected void |
setIn(java.io.InputStream in)
|
protected void |
setReaderIdentifier(java.lang.String i)
|
void |
setStrict(boolean s)
|
protected void |
setVersion(java.lang.String version)
|
protected static java.lang.String |
stripExtension(java.lang.String name,
java.lang.String ext)
|
java.util.List<ArchiveRecordHeader> |
validate()
Validate the Archive file. |
java.util.List<ArchiveRecordHeader> |
validate(int numRecords)
Validate the Archive file. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final int MAX_ALLOWED_RECOVERABLES
Constructor Detail |
---|
protected ArchiveReader()
Method Detail |
---|
protected void initialize(java.lang.String i)
i
- Identifier for Archive file this reader goes against.protected java.io.InputStream getInputStream(java.io.File f, long offset) throws java.io.IOException
f
- File to read.offset
- Offset at which to start reading.
java.io.IOException
- If failed open or fail to get a memory
mapped byte buffer on file.public boolean isCompressed()
public ArchiveRecord get(long offset) throws java.io.IOException
offset
.
offset
- Byte index into file at which a record starts.
java.io.IOException
public ArchiveRecord get() throws java.io.IOException
java.io.IOException
public void close() throws java.io.IOException
java.io.IOException
protected void rewind() throws java.io.IOException
java.io.IOException
- if stream is not resettable.protected void cleanupCurrentRecord() throws java.io.IOException
java.io.IOException
protected abstract ArchiveRecord createArchiveRecord(java.io.InputStream is, long offset) throws java.io.IOException
offset
into
is
.
is
- Stream to read Record from.offset
- Offset to find Record at.
java.io.IOException
protected abstract void gotoEOR(ArchiveRecord record) throws java.io.IOException
record
-
java.io.IOException
public abstract java.lang.String getFileExtension()
public abstract java.lang.String getDotFileExtension()
public java.lang.String getVersion()
public java.util.List<ArchiveRecordHeader> validate() throws java.io.IOException
Assumes the stream is at the start of the file.
java.io.IOException
public java.util.List<ArchiveRecordHeader> validate(int numRecords) throws java.io.IOException
We start validation from wherever we are in the stream.
numRecords
- Number of records expected. Pass -1 if number is
unknown.
java.io.IOException
public boolean isValid()
public boolean isStrict()
public void setStrict(boolean s)
s
- The strict to set.public void setDigest(boolean d)
d
- True if we're to digest.public boolean isDigest()
protected java.util.logging.Logger getLogger()
protected java.io.InputStream getInputStream()
public java.util.Iterator<ArchiveRecord> iterator()
strict
is not set, this will usually succeed.
protected void setCompressed(boolean compressed)
protected ArchiveRecord getCurrentRecord()
get()
protected ArchiveRecord currentRecord(ArchiveRecord currentRecord)
protected java.io.InputStream getIn()
protected void setIn(java.io.InputStream in)
protected void setVersion(java.lang.String version)
public java.lang.String getReaderIdentifier()
protected void setReaderIdentifier(java.lang.String i)
public void logStdErr(java.util.logging.Level level, java.lang.String message)
level
- Level to log message at.message
- Message to log.protected static java.lang.String stripExtension(java.lang.String name, java.lang.String ext)
public java.lang.String getFileName()
public java.lang.String getStrippedFileName()
public static java.lang.String getStrippedFileName(java.lang.String name, java.lang.String dotFileExtension)
name
- Name of ARCFile.dotFileExtension
- '.arc' or '.warc', etc.
protected static boolean getTrueOrFalse(java.lang.String value)
value
- Value to test.
protected boolean output(java.lang.String format) throws java.io.IOException, java.text.ParseException
format
- Format to use outputting.
java.io.IOException
java.text.ParseException
protected void cdxOutput(boolean toFile) throws java.io.IOException
java.io.IOException
public boolean outputRecord(java.lang.String format) throws java.io.IOException
format
- What format to use outputting.
java.io.IOException
public abstract void dump(boolean compress) throws java.io.IOException, java.text.ParseException
compress
- True if dumped output is compressed.
java.io.IOException
java.text.ParseException
public abstract ArchiveReader getDeleteFileOnCloseReader(java.io.File f)
protected static void outputRecord(ArchiveReader r, java.lang.String format) throws java.io.IOException
r
- ARCReader instance to output.format
- What format to use outputting.
java.io.IOException
protected static org.apache.commons.cli.Options getOptions()
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |