|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.archive.io.WriterPoolMember org.archive.io.warc.WARCWriter
public class WARCWriter
WARC implementation.
Assumption is that the caller is managing access to this WARCWriter ensuring only one thread accessing this WARC instance at any one time.
While being written, WARCs have a '.open' suffix appended.
Field Summary | |
---|---|
static java.lang.String |
CONTENT_BYTES
|
static byte[] |
CRLF_BYTES
NEWLINE as bytes. |
static java.lang.String |
NUM_RECORDS
|
static java.lang.String |
SIZE_ON_DISK
|
static java.lang.String |
TOTAL_BYTES
|
static java.lang.String |
TOTALS
|
Fields inherited from class org.archive.io.WriterPoolMember |
---|
DEFAULT_PREFIX, DEFAULT_SUFFIX, HOSTNAME_ADMINPORT_VARIABLE, HOSTNAME_VARIABLE, UTF8 |
Fields inherited from interface org.archive.io.ArchiveFileConstants |
---|
ABSOLUTE_OFFSET_KEY, CDX, CDX_FILE, CDX_LINE_BUFFER_SIZE, COMPRESSED_FILE_EXTENSION, CRLF, DATE_FIELD_KEY, DEFAULT_DIGEST_METHOD, DUMP, GZIP_DUMP, HEADER, INVALID_SUFFIX, LENGTH_FIELD_KEY, MIMETYPE_FIELD_KEY, NOHEAD, OCCUPIED_SUFFIX, READER_IDENTIFIER_FIELD_KEY, RECORD_IDENTIFIER_FIELD_KEY, SINGLE_SPACE, TYPE_FIELD_KEY, URL_FIELD_KEY, VERSION_FIELD_KEY |
Constructor Summary | |
---|---|
WARCWriter()
Shutdown Constructor Has default access so can make instance to test utility methods. |
|
WARCWriter(java.util.concurrent.atomic.AtomicInteger serialNo,
java.util.List<java.io.File> dirs,
java.lang.String prefix,
java.lang.String suffix,
boolean cmprs,
long maxSize,
java.util.List<java.lang.String> warcinfoData)
Constructor. |
|
WARCWriter(java.util.concurrent.atomic.AtomicInteger serialNo,
java.io.OutputStream out,
java.io.File f,
boolean cmprs,
java.lang.String a14DigitDate,
java.util.List<java.lang.String> warcinfoData)
Constructor. |
Method Summary | |
---|---|
protected void |
baseCharacterCheck(char c,
java.lang.String parameter)
|
protected java.lang.String |
checkHeaderLineMimetypeParameter(java.lang.String parameter)
|
protected java.lang.String |
checkHeaderValue(java.lang.String value)
|
protected java.lang.String |
createFile(java.io.File file)
|
protected java.lang.String |
createRecordHeader(java.lang.String type,
java.lang.String url,
java.lang.String create14DigitDate,
java.lang.String mimetype,
java.net.URI recordId,
ANVLRecord xtraHeaders,
long contentLength)
|
protected java.net.URI |
generateRecordId(java.util.Map<java.lang.String,java.lang.String> qualifiers)
|
protected java.net.URI |
generateRecordId(java.lang.String key,
java.lang.String value)
|
static java.net.URI |
getRecordID()
Convenience method for getting Record-Ids. |
static long |
getStat(java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Long>> statz,
java.lang.String key,
java.lang.String subkey)
|
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Long>> |
getStats()
|
void |
resetStats()
|
protected void |
subtally(java.util.Map<java.lang.String,java.lang.Long> substats,
long contentBytes,
long totalBytes,
long sizeOnDisk)
|
protected void |
tally(java.lang.String recordType,
long contentBytes,
long totalBytes,
long sizeOnDisk)
|
void |
writeMetadataRecord(java.lang.String url,
java.lang.String create14DigitDate,
java.lang.String mimetype,
java.net.URI recordId,
ANVLRecord namedFields,
java.io.InputStream metadata,
long metadataLength)
|
protected void |
writeRecord(java.lang.String type,
java.lang.String url,
java.lang.String create14DigitDate,
java.lang.String mimetype,
java.net.URI recordId,
ANVLRecord xtraHeaders,
java.io.InputStream contentStream,
long contentLength)
Deprecated. Use writeRecord(String,String,String,String,URI,ANVLRecord,InputStream,long,boolean) instead |
protected void |
writeRecord(java.lang.String type,
java.lang.String url,
java.lang.String create14DigitDate,
java.lang.String mimetype,
java.net.URI recordId,
ANVLRecord xtraHeaders,
java.io.InputStream contentStream,
long contentLength,
boolean enforceLength)
|
void |
writeRequestRecord(java.lang.String url,
java.lang.String create14DigitDate,
java.lang.String mimetype,
java.net.URI recordId,
ANVLRecord namedFields,
java.io.InputStream request,
long requestLength)
|
void |
writeResourceRecord(java.lang.String url,
java.lang.String create14DigitDate,
java.lang.String mimetype,
ANVLRecord namedFields,
java.io.InputStream response,
long responseLength)
|
void |
writeResourceRecord(java.lang.String url,
java.lang.String create14DigitDate,
java.lang.String mimetype,
java.net.URI recordId,
ANVLRecord namedFields,
java.io.InputStream response,
long responseLength)
|
void |
writeResponseRecord(java.lang.String url,
java.lang.String create14DigitDate,
java.lang.String mimetype,
java.net.URI recordId,
ANVLRecord namedFields,
java.io.InputStream response,
long responseLength)
|
void |
writeRevisitRecord(java.lang.String url,
java.lang.String create14DigitDate,
java.lang.String mimetype,
java.net.URI recordId,
ANVLRecord namedFields,
java.io.InputStream response,
long responseLength)
|
java.net.URI |
writeWarcinfoRecord(java.lang.String filename)
|
java.net.URI |
writeWarcinfoRecord(java.lang.String mimetype,
ANVLRecord namedFields,
java.io.InputStream fileMetadata,
long fileMetadataLength)
Write a warcinfo to current file. |
java.net.URI |
writeWarcinfoRecord(java.lang.String filename,
java.lang.String description)
|
void |
writeWarcinfoRecord(java.lang.String create14DigitDate,
java.lang.String mimetype,
java.net.URI recordId,
ANVLRecord namedFields,
java.io.InputStream fileMetadata,
long fileMetadataLength)
Write a warcinfo to current file. |
Methods inherited from class org.archive.io.WriterPoolMember |
---|
checkSize, checkWriteable, close, copyFrom, createFile, flush, getBaseFilename, getCreateTimestamp, getFile, getNextDirectory, getOutputStream, getPosition, getTimestampSerialNo, getTimestampSerialNo, isCompressed, postWriteRecordTasks, preWriteRecordTasks, readFullyFrom, readToLimitFrom, write, write, write |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final java.lang.String TOTALS
public static final java.lang.String SIZE_ON_DISK
public static final java.lang.String TOTAL_BYTES
public static final java.lang.String CONTENT_BYTES
public static final java.lang.String NUM_RECORDS
public static byte[] CRLF_BYTES
Constructor Detail |
---|
WARCWriter()
public WARCWriter(java.util.concurrent.atomic.AtomicInteger serialNo, java.io.OutputStream out, java.io.File f, boolean cmprs, java.lang.String a14DigitDate, java.util.List<java.lang.String> warcinfoData) throws java.io.IOException
serialNo
- used to generate unique file name sequencesout
- Where to write.f
- File the out
is connected to.cmprs
- Compress the content written.a14DigitDate
- If null, we'll write current time.
java.io.IOException
public WARCWriter(java.util.concurrent.atomic.AtomicInteger serialNo, java.util.List<java.io.File> dirs, java.lang.String prefix, java.lang.String suffix, boolean cmprs, long maxSize, java.util.List<java.lang.String> warcinfoData)
dirs
- Where to drop files.prefix
- File prefix to use.cmprs
- Compress the records written.maxSize
- Maximum size for ARC files written.suffix
- File tail to use. If null, unused.warcinfoData
- File metadata for warcinfo record.Method Detail |
---|
protected java.lang.String createFile(java.io.File file) throws java.io.IOException
createFile
in class WriterPoolMember
java.io.IOException
protected void baseCharacterCheck(char c, java.lang.String parameter) throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
protected java.lang.String checkHeaderValue(java.lang.String value) throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
protected java.lang.String checkHeaderLineMimetypeParameter(java.lang.String parameter) throws java.io.IOException
java.io.IOException
protected java.lang.String createRecordHeader(java.lang.String type, java.lang.String url, java.lang.String create14DigitDate, java.lang.String mimetype, java.net.URI recordId, ANVLRecord xtraHeaders, long contentLength) throws java.io.IOException
java.io.IOException
protected void writeRecord(java.lang.String type, java.lang.String url, java.lang.String create14DigitDate, java.lang.String mimetype, java.net.URI recordId, ANVLRecord xtraHeaders, java.io.InputStream contentStream, long contentLength) throws java.io.IOException
writeRecord(String,String,String,String,URI,ANVLRecord,InputStream,long,boolean)
instead
java.io.IOException
protected void writeRecord(java.lang.String type, java.lang.String url, java.lang.String create14DigitDate, java.lang.String mimetype, java.net.URI recordId, ANVLRecord xtraHeaders, java.io.InputStream contentStream, long contentLength, boolean enforceLength) throws java.io.IOException
java.io.IOException
protected void tally(java.lang.String recordType, long contentBytes, long totalBytes, long sizeOnDisk)
protected void subtally(java.util.Map<java.lang.String,java.lang.Long> substats, long contentBytes, long totalBytes, long sizeOnDisk)
protected java.net.URI generateRecordId(java.util.Map<java.lang.String,java.lang.String> qualifiers) throws java.io.IOException
java.io.IOException
protected java.net.URI generateRecordId(java.lang.String key, java.lang.String value) throws java.io.IOException
java.io.IOException
public java.net.URI writeWarcinfoRecord(java.lang.String filename) throws java.io.IOException
java.io.IOException
public java.net.URI writeWarcinfoRecord(java.lang.String filename, java.lang.String description) throws java.io.IOException
java.io.IOException
public java.net.URI writeWarcinfoRecord(java.lang.String mimetype, ANVLRecord namedFields, java.io.InputStream fileMetadata, long fileMetadataLength) throws java.io.IOException
mimetype
- Mimetype of the fileMetadata
block.namedFields
- Named fields. Pass null
if none.fileMetadata
- Metadata about this WARC as RDF, ANVL, etc.fileMetadataLength
- Length of fileMetadata
.
java.io.IOException
public void writeWarcinfoRecord(java.lang.String create14DigitDate, java.lang.String mimetype, java.net.URI recordId, ANVLRecord namedFields, java.io.InputStream fileMetadata, long fileMetadataLength) throws java.io.IOException
warcinfo
to current file.
The warcinfo
type uses its recordId
as its URL.
recordId
- URI to use for this warcinfo.create14DigitDate
- Record creation date as 14 digit date.mimetype
- Mimetype of the fileMetadata
.namedFields
- Named fields.fileMetadata
- Metadata about this WARC as RDF, ANVL, etc.fileMetadataLength
- Length of fileMetadata
.
java.io.IOException
public void writeRequestRecord(java.lang.String url, java.lang.String create14DigitDate, java.lang.String mimetype, java.net.URI recordId, ANVLRecord namedFields, java.io.InputStream request, long requestLength) throws java.io.IOException
java.io.IOException
public void writeResourceRecord(java.lang.String url, java.lang.String create14DigitDate, java.lang.String mimetype, ANVLRecord namedFields, java.io.InputStream response, long responseLength) throws java.io.IOException
java.io.IOException
public void writeResourceRecord(java.lang.String url, java.lang.String create14DigitDate, java.lang.String mimetype, java.net.URI recordId, ANVLRecord namedFields, java.io.InputStream response, long responseLength) throws java.io.IOException
java.io.IOException
public void writeResponseRecord(java.lang.String url, java.lang.String create14DigitDate, java.lang.String mimetype, java.net.URI recordId, ANVLRecord namedFields, java.io.InputStream response, long responseLength) throws java.io.IOException
java.io.IOException
public void writeRevisitRecord(java.lang.String url, java.lang.String create14DigitDate, java.lang.String mimetype, java.net.URI recordId, ANVLRecord namedFields, java.io.InputStream response, long responseLength) throws java.io.IOException
java.io.IOException
public void writeMetadataRecord(java.lang.String url, java.lang.String create14DigitDate, java.lang.String mimetype, java.net.URI recordId, ANVLRecord namedFields, java.io.InputStream metadata, long metadataLength) throws java.io.IOException
java.io.IOException
public static java.net.URI getRecordID() throws java.io.IOException
java.io.IOException
public void resetStats()
public java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Long>> getStats()
public static long getStat(java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.Long>> statz, java.lang.String key, java.lang.String subkey)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |