Serialized Form


Package org.archive.crawler.admin

Class org.archive.crawler.admin.CrawlJob extends javax.management.NotificationBroadcasterSupport implements Serializable

serialVersionUID: 3411161000452525856L

Serialized Fields

UID

java.lang.String UID

name

java.lang.String name

status

java.lang.String status

isReadOnly

boolean isReadOnly

isNew

boolean isNew

isProfile

boolean isProfile

isRunning

boolean isRunning

priority

int priority

numberOfJournalEntries

int numberOfJournalEntries

statisticsFileSave

java.lang.String statisticsFileSave

errorMessage

java.lang.String errorMessage

jobDir

java.io.File jobDir

Class org.archive.crawler.admin.CrawlJob.MBeanCrawlController extends CrawlController implements Serializable

serialVersionUID: -4608537998168407222L

Serialized Fields

cj

CrawlJob cj

ct

javax.management.openmbean.CompositeType ct

Class org.archive.crawler.admin.InvalidJobFileException extends java.lang.Exception implements Serializable

serialVersionUID: -5162130672800789699L

Class org.archive.crawler.admin.SeedRecord extends java.lang.Object implements Serializable

serialVersionUID: -8455358640509744478L

Serialized Fields

uri

java.lang.String uri

statusCode

int statusCode

disposition

java.lang.String disposition

redirectUri

java.lang.String redirectUri

Class org.archive.crawler.admin.StatisticsTracker extends AbstractTracker implements Serializable

serialVersionUID: 8004878315916392305L

Serialized Fields

lastPagesFetchedCount

long lastPagesFetchedCount

lastProcessedBytesCount

long lastProcessedBytesCount

discoveredUriCount

long discoveredUriCount

queuedUriCount

long queuedUriCount

finishedUriCount

long finishedUriCount

downloadedUriCount

long downloadedUriCount

downloadFailures

long downloadFailures

downloadDisregards

long downloadDisregards

docsPerSecond

double docsPerSecond

currentDocsPerSecond

double currentDocsPerSecond

currentKBPerSec

int currentKBPerSec

totalKBPerSec

long totalKBPerSec

busyThreads

int busyThreads

totalProcessedBytes

long totalProcessedBytes

congestionRatio

float congestionRatio

deepestUri

long deepestUri

averageDepth

long averageDepth

crawledBytes

CrawledBytesHistotable crawledBytes
tally sizes novel, verified (same hash), vouched (not-modified)


notModifiedUriCount

long notModifiedUriCount

dupByHashUriCount

long dupByHashUriCount

novelUriCount

long novelUriCount

mimeTypeDistribution

java.util.concurrent.ConcurrentMap<K,V> mimeTypeDistribution
Keep track of the file types we see (mime type -> count)


mimeTypeBytes

java.util.concurrent.ConcurrentMap<K,V> mimeTypeBytes

statusCodeDistribution

java.util.concurrent.ConcurrentMap<K,V> statusCodeDistribution
Keep track of fetch status codes


seedsCrawled

int seedsCrawled

seedsNotCrawled

int seedsNotCrawled

sExitMessage

java.lang.String sExitMessage

Package org.archive.crawler.datamodel

Class org.archive.crawler.datamodel.CandidateURI extends java.lang.Object implements Serializable

serialVersionUID: -7152937921526560388L

Serialization Methods

readObject

private void readObject(java.io.ObjectInputStream stream)
                 throws java.io.IOException,
                        java.lang.ClassNotFoundException
Custom deserialization to reconstruct UURI instances from more compact Strings.

Throws:
java.io.IOException
java.lang.ClassNotFoundException

writeObject

private void writeObject(java.io.ObjectOutputStream stream)
                  throws java.io.IOException
Custom serialization writing 'uuri' and 'via' as Strings, rather than the bloated full serialization of their object classes, and an empty alist as 'null'. Shrinks serialized form by 50% or more in short tests.

Throws:
java.io.IOException
Serialized Fields

schedulingDirective

int schedulingDirective

isSeed

boolean isSeed
Seed status


forceRevisit

boolean forceRevisit

pathFromSeed

java.lang.String pathFromSeed
String of letters indicating how this URI was reached from a seed.
 P precondition
 R redirection
 E embedded (as frame, src, link, codebase, etc.)
 X speculative embed (as from javascript, some alternate-format extractors
 L link
For example LLLE (an embedded image on a page 3 links from seed).


viaContext

java.lang.CharSequence viaContext
Context of URI's discovery, as per the 'context' in Link


cachedCandidateURIString

java.lang.String cachedCandidateURIString
Cache of this candidate uuri as a string. Profiling shows us spending about 1-2% of total elapsed time in toString.


classKey

java.lang.String classKey
Frontier/Scheduler lifecycle info. This is an identifier set by the Frontier for its purposes. Usually its the name of the Frontier queue this URI gets queued to. Values can be host + port or IP, etc.

Class org.archive.crawler.datamodel.Checkpoint extends java.lang.Object implements Serializable

serialVersionUID: 5121498771788002844L

Serialization Methods

readObject

private void readObject(java.io.ObjectInputStream s)
                 throws java.io.IOException,
                        java.lang.ClassNotFoundException
Throws:
java.io.IOException
java.lang.ClassNotFoundException
Serialized Fields

directory

java.io.File directory

Class org.archive.crawler.datamodel.CrawlHost extends java.lang.Object implements Serializable

serialVersionUID: -5494573967890942895L

Serialized Fields

hostname

java.lang.String hostname

countryCode

java.lang.String countryCode

ip

java.net.InetAddress ip

ipFetched

long ipFetched

substats

CrawlSubstats substats

ipTTL

long ipTTL
TTL gotten from dns record. From rfc2035:
 TTL       a 32 bit unsigned integer that specifies the time
           interval (in seconds) that the resource record may be
           cached before it should be discarded.  Zero values are
           interpreted to mean that the RR can only be used for the
           transaction in progress, and should not be cached.
 


earliestNextURIEmitTime

long earliestNextURIEmitTime

Class org.archive.crawler.datamodel.CrawlOrder extends ModuleType implements Serializable

serialVersionUID: -6715840285961511669L

Class org.archive.crawler.datamodel.CrawlServer extends java.lang.Object implements Serializable

serialVersionUID: -989714570750970369L

Serialization Methods

readObject

private void readObject(java.io.ObjectInputStream stream)
                 throws java.io.IOException,
                        java.lang.ClassNotFoundException
Called when object is being deserialized. In addition to the default java deserialization, this method re-establishes the references to settings handler and robots honoring policy.

Throws:
java.io.IOException - if I/O errors occur
java.lang.ClassNotFoundException - If the class for an object being restored cannot be found.
Serialized Fields

server

java.lang.String server

port

int port

robots

RobotsExclusionPolicy robots

robotsFetched

long robotsFetched

validRobots

boolean validRobots

robotstxtChecksum

java.util.zip.Checksum robotstxtChecksum

substats

CrawlSubstats substats

consecutiveConnectionErrors

int consecutiveConnectionErrors

Class org.archive.crawler.datamodel.CrawlSubstats extends java.lang.Object implements Serializable

serialVersionUID: 8624425657056569036L

Serialized Fields

totalScheduled

long totalScheduled

fetchSuccesses

long fetchSuccesses

fetchFailures

long fetchFailures

fetchDisregards

long fetchDisregards

fetchResponses

long fetchResponses

robotsDenials

long robotsDenials

successBytes

long successBytes

totalBytes

long totalBytes

fetchNonResponses

long fetchNonResponses

novelBytes

long novelBytes

novelUrls

long novelUrls

notModifiedBytes

long notModifiedBytes

notModifiedUrls

long notModifiedUrls

dupByHashBytes

long dupByHashBytes

dupByHashUrls

long dupByHashUrls

Class org.archive.crawler.datamodel.CrawlURI extends CandidateURI implements Serializable

serialVersionUID: 7874096757350100472L

Serialization Methods

readObject

private void readObject(java.io.ObjectInputStream stream)
                 throws java.io.IOException,
                        java.lang.ClassNotFoundException
Custom deserialization recreating empty HashSet from null in 'outLinks' slot.

Throws:
java.io.IOException
java.lang.ClassNotFoundException

writeObject

private void writeObject(java.io.ObjectOutputStream stream)
                  throws java.io.IOException
Custom serialization writing an empty 'outLinks' as null. Estimated to save ~20 bytes in serialized form.

Throws:
java.io.IOException
Serialized Fields

fetchStatus

int fetchStatus

deferrals

int deferrals

fetchAttempts

int fetchAttempts

linkHopCount

int linkHopCount
Deprecated. 

embedHopCount

int embedHopCount
Deprecated. 

userAgent

java.lang.String userAgent

contentSize

long contentSize

contentLength

long contentLength

contentType

java.lang.String contentType
Content type of a successfully fetched URI. May be null even on successfully fetched URI.


prerequisite

boolean prerequisite
True if this CrawlURI has been deemed a prerequisite by the PreconditionEnforcer. This flag is used at least inside in the precondition enforcer so that subsequent prerequisite tests know to let this CrawlURI through because its a prerequisite needed by an earlier prerequisite tests (e.g. If this is a robots.txt, then the subsequent login credentials prereq test must not throw it out because its not a login curi).


post

boolean post
Set to true if this curi is to be POST'd rather than GET-d.


ordinal

long ordinal
Monotonically increasing number within a crawl; useful for tending towards breadth-first ordering. Will sometimes be truncated to 48 bits, so behavior over 281 trillion instantiated CrawlURIs may be buggy


cachedCrawlURIString

java.lang.String cachedCrawlURIString
Cache of this candidate uuri as a string. Profiling shows us spending about 1-2% of total elapsed time in toString.


contentDigest

byte[] contentDigest
A digest (hash, usually SHA1) of retrieved content-body.


contentDigestScheme

java.lang.String contentDigestScheme

holderCost

int holderCost
spot for an integer cost to be placed by external facility (frontier). cost is truncated to 8 bits at times, so should not exceed 255

Class org.archive.crawler.datamodel.CredentialStore extends ModuleType implements Serializable

serialVersionUID: -7916979754932063634L

Class org.archive.crawler.datamodel.RobotsDirectives extends java.lang.Object implements Serializable

serialVersionUID: 5386542759286155383L

Serialized Fields

disallows

java.util.concurrent.ConcurrentSkipListSet<E> disallows

allows

java.util.concurrent.ConcurrentSkipListSet<E> allows

crawlDelay

float crawlDelay

Class org.archive.crawler.datamodel.RobotsExclusionPolicy extends java.lang.Object implements Serializable

serialVersionUID: 6323907991237383113L

Serialization Methods

readObject

private void readObject(java.io.ObjectInputStream stream)
                 throws java.io.IOException,
                        java.lang.ClassNotFoundException
If object is DENYALL or ALLOWALL, only the object identity and type is read from the serialization stream.

Throws:
java.io.IOException
java.lang.ClassNotFoundException

writeObject

private void writeObject(java.io.ObjectOutputStream stream)
                  throws java.io.IOException
If object is DENYALL or ALLOWALL, only the object identity and type is written in the serialization stream.

Throws:
java.io.IOException

readResolve

private java.lang.Object readResolve()
If object is DENYALL or ALLOWALL, the object is replaced by constants so that check for object equality works.

Serialized Fields

robotstxt

Robotstxt robotstxt

lastUsedUserAgent

java.lang.String lastUsedUserAgent

userAgentsToTest

java.util.List<E> userAgentsToTest

Class org.archive.crawler.datamodel.RobotsHonoringPolicy extends ModuleType implements Serializable

serialVersionUID: 8850011643923116605L

Class org.archive.crawler.datamodel.Robotstxt extends java.lang.Object implements Serializable

serialVersionUID: 7025386509301303890L

Serialized Fields

userAgents

java.util.LinkedList<E> userAgents

agentsToDirectives

java.util.Map<K,V> agentsToDirectives

hasErrors

boolean hasErrors

Package org.archive.crawler.datamodel.credential

Class org.archive.crawler.datamodel.credential.Credential extends ModuleType implements Serializable

Class org.archive.crawler.datamodel.credential.CredentialAvatar extends java.lang.Object implements Serializable

serialVersionUID: 4489542750898404807L

Serialized Fields

key

java.lang.String key
Key for this credential avatar.


type

java.lang.Class<T> type
Type represented by this avatar.


payload

java.lang.String payload
Data. May be null.

This used to be an Object and I used to store in here the httpclient AuthScheme but AuthScheme is not serializable and so there'd be trouble getting this payload to lie down in a bdb database. Changed it to String. That should be generic enough for credential purposes.

Class org.archive.crawler.datamodel.credential.HtmlFormCredential extends Credential implements Serializable

serialVersionUID: -4732570804435453949L

Class org.archive.crawler.datamodel.credential.Rfc2617Credential extends Credential implements Serializable

serialVersionUID: -1909614285968756188L


Package org.archive.crawler.deciderules

Class org.archive.crawler.deciderules.AcceptDecideRule extends DecideRule implements Serializable

serialVersionUID: 1670683201497583206L

Class org.archive.crawler.deciderules.AddRedirectFromRootServerToScope extends PredicatedDecideRule implements Serializable

serialVersionUID: 2644131585813079064L

Class org.archive.crawler.deciderules.BeanShellDecideRule extends DecideRule implements Serializable

serialVersionUID: -8433859929199308527L

Serialized Fields

threadInterpreter

java.lang.ThreadLocal<T> threadInterpreter

sharedInterpreter

bsh.Interpreter sharedInterpreter

sharedMap

java.util.Map<K,V> sharedMap

initialized

boolean initialized

Class org.archive.crawler.deciderules.ClassKeyMatchesRegExpDecideRule extends MatchesRegExpDecideRule implements Serializable

serialVersionUID: 1178873944436973294L

Class org.archive.crawler.deciderules.ConfiguredDecideRule extends DecideRule implements Serializable

serialVersionUID: -7084695808452312555L

Class org.archive.crawler.deciderules.ContentTypeMatchesRegExpDecideRule extends MatchesRegExpDecideRule implements Serializable

serialVersionUID: -2066930281015155843L

Class org.archive.crawler.deciderules.ContentTypeNotMatchesRegExpDecideRule extends ContentTypeMatchesRegExpDecideRule implements Serializable

serialVersionUID: 4729800377757426137L

Class org.archive.crawler.deciderules.DecideRule extends ModuleType implements Serializable

serialVersionUID: 3437522810581532520L

Class org.archive.crawler.deciderules.DecideRuleSequence extends DecideRule implements Serializable

serialVersionUID: 8918111430698683110L

Class org.archive.crawler.deciderules.DecidingFilter extends Filter implements Serializable

serialVersionUID: -7275555425381445477L

Class org.archive.crawler.deciderules.DecidingScope extends CrawlScope implements Serializable

serialVersionUID: -2942787757512964906L

Class org.archive.crawler.deciderules.ExceedsDocumentLengthTresholdDecideRule extends NotExceedsDocumentLengthTresholdDecideRule implements Serializable

serialVersionUID: -3008503096295212224L

Class org.archive.crawler.deciderules.ExternalGeoLocationDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: -32974116429860725L

Serialized Fields

countryCode

java.lang.String countryCode

implementation

ExternalGeoLookupInterface implementation

Class org.archive.crawler.deciderules.ExternalImplDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: 7727715263469524372L

Serialized Fields

implementation

ExternalImplInterface implementation

Class org.archive.crawler.deciderules.FetchStatusDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: 5820599300395594619L

Class org.archive.crawler.deciderules.FetchStatusMatchesRegExpDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: -3088156729860241312L

Serialized Fields

logger

java.util.logging.Logger logger

Class org.archive.crawler.deciderules.FetchStatusNotMatchesRegExpDecideRule extends FetchStatusMatchesRegExpDecideRule implements Serializable

serialVersionUID: -2220182698344063577L

Class org.archive.crawler.deciderules.FilterDecideRule extends DecideRule implements Serializable

serialVersionUID: -3193099932171335572L

Serialized Fields

filters

MapType filters
Filter(s) to apply. Package protections for testing.

Class org.archive.crawler.deciderules.HasViaDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: 1670292311303097735L

Class org.archive.crawler.deciderules.HopsPathMatchesRegExpDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: -8881013912393934053L

Class org.archive.crawler.deciderules.IsCrossTopmostAssignedSurtHopDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: 1L

Class org.archive.crawler.deciderules.MatchesFilePatternDecideRule extends MatchesRegExpDecideRule implements Serializable

serialVersionUID: -4182743018517062411L

Class org.archive.crawler.deciderules.MatchesListRegExpDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: 3011579758573454930L

Class org.archive.crawler.deciderules.MatchesRegExpDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: 6441410917074319295L

Class org.archive.crawler.deciderules.NotExceedsDocumentLengthTresholdDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: -8774160016195991876L

Class org.archive.crawler.deciderules.NotMatchesFilePatternDecideRule extends MatchesFilePatternDecideRule implements Serializable

serialVersionUID: -8161371026787859554L

Class org.archive.crawler.deciderules.NotMatchesListRegExpDecideRule extends MatchesListRegExpDecideRule implements Serializable

serialVersionUID: 8691360087063555583L

Class org.archive.crawler.deciderules.NotMatchesRegExpDecideRule extends MatchesRegExpDecideRule implements Serializable

serialVersionUID: -2085313401991694306L

Class org.archive.crawler.deciderules.NotOnDomainsDecideRule extends OnDomainsDecideRule implements Serializable

serialVersionUID: -1634035244888724934L

Class org.archive.crawler.deciderules.NotOnHostsDecideRule extends OnHostsDecideRule implements Serializable

serialVersionUID: 1512825197255050412L

Class org.archive.crawler.deciderules.NotSurtPrefixedDecideRule extends SurtPrefixedDecideRule implements Serializable

serialVersionUID: -7491388438128566377L

Class org.archive.crawler.deciderules.OnDomainsDecideRule extends SurtPrefixedDecideRule implements Serializable

serialVersionUID: -3872369060554558805L

Class org.archive.crawler.deciderules.OnHostsDecideRule extends SurtPrefixedDecideRule implements Serializable

serialVersionUID: -7566348189389792625L

Class org.archive.crawler.deciderules.PathologicalPathDecideRule extends MatchesRegExpDecideRule implements Serializable

serialVersionUID: -1803997581321178499L

Serialized Fields

constructedRegexp

java.lang.String constructedRegexp

Class org.archive.crawler.deciderules.PredicatedDecideRule extends ConfiguredDecideRule implements Serializable

Class org.archive.crawler.deciderules.PrerequisiteAcceptDecideRule extends AcceptDecideRule implements Serializable

serialVersionUID: 2762042167111186142L

Class org.archive.crawler.deciderules.QueueOverbudgetDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: 5165201864629344642L

Class org.archive.crawler.deciderules.RejectDecideRule extends DecideRule implements Serializable

serialVersionUID: -6621307860412933732L

Class org.archive.crawler.deciderules.ScopePlusOneDecideRule extends SurtPrefixedDecideRule implements Serializable

serialVersionUID: -6344162369024146340L

Class org.archive.crawler.deciderules.SeedAcceptDecideRule extends AcceptDecideRule implements Serializable

serialVersionUID: 2167939872761313683L

Class org.archive.crawler.deciderules.SurtPrefixedDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: 2075790126085405015L

Serialized Fields

surtPrefixes

SurtPrefixSet surtPrefixes

Class org.archive.crawler.deciderules.TooManyHopsDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: -5429536193865916670L

Class org.archive.crawler.deciderules.TooManyPathSegmentsDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: 147079100367815075L

Class org.archive.crawler.deciderules.TransclusionDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: -3975688876990558918L


Package org.archive.crawler.deciderules.recrawl

Class org.archive.crawler.deciderules.recrawl.IdenticalDigestDecideRule extends PredicatedDecideRule implements Serializable

serialVersionUID: 4275993790856626949L


Package org.archive.crawler.extractor

Class org.archive.crawler.extractor.AggressiveExtractorHTML extends ExtractorHTML implements Serializable

serialVersionUID: 3586060081186247087L

Class org.archive.crawler.extractor.ChangeEvaluator extends Processor implements Serializable

serialVersionUID: 5547590621493534632L

Class org.archive.crawler.extractor.Extractor extends Processor implements Serializable

Class org.archive.crawler.extractor.ExtractorCSS extends Extractor implements Serializable

serialVersionUID: -1540252885329424902L

Serialized Fields

numberOfCURIsHandled

long numberOfCURIsHandled

numberOfLinksExtracted

long numberOfLinksExtracted

Class org.archive.crawler.extractor.ExtractorDOC extends Extractor implements Serializable

serialVersionUID: 1896822554981116303L

Serialized Fields

numberOfCURIsHandled

long numberOfCURIsHandled

numberOfLinksExtracted

long numberOfLinksExtracted

Class org.archive.crawler.extractor.ExtractorHTML extends Extractor implements Serializable

serialVersionUID: 5855731422080471017L

Serialized Fields

numberOfCURIsHandled

long numberOfCURIsHandled

numberOfLinksExtracted

long numberOfLinksExtracted

Class org.archive.crawler.extractor.ExtractorHTTP extends Processor implements Serializable

serialVersionUID: 8499072198570554647L

Serialized Fields

numberOfCURIsHandled

long numberOfCURIsHandled

numberOfLinksExtracted

long numberOfLinksExtracted

Class org.archive.crawler.extractor.ExtractorImpliedURI extends Extractor implements Serializable

serialVersionUID: 8579045413127769497L

Serialized Fields

numberOfCURIsHandled

long numberOfCURIsHandled

numberOfLinksExtracted

long numberOfLinksExtracted

Class org.archive.crawler.extractor.ExtractorJS extends Extractor implements Serializable

serialVersionUID: -2231962381454717720L

Serialized Fields

numberOfCURIsHandled

long numberOfCURIsHandled

Class org.archive.crawler.extractor.ExtractorPDF extends Extractor implements Serializable

serialVersionUID: -6040669467531928494L

Serialized Fields

maxSizeToParse

long maxSizeToParse

numberOfCURIsHandled

long numberOfCURIsHandled

numberOfLinksExtracted

long numberOfLinksExtracted

Class org.archive.crawler.extractor.ExtractorSWF extends Extractor implements Serializable

serialVersionUID: 3627359592408010589L

Serialized Fields

numberOfCURIsHandled

long numberOfCURIsHandled

numberOfLinksExtracted

long numberOfLinksExtracted

Class org.archive.crawler.extractor.ExtractorUniversal extends Extractor implements Serializable

serialVersionUID: -7593380118857156939L

Serialized Fields

numberOfCURIsHandled

long numberOfCURIsHandled

numberOfLinksExtracted

long numberOfLinksExtracted

Class org.archive.crawler.extractor.ExtractorURI extends Extractor implements Serializable

serialVersionUID: -6273897743240970822L

Serialized Fields

numberOfCURIsHandled

long numberOfCURIsHandled

numberOfLinksExtracted

long numberOfLinksExtracted

Class org.archive.crawler.extractor.ExtractorXML extends Extractor implements Serializable

serialVersionUID: 3101230586822401584L

Serialized Fields

numberOfCURIsHandled

long numberOfCURIsHandled

numberOfLinksExtracted

long numberOfLinksExtracted

Class org.archive.crawler.extractor.HTTPContentDigest extends Processor implements Serializable

serialVersionUID: 8055532198737384358L

Class org.archive.crawler.extractor.JerichoExtractorHTML extends ExtractorHTML implements Serializable

serialVersionUID: 1684681316546343615L

Serialized Fields

logger

java.util.logging.Logger logger

numberOfFormsProcessed

long numberOfFormsProcessed

Class org.archive.crawler.extractor.Link extends java.lang.Object implements Serializable

serialVersionUID: 7660959085498739376L

Serialized Fields

source

java.lang.CharSequence source
URI where this Link was discovered


destination

java.lang.CharSequence destination
URI (absolute) where this Link points


context

java.lang.CharSequence context
context of discovery -- will be an XPath-like element[/@attribute] fragment for HTML URIs, a header name with trailing ':' for header values, or one of the stand-in constants when other context is unavailable


hopType

char hopType
hop-type, as character abbrieviation

Class org.archive.crawler.extractor.TrapSuppressExtractor extends Extractor implements Serializable

serialVersionUID: -1028783453022579530L

Serialized Fields

numberOfCURIsHandled

long numberOfCURIsHandled

numberOfCURIsSuppressed

long numberOfCURIsSuppressed

Package org.archive.crawler.fetcher

Class org.archive.crawler.fetcher.FetchDNS extends Processor implements Serializable

serialVersionUID: 4686199203459704426L

Serialized Fields

logger

java.util.logging.Logger logger

ClassType

short ClassType

TypeType

short TypeType

serverInetAddr

java.net.InetAddress serverInetAddr

reusableBuffer

byte[] reusableBuffer

Class org.archive.crawler.fetcher.FetchFTP extends Processor implements Serializable

Class org.archive.crawler.fetcher.FetchHTTP extends Processor implements Serializable

Serialization Methods

readObject

private void readObject(java.io.ObjectInputStream stream)
                 throws java.io.IOException,
                        java.lang.ClassNotFoundException
Throws:
java.io.IOException
java.lang.ClassNotFoundException

writeObject

private void writeObject(java.io.ObjectOutputStream stream)
                  throws java.io.IOException
Throws:
java.io.IOException
Serialized Fields

recoveryRetries

int recoveryRetries
How many 'instant retries' of HttpRecoverableExceptions have occurred Would like it to be 'long', but longs aren't atomic


curisHandled

int curisHandled
Count of crawl uris handled. Would like to be 'long', but longs aren't atomic


cookieDb

com.sleepycat.je.Database cookieDb
Database backing cookie map, if using BDB


sslfactory

javax.net.ssl.SSLSocketFactory sslfactory
Socket factory that has the configurable trust manager installed.


Package org.archive.crawler.filter

Class org.archive.crawler.filter.ContentTypeRegExpFilter extends URIRegExpFilter implements Serializable

serialVersionUID: 206378978342655106L

Class org.archive.crawler.filter.FilePatternFilter extends URIRegExpFilter implements Serializable

serialVersionUID: -4019256104085004651L

Class org.archive.crawler.filter.HopsFilter extends Filter implements Serializable

serialVersionUID: -5943030310651023640L

Serialized Fields

maxLinkHops

int maxLinkHops
Deprecated. 

maxTransHops

int maxTransHops
Deprecated. 

Class org.archive.crawler.filter.HTTPMidFetchUnchangedFilter extends Filter implements Serializable

serialVersionUID: -7416477243375196980L

Class org.archive.crawler.filter.OrFilter extends Filter implements Serializable

serialVersionUID: -6835737313105835112L

Class org.archive.crawler.filter.PathDepthFilter extends Filter implements Serializable

serialVersionUID: 1626115117327154205L

Serialized Fields

maxPathDepth

java.lang.Integer maxPathDepth
Deprecated. 

Class org.archive.crawler.filter.PathologicalPathFilter extends URIRegExpFilter implements Serializable

serialVersionUID: 2797805167250054353L

Serialized Fields

REGEX_PREFIX

java.lang.String REGEX_PREFIX
Deprecated. 

REGEX_SUFFIX

java.lang.String REGEX_SUFFIX
Deprecated. 

Class org.archive.crawler.filter.SurtPrefixFilter extends Filter implements Serializable

serialVersionUID: -6933592892325852022L

Serialized Fields

surtPrefixes

SurtPrefixSet surtPrefixes
Deprecated. 

Class org.archive.crawler.filter.TransclusionFilter extends Filter implements Serializable

serialVersionUID: 4251767672778714051L

Serialized Fields

maxTransHops

int maxTransHops
Deprecated. 

maxSpeculativeHops

int maxSpeculativeHops
Deprecated. 

maxReferralHops

int maxReferralHops
Deprecated. 

maxEmbedHops

int maxEmbedHops
Deprecated. 

Class org.archive.crawler.filter.URIListRegExpFilter extends Filter implements Serializable

serialVersionUID: -2587977969340783677L

Class org.archive.crawler.filter.URIRegExpFilter extends Filter implements Serializable

serialVersionUID: 1878356276332865537L


Package org.archive.crawler.framework

Class org.archive.crawler.framework.AbstractTracker extends ModuleType implements Serializable

Serialized Fields

crawlerStartTime

long crawlerStartTime

crawlerEndTime

long crawlerEndTime

crawlerPauseStarted

long crawlerPauseStarted

crawlerTotalPausedTime

long crawlerTotalPausedTime

lastLogPointTime

long lastLogPointTime
Timestamp of when this logger last wrote something to the log


shouldrun

boolean shouldrun

Class org.archive.crawler.framework.Checkpointer extends java.lang.Object implements Serializable

serialVersionUID: 7610078446694353173L

Serialized Fields

checkpointPrefix

java.lang.String checkpointPrefix
String to prefix any new checkpoint names.


nextCheckpoint

int nextCheckpoint
Next overall series checkpoint number.


predecessorCheckpoints

java.util.List<E> predecessorCheckpoints
All checkpoint names in chain prior to now. May not all still exist on disk.

Class org.archive.crawler.framework.CrawlController extends java.lang.Object implements Serializable

Serialization Methods

readObject

private void readObject(java.io.ObjectInputStream stream)
                 throws java.io.IOException,
                        java.lang.ClassNotFoundException
Throws:
java.io.IOException
java.lang.ClassNotFoundException
Serialized Fields

checkpointer

Checkpointer checkpointer
Checkpointer. Knows if checkpoint in progress and what name of checkpoint is. Also runs checkpoints.


maxBytes

long maxBytes

maxDocument

long maxDocument

maxTime

long maxTime

manifest

java.lang.StringBuffer manifest
A manifest of all files used/created during this crawl. Written to file at the end of the crawl (the absolutely last thing done).


statistics

StatisticsTracking statistics

Class org.archive.crawler.framework.CrawlScope extends Filter implements Serializable

serialVersionUID: -3321533224526211277L

Serialized Fields

seedListeners

java.util.Set<E> seedListeners

Class org.archive.crawler.framework.Filter extends ModuleType implements Serializable

serialVersionUID: -356718306794776802L

Class org.archive.crawler.framework.Processor extends ModuleType implements Serializable

serialVersionUID: 6248563827413710226L

Serialized Fields

attrDecideRules

java.lang.String attrDecideRules
local name for decide-rules


defaultNextProcessor

Processor defaultNextProcessor

Class org.archive.crawler.framework.Scoper extends Processor implements Serializable

Class org.archive.crawler.framework.WriterPoolProcessor extends Processor implements Serializable

serialVersionUID: 1L

Serialization Methods

readObject

private void readObject(java.io.ObjectInputStream stream)
                 throws java.io.IOException,
                        java.lang.ClassNotFoundException
Throws:
java.io.IOException
java.lang.ClassNotFoundException
Serialized Fields

logger

java.util.logging.Logger logger

totalBytesWritten

long totalBytesWritten
Total number of bytes written to disc.


Package org.archive.crawler.framework.exceptions

Class org.archive.crawler.framework.exceptions.ConfigurationException extends InitializationException implements Serializable

serialVersionUID: -9078913414698851380L

Serialized Fields

file

java.lang.String file

element

java.lang.String element

Class org.archive.crawler.framework.exceptions.EndedException extends java.lang.Exception implements Serializable

serialVersionUID: -4638427249822262643L

Class org.archive.crawler.framework.exceptions.FatalConfigurationException extends ConfigurationException implements Serializable

serialVersionUID: -7653248745873511949L

Class org.archive.crawler.framework.exceptions.InitializationException extends java.lang.Exception implements Serializable

serialVersionUID: -3482635476140606185L

Class org.archive.crawler.framework.exceptions.InvalidFrontierMarkerException extends java.lang.Exception implements Serializable

serialVersionUID: -26552188686310984L


Package org.archive.crawler.frontier

Class org.archive.crawler.frontier.AbstractFrontier extends ModuleType implements Serializable

serialVersionUID: -4766504935003203930L

Serialization Methods

readObject

private void readObject(java.io.ObjectInputStream in)
                 throws java.io.IOException,
                        java.lang.ClassNotFoundException
Throws:
java.io.IOException
java.lang.ClassNotFoundException

writeObject

private void writeObject(java.io.ObjectOutputStream out)
                  throws java.io.IOException
Throws:
java.io.IOException
Serialized Fields

nextOrdinal

java.util.concurrent.atomic.AtomicLong nextOrdinal
ordinal numbers to assign to created CrawlURIs


shouldPause

boolean shouldPause
should the frontier hold any threads asking for URIs?


queuedUriCount

long queuedUriCount

succeededFetchCount

long succeededFetchCount

failedFetchCount

long failedFetchCount

disregardedUriCount

long disregardedUriCount

totalProcessedBytes

long totalProcessedBytes
Used when bandwidth constraint are used.


processedBytesAfterLastEmittedURI

long processedBytesAfterLastEmittedURI

lastMaxBandwidthKB

int lastMaxBandwidthKB

Class org.archive.crawler.frontier.AdaptiveRevisitFrontier extends ModuleType implements Serializable

serialVersionUID: -8666872690438543671L

Serialized Fields

controller

CrawlController controller

hostQueues

AdaptiveRevisitQueueList hostQueues

alreadyIncluded

UriUniqFilter alreadyIncluded

threadWaiting

org.archive.crawler.frontier.AdaptiveRevisitFrontier.ThreadLocalQueue threadWaiting

queueAssignmentPolicy

QueueAssignmentPolicy queueAssignmentPolicy
Policy for assigning CrawlURIs to named queues


succeededFetchCount

long succeededFetchCount

failedFetchCount

long failedFetchCount

disregardedUriCount

long disregardedUriCount

totalProcessedBytes

long totalProcessedBytes

shouldPause

boolean shouldPause

shouldTerminate

boolean shouldTerminate

Class org.archive.crawler.frontier.BdbFrontier extends WorkQueueFrontier implements Serializable

Serialized Fields

AVAILABLE_INCLUDED_OPTIONS

java.lang.String[] AVAILABLE_INCLUDED_OPTIONS
all URI-already-included options available to be chosen

Class org.archive.crawler.frontier.BdbWorkQueue extends WorkQueue implements Serializable

Serialized Fields

origin

byte[] origin
All items in this queue have this same 'origin' prefix to their keys.

Class org.archive.crawler.frontier.DomainSensitiveFrontier extends BdbFrontier implements Serializable

serialVersionUID: -3330190056282726202L

Serialized Fields

hostCounters

java.util.Hashtable<K,V> hostCounters
Deprecated. 

countPerOverride

boolean countPerOverride
Deprecated. 

counterMode

java.lang.String counterMode
Deprecated. 

Class org.archive.crawler.frontier.WorkQueue extends java.lang.Object implements Serializable

serialVersionUID: -1939168792663316048L

Serialized Fields

classKey

java.lang.String classKey
The classKey


active

boolean active

count

long count
Total number of stored items


enqueueCount

long enqueueCount
Total number of items ever enqueued


isHeld

boolean isHeld
Whether queue is already in lifecycle stage


wakeTime

long wakeTime
Time to wake, if snoozed


sessionBalance

int sessionBalance
Running 'budget' indicating whether queue should stay active


lastCost

int lastCost
Cost of the last item to be charged against queue


costCount

long costCount
Total number of items charged against queue; with totalExpenditure can be used to calculate 'average cost'.


totalExpenditure

long totalExpenditure
Running tally of total expenditures out of this queue; tallied only as a URI is finished or retried


pendingExpenditure

long pendingExpenditure
Net cost of all currently-enqueued URIs.


totalBudget

long totalBudget
Total to spend on this queue over its lifetime


peekItem

CrawlURI peekItem
The next item to be returned


lastQueued

java.lang.String lastQueued
Last URI enqueued


lastPeeked

java.lang.String lastPeeked
Last URI peeked


lastDequeueTime

long lastDequeueTime
time of last dequeue (disposition of some URI)


errorCount

long errorCount
count of errors encountered


substats

CrawlSubstats substats
Substats for all CrawlURIs in this group


retired

boolean retired

Class org.archive.crawler.frontier.WorkQueueFrontier extends AbstractFrontier implements Serializable

serialVersionUID: 570384305871965843L

Serialized Fields

readyClassQueues

java.util.concurrent.BlockingQueue<E> readyClassQueues
All per-class queues whose first item may be handed out. Linked-list of keys for the queues.


targetSizeForReadyQueues

int targetSizeForReadyQueues
Target (minimum) size to keep readyClassQueues


inactiveQueues

java.util.Queue<E> inactiveQueues
All 'inactive' queues, not yet in active rotation. Linked-list of keys for the queues.


retiredQueues

java.util.Queue<E> retiredQueues
'retired' queues, no longer considered for activation. Linked-list of keys for queues.


inProcessQueues

org.apache.commons.collections.Bag inProcessQueues
all per-class queues from whom a URI is outstanding


snoozedClassQueues

java.util.SortedSet<E> snoozedClassQueues
All per-class queues held in snoozed state, sorted by wake time.


longestActiveQueue

WorkQueue longestActiveQueue

AVAILABLE_COST_POLICIES

java.lang.String[] AVAILABLE_COST_POLICIES
all policies available to be chosen


Package org.archive.crawler.postprocessor

Class org.archive.crawler.postprocessor.AcceptRevisitProcessor extends Processor implements Serializable

serialVersionUID: 4310432303089418844L

Class org.archive.crawler.postprocessor.ContentBasedWaitEvaluator extends WaitEvaluator implements Serializable

serialVersionUID: 1623347208782997347L

Class org.archive.crawler.postprocessor.CrawlStateUpdater extends Processor implements Serializable

serialVersionUID: -1072728147960180091L

Class org.archive.crawler.postprocessor.FrontierScheduler extends Processor implements Serializable

serialVersionUID: -5178775477602250542L

Class org.archive.crawler.postprocessor.ImageWaitEvaluator extends ContentBasedWaitEvaluator implements Serializable

serialVersionUID: -2762377129860398333L

Class org.archive.crawler.postprocessor.LinksScoper extends Scoper implements Serializable

serialVersionUID: -4074442117992496793L

Class org.archive.crawler.postprocessor.LowDiskPauseProcessor extends Processor implements Serializable

serialVersionUID: 3338337700768396302L

Serialized Fields

contentSinceCheck

int contentSinceCheck

Class org.archive.crawler.postprocessor.RejectRevisitProcessor extends Processor implements Serializable

serialVersionUID: 4310432303089418844L

Class org.archive.crawler.postprocessor.SupplementaryLinksScoper extends Scoper implements Serializable

serialVersionUID: -775819977752790418L

Class org.archive.crawler.postprocessor.TextWaitEvaluator extends ContentBasedWaitEvaluator implements Serializable

serialVersionUID: -328402266684681632L

Class org.archive.crawler.postprocessor.WaitEvaluator extends Processor implements Serializable

serialVersionUID: 7452762726125458413L

Serialized Fields

logger

java.util.logging.Logger logger

Package org.archive.crawler.prefetch

Class org.archive.crawler.prefetch.PreconditionEnforcer extends Processor implements Serializable

serialVersionUID: 4636474153589079615L

Class org.archive.crawler.prefetch.Preselector extends Scoper implements Serializable

serialVersionUID: 3738560264369561017L

Class org.archive.crawler.prefetch.QuotaEnforcer extends Processor implements Serializable

serialVersionUID: 6091720623469404595L

Serialized Fields

LOGGER

java.util.logging.Logger LOGGER

Class org.archive.crawler.prefetch.RuntimeLimitEnforcer extends Processor implements Serializable

serialVersionUID: 1L

Serialized Fields

logger

java.util.logging.Logger logger

Package org.archive.crawler.processor

Class org.archive.crawler.processor.BeanShellProcessor extends Processor implements Serializable

serialVersionUID: 6926589944337050754L

Serialized Fields

threadInterpreter

java.lang.ThreadLocal<T> threadInterpreter

sharedInterpreter

bsh.Interpreter sharedInterpreter

sharedMap

java.util.Map<K,V> sharedMap

Class org.archive.crawler.processor.CrawlMapper extends Processor implements Serializable

Serialized Fields

diversionLogs

java.util.HashMap<K,V> diversionLogs
Mapping of target crawlers to logs (PrintWriters)


logGeneration

java.lang.String logGeneration
Truncated timestamp prefix for diversion logs; when current time doesn't match, it's time to close all current logs.


localName

java.lang.String localName
name of the enclosing crawler (URIs mapped here stay put)


cache

ArrayLongFPCache cache

Class org.archive.crawler.processor.HashCrawlMapper extends CrawlMapper implements Serializable

serialVersionUID: 1L

Serialized Fields

bucketCount

long bucketCount

reducePattern

java.lang.String reducePattern

Class org.archive.crawler.processor.LexicalCrawlMapper extends CrawlMapper implements Serializable

serialVersionUID: 1L

Serialized Fields

map

java.util.TreeMap<K,V> map
Mapping of classKey ranges (as represented by their start) to crawlers (by abstract name/filename)


Package org.archive.crawler.processor.recrawl

Class org.archive.crawler.processor.recrawl.FetchHistoryProcessor extends Processor implements Serializable

serialVersionUID: 8476621038669163983L

Class org.archive.crawler.processor.recrawl.PersistLoadProcessor extends PersistOnlineProcessor implements Serializable

serialVersionUID: -1917169316015093131L

Class org.archive.crawler.processor.recrawl.PersistLogProcessor extends PersistProcessor implements Serializable

serialVersionUID: 1678691994065439346L

Serialized Fields

log

CrawlerJournal log

Class org.archive.crawler.processor.recrawl.PersistOnlineProcessor extends PersistProcessor implements Serializable

serialVersionUID: -666479480942267268L

Serialized Fields

store

com.sleepycat.collections.StoredSortedMap<K,V> store

historyDb

com.sleepycat.je.Database historyDb

Class org.archive.crawler.processor.recrawl.PersistProcessor extends Processor implements Serializable

serialVersionUID: 1L

Class org.archive.crawler.processor.recrawl.PersistStoreProcessor extends PersistOnlineProcessor implements Serializable

serialVersionUID: -8308356194337303758L


Package org.archive.crawler.scope

Class org.archive.crawler.scope.BroadScope extends ClassicScope implements Serializable

serialVersionUID: -2354234238454865888L

Class org.archive.crawler.scope.ClassicScope extends CrawlScope implements Serializable

serialVersionUID: 4494905304855590002L

Serialized Fields

excludeFilter

OrFilter excludeFilter

Class org.archive.crawler.scope.DomainScope extends SeedCachingScope implements Serializable

serialVersionUID: 648062105277258820L

Serialized Fields

additionalFocusFilter

Filter additionalFocusFilter
Deprecated. 

transitiveFilter

Filter transitiveFilter
Deprecated. 

Class org.archive.crawler.scope.HostScope extends SeedCachingScope implements Serializable

serialVersionUID: -6257664892667267266L

Serialized Fields

additionalFocusFilter

Filter additionalFocusFilter
Deprecated. 

transitiveFilter

Filter transitiveFilter
Deprecated. 

Class org.archive.crawler.scope.PathScope extends SeedCachingScope implements Serializable

serialVersionUID: -2217024073240277527L

Serialized Fields

additionalFocusFilter

Filter additionalFocusFilter
Deprecated. 

transitiveFilter

Filter transitiveFilter
Deprecated. 

Class org.archive.crawler.scope.RefinedScope extends ClassicScope implements Serializable

Serialized Fields

additionalFocusFilter

Filter additionalFocusFilter

transitiveFilter

Filter transitiveFilter

Class org.archive.crawler.scope.SeedCachingScope extends ClassicScope implements Serializable

serialVersionUID: 300230673616424926L

Serialized Fields

seeds

java.util.List<E> seeds

Class org.archive.crawler.scope.SurtPrefixScope extends RefinedScope implements Serializable

serialVersionUID: 2652008287322770123L

Serialized Fields

surtPrefixes

SurtPrefixSet surtPrefixes
Deprecated. 

Package org.archive.crawler.settings

Class org.archive.crawler.settings.ComplexType extends Type implements Serializable

Serialized Fields

description

java.lang.String description

absoluteName

java.lang.String absoluteName

definition

java.util.List<E> definition

definitionMap

java.util.Map<K,V> definitionMap

initialized

boolean initialized

preservedFields

java.lang.String[] preservedFields

Class org.archive.crawler.settings.Constraint extends java.lang.Object implements Serializable

serialVersionUID: -646814290764700497L

Serialized Fields

severity

java.util.logging.Level severity

msg

java.lang.String msg

Class org.archive.crawler.settings.DataContainer extends java.util.HashMap<java.lang.String,java.lang.Object> implements Serializable

serialVersionUID: 2089160108643429282L

Serialized Fields

complexType

ComplexType complexType
The ComplexType for which this DataContainer keeps data


settings

java.lang.ref.Reference<T> settings
The Settings object for which this data is valid


attributes

java.util.List<E> attributes
The attributes defined for this DataContainers combination of ComplexType and CrawlerSettings.


attributeNames

java.util.Map<K,V> attributeNames
All attributes that have their value set for this DataContainers combination of ComplexType and CrawlerSettings. This includes overrides.

Class org.archive.crawler.settings.DoubleList extends ListType<java.lang.Double> implements Serializable

serialVersionUID: -5793937164778552546L

Class org.archive.crawler.settings.FloatList extends ListType<java.lang.Float> implements Serializable

serialVersionUID: -8836233200837205447L

Class org.archive.crawler.settings.IntegerList extends ListType<java.lang.Integer> implements Serializable

serialVersionUID: -637584927948877976L

Class org.archive.crawler.settings.LegalValueListConstraint extends Constraint implements Serializable

serialVersionUID: -4293290799574408033L

Class org.archive.crawler.settings.LegalValueTypeConstraint extends Constraint implements Serializable

serialVersionUID: 6106774072922858976L

Class org.archive.crawler.settings.ListType extends Type implements Serializable

Serialized Fields

listData

java.util.List<E> listData

description

java.lang.String description

Class org.archive.crawler.settings.LongList extends ListType<java.lang.Long> implements Serializable

serialVersionUID: -7542494945185808903L

Class org.archive.crawler.settings.MapType extends ComplexType implements Serializable

serialVersionUID: -3694800285930202700L

Serialized Fields

definition

Type definition
The content type allowed for this map.

Class org.archive.crawler.settings.ModuleAttributeInfo extends javax.management.MBeanAttributeInfo implements Serializable

serialVersionUID: -4447321338690051514L

Serialized Fields

type

java.lang.String type

isOverrideable

boolean isOverrideable

isTransient

boolean isTransient

defaultValue

java.lang.Object defaultValue

legalValueLists

java.lang.Object[] legalValueLists

complexType

boolean complexType

isExpertSetting

boolean isExpertSetting

Class org.archive.crawler.settings.ModuleType extends ComplexType implements Serializable

serialVersionUID: 3686678928531236811L

Class org.archive.crawler.settings.RegularExpressionConstraint extends Constraint implements Serializable

serialVersionUID: -5916211981136071809L

Serialized Fields

pattern

java.lang.String pattern

Class org.archive.crawler.settings.SimpleType extends Type implements Serializable

serialVersionUID: -5134952907004648419L

Serialized Fields

description

java.lang.String description

legalValues

java.lang.Object[] legalValues

Class org.archive.crawler.settings.StringList extends ListType<java.lang.String> implements Serializable

serialVersionUID: 3181868189684416390L

Class org.archive.crawler.settings.TextField extends java.lang.Object implements Serializable

serialVersionUID: -2853908867414076703L

Serialized Fields

value

java.lang.String value

Class org.archive.crawler.settings.Type extends javax.management.Attribute implements Serializable

Serialized Fields

isTransient

boolean isTransient
Should this Type be serialized to persistent storage


overrideable

boolean overrideable
True if this Type can be overridden


isExpertSetting

boolean isExpertSetting
True if this Type should only show up in expert mode in UI


constraints

java.util.List<E> constraints
List of constraint that apply for the values of this type


legalValueType

java.lang.Class<T> legalValueType
The class the value of this type must be an instance of (or instance of a subclass.


Package org.archive.crawler.url.canonicalize

Class org.archive.crawler.url.canonicalize.BaseRule extends ModuleType implements Serializable

Class org.archive.crawler.url.canonicalize.FixupQueryStr extends BaseRule implements Serializable

serialVersionUID: 3169526832544474794L

Class org.archive.crawler.url.canonicalize.LowercaseRule extends BaseRule implements Serializable

serialVersionUID: -4732482198714929052L

Class org.archive.crawler.url.canonicalize.RegexRule extends BaseRule implements Serializable

serialVersionUID: -2658094415450237847L

Class org.archive.crawler.url.canonicalize.StripExtraSlashes extends BaseRule implements Serializable

Class org.archive.crawler.url.canonicalize.StripSessionCFIDs extends BaseRule implements Serializable

serialVersionUID: 9122689291157731293L

Class org.archive.crawler.url.canonicalize.StripSessionIDs extends BaseRule implements Serializable

serialVersionUID: -3737115200690525641L

Class org.archive.crawler.url.canonicalize.StripUserinfoRule extends BaseRule implements Serializable

serialVersionUID: -4271062607638914996L

Class org.archive.crawler.url.canonicalize.StripWWWNRule extends BaseRule implements Serializable

serialVersionUID: 3619916990307308590L

Class org.archive.crawler.url.canonicalize.StripWWWRule extends BaseRule implements Serializable

serialVersionUID: -5416391108485746976L


Package org.archive.crawler.util

Class org.archive.crawler.util.BdbUriUniqFilter extends SetBasedUriUniqFilter implements Serializable

serialVersionUID: -8099357538178524011L

Serialization Methods

writeObject

private void writeObject(java.io.ObjectOutputStream oos)
                  throws java.io.IOException
Throws:
java.io.IOException
Serialized Fields

createdEnvironment

boolean createdEnvironment

lastCacheMiss

long lastCacheMiss

lastCacheMissDiff

long lastCacheMissDiff

count

long count

aggregatedLookupTime

long aggregatedLookupTime

Class org.archive.crawler.util.BloomUriUniqFilter extends SetBasedUriUniqFilter implements Serializable

serialVersionUID: 1061526253773091309L

Serialized Fields

bloom

BloomFilter bloom

expected_n

int expected_n

Class org.archive.crawler.util.CrawledBytesHistotable extends Histotable<java.lang.String> implements Serializable

serialVersionUID: 7923431123239026213L

Class org.archive.crawler.util.FPUriUniqFilter extends SetBasedUriUniqFilter implements Serializable

Serialized Fields

fpset

LongFPSet fpset

Class org.archive.crawler.util.SeedUrlNotFoundException extends java.lang.Exception implements Serializable

serialVersionUID: 2515927240634523493L


Package org.archive.crawler.writer

Class org.archive.crawler.writer.ARCWriterProcessor extends WriterPoolProcessor implements Serializable

serialVersionUID: 1957518408532644531L

Serialized Fields

logger

java.util.logging.Logger logger

Class org.archive.crawler.writer.Kw3WriterProcessor extends Processor implements Serializable

serialVersionUID: 7171448068924684594L

Serialized Fields

arcsDir

java.io.File arcsDir

chmod

boolean chmod

chmodValue

java.lang.String chmodValue

maxSize

int maxSize

collection

java.lang.String collection

harvester

java.lang.String harvester

Class org.archive.crawler.writer.MirrorWriterProcessor extends Processor implements Serializable

serialVersionUID: 301407556928389168L

Class org.archive.crawler.writer.WARCWriterProcessor extends WriterPoolProcessor implements Serializable

serialVersionUID: 6182850087635847443L

Serialized Fields

logger

java.util.logging.Logger logger

stats

java.util.HashMap<K,V> stats

urlsWritten

int urlsWritten

Package org.archive.io

Class org.archive.io.NoGzipMagicException extends java.io.IOException implements Serializable

serialVersionUID: 3084169624430655013L

Class org.archive.io.RecorderIOException extends java.io.IOException implements Serializable

serialVersionUID: 5907470275350314277L

Class org.archive.io.RecorderLengthExceededException extends RecorderIOException implements Serializable

serialVersionUID: 6655419033414648444L

Class org.archive.io.RecorderTimeoutException extends RecorderIOException implements Serializable

serialVersionUID: 7433214063765078269L

Class org.archive.io.RecorderTooMuchHeaderException extends RecorderIOException implements Serializable

serialVersionUID: 3528516034898129150L

Class org.archive.io.RecoverableIOException extends java.io.IOException implements Serializable

serialVersionUID: 6194776587381865451L

Serialized Fields

decoratedIOException

java.io.IOException decoratedIOException

Class org.archive.io.SinkHandlerLogRecord extends java.util.logging.LogRecord implements Serializable

serialVersionUID: -7782942650334713560L

Serialized Fields

read

boolean read

delegatee

java.util.logging.LogRecord delegatee

creationTime

java.util.Date creationTime

Package org.archive.net

Class org.archive.net.LaxURI extends org.apache.commons.httpclient.URI implements Serializable

serialVersionUID: 5273922211722239537L

Class org.archive.net.UURI extends LaxURI implements Serializable

serialVersionUID: -1277570889914647093L

Class org.archive.net.UURIFactory extends org.apache.commons.httpclient.URI implements Serializable

serialVersionUID: -6146295130382209042L

Serialized Fields

schemes

java.lang.String[] schemes

ignoredSchemes

java.lang.String[] ignoredSchemes

Package org.archive.queue

Class org.archive.queue.MemQueue extends java.util.LinkedList<T> implements Serializable

serialVersionUID: -9077824759011044247L

Class org.archive.queue.StoredQueue extends java.util.AbstractQueue<E extends java.io.Serializable> implements Serializable

serialVersionUID: 1L

Serialization Methods

writeObject

private void writeObject(java.io.ObjectOutputStream s)
                  throws java.io.IOException
Save the state to a stream (that is, serialize it).

Serial Data:
The capacity is emitted (int), followed by all of its elements (each an Object) in the proper order, followed by a null
Throws:
java.io.IOException
Serialized Fields

tailIndex

java.util.concurrent.atomic.AtomicLong tailIndex

headIndex

java.util.concurrent.atomic.AtomicLong headIndex

Package org.archive.util

Class org.archive.util.AbstractLongFPSet extends java.lang.Object implements Serializable

Serialized Fields

capacityPowerOfTwo

int capacityPowerOfTwo
the capacity of this set, specified as the exponent of a power of 2


loadFactor

float loadFactor
The load factor, as a fraction. This gives the amount of free space to keep in the Set.


count

long count
The current number of elements in the set

Class org.archive.util.BloomFilter64bit extends java.lang.Object implements Serializable

serialVersionUID: 2L

Serialized Fields

m

long m
The number of bits in this filter.


power

int power
if bitfield is an exact power of 2 in length, it is this power


expectedInserts

long expectedInserts
The expected number of inserts; determines calculated size


d

int d
The number of hash functions used by this filter.


bits

long[][] bits
The underlying bit vector


weight

long[][] weight
The random integers used to generate the hash functions.


size

int size
The number of elements currently in the filter. It may be smaller than the actual number of additions of distinct character sequences because of false positives.

Class org.archive.util.CachedBdbMap extends java.util.AbstractMap<K,V> implements Serializable

serialVersionUID: -8655539411367047332L

Serialized Fields

diskMapSize

java.util.concurrent.atomic.AtomicInteger diskMapSize
Deprecated. 
The number of objects in the diskMap StoredMap. (Package access for unit testing.)


cacheHit

java.util.concurrent.atomic.AtomicLong cacheHit
Deprecated. 
Count of times we got an object from in-memory cache.


countOfGets

java.util.concurrent.atomic.AtomicLong countOfGets
Deprecated. 
Count of times the CachedBdbMap.get(java.lang.Object) method was called.


diskHit

java.util.concurrent.atomic.AtomicLong diskHit
Deprecated. 
Count of every time we went to the disk-based map AND we found an object (Doesn't include accesses that came back null).


dbName

java.lang.String dbName
Deprecated. 
Name of bdbje db.


LOG_ERROR_ON_DESIGN_VIOLATING_METHODS

boolean LOG_ERROR_ON_DESIGN_VIOLATING_METHODS
Deprecated. 
internal behavior characterization flag. log a warning when operations are performed that could violate the "only one reference" design rule. The problem to avoid is mutating an unmapped value instance that would not be persisted. (warning is emitted at most once per instance)

Class org.archive.util.Histotable extends java.util.HashMap<K,java.lang.Long> implements Serializable

serialVersionUID: 310306238032568623L

Class org.archive.util.LRU extends java.util.LinkedHashMap<K,V> implements Serializable

serialVersionUID: 1032420936705267913L

Serialized Fields

max

int max
The maximum number of entries to store in the cache.

Class org.archive.util.ObjectIdentityBdbCache extends java.lang.Object implements Serializable

serialVersionUID: 1L

Serialized Fields

count

java.util.concurrent.atomic.AtomicLong count

cacheHit

java.util.concurrent.atomic.AtomicLong cacheHit
Count of times we got an object from in-memory cache


countOfGets

java.util.concurrent.atomic.AtomicLong countOfGets
Count of times the ObjectIdentityBdbCache.get(java.lang.String) method was called.


diskHit

java.util.concurrent.atomic.AtomicLong diskHit
Count of every time disk-based map provided non-null object


supplierUsed

java.util.concurrent.atomic.AtomicLong supplierUsed
Count of times Supplier was used for new object


expungeStatsDiskPut

java.util.concurrent.atomic.AtomicLong expungeStatsDiskPut
count of expunge put() to BDB (implies disk)


useStatsSyncUsed

java.util.concurrent.atomic.AtomicLong useStatsSyncUsed
count of ObjectIdentityBdbCache.sync() use

Class org.archive.util.PrefixSet extends java.util.TreeSet<java.lang.String> implements Serializable

serialVersionUID: -6054697706348411992L

Class org.archive.util.SubList extends java.util.AbstractList<E> implements Serializable

serialVersionUID: 1L

Serialized Fields

delegate

java.util.List<E> delegate
The list that created this SubList.


start

int start
The starting index of the SubList, inclusive.


end

int end
The ending index of the SubList, exclusive.

Class org.archive.util.SurtPrefixSet extends PrefixSet implements Serializable

serialVersionUID: 2598365040524933110L


Package org.archive.util.anvl

Class org.archive.util.anvl.ANVLRecord extends java.util.ArrayList<Element> implements Serializable

serialVersionUID: -4610638888453052958L

Class org.archive.util.anvl.ANVLRecords extends java.util.ArrayList<ANVLRecord> implements Serializable

serialVersionUID: 5361551920550106113L


Package org.archive.util.fingerprint

Class org.archive.util.fingerprint.LongFPSetCache extends MemLongFPSet implements Serializable

serialVersionUID: -5307436423975825566L

Serialized Fields

sweepHand

long sweepHand

Class org.archive.util.fingerprint.MemLongFPSet extends AbstractLongFPSet implements Serializable

serialVersionUID: -4301879539092625698L

Serialized Fields

slots

byte[] slots

values

long[] values



Copyright © 2003-2011 Internet Archive. All Rights Reserved.