|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.archive.crawler.frontier.WorkQueue
public abstract class WorkQueue
A single queue of related URIs to visit, grouped by a classKey (typically "hostname:port" or similar)
Field Summary | |
---|---|
protected java.lang.String |
classKey
The classKey |
(package private) static long |
serialVersionUID
|
protected CrawlSubstats |
substats
Substats for all CrawlURIs in this group |
Constructor Summary | |
---|---|
WorkQueue(java.lang.String pClassKey)
|
Method Summary | |
---|---|
void |
clearHeld()
Clear isHeld to false |
int |
compareTo(java.lang.Object obj)
|
protected abstract void |
deleteItem(WorkQueueFrontier frontier,
CrawlURI item)
Removes the given item from the queue. |
long |
deleteMatching(WorkQueueFrontier frontier,
java.lang.String match)
Delete URIs matching the given pattern from this queue. |
protected abstract long |
deleteMatchingFromQueue(WorkQueueFrontier frontier,
java.lang.String match)
Delete URIs matching the given pattern from this queue. |
void |
dequeue(WorkQueueFrontier frontier)
Remove the peekItem from the queue and adjusts the count. |
void |
enqueue(WorkQueueFrontier frontier,
CrawlURI curi)
Add the given CrawlURI, noting its addition in running count. |
int |
expend(int amount)
Decrease the internal running budget by the given amount. |
java.lang.String |
getClassKey()
|
UURI |
getContextUURI(WorkQueueFrontier wqf)
|
long |
getCount()
|
long |
getPendingExpenditure()
Return the tally of all URI costs currently inside this queue |
java.lang.String[] |
getReports()
Get an array of report names offered by this Reporter. |
int |
getSessionBalance()
Return current session 'activity budget balance' |
CrawlSubstats |
getSubstats()
|
long |
getTotalBudget()
Retrieve the total expenditure level allowed by this queue. |
long |
getTotalExpenditure()
Return the tally of all expenditures from this queue (dequeued items) |
long |
getWakeTime()
|
int |
incrementSessionBalance(int amount)
Increase the internal running budget to be used before deactivating the queue |
protected abstract void |
insertItem(WorkQueueFrontier frontier,
CrawlURI curi,
boolean expectedPresent)
Insert the given curi, whether it is already present or not. |
boolean |
isHeld()
Whether the queue is already in a lifecycle stage -- such as ready, in-progress, snoozed -- and thus should not be redundantly inserted to readyClassQueues |
boolean |
isOverBudget()
Check whether queue has temporarily or permanently exceeded its budget. |
boolean |
isRetired()
|
void |
noteError(int penalty)
Note an error and assess an extra penalty. |
CrawlURI |
peek(WorkQueueFrontier frontier)
Return the topmost queue item -- and remember it, such that even later higher-priority inserts don't change it. |
protected abstract CrawlURI |
peekItem(WorkQueueFrontier frontier)
Returns first item from queue (does not delete) |
int |
refund(int amount)
A URI should not have been charged against queue (eg it was disregarded); return the amount expended |
void |
reportTo(java.io.PrintWriter writer)
Make a default report to the passed-in Writer. |
void |
reportTo(java.lang.String name,
java.io.PrintWriter writer)
Make a report of the given name to the passed-in Writer, If null, give the default report. |
protected void |
resume(WorkQueueFrontier frontier)
Resumes this WorkQueue. |
void |
setActive(WorkQueueFrontier frontier,
boolean b)
|
void |
setHeld()
Set isHeld to true |
void |
setRetired(boolean b)
Set the retired status of this queue. |
void |
setSessionBalance(int balance)
Set the session 'activity budget balance' to the given value |
void |
setTotalBudget(long budget)
Set the total expenditure level allowable before queue is considered inherently 'over-budget'. |
void |
setWakeTime(long l)
|
java.lang.String |
singleLineLegend()
Return a legend for the single-line summary report as a String. |
java.lang.String |
singleLineReport()
Return a short single-line summary report as a String. |
void |
singleLineReportTo(java.io.PrintWriter writer)
Make a single-line summary report to the passed-in writer |
protected void |
suspend(WorkQueueFrontier frontier)
Suspends this WorkQueue. |
void |
unpeek()
Forgive the peek, allowing a subsequent peek to return a different item. |
void |
update(WorkQueueFrontier frontier,
CrawlURI curi)
Update the given CrawlURI, which should already be present. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
static final long serialVersionUID
protected final java.lang.String classKey
protected CrawlSubstats substats
Constructor Detail |
---|
public WorkQueue(java.lang.String pClassKey)
Method Detail |
---|
public long deleteMatching(WorkQueueFrontier frontier, java.lang.String match)
frontier
- match
-
public void enqueue(WorkQueueFrontier frontier, CrawlURI curi)
frontier
- Work queues manager.curi
- CrawlURI to insert.public CrawlURI peek(WorkQueueFrontier frontier)
frontier
- Work queues manager
public void dequeue(WorkQueueFrontier frontier)
frontier
- Work queues manager.public void setSessionBalance(int balance)
balance
- to usepublic int getSessionBalance()
public void setTotalBudget(long budget)
budget
- public long getTotalBudget()
public boolean isOverBudget()
public long getTotalExpenditure()
public long getPendingExpenditure()
public int incrementSessionBalance(int amount)
amount
- amount to increment
public int expend(int amount)
amount
- tp decrement
public int refund(int amount)
amount
- to return
public void noteError(int penalty)
penalty
- additional amount to deductpublic void setWakeTime(long l)
l
- public long getWakeTime()
public java.lang.String getClassKey()
public void clearHeld()
public boolean isHeld()
public void setHeld()
public void unpeek()
public final int compareTo(java.lang.Object obj)
compareTo
in interface java.lang.Comparable
public void update(WorkQueueFrontier frontier, CrawlURI curi)
frontier
- Work queues manager.curi
- CrawlURI to update.public long getCount()
protected abstract void insertItem(WorkQueueFrontier frontier, CrawlURI curi, boolean expectedPresent) throws java.io.IOException
frontier
- WorkQueueFrontier.curi
- CrawlURI to insert.
java.io.IOException
- if there was a problem while inserting the itemprotected abstract long deleteMatchingFromQueue(WorkQueueFrontier frontier, java.lang.String match) throws java.io.IOException
frontier
- WorkQueues manager.match
- the pattern to match
java.io.IOException
- if there was a problem while deletingprotected abstract void deleteItem(WorkQueueFrontier frontier, CrawlURI item) throws java.io.IOException
frontier
- Work queues manager.
java.io.IOException
- if there was a problem while deleting the itemprotected abstract CrawlURI peekItem(WorkQueueFrontier frontier) throws java.io.IOException
java.io.IOException
- if there was a problem while peekingprotected void suspend(WorkQueueFrontier frontier) throws java.io.IOException
frontier
-
java.io.IOException
protected void resume(WorkQueueFrontier frontier) throws java.io.IOException
frontier
-
java.io.IOException
public void setActive(WorkQueueFrontier frontier, boolean b)
public java.lang.String[] getReports()
Reporter
getReports
in interface Reporter
public void reportTo(java.io.PrintWriter writer)
Reporter
reportTo
in interface Reporter
writer
- to receive reportpublic void singleLineReportTo(java.io.PrintWriter writer)
Reporter
singleLineReportTo
in interface Reporter
writer
- to receive reportpublic java.lang.String singleLineLegend()
Reporter
singleLineLegend
in interface Reporter
public java.lang.String singleLineReport()
Reporter
singleLineReport
in interface Reporter
public void reportTo(java.lang.String name, java.io.PrintWriter writer)
Reporter
reportTo
in interface Reporter
writer
-
java.io.IOException
public CrawlSubstats getSubstats()
getSubstats
in interface CrawlSubstats.HasCrawlSubstats
public void setRetired(boolean b)
b
- new value for retired statuspublic boolean isRetired()
public UURI getContextUURI(WorkQueueFrontier wqf)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |