1 package org.archive.crawler.postprocessor; 2 3 import java.util.logging.Level; 4 import java.util.logging.Logger; 5 6 import org.archive.crawler.datamodel.CrawlURI; 7 import org.archive.crawler.framework.Processor; 8 import org.archive.crawler.frontier.AdaptiveRevisitAttributeConstants; 9 10 /*** 11 * Set a URI to not be revisited by the ARFrontier. This only makes sense when 12 * using the ARFrontier and a decide-rule chain granting only selected access to 13 * this processor. This is the opposite of the AcceptRevisitProcessor class. 14 * 15 * @author mzsanford 16 */ 17 public class RejectRevisitProcessor extends Processor implements 18 AdaptiveRevisitAttributeConstants { 19 private static final long serialVersionUID = 4310432303089418844L; 20 21 private static final Logger logger = Logger 22 .getLogger(RejectRevisitProcessor.class.getName()); 23 24 public RejectRevisitProcessor(String name) { 25 super(name, "Set a URI to not be revisited by the ARFrontier."); 26 } 27 28 @Override 29 protected void initialTasks() { 30 CrawlURI.addAlistPersistentMember(A_DISCARD_REVISIT); 31 } 32 33 @Override 34 protected void innerProcess(CrawlURI curi) throws InterruptedException { 35 if (curi != null) { 36 if (logger.isLoggable(Level.FINE)) { 37 logger.fine("Adding DISCARD_REVISIT=true to Crawl URI: " 38 + curi.getUURI().toString()); 39 } 40 curi.putObject(A_DISCARD_REVISIT, Boolean.TRUE); 41 } 42 } 43 44 }