View Javadoc

1   /* $Id: Crawler.java 4078 2005-12-22 01:18:09Z dbernstein $
2    *
3    * Created on Dec 12, 2005
4    *
5    * Copyright (C) 2005 Internet Archive.
6    *  
7    * This file is part of the Heritrix Cluster Controller (crawler.archive.org).
8    *  
9    * HCC is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   * 
14   * Heritrix is distributed in the hope that it will be useful, 
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   * 
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.hcc.client;
24  
25  import java.util.Collection;
26  
27  /***
28   * An interface representing an instance of a Crawler.
29   * 
30   * @author Daniel Bernstein (dbernstein@archive.org)
31   */
32  public interface Crawler extends Proxy {
33      /***
34       * Starts the pending job queue. Does nothing 
35       * if the queue is already started.
36       */
37      public void startPendingJobQueue();
38  
39      /***
40       * Stops the pending job queue. Does nothing if the 
41       * queue is already stopped.
42       *
43       */
44      public void stopPendingJobQueue();
45  
46      /***
47       * Returns true if the pending job queue is running.
48       * @return
49       */
50      public boolean isPendingJobQueueRunning();
51  
52      /***
53       * Adds a job to the pending job queue. 
54       * @param order
55       * @return
56       */
57      public String addJob(JobOrder order);
58  
59      /***
60       * Terminates the currently running job. Does nothing if 
61       * if no job is currently running.
62       */
63      public void terminateCurrentJob();
64  
65      /***
66       * Returns true if the crawler has a currently running job.
67       * @return
68       */
69      public boolean isCrawling();
70  
71      /***
72       * Returns the Heritrix version.
73       * @return
74       */
75      public String getVersion();
76  
77      /***
78       * Destroys the crawler instance and all dependent objects.
79       *
80       */
81      public void destroy();
82  
83      /***
84       * Deletes a job from the pending queue.
85       * @param job
86       * @return
87       */
88      public boolean deletePendingCrawlJob(PendingCrawlJob job);
89  
90      /***
91       * Deletes a job from the completed list.
92       * @param job
93       * @return
94       * @throws ClusterException
95       */
96      public boolean deleteCompletedCrawlJob(CompletedCrawlJob job) throws ClusterException;
97  
98      /***
99       * Returns a list of pending jobs.
100      * @return
101      */
102     public Collection<PendingCrawlJob> listPendingCrawlJobs();
103 
104     /***
105      * Returns a list of completed jobs.
106      * @return
107      */
108     public Collection<CompletedCrawlJob> listCompletedCrawlJobs();
109 }