View Javadoc

1    /*** 
2    * 
3    * Copyright (C) 2005 Internet Archive.
4    *  
5    * This file is part of the Heritrix Cluster Controller (crawler.archive.org).
6    *  
7    * HCC is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU Lesser Public License as published by
9    * the Free Software Foundation; either version 2.1 of the License, or
10   * any later version.
11   * 
12   * Heritrix is distributed in the hope that it will be useful, 
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU Lesser Public License for more details.
16   * 
17   * You should have received a copy of the GNU Lesser Public License
18   * along with Heritrix; if not, write to the Free Software
19   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20   */
21  package org.archive.hcc.util;
22  
23  import java.io.File;
24  
25  /***
26   * A set of restrictions associated with a host that limit the scope of a crawl.
27   * @author Daniel Bernstein (dbernstein@archive.org)
28   *
29   */
30  public class HostConstraint {
31  	
32  	private String host;
33  	protected String[] hostArray;
34  	private String regex = null;
35  	private Long documentLimit = null;
36  	private Boolean block = null;
37  	private Boolean ignoreRobots = null;
38  	
39  	
40  	public HostConstraint(String host){
41  		this.host = host;
42  		this.hostArray = host.split("[.]");
43  	}
44  	
45  	
46  	public String getSettingsFileDirectory(){
47  		StringBuffer b = new StringBuffer();
48  		b.append("settings");
49  		for (int i = hostArray.length-1; i > -1; i--) {
50  			b.append(File.separator);
51  			b.append(hostArray[i]);
52  		}
53  		
54  		return b.toString();
55  	}
56  	
57  	
58  
59  	public String getSettingsFilePath(){
60  		return getSettingsFileDirectory() + File.separator + "settings.xml";
61  	}
62  
63  
64  	public Boolean getBlock() {
65  		return block;
66  	}
67  
68  
69  	public void setBlock(Boolean block) {
70  		this.block = block;
71  	}
72  
73  
74  	public Long getDocumentLimit() {
75  		return documentLimit;
76  	}
77  
78  
79  	public void setDocumentLimit(Long documentLimit) {
80  		this.documentLimit = documentLimit;
81  	}
82  
83  
84  	public String getHost() {
85  		return host;
86  	}
87  
88  
89  	public void setHost(String host) {
90  		this.host = host;
91  	}
92  
93  
94  	public Boolean getIgnoreRobots() {
95  		return ignoreRobots;
96  	}
97  
98  
99  	public void setIgnoreRobots(Boolean ignoreRobots) {
100 		this.ignoreRobots = ignoreRobots;
101 	}
102 
103 
104 	public String getRegex() {
105 		return regex;
106 	}
107 
108 
109 	public void setRegex(String regex) {
110 		this.regex = regex;
111 	}
112 	
113 }