1 /*** 2 * 3 * Copyright (C) 2005 Internet Archive. 4 * 5 * This file is part of the Heritrix Cluster Controller (crawler.archive.org). 6 * 7 * HCC is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU Lesser Public License as published by 9 * the Free Software Foundation; either version 2.1 of the License, or 10 * any later version. 11 * 12 * Heritrix is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU Lesser Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser Public License 18 * along with Heritrix; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 */ 21 package org.archive.hcc.util; 22 23 import java.io.File; 24 25 /*** 26 * A set of restrictions associated with a host that limit the scope of a crawl. 27 * @author Daniel Bernstein (dbernstein@archive.org) 28 * 29 */ 30 public class HostConstraint { 31 32 private String host; 33 protected String[] hostArray; 34 private String regex = null; 35 private Long documentLimit = null; 36 private Boolean block = null; 37 private Boolean ignoreRobots = null; 38 39 40 public HostConstraint(String host){ 41 this.host = host; 42 this.hostArray = host.split("[.]"); 43 } 44 45 46 public String getSettingsFileDirectory(){ 47 StringBuffer b = new StringBuffer(); 48 b.append("settings"); 49 for (int i = hostArray.length-1; i > -1; i--) { 50 b.append(File.separator); 51 b.append(hostArray[i]); 52 } 53 54 return b.toString(); 55 } 56 57 58 59 public String getSettingsFilePath(){ 60 return getSettingsFileDirectory() + File.separator + "settings.xml"; 61 } 62 63 64 public Boolean getBlock() { 65 return block; 66 } 67 68 69 public void setBlock(Boolean block) { 70 this.block = block; 71 } 72 73 74 public Long getDocumentLimit() { 75 return documentLimit; 76 } 77 78 79 public void setDocumentLimit(Long documentLimit) { 80 this.documentLimit = documentLimit; 81 } 82 83 84 public String getHost() { 85 return host; 86 } 87 88 89 public void setHost(String host) { 90 this.host = host; 91 } 92 93 94 public Boolean getIgnoreRobots() { 95 return ignoreRobots; 96 } 97 98 99 public void setIgnoreRobots(Boolean ignoreRobots) { 100 this.ignoreRobots = ignoreRobots; 101 } 102 103 104 public String getRegex() { 105 return regex; 106 } 107 108 109 public void setRegex(String regex) { 110 this.regex = regex; 111 } 112 113 }