View Javadoc

1   /*
2    * TimespanCriteria
3    *
4    * $Id: TimespanCriteria.java 3704 2005-07-18 17:30:21Z stack-sf $
5    *
6    * Created on Apr 8, 2004
7    *
8    * Copyright (C) 2004 Internet Archive.
9    *
10   * This file is part of the Heritrix web crawler (crawler.archive.org).
11   *
12   * Heritrix is free software; you can redistribute it and/or modify it under the
13   * terms of the GNU Lesser Public License as published by the Free Software
14   * Foundation; either version 2.1 of the License, or any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful, but WITHOUT ANY
17   * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
18   * A PARTICULAR PURPOSE. See the GNU Lesser Public License for more details.
19   *
20   * You should have received a copy of the GNU Lesser Public License along with
21   * Heritrix; if not, write to the Free Software Foundation, Inc., 59 Temple
22   * Place, Suite 330, Boston, MA 02111-1307 USA
23   */
24  package org.archive.crawler.settings.refinements;
25  
26  import java.text.DateFormat;
27  import java.text.ParseException;
28  import java.text.SimpleDateFormat;
29  import java.util.Date;
30  import java.util.TimeZone;
31  
32  import org.archive.net.UURI;
33  
34  /***
35   * A refinement criteria that checks if a URI is requested within a specific
36   * time frame. <p/>
37   *
38   * The timeframe's resolution is minutes and always operates in 24h GMT. The
39   * format is <code>hhmm</code>, exmaples:
40   * <p>
41   * <code> 1200</code> for noon GMT <br>
42   * <code> 1805</code> for 5 minutes past six in the afternoon GMT.
43   *
44   * @author John Erik Halse
45   */
46  public class TimespanCriteria implements Criteria {
47  
48      private static DateFormat timeFormat;
49      static {
50          final TimeZone TZ = TimeZone.getTimeZone("GMT");
51          timeFormat = new SimpleDateFormat("HHmm");
52          timeFormat.setTimeZone(TZ);
53      }
54  
55      private Date from;
56  
57      private Date to;
58  
59      /***
60       * Create a new instance of TimespanCriteria.
61       *
62       * @param from start of the time frame (inclusive).
63       * @param to end of the time frame (inclusive).
64       * @throws ParseException
65       */
66      public TimespanCriteria(String from, String to) throws ParseException {
67          setFrom(from);
68          setTo(to);
69      }
70  
71      public boolean isWithinRefinementBounds(UURI uri) {
72          try {
73              Date now = timeFormat.parse(timeFormat.format(new Date()));
74              if (from.before(to)) {
75                  if (now.getTime() >= from.getTime()
76                          && now.getTime() <= to.getTime()) {
77                      return true;
78                  }
79              } else {
80                  if (!(now.getTime() > to.getTime() && now.getTime() < from
81                          .getTime())) {
82                      return true;
83                  }
84              }
85          } catch (ParseException e) {
86              // Should never happen since we are only parsing system time at
87              // this place.
88              e.printStackTrace();
89          }
90  
91          return false;
92      }
93  
94      /***
95       * Get the beginning of the time frame to check against.
96       *
97       * @return Returns the from.
98       */
99      public String getFrom() {
100         return timeFormat.format(from);
101     }
102 
103     /***
104      * Set the beginning of the time frame to check against.
105      *
106      * @param from The from to set.
107      * @throws ParseException
108      */
109     public void setFrom(String from) throws ParseException {
110         this.from = timeFormat.parse(from);
111     }
112 
113     /***
114      * Get the end of the time frame to check against.
115      *
116      * @return Returns the to.
117      */
118     public String getTo() {
119         return timeFormat.format(to);
120     }
121 
122     /***
123      * Set the end of the time frame to check against.
124      *
125      * @param to The to to set.
126      * @throws ParseException
127      */
128     public void setTo(String to) throws ParseException {
129         this.to = timeFormat.parse(to);
130     }
131 
132     public boolean equals(Object o) {
133         if (o instanceof TimespanCriteria) {
134             TimespanCriteria other = (TimespanCriteria) o;
135             if (this.from.equals(other.from) && this.to.equals(other.to)) {
136                 return true;
137             }
138         }
139         return false;
140     }
141 
142     /* (non-Javadoc)
143      * @see org.archive.crawler.settings.refinements.Criteria#getName()
144      */
145     public String getName() {
146         return "Time of day criteria";
147     }
148 
149     /* (non-Javadoc)
150      * @see org.archive.crawler.settings.refinements.Criteria#getDescription()
151      */
152     public String getDescription() {
153         return "Accept any URIs between the hours of " + getFrom() + "(GMT) and "
154             + getTo() + "(GMT) each day.";
155     }
156 }