View Javadoc

1   /* PatternMatcherRecycler
2   *
3   * $Id: PatternMatcherRecycler.java 4644 2006-09-20 22:40:21Z paul_jack $
4   *
5   * Created on Dec 21, 2004
6   *
7   * Copyright (C) 2004 Internet Archive.
8   *
9   * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24  */ 
25  package org.archive.util;
26  
27  import java.util.EmptyStackException;
28  import java.util.Stack;
29  import java.util.regex.Matcher;
30  import java.util.regex.Pattern;
31  
32  /***
33   * Utility class to retain a compiled Pattern and multiple corresponding 
34   * Matcher instances for reuse.
35   * 
36   * @author gojomo
37   */
38  public class PatternMatcherRecycler {
39      /***
40       * Upper-bound on Matcher Stacks.
41       * Profiling has the size of these Stacks tending upward over
42       * the life of a crawl.  TODO: do something better than an
43       * a coarse upperbound; do something that can get GC'd in
44       * low-memory conditions.
45       */
46      private final static int MAXIMUM_STACK_SIZE = 10;
47      
48      private Pattern pattern;
49      private Stack<Matcher> matchers;
50  
51      public PatternMatcherRecycler(Pattern p) {
52          this.pattern = p;
53          this.matchers = new Stack<Matcher>();
54      }
55  
56      public Pattern getPattern() {
57          return this.pattern;
58      }
59  
60      /***
61       * Get a Matcher for the internal Pattern, against the given
62       * input sequence. Reuse an old Matcher if possible, otherwise
63       * create a new one. 
64       * 
65       * @param input CharSequence to match
66       * @return Matcher set against the the input sequence
67       */
68      public Matcher getMatcher(CharSequence input) {
69          if (input == null) {
70              throw new IllegalArgumentException("CharSequence 'input' must not be null");
71          }
72          try {
73              return ((Matcher)matchers.pop()).reset(input);
74          } catch (EmptyStackException e) {
75              return this.pattern.matcher(input);
76          }
77      }
78      
79      /***
80       * Return the given Matcher to the reuse stack, if stack is
81       * not already at its maximum size.
82       * 
83       * @param m the Matcher to save for reuse
84       */
85      public void freeMatcher(Matcher m) {
86          if(this.matchers.size() < MAXIMUM_STACK_SIZE) {
87              matchers.push(m);
88          }
89      }
90  }
91