View Javadoc

1   /* TextUtilsTest.java
2    *
3    * $Id: TextUtilsTest.java 5703 2008-01-30 03:04:23Z Gojomo $
4    *
5    * Created Tue Jan 20 14:17:59 PST 2004
6    *
7    * Copyright (C) 2004 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  
26  package org.archive.util;
27  
28  import java.util.regex.Matcher;
29  
30  import junit.framework.Test;
31  import junit.framework.TestCase;
32  import junit.framework.TestSuite;
33  
34  /***
35   * JUnit test suite for TextUtils
36   * 
37   * @author gojomo
38   * @version $ Id$
39   */
40  public class TextUtilsTest extends TestCase {
41      /***
42       * Create a new TextUtilsTest object
43       * 
44       * @param testName
45       *            the name of the test
46       */
47      public TextUtilsTest(final String testName) {
48          super(testName);
49      }
50  
51      /***
52       * run all the tests for TextUtilsTest
53       * 
54       * @param argv
55       *            the command line arguments
56       */
57      public static void main(String argv[]) {
58          junit.textui.TestRunner.run(suite());
59      }
60  
61      /***
62       * return the suite of tests for MemQueueTest
63       * 
64       * @return the suite of test
65       */
66      public static Test suite() {
67          return new TestSuite(TextUtilsTest.class);
68      }
69  
70      public void testMatcherRecycling() {
71          String pattern = "f.*";
72          Matcher m1 = TextUtils.getMatcher(pattern,"foo");
73          assertTrue("matcher against 'foo' problem", m1.matches());
74          TextUtils.recycleMatcher(m1);
75          Matcher m2 = TextUtils.getMatcher(pattern,"");
76          assertFalse("matcher against '' problem", m2.matches());
77          assertTrue("matcher not recycled",m1==m2);
78          // now verify proper behavior without recycling
79          Matcher m3 = TextUtils.getMatcher(pattern,"fuggedaboutit");
80          assertTrue("matcher against 'fuggedaboutit' problem",m3.matches());
81          assertFalse("matcher was recycled",m3==m2);
82      }
83      
84      public void testGetFirstWord() {
85          final String firstWord = "one";
86          String tmpStr = TextUtils.getFirstWord(firstWord + " two three");
87          assertTrue("Failed to get first word 1 " + tmpStr,
88              tmpStr.equals(firstWord));
89          tmpStr = TextUtils.getFirstWord(firstWord);
90          assertTrue("Failed to get first word 2 " + tmpStr,
91              tmpStr.equals(firstWord));       
92      }
93      
94      public void testUnescapeHtml() {
95          final String abc = "abc";
96          CharSequence cs = TextUtils.unescapeHtml("abc");
97          assertEquals(cs, abc);
98          final String backwards = "aaa;lt&aaa";
99          cs = TextUtils.unescapeHtml(backwards);
100         assertEquals(cs, backwards);
101         final String ampersand = "aaa&aaa";
102         cs = TextUtils.unescapeHtml(ampersand);
103         assertEquals(cs, ampersand);
104         final String encodedAmpersand = "aaa&aaa";
105         cs = TextUtils.unescapeHtml(encodedAmpersand);
106         assertEquals(cs, ampersand);
107         final String encodedQuote = "aaa'aaa";
108         cs = TextUtils.unescapeHtml(encodedQuote);
109         assertEquals(cs, "aaa'aaa");
110         final String entityQuote = "aaa"aaa";
111         cs = TextUtils.unescapeHtml(entityQuote);
112         assertEquals(cs, "aaa\"aaa");
113         final String hexencoded = "aaa
aaa";
114         cs = TextUtils.unescapeHtml(hexencoded);
115         assertEquals(cs, "aaa\naaa");
116         final String zeroPos = "&aaa";
117         cs = TextUtils.unescapeHtml(zeroPos);
118         assertEquals(cs, "&aaa");
119     }
120     
121     public void testUnescapeHtmlWithDanglingAmpersand() {
122         final String mixedEncodedAmpersand1 = "aaa&aaa&aaa";
123         CharSequence cs = TextUtils.unescapeHtml(mixedEncodedAmpersand1);
124         assertEquals(cs,"aaa&aaa&aaa");
125         final String mixedEncodedAmpersand2 = "aaa&aaa&aaa&aaa";
126         cs = TextUtils.unescapeHtml(mixedEncodedAmpersand2);
127         assertEquals(cs,"aaa&aaa&aaa&aaa");
128     } 
129 }
130