1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.archive.util;
25
26 import junit.framework.Test;
27 import junit.framework.TestCase;
28 import junit.framework.TestSuite;
29
30 /***
31 * JUnit test suite for UriUtils.
32 *
33 * Several of the tests for the 'legacy' (H1 through at least 1.14.4)
34 * heuristics are disabled by renaming, because those heuristics have known
35 * failures; however, until more experience with the new heuristics is
36 * collected, H1 still uses them for consistency.
37 *
38 * @contributor gojomo
39 * @version $Id: ArchiveUtilsTest.java 5052 2007-04-10 02:26:52Z gojomo $
40 */
41 public class UriUtilsTest extends TestCase {
42
43 public UriUtilsTest(final String testName) {
44 super(testName);
45 }
46
47 /***
48 * run all the tests for ArchiveUtilsTest
49 *
50 * @param argv
51 * the command line arguments
52 */
53 public static void main(String argv[]) {
54 junit.textui.TestRunner.run(suite());
55 }
56
57 public static Test suite() {
58 return new TestSuite(UriUtilsTest.class);
59 }
60
61 /*** image URIs that should be considered likely URIs **/
62 static String[] urisRelativeImages = {
63 "photo.jpg",
64 "./photo.jpg",
65 "../photo.jpg",
66 "images/photo.jpg",
67 "../../images/photo.jpg" };
68
69 /*** check that plausible relative image URIs return true with legacy tests */
70 public void xestLegacySimpleImageRelatives() {
71 legacyTryAll(urisRelativeImages, true);
72 }
73
74 /*** check that plausible relative image URIs return true with new tests */
75 public void testNewSimpleImageRelatives() {
76 tryAll(urisRelativeImages,true);
77 }
78
79 /*** absolute URIs that should be considered likely URIs **/
80 static String[] urisAbsolute = {
81 "http://example.com",
82 "http://example.com/", "http://www.example.com",
83 "http://www.example.com/", "http://www.example.com/about",
84 "http://www.example.com/about/",
85 "http://www.example.com/about/index.html", "https://example.com",
86 "https://example.com/", "https://www.example.com",
87 "https://www.example.com/", "https://www.example.com/about",
88 "https://www.example.com/about/",
89 "https://www.example.com/about/index.html",
90 "ftp://example.com/public/report.pdf",
91
92
93 };
94
95 /*** check that absolute URIs return true with legacy tests */
96 public void testLegacyAbsolutes() {
97 legacyTryAll(urisAbsolute,true);
98 }
99
100 /*** check that absolute URIs return true with new tests */
101 public void testAbsolutes() {
102 tryAll(urisAbsolute,true);
103 }
104
105 /*** path-absolute images URIs that should be considered likely URIs **/
106 static String[] urisPathAbsoluteImages = {
107 "/photo.jpg",
108 "/images/photo.jpg",
109 };
110
111 /*** check that path-absolute image URIs return true with legacy tests*/
112 public void testLegacySimpleImagePathAbsolutes() {
113 legacyTryAll(urisPathAbsoluteImages, true);
114 }
115
116 /*** check that path-absolute image URIs return true with new tests*/
117 public void testSimpleImagePathAbsolutes() {
118 tryAll(urisPathAbsoluteImages, true);
119 }
120
121 /*** URI-like strings risking false positives that should NOT be likely URIs **/
122 static String[] notUrisNaiveFalsePositives = {
123 "0.99",
124 "3.14157",
125 "text/javascript"
126 };
127
128 /*** check that typical false-positives of the naive test are not deemed URIs */
129 public void xestLegacyNaiveFalsePositives() {
130 legacyTryAll(notUrisNaiveFalsePositives, false);
131 }
132
133 /*** check that typical false-positives of the naive test are not deemed URIs */
134 public void testNaiveFalsePositives() {
135 tryAll(notUrisNaiveFalsePositives, false);
136 }
137
138 /*** strings that should not be considered likely URIs **/
139 static String[] notUrisNaive = {
140 "foo bar",
141 "<script>foo=bar</script>",
142 "item\t$0.99\tred",
143 };
144
145 /*** check that strings that fail naive test are not deemed URIs legacy tests*/
146 public void testLegacyNaiveNotUris() {
147 legacyTryAll(notUrisNaive, false);
148 }
149
150 /*** check that strings that fail naive test are not deemed URIs new tests*/
151 public void testNaiveNotUris() {
152 tryAll(notUrisNaive, false);
153 }
154
155
156 /***
157 * Test that all supplied candidates give the expected result, for each of
158 * the 'legacy' (H1) likely-URI-tests
159 *
160 * @param candidates String[] to test
161 * @param expected desired answer
162 */
163 protected void legacyTryAll(String[] candidates, boolean expected) {
164 for (String candidate : candidates) {
165 assertEquals("javascript context: " + candidate,
166 expected,
167 UriUtils.isLikelyUriJavascriptContextLegacy(candidate));
168 assertEquals("html context: " + candidate,
169 expected,
170 UriUtils.isLikelyUriHtmlContextLegacy(candidate));
171 }
172 }
173
174
175
176 /***
177 * Test that all supplied candidates give the expected results, for
178 * the 'new' heuristics now in this class.
179 * @param candidates String[] to test
180 * @param expected desired answer
181 */
182 protected void tryAll(String[] candidates, boolean expected) {
183 for (String candidate : candidates) {
184 assertEquals("new: " + candidate,
185 expected,
186 UriUtils.isLikelyUri(candidate));
187 assertEquals("html context: " + candidate,
188 expected,
189 UriUtils.isLikelyUri(candidate));
190 }
191 }
192 }