View Javadoc

1   /* CrawlURITest
2    * 
3    * Created on Jul 26, 2004
4    *
5    * Copyright (C) 2004 Internet Archive.
6    * 
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    * 
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   * 
14   * Heritrix is distributed in the hope that it will be useful, 
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   * 
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.crawler.datamodel;
24  
25  import java.io.File;
26  import java.io.FileInputStream;
27  import java.io.FileOutputStream;
28  import java.io.IOException;
29  import java.io.ObjectInputStream;
30  import java.io.ObjectOutputStream;
31  
32  import org.apache.commons.httpclient.URIException;
33  import org.archive.net.UURIFactory;
34  import org.archive.util.TmpDirTestCase;
35  
36  /***
37   * @author stack
38   * @version $Revision: 3771 $, $Date: 2005-08-29 21:52:36 +0000 (Mon, 29 Aug 2005) $
39   */
40  public class CrawlURITest extends TmpDirTestCase {
41      
42      CrawlURI seed = null;
43      
44      protected void setUp() throws Exception {
45          super.setUp();
46          final String url = "http://www.dh.gov.uk/Home/fs/en";
47          this.seed = new CrawlURI(UURIFactory.getInstance(url));
48          this.seed.setSchedulingDirective(CandidateURI.MEDIUM);
49          this.seed.setIsSeed(true);
50          // Force caching of string.
51          this.seed.toString();
52          // TODO: should this via really be itself?
53          this.seed.setVia(UURIFactory.getInstance(url));
54      }
55  
56      /***
57       * Test serialization/deserialization works.
58       * 
59       * @throws IOException
60       * @throws ClassNotFoundException
61       */
62      final public void testSerialization()
63      		throws IOException, ClassNotFoundException {
64          File serialize = new File(getTmpDir(), 
65              this.getClass().getName() + ".serialize");
66          try {
67              FileOutputStream fos = new FileOutputStream(serialize);
68              ObjectOutputStream oos = new ObjectOutputStream(fos);
69              oos.writeObject(this.seed);
70              oos.reset();
71              oos.writeObject(this.seed);
72              oos.reset();
73              oos.writeObject(this.seed);
74              oos.close();
75              // Read in the object.
76              FileInputStream fis = new FileInputStream(serialize);
77              ObjectInputStream ois = new ObjectInputStream(fis);
78              CrawlURI deserializedCuri = (CrawlURI)ois.readObject();
79              deserializedCuri = (CrawlURI)ois.readObject();
80              deserializedCuri = (CrawlURI)ois.readObject();
81              assertTrue("Deserialized not equal to original",
82                  this.seed.toString().equals(deserializedCuri.toString()));
83              String host = this.seed.getUURI().getHost();
84              assertTrue("Deserialized host not null",
85                  host != null && host.length() >= 0);
86          } finally {
87              serialize.delete();
88          }
89      }
90      
91      public void testCandidateURIWithLoadedAList()
92      throws URIException {
93          CandidateURI c = CandidateURI.
94              createSeedCandidateURI(UURIFactory.
95                  getInstance("http://www.archive.org"));
96          c.putString("key", "value");
97          CrawlURI curi = new CrawlURI(c, 0);
98          assertTrue("Didn't find AList item",
99              curi.getString("key").equals("value"));
100     }
101     
102 // TODO: move to QueueAssignmentPolicies
103 //    public void testCalculateClassKey() throws URIException {
104 //        final String uri = "http://mprsrv.agri.gov.cn";
105 //        CrawlURI curi = new CrawlURI(UURIFactory.getInstance(uri));
106 //        String key = curi.getClassKey();
107 //        assertTrue("Key1 is bad " + key,
108 //            key.equals(curi.getUURI().getAuthorityMinusUserinfo()));
109 //    	final String baduri = "ftp://pfbuser:pfbuser@mprsrv.agri.gov.cn/clzreceive/";
110 //        curi = new CrawlURI(UURIFactory.getInstance(baduri));
111 //        key = curi.getClassKey();
112 //        assertTrue("Key2 is bad " + key,
113 //            key.equals(curi.getUURI().getAuthorityMinusUserinfo()));
114 //	}
115 }