View Javadoc

1   /* XmlUtils
2    *
3    * Created on Sep 19, 2007
4    *
5    * Copyright (C) 2007 Internet Archive.
6    *
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    *
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   *
14   * Heritrix is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.util;
24  
25  import java.io.File;
26  import java.io.IOException;
27  import java.util.logging.Logger;
28  
29  import javax.xml.parsers.DocumentBuilder;
30  import javax.xml.parsers.DocumentBuilderFactory;
31  import javax.xml.parsers.ParserConfigurationException;
32  import javax.xml.xpath.XPath;
33  import javax.xml.xpath.XPathExpression;
34  import javax.xml.xpath.XPathExpressionException;
35  import javax.xml.xpath.XPathFactory;
36  
37  import org.w3c.dom.Document;
38  import org.xml.sax.SAXException;
39  
40  
41  /***
42   * XML utilities for document/xpath actions. 
43   *
44   * @author gojomo
45   * @version $Revision: 4644 $ $Date: 2006-09-20 22:40:21 +0000 (Wed, 20 Sep 2006) $
46   */
47  public class XmlUtils {
48      public static Logger logger =
49          Logger.getLogger(XmlUtils.class.getName());
50  
51      /***
52       * Parse a DOM Document from the given XML file. 
53       * 
54       * @param f File to parse as Document
55       * @return Document
56       * @throws IOException
57       */
58      public static Document getDocument(File f) throws IOException {
59          DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
60          factory.setNamespaceAware(true); // never forget this!
61          try {
62              DocumentBuilder builder = factory.newDocumentBuilder();
63              return builder.parse(f);
64          } catch (ParserConfigurationException e) {
65              IOException ioe = new IOException();
66              ioe.initCause(e);
67              throw ioe;
68          } catch (SAXException e) {
69              IOException ioe = new IOException();
70              ioe.initCause(e);
71              throw ioe;
72          }
73      }
74      
75      /***
76       * Evaluate an XPath against a Document, returning a String.
77       * 
78       * @param doc Document
79       * @param xp XPath to evaluate against Document
80       * @return String found at path or null
81       */
82      public static String xpathOrNull(Document doc, String xp) {
83          XPathFactory factory = XPathFactory.newInstance();
84          XPath xpath = factory.newXPath();
85          try {
86              XPathExpression expr = xpath.compile(xp);
87              return expr.evaluate(doc);
88          } catch (XPathExpressionException e) {
89              return null;
90          }
91      }
92  }