Validate XML using a XSD (XML Schema)Tag(s): XML


Consider this XML file howto.xml :
<?xml version="1.0" encoding="ISO-8859-1"?>
<howto xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <topic>
      <title>Java</title>
      <url>http://www.rgagnon.com/topics/java-xml.html</url>
  </topic>
  <topic>
      <title>PowerBuilder</title>
      <url>http://www.rgagnon.com/topics/pb-powerscript.htm</url>
  </topic>
  <topic>
        <title>Javascript</title>
        <url>http://www.rgagnon.com/topics/js-language.html</url>
  </topic>
  <topic>
        <title>VBScript</title>
        <url>http://www.rgagnon.com/topics/wsh-vbs.html</url>
  </topic>
</howto>

The external howto.xsd :

<?xml version="1.0" encoding="ISO-8859-1"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">

  <xs:element name="howto">
     <xs:complexType>
      <xs:sequence>
        <xs:element name="topic" maxOccurs="unbounded">
          <xs:complexType>
            <xs:sequence>
              <xs:element name="title" type="xs:string"/>
              <xs:element name="url" type="httpURI"/>
            </xs:sequence>
          </xs:complexType>
        </xs:element>
      </xs:sequence>
    </xs:complexType>
  </xs:element>
  
  <xs:simpleType name="httpURI">
      <xs:restriction base="xs:anyURI">
        <xs:pattern value="http://.*" />
      </xs:restriction>
  </xs:simpleType>
 
</xs:schema>

The code (using SAX parser) to validate an XML file using a given external XSD.

import java.io.IOException;

// SAX
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.XMLReader;

//SAX and external XSD
import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.SchemaFactory;

import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.InputSource;

public class XMLUtils {

  private XMLUtils() {}
  
  // validate SAX and external XSD 
  public static boolean validateWithExtXSDUsingSAX(String xml, String xsd) 
  throws ParserConfigurationException, IOException 
  {
    try {
      SAXParserFactory factory = SAXParserFactory.newInstance();
      factory.setValidating(false); 
      factory.setNamespaceAware(true);

      SchemaFactory schemaFactory = SchemaFactory.newInstance("http://www.w3.org/2001/XMLSchema");
      SAXParser parser = null;
      try {
         factory.setSchema(schemaFactory.newSchema(new Source[] {new StreamSource( xsd )}));
         parser = factory.newSAXParser();
      }
      catch (SAXException se) {
        System.out.println("SCHEMA : " + se.getMessage());  // problem in the XSD itself
        return false;
      }
      
      XMLReader reader = parser.getXMLReader();
      reader.setErrorHandler(
          new ErrorHandler() {
            public void warning(SAXParseException e) throws SAXException {
              System.out.println("WARNING: " + e.getMessage()); // do nothing
            }

            public void error(SAXParseException e) throws SAXException {
              System.out.println("ERROR : " + e.getMessage());
              throw e;
            }

            public void fatalError(SAXParseException e) throws SAXException {
              System.out.println("FATAL : " + e.getMessage());
              throw e;
            }
          }
          );
      reader.parse(new InputSource(xml));
      return true;
    }    
    catch (ParserConfigurationException pce) {
      throw pce;
    } 
    catch (IOException io) {
      throw io;
    }
    catch (SAXException se){
      return false;
  }
}

public static void main (String args[]) throws Exception{ 
    System.out.println
        (XMLUtils.validateWithExtXSDUsingSAX
            ("c:/temp/howto.xml", "c:/temp/howto.xsd"));
    /*
      output :
               true
    */           
  }
}

The XML can contain a reference to the XSD to be used.
<?xml version="1.0" encoding="ISO-8859-1"?>
<howto xsi:noNamespaceSchemaLocation="howto.xsd"
       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <topic>
      <title>Java</title>
      <url>http://www.rgagnon.com/topics/java-xml.html</url>
  </topic>
...
</howto>

The code (using DOM parser) to validate an XML file using the referenced XSD :

import java.io.IOException;
// DOM
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.InputSource;

public class XMLUtils {

  private XMLUtils() {}
  
  // validate DOM and internal XSD 
  public static boolean validateWithIntXSDUsingDOM(String xml) 
  throws ParserConfigurationException, IOException 
  {
    try {
      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
      factory.setValidating(true);
      factory.setNamespaceAware(true);
      factory.setAttribute("http://java.sun.com/xml/jaxp/properties/schemaLanguage", 
              "http://www.w3.org/2001/XMLSchema");
      DocumentBuilder builder = factory.newDocumentBuilder();        

      builder.setErrorHandler(
          new ErrorHandler() {
            public void warning(SAXParseException e) throws SAXException {
              System.out.println("WARNING: " + e.getMessage()); // do nothing
            }

            public void error(SAXParseException e) throws SAXException {
              System.out.println("ERROR: " + e.getMessage());
              throw e;
            }

            public void fatalError(SAXParseException e) throws SAXException {
              System.out.println("FATAL: " + e.getMessage());
              throw e;
            }
          }
    
      );

      builder.parse(new InputSource(xml));
      return true;
    }    
    catch (ParserConfigurationException pce) {
      throw pce;
    } 
    catch (IOException io) {
      throw io;
    }
    catch (SAXException se){
      return false;
  }
}

public static void main (String args[]) throws Exception{ 
    System.out.println
       (XMLUtils.validateWithIntXSDUsingDOM
          ("c:/temp/howto.xml"));
  }
}

The code (using SAX parser) to validate an XML file using the referenced XSD :

import java.io.IOException;
// SAX
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.XMLReader;

import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.InputSource;

public class XMLUtils {

  private XMLUtils() {}
  
  // validate SAX and internal XSD
  public static boolean validateWithIntXSDWithSAX(String xml) 
  throws ParserConfigurationException, IOException 
  {
    try {
      
      SAXParserFactory factory = SAXParserFactory.newInstance();
      factory.setValidating(true);
      factory.setNamespaceAware(true);

      SAXParser parser = factory.newSAXParser();
      parser.setProperty("http://java.sun.com/xml/jaxp/properties/schemaLanguage", 
            "http://www.w3.org/2001/XMLSchema");

      XMLReader reader = parser.getXMLReader();
      reader.setErrorHandler(
          new ErrorHandler() {
            public void warning(SAXParseException e) throws SAXException {
              System.out.println("WARNING: " + e.getMessage()); // do nothing
            }

            public void error(SAXParseException e) throws SAXException {
              System.out.println("ERROR: " + e.getMessage());
              throw e;
            }

            public void fatalError(SAXParseException e) throws SAXException {
              System.out.println("FATAL: " + e.getMessage());
              throw e;
            }
          }
          );
      reader.parse(new InputSource(xml));
      return true;
    }    
    catch (ParserConfigurationException pce) {
      throw pce;
    } 
    catch (IOException io) {
      throw io;
    }
    catch (SAXException se){
      return false;
  }
}

  public static void main (String args[]) throws Exception{ 
    System.out.println(XMLUtils.validateWithIntXSDWithSAX("c:/temp/howto.xml"));
  }
}

This HowTo uses the built-in XML parser, you can switch JAXP in debug mode by passing a special switch on the JVM command-line.

java -Djaxp.debug=1 ...
or
java -Djaxp.debug ...
JAXP will produce log entries about its activities :
JAXP: found null in $java.home/jaxp.properties
JAXP: no META-INF/services/javax.xml.validation.SchemaFactory file was found
JAXP: attempting to use the platform default XML Schema validator
JAXP: createInstance(com.sun.org.apache.xerces.internal.jaxp.validation.XMLSchemaFactory)
...

NOTES :
  • The DOM is faster than SAX but DOM reads the entire structure in memory so the memory consumption is bigger.
  • See http://en.wikibooks.org/wiki/XML_Schema for more infos about XSD.
  • To validate using an DTD, see this HowTo.
    blog comments powered by Disqus