Convert a flat file to XML (SAX)Tag(s): XML


Consider the following data file (data.txt):
Java|http://www.rgagnon/javahowto.htm
PowerBuilder|http://www.rgagnon/pbhowto.htm
Javascript|http://www.rgagnon/jshowto.htm
VBScript|http://www.rgagnon/vbshowto.htm
We want to convert it to an XML format (data.xml) :
<?xml version="1.0" encoding="ISO-8859-1"?>
<HOWTOS>
    <TOPIC>
        <TITLE>Java</TITLE>
        <URL>http://www.rgagnon/javahowto.htm</URL>
    </TOPIC>
    <TOPIC>
        <TITLE>PowerBuilder</TITLE>
        <URL>http://www.rgagnon/pbhowto.htm</URL>
    </TOPIC>
    <TOPIC>
        <TITLE>Javascript</TITLE>
        <URL>http://www.rgagnon/jshowto.htm</URL>
    </TOPIC>
    <TOPIC>
        <TITLE>VBScript</TITLE>
        <URL>http://www.rgagnon/vbshowto.htm</URL>
    </TOPIC>
</HOWTOS>
We are using JAXP and SAX.
import java.io.*;

// SAX classes.
import org.xml.sax.*;
import org.xml.sax.helpers.*;

import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.stream.*;
import javax.xml.transform.sax.*;

public class ToXML  {

  BufferedReader in;
  StreamResult out;

  TransformerHandler th;
  AttributesImpl atts;

  public static void main (String args[]) {
      new ToXML().doit();
  }

  public void doit () {
    try{
      in = new BufferedReader(new FileReader("data.txt"));
      out = new StreamResult("data.xml");
      initXML();
      String str;
      while ((str = in.readLine()) != null) {
         process(str);
      }
      in.close();
      closeXML();
    }
    catch (Exception e) { e.printStackTrace(); }
  }


  public void initXML() throws ParserConfigurationException,
      TransformerConfigurationException, SAXException {
    // JAXP + SAX
    SAXTransformerFactory tf = 
       (SAXTransformerFactory) SAXTransformerFactory.newInstance();

    th = tf.newTransformerHandler();
    Transformer serializer = th.getTransformer();
    serializer.setOutputProperty(OutputKeys.ENCODING,"ISO-8859-1");
    // pretty XML output
    serializer.setOutputProperty
        ("{http://xml.apache.org/xslt}indent-amount", "4");
    serializer.setOutputProperty(OutputKeys.INDENT,"yes");
    th.setResult(out);
    th.startDocument();
    atts = new AttributesImpl();
    th.startElement("","","HOWTOS",atts);
  }

  public void process (String s) throws SAXException {
    String [] elements = s.split("\\|");
    atts.clear();
    th.startElement("","","TOPIC",atts);

    th.startElement("","","TITLE",atts);
    th.characters(elements[0].toCharArray(),0,elements[0].length());
    th.endElement("","","TITLE");

    th.startElement("","","URL",atts);
    th.characters(elements[1].toCharArray(),0,elements[1].length());
    th.endElement("","","URL");

    th.endElement("","","TOPIC");
  }

  public void closeXML() throws SAXException {
    th.endElement("","","HOWTOS");
    th.endDocument();  }
}

blog comments powered by Disqus