Remove XML tags from a string to keep only text Tag(s): XML
First we define an XSLT template.
[onlytext.xsl]
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:fo="http://www.w3.org/1999/XSL/Format">
<xsl:output method="text" indent="no"/>
<xsl:template match="//text()[normalize-space(.) = '']>
<xsl:text>
</xsl:text>
</xsl:template>
</xsl:stylesheet>
[howto.xml]
<?xml version="1.0"?>
<howto>
<topic id="1">
<title>Java</title>
<url>http://www.rgagnon.com/topics/java-io.html</url>
</topic>
<topic id="2">
<title>XML</title>
<url>http://www.rgagnon.com/topics/java-xml.html</url>
</topic>
<topic id="3">
<title>Javascript</title>
<url>http://www.rgagnon.com/topics/js-language.html</url>
</topic>
<topic id="4">
<title>VBScript</title>
<url>http://www.rgagnon.com/topics/wsh-vbs.html</url>
</topic>
</howto>
And the Java code to apply template to the XML.
import java.io.File;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.w3c.dom.Document;
public class XMLUtils {
public static void main(String args[]) throws Exception {
File stylesheet = new File("/temp/onlytext.xsl");
File xmlSource = new File("/temp/howto.xml");
File txtOutput = new File("/temp/howto.txt");
StreamSource stylesource = new StreamSource(stylesheet);
Transformer transformer = TransformerFactory.newInstance()
.newTransformer(stylesource);
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(xmlSource);
transformer.transform(new DOMSource(document), new StreamResult(txtOutput));
// output to console :
// transformer.transform(new DOMSource(document), new StreamResult(System.out));
System.out.println("Done.");
}
}
Java http://www.rgagnon.com/topics/java-io.html XML http://www.rgagnon.com/topics/java-xml.html Javascript http://www.rgagnon.com/topics/js-language.html VBScript http://www.rgagnon.com/topics/wsh-vbs.html
See also : Remove HTML tags from a file to extract only the TEXT.
mail_outline
Send comment, question or suggestion to howto@rgagnon.com
Send comment, question or suggestion to howto@rgagnon.com