import java.io.*;
public class Html2TextWithRegExp {
private Html2TextWithRegExp() {}
public static void main (String[] args) {
try {
StringBuilder sb = new StringBuilder();
BufferedReader br = new BufferedReader
(new FileReader
("java-new.html"));
String line;
while ( (line=br.readLine()) != null) {
sb.append(line);
}
String nohtml = sb.toString().replaceAll("\\<.*?>","");
System.out.println(nohtml);
}
catch (Exception e) {
e.printStackTrace();
}
}
}
import java.io.*;
import javax.swing.text.html.*;
import javax.swing.text.html.parser.*;
public class Html2Text extends HTMLEditorKit.ParserCallback {
StringBuffer s;
public Html2Text() {}
public void parse(Reader in) throws IOException {
s = new StringBuffer();
ParserDelegator delegator = new ParserDelegator();
// the third parameter is TRUE to ignore charset directive
delegator.parse(in, this, Boolean.TRUE);
}
public void handleText(char[] text, int pos) {
s.append(text);
}
public String getText() {
return s.toString();
}
public static void main (String[] args) {
try {
// the HTML to convert
FileReader in = new FileReader("java-new.html");
Html2Text parser = new Html2Text();
parser.parse(in);
in.close();
System.out.println(parser.getText());
}
catch (Exception e) {
e.printStackTrace();
}
}
}
Written and compiled by Réal Gagnon ©1998-2008
[ home ]