//讀取文本
InputStream is = new FileInputStream("d://04.doc");
WordExtractor extractor = new WordExtractor(is);
String text = extractor.getText();//抓取文本,裡面有許多空格和換行
//抓取文本,提取的結果也可能包含了一些其他的crud比特
String textFromPieces = extractor.getTextFromPieces();
InputStream is = new FileInputStream("d://04.doc");
HWPFDocument hwpfDocument = new HWPFDocument(is);
// 抓取文本,提取的結果也可能包含了一些其他的crud比特
String text1 = hwpfDocument.getDocumentText();
System.err.println(text1);
Range range = hwpfDocument.getRange();
String text2 = range.text();// 抓取文本,裡面有許多空格和換行
System.err.println(text2);
/**
* 列印段落
* @param range
*/
private void printParagraph(Range range) {
int num = range.numParagraphs();
for (int i = 0; i < num; i++) {
Paragraph para = range.getParagraph(i);
if (para.isInList()) {
System.out.println("list: " + para.text());
}
}
}
/**
* 列印小節
* @param range
*/
private void printSection(Range range) {
int secNum = range.numSections();
System.out.println(secNum);
for (int i = 0; i < secNum; i++) {
Section section = range.getSection(i);
System.out.println(section.getMarginLeft());
System.out.println(section.getMarginRight());
System.out.println(section.getMarginTop());
System.out.println(section.getMarginBottom());
System.out.println(section.getPageHeight());
System.out.println(section.text());
}
}