天天看點

poi操作word 2003/docHWPFDocument中的要素maven引用通過WordExtractor讀取文本通過HWPFDocument讀檔案通過HWPFDocument寫檔案

poi操作word 2003/doc

  • HWPFDocument中的要素
  • maven引用
  • 通過WordExtractor讀取文本
  • 通過HWPFDocument讀檔案
  • 通過HWPFDocument寫檔案

HWPFDocument中的要素

Range 表示一個範圍(内容:Section,Paragraph,CharacterRun)
Section 文檔中的一個小節,文檔由多個小節組成(内容:Paragraph)
Paragraph 段落(内容:CharacterRun)
CharacterRun 具有相同屬性的文本
Table 表格
TableRow 表格對應的行
TableCell 表格對應的單元格
***Section、Paragraph、CharacterRun和Table都繼承自Range***
           

maven引用

<dependency>
	<groupId>org.apache.poi</groupId>
	<artifactId>poi-scratchpad</artifactId>
	<version>3.17</version>
</dependency>
<dependency>
	<groupId>commons-io</groupId>
	<artifactId>commons-io</artifactId>
	<version>2.0</version>
</dependency>
<dependency>
	<groupId>commons-lang</groupId>
	<artifactId>commons-lang</artifactId>
	<version>2.0</version>
</dependency>
           

通過WordExtractor讀取文本

//讀取文本
InputStream is = new FileInputStream("d://04.doc");
WordExtractor extractor = new WordExtractor(is);
String text = extractor.getText();//抓取文本,裡面有許多空格和換行
//抓取文本,提取的結果也可能包含了一些其他的crud比特
String textFromPieces = extractor.getTextFromPieces();
           
/**
 * 輸出書簽資訊
 * @param bookmarks
 */
private void printBookmarks(Bookmarks bookmarks) {
	int count = bookmarks.getBookmarksCount();
	System.out.println("書簽數量:" + count);
	Bookmark bookmark;
	for (int i = 0; i < count; i++) {
		bookmark = bookmarks.getBookmark(i);
		System.out.println("書簽" + (i + 1) + "的名稱是:" + bookmark.getName());
		System.out.println("開始位置:" + bookmark.getStart());
		System.out.println("結束位置:" + bookmark.getEnd());
	}
}

/**
 * 輸出SummaryInfomation
 * @param info
 */
private void printSummary(SummaryInformation info) {
	System.out.println(info.getAuthor());// 作者
	System.out.println(info.getCharCount());// 字元統計
	System.out.println(info.getPageCount());// 頁數
	System.out.println(info.getTitle());// 标題
	System.out.println(info.getSubject());// 主題
}
           

通過HWPFDocument讀檔案

InputStream is = new FileInputStream("d://04.doc");
HWPFDocument hwpfDocument = new HWPFDocument(is);
// 抓取文本,提取的結果也可能包含了一些其他的crud比特
String text1 = hwpfDocument.getDocumentText();
System.err.println(text1);

Range range = hwpfDocument.getRange();
String text2 = range.text();// 抓取文本,裡面有許多空格和換行
System.err.println(text2);
           
/**
 * 列印段落
 * @param range
 */
private void printParagraph(Range range) {
	int num = range.numParagraphs();
	for (int i = 0; i < num; i++) {
		Paragraph para = range.getParagraph(i);
		if (para.isInList()) {
			System.out.println("list: " + para.text());
		}
	}
}
           
/**
 * 列印小節
 * @param range
 */
private void printSection(Range range) {
	int secNum = range.numSections();
	System.out.println(secNum);
	for (int i = 0; i < secNum; i++) {
		Section section = range.getSection(i);
		System.out.println(section.getMarginLeft());
		System.out.println(section.getMarginRight());
		System.out.println(section.getMarginTop());
		System.out.println(section.getMarginBottom());
		System.out.println(section.getPageHeight());
		System.out.println(section.text());
	}
}
           

通過HWPFDocument寫檔案

public static void main(String[] args) throws IOException {
	String wordMouldFile = "d://tmp//module.doc";
	Map<String, String> replaceMap = new HashMap<String, String>();
	String outputFile = "d://tmp//導出.doc";
	replaceMap.put("title1", "标題一");
	replaceMap.put("content1", "内容一");
	replaceMap.put("title2", "标題二");
	replaceMap.put("content2", "内容二");
	HWPFDocumentUtil hwpfDocumentUtil = new HWPFDocumentUtil(wordMouldFile, replaceMap);
	hwpfDocumentUtil.exportWord(new File(outputFile));
	String readWordText = HWPFDocumentUtil.readWordText(outputFile);
	System.err.println(readWordText);
}
           
poi操作word 2003/docHWPFDocument中的要素maven引用通過WordExtractor讀取文本通過HWPFDocument讀檔案通過HWPFDocument寫檔案
/**
 * 模闆導出Word文檔(2003/doc)
 * @author Administrator
 *
 */
public class HWPFDocumentUtil {
	/** Word文檔模闆 */
	private String wordMouldFile;
	private Map<String, String> replaceMap;
	public HWPFDocumentUtil(String wordMouldFile, Map<String, String> replaceMap) {
		super();
		this.wordMouldFile = wordMouldFile;
		this.replaceMap = replaceMap;
	}

	public static String readWordText(String filename) throws IOException {
		if (StringUtils.isBlank(filename)) {
			return "";
		}
		File file = new File(filename);
		if (!file.exists()) {
			throw new FileNotFoundException(filename);
		}
		
		InputStream is = null;
		HWPFDocument hwpfDocument = null;
		try {
			is = new FileInputStream(filename);
			hwpfDocument = new HWPFDocument(is);
			Range range = hwpfDocument.getRange();
			String text = range.text();
			return StringUtils.trimToEmpty(text);
		} catch (IOException e) {
			throw e;
		} finally {
			IOUtils.closeQuietly(is);
			hwpfDocument.close();
			hwpfDocument=null;
		}
	}

	
	public void exportWord(File outFile) {
		InputStream is = null;
		HWPFDocument doc = null;
		OutputStream os = null;
		try {
			is = new FileInputStream(wordMouldFile);
			doc = new HWPFDocument(is);
			Range range = doc.getRange();//擷取文檔内容區域
			Set<String> keySet = replaceMap.keySet();
			for(String key : keySet) {
				String value = replaceMap.get(key);
				range.replaceText("$" + key, value);
			}
			os = new FileOutputStream(outFile);
			// 把doc輸出到輸出流中
			doc.write(os);
		} catch (IOException e) {
			throw new RuntimeException(e);
		} finally {
			try {
				doc.close();
				doc = null;
			} catch (IOException e) {
				e.printStackTrace();
			}
			IOUtils.closeQuietly(is);
			IOUtils.closeQuietly(os);
		}
	}
	...[get set 忽略]
}