- package test;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.io.InputStream;
- import org.apache.tika.exception.TikaException;
- import org.apache.tika.metadata.Metadata;
- import org.apache.tika.parser.AutoDetectParser;
- import org.apache.tika.parser.ParseContext;
- import org.apache.tika.parser.Parser;
- import org.apache.tika.sax.BodyContentHandler;
- import org.apache.tika.sax.ContentHandlerDecorator;
- import org.xml.sax.ContentHandler;
- import org.xml.sax.SAXException;
- public class TikaTest {
- public static void main(String[] args) {
- String classPath = TikaTest.class.getResource("/").getPath();
- classPath += "doc/test.doc";
- System.out.println(classPath);
- File f = new File(classPath);
- Parser parser = new AutoDetectParser();
- InputStream input = null;
- try {
- input = new FileInputStream(f);
- ContentHandler handler = new BodyContentHandler();
- ParseContext context = new ParseContext();
- context.set(Parser.class, parser);
- Metadata data = new Metadata();
- parser.parse(input, handler, data, context);
- System.out.println(handler.toString());
- } catch (FileNotFoundException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (Exception e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } finally{
- if(input!=null){
- try {
- input.close();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
- }
- }