一背景接口方法:
1.建立索引
/**
* 建立知識索引
*
* @param flag
* @param path
* @param indexPath
* @param title
* @param knowlegeid
*/
public void createKnowledgeIndex(boolean flag, String path, String indexPath, String title, Date createtime, String infoType, String knowlegeid) {
// 根據類目代碼找到類目,獲得檔案及檔案索引目錄
File indexDir = new File(indexPath + "/knowledgeIndex");
if (!indexDir.exists()) {
if (!indexDir.mkdirs()) throw new RuntimeException("索引檔案夾建立出錯");
}
// 建立分詞
Analyzer luceneAnalyzer = new IK_CAnalyzer();
// 取得目錄下所有Files
// 建立indexWrite indexWrite主要作用是添加索引,并判斷索引目錄是否有索引檔案
File[] file = indexDir.listFiles(new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.equals("segments.gen");
}
});
if (file == null || file.length == 0) {
flag = true;
}
IndexWriter indexWriter = null;
long startTime = new Date().getTime();
try {
indexWriter = new IndexWriter(indexDir, luceneAnalyzer, flag);
File dataFiles = new File(path);
String txtReader = "";
if (!"".equals(path) && path != null) {
txtReader = postFix(dataFiles, fileParseDomain);
Document document = new Document();
document.add(new Field("path", dataFiles.getCanonicalPath(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
document.add(new Field("title", title, Field.Store.YES,
Field.Index.ANALYZED,
Field.TermVector.WITH_OFFSETS));
document.add(new Field("createtime", StringUtil.getDateStringYMD(createtime),
Field.Store.YES, Field.Index.ANALYZED));
document.add(new Field("infoType", infoType,
Field.Store.YES, Field.Index.ANALYZED));
document.add(new Field("knowlegeid", knowlegeid,
Field.Store.YES, Field.Index.ANALYZED));
document.add(new Field("contents", txtReader,
Field.Store.COMPRESS, Field.Index.ANALYZED,
Field.TermVector.WITH_OFFSETS));
document.add(new Field("all", title + txtReader,
Field.Store.COMPRESS, Field.Index.ANALYZED,
Field.TermVector.WITH_OFFSETS));
indexWriter.addDocument(document);
}
indexWriter.optimize();
} catch (IOException e) {
log.error("索引建立出錯" + e.getMessage(), e);
throw new RuntimeException("索引建立出錯" + e.getMessage());
} finally {
try {
indexWriter.close();
} catch (CorruptIndexException e) {
log.error(e.getMessage());
throw new RuntimeException("關閉寫索引流出錯" + e.getMessage());
} catch (IOException e) {
log.error(e.getMessage());
throw new RuntimeException("關閉寫索引流出錯" + e.getMessage());
}
}
long endTime = new Date().getTime();
log.info("建立索引總時間:" + (endTime - startTime));
}
2.根據id和路徑删除索引:
public void delKnowledgeIndexByinfoid(String path,String knowlegeid, String indexPath) {
Directory directory = null;
try {
directory = FSDirectory.getDirectory(indexPath + "/knowledgeIndex");
Term[] termArr = new Term[2];
termArr[0] = new Term("path", path);
termArr[1] = new Term("knowlegeid", knowlegeid);
Analyzer luceneAnalyzer = new IK_CAnalyzer();
IndexWriter indexWriter = new IndexWriter(directory,
luceneAnalyzer, false);
indexWriter.deleteDocuments(termArr);
indexWriter.optimize();
indexWriter.close();
} catch (IOException e) {
log.debug("索引删除出錯" + e.getMessage(), e);
throw new RuntimeException("索引删除出錯" + e.getMessage());
}
}
3.查詢索引
/**
* 根據關鍵字檢索知識資訊
*
* @param type
* @param keyWord
* @param indexPath
* @param sp
* @return
*/
public List searchKnowlegeByKey(String type, String keyWord, String indexPath, SplitPage sp) {
keyWord = specialStrConvert(keyWord);
// 索引位址
File indexDir = new File(indexPath + "/knowledgeIndex");
// 取得索引字典
FSDirectory directory = null;
IndexSearcher searcher = null;
Hits hits = null;
List list = null;
try {
directory = FSDirectory.getDirectory(indexDir, false);
IndexReader reader = IndexReader.open(directory);
searcher = new IndexSearcher(directory);
// 查詢的索引位址是否存在
if (!indexDir.exists()) {
log.debug("索引檔案不存在");
throw new RuntimeException("索引檔案不存在");
}
// 建立term 查詢docuemnt中contents中的内容(内容要轉為大字)
Analyzer luceneAnalyzer = new IK_CAnalyzer();
QueryParser parser = new QueryParser(type, luceneAnalyzer);
parser.setAllowLeadingWildcard(true);
Query query = null;
query = parser.parse("+(" + type + ":*" + keyWord + "*)");
// 生成結果
Sort sort = new Sort(new SortField[]{new SortField("createtime", false)});//對索引結果排序
hits = searcher.search(query, sort);
// 分詞結果
list = new ArrayList();
SimpleHTMLFormatter sHtmlF = new SimpleHTMLFormatter(
"<b><span style='background-color:yellow;'>", "</span></b>");
Highlighter highlighter = new Highlighter(sHtmlF, new QueryScorer(
query));
if (hits != null && hits.length() > 0) {
int len = hits.length();
sp.setRecordCount(len);
sp.init();
int endRecord = sp.getStartRecord() + sp.getPageSize();
int con = endRecord > sp.getRecordCount() ? sp.getRecordCount()
: endRecord;
for (int i = sp.getStartRecord(); i < con; i++) {
Document docTemp = hits.doc(i);
String value = docTemp.get(type);
// 對要高亮顯示的字段格式化,這裡隻是加紅色顯示和加粗
Map m = new HashMap();
m.put("path", docTemp.get("path"));
m.put("title", docTemp.get("title"));
m.put("createtime", docTemp.get("createtime"));
m.put("infoType", docTemp.get("infoType"));
m.put("knowlegeid", docTemp.get("knowlegeid"));
if (value != null && !type.equals("title")) {
// Lucene使用項向量提高高亮顯示性能
TermPositionVector termFreqVector = (TermPositionVector) reader
.getTermFreqVector(hits.id(i), type);
TokenStream tokenStream = TokenSources
.getTokenStream(termFreqVector);
String str = highlighter.getBestFragment(tokenStream,
value);
m.put(type, str);
}
list.add(m);
}
}
// long ll = System.currentTimeMillis();
// System.out.println("高亮顯示" + (ll - l));
searcher.close();
reader.close();
} catch (IOException e) {
log.debug(e.getMessage(), e);
throw new RuntimeException(e.getMessage());
} catch (ParseException e) {
log.debug("lucene分詞轉換出錯" + e.getMessage(), e);
throw new RuntimeException("lucene分詞轉換出錯" + e.getMessage());
}
return list;
}
4.判定是否存在索引
public String isExistsKnowlegeIndex(String path) {
String mes;
//獲得檔案及檔案索引目錄
File indexDir = new File(path + "/knowledgeIndex");
if (!indexDir.exists()) {
if (!indexDir.mkdirs()) throw new RuntimeException("索引檔案夾建立出錯");
}
// 建立indexWrite indexWrite主要作用是添加索引,并判斷索引目錄是否有索引檔案
File[] file = indexDir.listFiles(new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.equals("segments.gen");
}
});
if (file == null || file.length == 0) {
mes = "";
} else {
mes = "ok";
}
return mes;
}
5.替換特殊字元
/**
* 替換特殊字元
*
* @param str
* @return
*/
private static String specialStrConvert(String str) {
// + - && || ! ( ) { } [ ] ^ " ~ * ? : \
if ("".equals(str) || str == null)
return "";
else
return str.replaceAll("\\\\", "\\\\\\\\")
.replaceAll("\\+", "\\\\+").replaceAll("\\-", "\\\\-")
.replaceAll("\\&&", "\\\\&&").replaceAll("\\!", "\\\\!")
.replaceAll("\\(", "\\\\(").replaceAll("\\)", "\\\\)")
.replaceAll("\\{", "\\\\{").replaceAll("\\}", "\\\\}")
.replaceAll("\\[", "\\\\[").replaceAll("\\]", "\\\\]")
.replaceAll("\\^", "\\\\^").replaceAll("\"", "\\\\\"")
.replaceAll("\\~", "\\\\~").replaceAll("\\*", "\\\\*")
.replaceAll("\\?", "\\\\?").replaceAll("\\|\\|", "\\\\||")
.replaceAll("\\:", "\\\\:");
}
6.讀取不同檔案(方法在Java基礎中工具類中實作)
private static String postFix(File file, FileParseDomain fileParseDomain) {
String txtReader = "";
try {
if (file.getPath().endsWith(".doc")) {
txtReader = fileParseDomain.readWord(file.getCanonicalPath());
} else if (file.getPath().endsWith(".pdf")) {
txtReader = fileParseDomain.readPDF(file.getCanonicalPath());
} else if (file.getPath().endsWith(".xls")) {
txtReader = fileParseDomain.readExcel(file.getCanonicalPath());
} else if (file.getPath().endsWith(".txt")) {
txtReader = fileParseDomain.readTxt(file.getCanonicalPath());
} else if (file.getPath().endsWith(".html")
|| file.getPath().endsWith(".htm")) {
txtReader = fileParseDomain.readHtmlText(file.getCanonicalPath());
}
} catch (IOException e) {
log.debug("檔案讀取出錯" + e.getMessage(), e);
throw new RuntimeException("檔案讀取出錯" + e.getMessage());
}
return txtReader;
}