目前的查詢方法過于簡單,而且無法與實際業務中的實體建立關系,是以本篇文章就來描述對查詢方法的擴充。
1.查詢多個字段的檢索方法
1.1.定義接口及輸入輸出項
查詢輸入項SingleSearchOption:
public class SingleSearchOption:SearchOptionBase
{
/// <summary>
/// 檢索關鍵詞
/// </summary>
public string Keyword { get; set; }
/// <summary>
/// 限定檢索域
/// </summary>
public List<string> Fields { get; set; }
public SingleSearchOption(string keyword,List<string> fields,int maxHits=100)
{
if (string.IsNullOrWhiteSpace(keyword))
{
throw new ArgumentException("搜尋關鍵詞不能為空");
}
Keyword = keyword;
Fields = fields;
MaxHits = maxHits;
}
public SingleSearchOption()
{
}
}
其中SearchOptionBase:
public class SearchOptionBase : ISearchOption
{
/// <summary>
/// 最大檢索量
/// </summary>
public int MaxHits { get ; set; }
}
輸出結果SingleSearchResult:
public class SingleSearchResult : ISearchResult<SearchResultItem>
{
/// <summary>
/// 比對結果
/// </summary>
public List<SearchResultItem> Items { get; set; }
/// <summary>
/// 檢索耗時
/// </summary>
public long Elapsed { get; set; }
/// <summary>
/// 比對結果數
/// </summary>
public int TotalHits { get; set; }
public SingleSearchResult()
{
Items = new List<SearchResultItem>();
}
}
其中查詢結果項SearchResultItem:
public class SearchResultItem : ISearchResultItem
{
/// <summary>
/// 結果評分
/// </summary>
public float Score { get; set; }
/// <summary>
/// 實體Id
/// </summary>
public string EntityId { get; set; }
/// <summary>
/// 實體類名
/// </summary>
public string EntityName { get; set; }
}
1.2.方法實作
/// <summary>
/// 簡單查詢
/// </summary>
/// <param name="option"></param>
/// <returns></returns>
public SingleSearchResult SingleSearch(SingleSearchOption option)
{
SingleSearchResult result = new SingleSearchResult();
Stopwatch watch=Stopwatch.StartNew();
using (Lucene.Net.Index.DirectoryReader reader = DirectoryReader.Open(Directory))
{
//執行個體化索引檢索器
IndexSearcher searcher = new IndexSearcher(reader);
var queryParser = new MultiFieldQueryParser(LuceneVersion.LUCENE_48, option.Fields.ToArray(), Analyzer);
Query query = queryParser.Parse(option.Keyword);
var matches = searcher.Search(query, option.MaxHits).ScoreDocs;
result.TotalHits = matches.Count();
foreach (var match in matches)
{
var doc = searcher.Doc(match.Doc);
SearchResultItem item = new SearchResultItem();
item.Score = match.Score;
item.EntityId = doc.GetField(CoreConstant.EntityId).GetStringValue();
item.EntityName = doc.GetField(CoreConstant.EntityType).GetStringValue();
result.Items.Add(item);
}
}
watch.Stop();
result.Elapsed = watch.ElapsedMilliseconds;
return result;
}
其中實體辨別EntityId、實體類名EntityName這兩個域是在建立索引時添加進去的,這樣確定每個Document和資料庫的每條記錄都能通過Id被互相找到。
![](https://img.laitimes.com/img/_0nNw4CM6IyYiwiM6ICdiwiIyVGduV2YfNWawNyZuBnLjZzM2kDM3YWOkRjN5MzM5E2MzQzY4QGNxkDMmlTMklzLc52YucWbp5GZzNmLn9Gbi1yZtl2Lc9CX6MHc0RHaiojIsJye.png)
2.可設定權重的檢索方法
2.1.定義接口及輸入輸出項
輸入項為:
public class ScoredSearchOption:SearchOptionBase
{
/// <summary>
/// 檢索關鍵詞
/// </summary>
public string Keyword { get; set; }
/// <summary>
/// 限定檢索域
/// </summary>
public List<string> Fields { get; set; }
/// <summary>
/// 多字段搜尋時,給字段設定搜尋權重
/// </summary>
private readonly Dictionary<string, float> _boosts;
/// <summary>
/// 多字段搜尋時,給字段設定搜尋權重
/// </summary>
internal Dictionary<string, float> Boosts
{
get
{
foreach (var field in Fields.Where(field => _boosts.All(x => x.Key.ToUpper() != field.ToUpper())))
{
_boosts.Add(field, 2.0f);
}
return _boosts;
}
}
/// <summary>
/// 比對度,0-1,數值越大結果越精确
/// </summary>
public float Score { get; set; } = 0.5f;
/// <summary>
/// 過濾條件
/// </summary>
public Filter Filter { get; set; }
public ScoredSearchOption(string keyword,List<string> fields,int maxHits=100,Dictionary<string,float> boosts=null)
{
if (string.IsNullOrWhiteSpace(keyword))
{
throw new ArgumentException("搜尋關鍵詞不能為空");
}
Keyword = keyword;
Fields = fields;
MaxHits = maxHits;
_boosts = boosts ?? new Dictionary<string, float>();
}
/// <summary>
/// 設定權重
/// </summary>
/// <param name="field"></param>
/// <param name="boost"></param>
public void SetBoosts(string field,float boost)
{
_boosts[field] = boost;
}
}
輸出項為:
public class ScoredSearchResult : ISearchResult<SearchResultItem>
{
public List<SearchResultItem> Items { get; set; }
public long Elapsed { get;set;}
public int TotalHits { get; set; }
public ScoredSearchResult()
{
Items = new List<SearchResultItem>();
}
}
2.2.方法實作
/// <summary>
/// 包含權重的查詢
/// </summary>
/// <param name="option"></param>
/// <returns></returns>
public ScoredSearchResult ScoredSearch(ScoredSearchOption option)
{
ScoredSearchResult result = new ScoredSearchResult();
Stopwatch watch = Stopwatch.StartNew();//啟動計時器
using (DirectoryReader reader = DirectoryReader.Open(Directory))
{
IndexSearcher searcher = new IndexSearcher(reader);
var queryParser = new MultiFieldQueryParser(LuceneVersion.LUCENE_48, option.Fields.ToArray(), Analyzer, option.Boosts);
var terms = Cut(option.Keyword);//關鍵詞分割
Query query = QueryExpression(queryParser, terms);//查詢語句拼接擴充
Sort sort = new Sort(SortField.FIELD_SCORE);//預設按照評分排序
Expression<Func<ScoreDoc, bool>> whereExpression = m => m.Score >= option.Score;
var matches = searcher.Search(query, option.Filter, option.MaxHits, sort, true, true).ScoreDocs
.Where(whereExpression.Compile());
foreach (var match in matches)
{
var doc = searcher.Doc(match.Doc);
SearchResultItem item = new SearchResultItem();
item.Score = match.Score;
item.EntityId = doc.Get(CoreConstant.EntityId);
item.EntityName = doc.Get(CoreConstant.EntityType);
result.Items.Add(item);
}
result.TotalHits = matches.Count();
}
watch.Stop();//停止計時器
result.Elapsed = watch.ElapsedMilliseconds;
return result;
}
其中私有方法Cut用于關鍵詞的分割:
private List<string> Cut(string keyword)
{
List<string> result = new List<string> { keyword };//先将關鍵詞放入分割結果中
if (keyword.Length <= 2)//如果關鍵詞過短則不分割,直接傳回結果
{
return result;
}
//常用關鍵詞查詢規則替換,‘+’替換并,‘-’替換否,空格替換或
keyword = keyword.Replace("AND ", "+").Replace("NOT ", "-").Replace("OR ", " ");
result.AddRange(Regex.Matches(keyword, @""".+""").Cast<Match>().Select(m =>
{
keyword = keyword.Replace(m.Value, "");
return m.Value;
}));//必須包含的
result.AddRange(Regex.Matches(keyword, @"\s-.+\s?").Cast<Match>().Select(m =>
{
keyword = keyword.Replace(m.Value, "");
return m.Value.Trim();
}));//必須不包含的
result.AddRange(Regex.Matches(keyword, @"[\u4e00-\u9fa5]+").Cast<Match>().Select(m => m.Value));//中文
result.AddRange(Regex.Matches(keyword, @"\p{P}?[A-Z]*[a-z]*[\p{P}|\p{S}]*").Cast<Match>().Select(m => m.Value));//英文單詞
result.AddRange(Regex.Matches(keyword, "([A-z]+)([0-9.]+)").Cast<Match>().SelectMany(m => m.Groups.Cast<Group>().Select(g => g.Value)));//英文+數字
//result.AddRange(new JiebaSegmenter().Cut(keyword, true));//結巴分詞
result.RemoveAll(s => s.Length < 2);
result = result.Distinct().OrderByDescending(s => s.Length).Take(10).ToList();
return result;
}
私有方法QueryExpression用于查詢語句的拼接:
private BooleanQuery QueryExpression(MultiFieldQueryParser queryParser, List<string> terms)
{
BooleanQuery query = new BooleanQuery();
foreach (var term in terms)
{
if (term.StartsWith("\""))
{
query.Add(queryParser.Parse(term.Trim('"')), Occur.MUST);//必須比對
}
else if (term.StartsWith("-"))
{
query.Add(queryParser.Parse(term), Occur.MUST_NOT);//必須不比對
}
else
{
query.Add(queryParser.Parse(term.Replace("~", "") + "~"), Occur.SHOULD);//可以比對
}
}
return query;
}
3.測試示例
寫一個示例方法對簡單查詢進行測試:
public List<DataContent> SingleSearch(SingleSearchOption option)
{
List<DataContent> entities = new List<DataContent>();
SingleSearchResult searchResult = _searchManager.SingleSearch(option);
foreach (var item in searchResult.Items)
{
DataContent entity = _repository.Get(item.EntityId);//查詢實體
entities.Add(entity);
}
return entities;
}
目前索引的查詢和實體的查詢并沒有強關聯,是以實際上是查詢了兩次,後續會考慮根據業務需要将兩者結合起來。