Springboot集成elasticsearch 使用IK+拼音分词
docker安装ES
下载
docker pull docker.elastic.co/elasticsearch/elasticsearch:6.3.2
启动
docker run -d --name="es" -p 9200:9200 -p 9300:9300 -e "cluster.name=elasticsearch" -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:6.3.2
springboot集成
maven依赖
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
<version>2.1.4.RELEASE</version>
</dependency>
配置文件
spring:
data:
elasticsearch:
cluster-name: elasticsearch
cluster-nodes: ip:9300
实体类
package com.my.entity;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.elasticsearch.annotations.Document;
/**
* <一句话功能简述><br>
* ()
*
* @author M.Y
* @date 2019/5/30
* @since 1.0.0
*/
@AllArgsConstructor
@NoArgsConstructor
@Data
@Document(indexName = "contents",type = "content")
//indexName索引名称 可以理解为数据库名 必须为小写 不然会报org.elasticsearch.indices.InvalidIndexNameException异常
//type类型 可以理解为表名
public class GoodsInfo {
private Long id;
private String name;
private String des;
}
DAO
package com.my.dao;
import com.my.entity.GoodsInfo;
import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;
import org.springframework.stereotype.Component;
/**
* <一句话功能简述><br>
* ()
*
* @author M.Y
* @date 2019/5/30
* @since 1.0.0
*/
@Component
public interface GoodsRepository extends ElasticsearchRepository<GoodsInfo,Long> {
}
Controller
package com.my.controller;
import com.my.dao.GoodsRepository;
import com.my.entity.GoodsInfo;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
/**
* <一句话功能简述><br>
* ()
*
* @author M.Y
* @date 2019/5/30
* @since 1.0.0
*/
@RestController
public class GoodsController {
@Autowired
private GoodsRepository goodsRepository;
//http://localhost:8080/save?des=
@GetMapping("save")
public String save(String des){
GoodsInfo goodsInfo = new GoodsInfo(System.currentTimeMillis(),
"商品"+System.currentTimeMillis(),des);
goodsRepository.save(goodsInfo);
return "success";
}
//http://localhost:8080/delete?id=
@GetMapping("delete")
public String delete(long id){
goodsRepository.deleteById(id);
return "success";
}
//http://localhost:8080/update?name=修改&des=修改&id=
@GetMapping("update")
public String update(long id,String name,String description){
GoodsInfo goodsInfo = new GoodsInfo(id,
name,description);
goodsRepository.save(goodsInfo);
return "success";
}
//http://localhost:8080/getOne?id=
@GetMapping("getOne")
public GoodsInfo getOne(long id){
GoodsInfo goodsInfo = goodsRepository.findById(id).orElse(null);
return goodsInfo;
}
}
测试
谷歌elasticsearch-head插件查看
安装IK分词插件
进入容器
docker exec -it es bash
进入目录
cd /usr/share/elasticsearch/
下载安装插件(注意版本要与es一致)
./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.3.2/elasticsearch-analysis-ik-6.3.2.zip
退出容器
exit
重启容器
docker restart es
验证结果
ik_max_word:尽可能多的分词
ik_smart:尽可能少的分词
安装拼音转换插件
进入容器
docker exec -it es bash
进入目录
cd /usr/share/elasticsearch/
下载安装插件(注意版本要与es一致)
./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-pinyin/releases/download/v6.3.2/elasticsearch-analysis-pinyin-6.3.2.zip
退出容器
exit
重启容器
docker restart es
新建索引
关闭索引(更新配置前要关闭索引,不然会报错)
创建拼音分词
{
"analysis" : {
"analyzer" : {
"pinyin_analyzer" : {
"tokenizer" : "my_pinyin"
}
},
"tokenizer" : {
"my_pinyin" : {
"type" : "pinyin",
"keep_separate_first_letter" : false,
"keep_full_pinyin" : true,
"keep_original" : true,
"limit_first_letter_length" : 16,
"lowercase" : true,
"remove_duplicated_term" : true
}
}
}
}
验证结果
Springboot集成以上分词
新建索引,新增分词配置
{
"analysis" : {
"analyzer" : {
"pinyin_analyzer" : {
"tokenizer" : "my_pinyin"
}
},
"tokenizer" : {
"my_pinyin" : {
"type" : "pinyin",
"keep_separate_first_letter" : false,
"keep_full_pinyin" : true,
"keep_original" : true,
"limit_first_letter_length" : 16,
"lowercase" : true,
"remove_duplicated_term" : true
}
}
}
}
{
"properties": {
"字段名": {
"type": "keyword",
"fields": {
"pinyin": {
"type": "text",
"store": false,
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
}
}
}
新实体类
package com.my.entity;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Mapping;
import java.util.Date;
@AllArgsConstructor
@NoArgsConstructor
@Data
@Document(indexName = "new_film", type = "new")
public class FilmEntity {
private Long id;
private String name;
private String director;
private Date created ;
@Override
public String toString() {
return "FilmEntity [id=" + id + ", name=" + name + ", director=" + director + "]";
}
}
新增测试数据
@Autowired
FilmRepository filmRepository;
@GetMapping("save")
public String save(String des,String name){
LocalDateTime localDateTime = LocalDateTime.now();
ZoneId zone = ZoneId.systemDefault();
Instant instant = localDateTime.atZone(zone).toInstant();
Date date = Date.from(instant);
FilmEntity filmEntity = new FilmEntity(System.currentTimeMillis(),name,des,date);
filmRepository.save(filmEntity);
return "success";
}
查询
/**
* 拼接搜索条件
*
* @param name the name
* @return list
*/
@GetMapping("search")
public List<FilmEntity> search(String name) {
SearchQuery searchQuery = new NativeSearchQueryBuilder()
.withQuery(structureQuery(name))
.build();
List<FilmEntity> list = filmRepository.search(searchQuery).getContent();
return list;
}
/**
* 中文、拼音混合搜索
*
* @param content the content
* @return dis max query builder
*/
public DisMaxQueryBuilder structureQuery(String content) {
//使用dis_max直接取多个query中,分数最高的那一个query的分数即可
DisMaxQueryBuilder disMaxQueryBuilder = QueryBuilders.disMaxQuery();
//boost 设置权重,只搜索匹配name和disrector字段
QueryBuilder ikNameQuery = QueryBuilders.matchQuery("name", content).boost(2f);
QueryBuilder pinyinNameQuery = QueryBuilders.matchQuery("name.pinyin", content);
QueryBuilder ikDirectorQuery = QueryBuilders.matchQuery("director", content).boost(2f);
disMaxQueryBuilder.add(ikNameQuery);
disMaxQueryBuilder.add(pinyinNameQuery);
disMaxQueryBuilder.add(ikDirectorQuery);
return disMaxQueryBuilder;
}
http://localhost:8080/film/search?name=中国
:
[
{
"id": 1559724973532,
"name": "ceshi",
"director": "中韩渔警冲突调查:韩警平均每天扣1艘中国渔船",
"created": "2019-06-05T08:56:13.531+0000"
},
{
"id": 1559724949646,
"name": "测试",
"director": "中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首",
"created": "2019-06-05T08:55:49.645+0000"
},
{
"id": 1559724960792,
"name": "小明",
"director": "美国留给伊拉克的是个烂摊子吗",
"created": "2019-06-05T08:56:00.792+0000"
}
]
http://localhost:8080/film/search?name=ceshi
http://localhost:8080/film/search?name=测试
[
{
"id": 1559786111119,
"name": "ceshi",
"director": "中韩渔警冲突调查:韩警平均每天扣1艘中国渔船",
"created": "2019-06-06T01:55:11.119+0000"
},
{
"id": 1559786123724,
"name": "测试",
"director": "中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首",
"created": "2019-06-06T01:55:23.724+0000"
}
]
构建高亮查询
@Autowired
TransportClient client;
/**
* 构建高亮查询
* @param des
* @return
*/
@GetMapping("query")
public List<FilmEntity> query(String des) {
QueryBuilder query = structureQuery(des);
// 加入查询中
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.preTags("<span>");//设置前缀
highlightBuilder.postTags("</span>");//设置后缀
highlightBuilder.field("name");//设置高亮字段
highlightBuilder.field("director");//设置高亮字段
// highlightBuilder.field("name.pinyin");//这里设置之后没有生效,如果有大佬知道请告知谢谢
SearchResponse response = client.prepareSearch("new_film")
.setTypes("new")
.setQuery(query).highlighter(highlightBuilder).execute().actionGet();
// 遍历结果, 获取高亮片段
SearchHits searchHits = response.getHits();
FilmEntity filmEntity = null;
List<FilmEntity> result = new ArrayList<>();
for (SearchHit hit : searchHits) {
Map<String, Object> entityMap = hit.getSourceAsMap();
filmEntity = com.alibaba.fastjson.JSON.parseObject(JSON.toJSONString(entityMap), FilmEntity.class);
if (!StringUtils.isEmpty(hit.getHighlightFields().get("name"))) {
Text[] text = hit.getHighlightFields().get("name").getFragments();
filmEntity.setName(text[0].toString());
}
if (!StringUtils.isEmpty(hit.getHighlightFields().get("director"))) {
Text[] text = hit.getHighlightFields().get("director").getFragments();
filmEntity.setDirector(text[0].toString());
}
result.add(filmEntity);
}
return result;
}
http://localhost:8080/film/query?des=中国
[
{
"id": 1559786111119,
"name": "ceshi",
"director": "<span>中</span>韩渔警冲突调查:韩警平均每天扣1艘<span>中</span><span>国</span>渔船",
"created": "2019-06-06T01:55:11.119+0000"
},
{
"id": 1559786123724,
"name": "测试",
"director": "<span>中</span><span>国</span>驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首",
"created": "2019-06-06T01:55:23.724+0000"
},
{
"id": 1559786119620,
"name": "小明",
"director": "美<span>国</span>留给伊拉克的是个烂摊子吗",
"created": "2019-06-06T01:55:19.620+0000"
}
]
http://localhost:8080/film/query?des=测试
[
{
"id": 1559786123724,
"name": "<span>测试</span>",
"director": "中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首",
"created": "2019-06-06T01:55:23.724+0000"
},
{
"id": 1559786111119,
"name": "ceshi",
"director": "中韩渔警冲突调查:韩警平均每天扣1艘中国渔船",
"created": "2019-06-06T01:55:11.119+0000"
}
]
代码地址