SVN可视化平台(7) -- ElasticSearch 全文检索,Spring Data ES 和 RestHighLevelClient 的两种方式,解决最多查询一万条数据的问题

avatar 2021年12月22日08:50:51 6 5422 views
博主分享免费Java教学视频,B站账号:Java刘哥 ,长期提供技术问题解决、项目定制:本站商品点此

本文介绍 ES 查询数据的2种方式

最近在做SVN web化中,需要把数百万的SVN文件信息存储到数据库,然后存储到ES,实现全文检索功能。

最开始我用的是 Spring Data ES,后面出现了一个 Bug,是最多返回10000条数据。

Spring Data ES 不支持,我后面才改用 RestHighLevelClient 的方式

一、解决最多查询10000条数据的问题

在测试环境发现,分页返回总数最多10000条

原因是 es 为了提高查询效率,hits默认是10000

 

解决办法是

第一步、

PUT http://127.0.0.1:9200/_all/_settings?preserve_existing=true
{
  "index":{
    "max_result_window":2000000000
  }
}

第二步、

在请求里加   "track_total_hits":true

 

二、代码准备

1、依赖

<dependency>
	<groupId>org.springframework.boot</groupId>
	<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
	<version>2.3.4.RELEASE</version>
</dependency>

 

2、配置文件

spring:
# ES 配置
  elasticsearch:
    rest:
      uris: ["http://127.0.0.1:9200"]
      connection-timeout: 10s
      username:
      password:

 

3、索引

PUT http://127.0.0.1:9200/common_file
{
    "settings": {
        "index": {
            "number_of_shards": "1",
            "number_of_replicas": "1"
        },
        "index.max_ngram_diff": 5,
        "analysis": {
            "analyzer": {
                "ngram_analyzer": {
                    "tokenizer": "ngram_tokenizer"
                }
            },
            "tokenizer": {
                "ngram_tokenizer": {
                    "type": "ngram",
                    "min_gram": 1,
                    "max_gram": 5,
                    "token_chars": [
                        "letter",
                        "digit"
                    ]
                }
            }
        }
    },
    "mappings": {
        "properties": {
            "repoPrefix": {
                "type": "keyword"
            },
            "fileName": {
                "type": "text",
                "analyzer": "ngram_analyzer"
            },
            "level": {
                "type": "long"
            },
            "leafFlag": {
                "type": "boolean"
            },
            "filePath": {
            	"type": "text",
                "fields": {
                    "keyword": {
                        "type": "keyword"
                    },
                    "text": {
                        "type": "text",
                        "analyzer": "ngram_analyzer"
                    }
                }
            },
            "creatorName": {
                "type": "text"
            },
            "description": {
                "type": "text"
            },
            "updateTime": {
                "type": "date"
            },
            "revision": {
                "type": "long"
            },
            "createTime": {
                "type": "date"
            },
            "fileSize": {
                "type": "long"
            },
            "updaterName": {
                "type": "text"
            },
            "_class": {
                "type": "keyword"
            },
            "id": {
                "type": "long"
            },
            "projectId": {
                "type": "long"
            },
            "fileType": {
                "type": "long"
            },
            "roleAuthList": {
            	"type": "nested",
                "properties": {
                	"roleCode" : {
                		"type": "keyword"
                	},
                	"auth" : {
                		"type": "text"
                	}
                }
            }
        }
    }
}

 

4、实体类

import lombok.Data;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;
import org.springframework.stereotype.Component;


import javax.persistence.Id;
import java.io.Serializable;
import java.util.Date;
import java.util.List;

/**
 * 文件类对应索引
 */
@Component
@Document(indexName = "common_file", indexStoreType = "common_file")
@Data
public class EsCommonFile implements Serializable
{


    /**
     * ID
     */
    @Id
    private Long id;

    /**
     * 项目id
     */
    private Long projectId;

    /**
     * 文件名称
     */
    private String fileName;

    /**
     * 文件类型(1 file,2 folder)
     */
    private Integer fileType;

    /**
     * 文件路径
     */
    private String repoPrefix;

    /**
     * 文件路径
     */
    private String filePath;

    /**
     * 文件大小,单位字节
     */
    private Long fileSize;


    /**
     * 文件版本
     */
    private Long revision;

    /**
     * 创建人账号
     */
    private String creatorName;

    /**
     * 修改人账号
     */
    private String updaterName;

    /**
     * 描述
     */
    private String description;

    /**
     * 文件层级,仓库目录为1,子目录依次+1
     */
    private Integer level;

    /**
     * 是否存在子目录(1不存在,0存在)
     */
    private Boolean leafFlag;

    /**
     * 创建时间
     */
    private Date createTime;


    /**
     * 更新时间
     */
    private Date updateTime;

    /**
     * 授权列表
     */
    @Field(type = FieldType.Nested)
    private List<CommonFileRoleAuth> roleAuthList;

}

 

 

三、Spring Data ES 查询方式 (不推荐,无法解决查询限制10000的问题)

Spring Data ES简单,可以用来做一些添加,删除,修改、简单查询

复杂查询有点力不从心,比如我这里的限制10000的问题

代码我也贴上

@Autowired
private EsCommonFileRepository esCommonFileRepository;

private LayPage<CommonFileVO> searchEsFile(FileSearchDTO fileSearchDTO)
{


	//查询对象
	BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
	// level查询
	if (CommonUtil.isNotEmpty(fileSearchDTO.getLevel()))
	{
		boolQueryBuilder.must(QueryBuilders.rangeQuery("level").from(fileSearchDTO.getLevel() + 1));
	}
	// repoPrefix查询
	if (CommonUtil.isNotEmpty(fileSearchDTO.getRepoPrefix()))
	{
		boolQueryBuilder.must(QueryBuilders.matchQuery("repoPrefix", fileSearchDTO.getRepoPrefix()).operator(Operator.AND));
	}

	// 关键字查询
	if (CommonUtil.isNotEmpty(fileSearchDTO.getSearchKeywords()))
	{
		// 搜文件路径
		if ("filePath".equals(fileSearchDTO.getSearchType()))
		{
			boolQueryBuilder.must(QueryBuilders.matchQuery("filePath.text", fileSearchDTO.getSearchKeywords()).operator(Operator.AND));

		}
		// 搜文件名称
		else
		{
			boolQueryBuilder.must(QueryBuilders.matchQuery("fileName", fileSearchDTO.getSearchKeywords()).operator(Operator.AND));
		}
	}
	// 权限
	List<String> loginUserRoleCodes = prjUserRoleService.getLoginUserRoleCodeList();

	List<String> authList = new ArrayList<>();
	authList.add("rw");
	authList.add("r");
	loginUserRoleCodes.add("*");
	boolQueryBuilder.must(QueryBuilders.nestedQuery("roleAuthList", QueryBuilders.termsQuery("roleAuthList.roleCode", loginUserRoleCodes), ScoreMode.None));
	boolQueryBuilder.must(QueryBuilders.nestedQuery("roleAuthList", QueryBuilders.termsQuery("roleAuthList.auth", authList), ScoreMode.None));


	PageRequest pageRequest = PageRequest.of(fileSearchDTO.getPage() - 1, fileSearchDTO.getLimit(),
			Sort.Direction.DESC, "fileType", "_score", "updateTime");
	org.springframework.data.domain.Page<EsCommonFile> esCommonFilePage = esCommonFileRepository.search(boolQueryBuilder, pageRequest);


	List<EsCommonFile> content = esCommonFilePage.getContent();
	List<CommonFileVO> commonFileVOList = new ArrayList<>();
	for (EsCommonFile esCommonFile : content)
	{
		CommonFileVO commonFileVO = new CommonFileVO();
		BeanUtils.copyProperties(esCommonFile, commonFileVO);
		commonFileVOList.add(commonFileVO);
	}
	LayPage<CommonFileVO> result = new LayPage<>();
	result.setCount(esCommonFilePage.getTotalElements());
	result.setData(commonFileVOList);
	return result;
}

需要自己创建一个 repository 和开启扫包

具体可以看我之前文章

 

四、RestHighLevelClient 查询方式 (推荐)

@Autowired
private RestHighLevelClient restHighLevelClient;

private LayPage<CommonFileVO> newSearchEsFile(FileSearchDTO fileSearchDTO) throws Exception
{


	//查询对象
	BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
	// level查询 范围查询,level > 1 + #{level}
	if (CommonUtil.isNotEmpty(fileSearchDTO.getLevel()))
	{
		boolQueryBuilder.must(QueryBuilders.rangeQuery("level").from(fileSearchDTO.getLevel() + 1));
	}
	// repoPrefix查询,等值查询
	if (CommonUtil.isNotEmpty(fileSearchDTO.getRepoPrefix()))
	{
		boolQueryBuilder.must(QueryBuilders.matchQuery("repoPrefix", fileSearchDTO.getRepoPrefix()).operator(Operator.AND));
	}
	// filePath查询,模糊查询,类似 like '/项目/测试项目%'
	if (CommonUtil.isNotEmpty(fileSearchDTO.getFilePath()))
	{
		boolQueryBuilder.must(QueryBuilders.wildcardQuery("filePath.keyword", fileSearchDTO.getFilePath() + "*"));
	}

	// 关键字查询
	if (CommonUtil.isNotEmpty(fileSearchDTO.getSearchKeywords()))
	{
		// 搜文件路径,分词查询
		if ("filePath".equals(fileSearchDTO.getSearchType()))
		{
			boolQueryBuilder.must(QueryBuilders.matchQuery("filePath.text", fileSearchDTO.getSearchKeywords()).operator(Operator.AND));

		}
		// 搜文件名称,分词查询
		else
		{
			boolQueryBuilder.must(QueryBuilders.matchQuery("fileName", fileSearchDTO.getSearchKeywords()).operator(Operator.AND));
		}
	}
	// 根据权限过滤
	// 查询登录用户的角色编码
	List<String> loginUserRoleCodes = prjUserRoleService.getLoginUserRoleCodeList();
	List<String> authList = new ArrayList<>();
	// 只允许 auth 为 rw 或 r
	authList.add("rw");
	authList.add("r");
	// 只允许 roleCode 为登录用户的角色编码或 *  (roleCodeList 是嵌套索引)
	loginUserRoleCodes.add("*");
	boolQueryBuilder.must(QueryBuilders.nestedQuery("roleAuthList", QueryBuilders.termsQuery("roleAuthList.roleCode", loginUserRoleCodes), ScoreMode.None));
	boolQueryBuilder.must(QueryBuilders.nestedQuery("roleAuthList", QueryBuilders.termsQuery("roleAuthList.auth", authList), ScoreMode.None));

	SearchRequest request = new SearchRequest();
	// 指定索引
	request.indices("common_file");
	SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
	// 传 "track_total_hits":true
	sourceBuilder.trackTotalHits(true);
	// 排序
	sourceBuilder.sort("fileType", SortOrder.DESC);
	sourceBuilder.sort("_score", SortOrder.DESC);
	sourceBuilder.sort("updateTime", SortOrder.DESC);
	// 设置从第几页开始,一页查询多少条
	int pageNum = fileSearchDTO.getPage();
	int start = (( pageNum < 1 ? 1 : pageNum) - 1) * fileSearchDTO.getLimit();
	sourceBuilder.from(start);
	sourceBuilder.size(fileSearchDTO.getLimit());

	sourceBuilder.query(boolQueryBuilder);
	request.source(sourceBuilder);
	SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
	SearchHits hits = response.getHits();
	List<CommonFileVO> commonFileVOList = new ArrayList<>();
	
	// 遍历查询结果
	for (SearchHit searchHit : hits) {
		EsCommonFile esCommonFile = JSON.parseObject(searchHit.getSourceAsString(),EsCommonFile.class);
		CommonFileVO commonFileVO = new CommonFileVO();
		BeanUtils.copyProperties(esCommonFile, commonFileVO);
		commonFileVOList.add(commonFileVO);
	}

	LayPage<CommonFileVO> result = new LayPage<>();
	// 设置查询总数
	result.setCount(hits.getTotalHits().value);
	result.setData(commonFileVOList);
	return result;
}

 

 

  • 微信
  • 交流学习,资料分享
  • weinxin
  • 个人淘宝
  • 店铺名:言曌博客咨询部

  • (部分商品未及时上架淘宝)

发表评论

avatar 登录者:匿名
匿名评论,评论回复后会有邮件通知

  

已通过评论:0   待审核评论数:0