SVN可视化平台(7) -- ElasticSearch 全文检索,Spring Data ES 和 RestHighLevelClient 的两种方式,解决最多查询一万条数据的问题

avatar 2021年12月22日08:50:51 6 5778 views
博主分享免费Java教学视频,B站账号:Java刘哥 ,长期提供技术问题解决、项目定制:本站商品点此

本文介绍 ES 查询数据的2种方式

最近在做SVN web化中,需要把数百万的SVN文件信息存储到数据库,然后存储到ES,实现全文检索功能。

最开始我用的是 Spring Data ES,后面出现了一个 Bug,是最多返回10000条数据。

Spring Data ES 不支持,我后面才改用 RestHighLevelClient 的方式

一、解决最多查询10000条数据的问题

在测试环境发现,分页返回总数最多10000条

原因是 es 为了提高查询效率,hits默认是10000

 

解决办法是

第一步、

  1. PUT http://127.0.0.1:9200/_all/_settings?preserve_existing=true
  2. {
  3. "index":{
  4. "max_result_window":2000000000
  5. }
  6. }

第二步、

在请求里加   "track_total_hits":true

 

二、代码准备

1、依赖

  1. <dependency>
  2. <groupId>org.springframework.boot</groupId>
  3. <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
  4. <version>2.3.4.RELEASE</version>
  5. </dependency>

 

2、配置文件

  1. spring:
  2. # ES 配置
  3. elasticsearch:
  4. rest:
  5. uris: ["http://127.0.0.1:9200"]
  6. connection-timeout: 10s
  7. username:
  8. password:

 

3、索引

  1. PUT http://127.0.0.1:9200/common_file
  2. {
  3. "settings": {
  4. "index": {
  5. "number_of_shards": "1",
  6. "number_of_replicas": "1"
  7. },
  8. "index.max_ngram_diff": 5,
  9. "analysis": {
  10. "analyzer": {
  11. "ngram_analyzer": {
  12. "tokenizer": "ngram_tokenizer"
  13. }
  14. },
  15. "tokenizer": {
  16. "ngram_tokenizer": {
  17. "type": "ngram",
  18. "min_gram": 1,
  19. "max_gram": 5,
  20. "token_chars": [
  21. "letter",
  22. "digit"
  23. ]
  24. }
  25. }
  26. }
  27. },
  28. "mappings": {
  29. "properties": {
  30. "repoPrefix": {
  31. "type": "keyword"
  32. },
  33. "fileName": {
  34. "type": "text",
  35. "analyzer": "ngram_analyzer"
  36. },
  37. "level": {
  38. "type": "long"
  39. },
  40. "leafFlag": {
  41. "type": "boolean"
  42. },
  43. "filePath": {
  44. "type": "text",
  45. "fields": {
  46. "keyword": {
  47. "type": "keyword"
  48. },
  49. "text": {
  50. "type": "text",
  51. "analyzer": "ngram_analyzer"
  52. }
  53. }
  54. },
  55. "creatorName": {
  56. "type": "text"
  57. },
  58. "description": {
  59. "type": "text"
  60. },
  61. "updateTime": {
  62. "type": "date"
  63. },
  64. "revision": {
  65. "type": "long"
  66. },
  67. "createTime": {
  68. "type": "date"
  69. },
  70. "fileSize": {
  71. "type": "long"
  72. },
  73. "updaterName": {
  74. "type": "text"
  75. },
  76. "_class": {
  77. "type": "keyword"
  78. },
  79. "id": {
  80. "type": "long"
  81. },
  82. "projectId": {
  83. "type": "long"
  84. },
  85. "fileType": {
  86. "type": "long"
  87. },
  88. "roleAuthList": {
  89. "type": "nested",
  90. "properties": {
  91. "roleCode" : {
  92. "type": "keyword"
  93. },
  94. "auth" : {
  95. "type": "text"
  96. }
  97. }
  98. }
  99. }
  100. }
  101. }

 

4、实体类

  1. import lombok.Data;
  2. import org.springframework.data.elasticsearch.annotations.Document;
  3. import org.springframework.data.elasticsearch.annotations.Field;
  4. import org.springframework.data.elasticsearch.annotations.FieldType;
  5. import org.springframework.stereotype.Component;
  6. import javax.persistence.Id;
  7. import java.io.Serializable;
  8. import java.util.Date;
  9. import java.util.List;
  10. /**
  11. * 文件类对应索引
  12. */
  13. @Component
  14. @Document(indexName = "common_file", indexStoreType = "common_file")
  15. @Data
  16. public class EsCommonFile implements Serializable
  17. {
  18. /**
  19. * ID
  20. */
  21. @Id
  22. private Long id;
  23. /**
  24. * 项目id
  25. */
  26. private Long projectId;
  27. /**
  28. * 文件名称
  29. */
  30. private String fileName;
  31. /**
  32. * 文件类型(1 file,2 folder)
  33. */
  34. private Integer fileType;
  35. /**
  36. * 文件路径
  37. */
  38. private String repoPrefix;
  39. /**
  40. * 文件路径
  41. */
  42. private String filePath;
  43. /**
  44. * 文件大小,单位字节
  45. */
  46. private Long fileSize;
  47. /**
  48. * 文件版本
  49. */
  50. private Long revision;
  51. /**
  52. * 创建人账号
  53. */
  54. private String creatorName;
  55. /**
  56. * 修改人账号
  57. */
  58. private String updaterName;
  59. /**
  60. * 描述
  61. */
  62. private String description;
  63. /**
  64. * 文件层级,仓库目录为1,子目录依次+1
  65. */
  66. private Integer level;
  67. /**
  68. * 是否存在子目录(1不存在,0存在)
  69. */
  70. private Boolean leafFlag;
  71. /**
  72. * 创建时间
  73. */
  74. private Date createTime;
  75. /**
  76. * 更新时间
  77. */
  78. private Date updateTime;
  79. /**
  80. * 授权列表
  81. */
  82. @Field(type = FieldType.Nested)
  83. private List<CommonFileRoleAuth> roleAuthList;
  84. }

 

 

三、Spring Data ES 查询方式 (不推荐,无法解决查询限制10000的问题)

Spring Data ES简单,可以用来做一些添加,删除,修改、简单查询

复杂查询有点力不从心,比如我这里的限制10000的问题

代码我也贴上

  1. @Autowired
  2. private EsCommonFileRepository esCommonFileRepository;
  3. private LayPage<CommonFileVO> searchEsFile(FileSearchDTO fileSearchDTO)
  4. {
  5. //查询对象
  6. BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
  7. // level查询
  8. if (CommonUtil.isNotEmpty(fileSearchDTO.getLevel()))
  9. {
  10. boolQueryBuilder.must(QueryBuilders.rangeQuery("level").from(fileSearchDTO.getLevel() + 1));
  11. }
  12. // repoPrefix查询
  13. if (CommonUtil.isNotEmpty(fileSearchDTO.getRepoPrefix()))
  14. {
  15. boolQueryBuilder.must(QueryBuilders.matchQuery("repoPrefix", fileSearchDTO.getRepoPrefix()).operator(Operator.AND));
  16. }
  17. // 关键字查询
  18. if (CommonUtil.isNotEmpty(fileSearchDTO.getSearchKeywords()))
  19. {
  20. // 搜文件路径
  21. if ("filePath".equals(fileSearchDTO.getSearchType()))
  22. {
  23. boolQueryBuilder.must(QueryBuilders.matchQuery("filePath.text", fileSearchDTO.getSearchKeywords()).operator(Operator.AND));
  24. }
  25. // 搜文件名称
  26. else
  27. {
  28. boolQueryBuilder.must(QueryBuilders.matchQuery("fileName", fileSearchDTO.getSearchKeywords()).operator(Operator.AND));
  29. }
  30. }
  31. // 权限
  32. List<String> loginUserRoleCodes = prjUserRoleService.getLoginUserRoleCodeList();
  33. List<String> authList = new ArrayList<>();
  34. authList.add("rw");
  35. authList.add("r");
  36. loginUserRoleCodes.add("*");
  37. boolQueryBuilder.must(QueryBuilders.nestedQuery("roleAuthList", QueryBuilders.termsQuery("roleAuthList.roleCode", loginUserRoleCodes), ScoreMode.None));
  38. boolQueryBuilder.must(QueryBuilders.nestedQuery("roleAuthList", QueryBuilders.termsQuery("roleAuthList.auth", authList), ScoreMode.None));
  39. PageRequest pageRequest = PageRequest.of(fileSearchDTO.getPage() - 1, fileSearchDTO.getLimit(),
  40. Sort.Direction.DESC, "fileType", "_score", "updateTime");
  41. org.springframework.data.domain.Page<EsCommonFile> esCommonFilePage = esCommonFileRepository.search(boolQueryBuilder, pageRequest);
  42. List<EsCommonFile> content = esCommonFilePage.getContent();
  43. List<CommonFileVO> commonFileVOList = new ArrayList<>();
  44. for (EsCommonFile esCommonFile : content)
  45. {
  46. CommonFileVO commonFileVO = new CommonFileVO();
  47. BeanUtils.copyProperties(esCommonFile, commonFileVO);
  48. commonFileVOList.add(commonFileVO);
  49. }
  50. LayPage<CommonFileVO> result = new LayPage<>();
  51. result.setCount(esCommonFilePage.getTotalElements());
  52. result.setData(commonFileVOList);
  53. return result;
  54. }

需要自己创建一个 repository 和开启扫包

具体可以看我之前文章

 

四、RestHighLevelClient 查询方式 (推荐)

  1. @Autowired
  2. private RestHighLevelClient restHighLevelClient;
  3. private LayPage<CommonFileVO> newSearchEsFile(FileSearchDTO fileSearchDTO) throws Exception
  4. {
  5. //查询对象
  6. BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
  7. // level查询 范围查询,level > 1 + #{level}
  8. if (CommonUtil.isNotEmpty(fileSearchDTO.getLevel()))
  9. {
  10. boolQueryBuilder.must(QueryBuilders.rangeQuery("level").from(fileSearchDTO.getLevel() + 1));
  11. }
  12. // repoPrefix查询,等值查询
  13. if (CommonUtil.isNotEmpty(fileSearchDTO.getRepoPrefix()))
  14. {
  15. boolQueryBuilder.must(QueryBuilders.matchQuery("repoPrefix", fileSearchDTO.getRepoPrefix()).operator(Operator.AND));
  16. }
  17. // filePath查询,模糊查询,类似 like '/项目/测试项目%'
  18. if (CommonUtil.isNotEmpty(fileSearchDTO.getFilePath()))
  19. {
  20. boolQueryBuilder.must(QueryBuilders.wildcardQuery("filePath.keyword", fileSearchDTO.getFilePath() + "*"));
  21. }
  22. // 关键字查询
  23. if (CommonUtil.isNotEmpty(fileSearchDTO.getSearchKeywords()))
  24. {
  25. // 搜文件路径,分词查询
  26. if ("filePath".equals(fileSearchDTO.getSearchType()))
  27. {
  28. boolQueryBuilder.must(QueryBuilders.matchQuery("filePath.text", fileSearchDTO.getSearchKeywords()).operator(Operator.AND));
  29. }
  30. // 搜文件名称,分词查询
  31. else
  32. {
  33. boolQueryBuilder.must(QueryBuilders.matchQuery("fileName", fileSearchDTO.getSearchKeywords()).operator(Operator.AND));
  34. }
  35. }
  36. // 根据权限过滤
  37. // 查询登录用户的角色编码
  38. List<String> loginUserRoleCodes = prjUserRoleService.getLoginUserRoleCodeList();
  39. List<String> authList = new ArrayList<>();
  40. // 只允许 auth 为 rw 或 r
  41. authList.add("rw");
  42. authList.add("r");
  43. // 只允许 roleCode 为登录用户的角色编码或 * (roleCodeList 是嵌套索引)
  44. loginUserRoleCodes.add("*");
  45. boolQueryBuilder.must(QueryBuilders.nestedQuery("roleAuthList", QueryBuilders.termsQuery("roleAuthList.roleCode", loginUserRoleCodes), ScoreMode.None));
  46. boolQueryBuilder.must(QueryBuilders.nestedQuery("roleAuthList", QueryBuilders.termsQuery("roleAuthList.auth", authList), ScoreMode.None));
  47. SearchRequest request = new SearchRequest();
  48. // 指定索引
  49. request.indices("common_file");
  50. SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
  51. // 传 "track_total_hits":true
  52. sourceBuilder.trackTotalHits(true);
  53. // 排序
  54. sourceBuilder.sort("fileType", SortOrder.DESC);
  55. sourceBuilder.sort("_score", SortOrder.DESC);
  56. sourceBuilder.sort("updateTime", SortOrder.DESC);
  57. // 设置从第几页开始,一页查询多少条
  58. int pageNum = fileSearchDTO.getPage();
  59. int start = (( pageNum < 1 ? 1 : pageNum) - 1) * fileSearchDTO.getLimit();
  60. sourceBuilder.from(start);
  61. sourceBuilder.size(fileSearchDTO.getLimit());
  62. sourceBuilder.query(boolQueryBuilder);
  63. request.source(sourceBuilder);
  64. SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
  65. SearchHits hits = response.getHits();
  66. List<CommonFileVO> commonFileVOList = new ArrayList<>();
  67. // 遍历查询结果
  68. for (SearchHit searchHit : hits) {
  69. EsCommonFile esCommonFile = JSON.parseObject(searchHit.getSourceAsString(),EsCommonFile.class);
  70. CommonFileVO commonFileVO = new CommonFileVO();
  71. BeanUtils.copyProperties(esCommonFile, commonFileVO);
  72. commonFileVOList.add(commonFileVO);
  73. }
  74. LayPage<CommonFileVO> result = new LayPage<>();
  75. // 设置查询总数
  76. result.setCount(hits.getTotalHits().value);
  77. result.setData(commonFileVOList);
  78. return result;
  79. }

 

 

  • 微信
  • 交流学习,资料分享
  • weinxin
  • 个人淘宝
  • 店铺名:言曌博客咨询部

  • (部分商品未及时上架淘宝)

发表评论

avatar 登录者:匿名
匿名评论,评论回复后会有邮件通知

  

已通过评论:0   待审核评论数:0