高级查询 · Elasticsearch7.x

演示使用的数据： ```json #创建student索引 PUT /student #添加下面数据用于演示 POST /student/_doc/1001 { "name": "zhangsan", "nickname": "zhangsan", "sex": "男", "age": 30 } POST /student/_doc/1002 { "name": "lisi", "nickname": "lisi", "sex": "男", "age": 20 } POST /student/_doc/1003 { "name": "wangwu", "nickname": "wangwu", "sex": "女", "age": 40 } POST /student/_doc/1004 { "name": "zhangsan01", "nickname": "zhangsan01", "sex": "女", "age": 50 } POST /student/_doc/1005 { "name": "zhangsan02", "nickname": "zhangsan02", "sex": "女", "age": 30 } ``` **1. 查询一个索引的所有文档** ```json #写法1 GET /student/_search #写法2 GET /student/_search { "query": { "match_all": {} } } ``` 结果如下： ```json { "took" : 0, #查询花销的时间 "timed_out" : false, #是否超时，false-不超时 "_shards" : { #分片信息 "total" : 1, #分片总数 "successful" : 1, #分片成功次数 "skipped" : 0, #分片被忽略次数 "failed" : 0 #分片失败次数 }, "hits" : { #搜索命中结果 "total" : { #搜索条件匹配文档总数 "value" : 5, #条件命中条数 "relation" : "eq" #计数规则，eq-计数准确、gte-计数不准确 }, "max_score" : 1.0, #匹配度分值 "hits" : [ #命中结果集合 { "_index" : "student", "_type" : "_doc", "_id" : "1001", "_score" : 1.0, "_source" : { "name" : "zhangsan", "nickname" : "zhangsan", "sex" : "男", "age" : 30 } }, { "_index" : "student", "_type" : "_doc", "_id" : "1002", "_score" : 1.0, "_source" : { "name" : "lisi", "nickname" : "lisi", "sex" : "男", "age" : 20 } }, { "_index" : "student", "_type" : "_doc", "_id" : "1003", "_score" : 1.0, "_source" : { "name" : "wangwu", "nickname" : "wangwu", "sex" : "女", "age" : 40 } }, { "_index" : "student", "_type" : "_doc", "_id" : "1004", "_score" : 1.0, "_source" : { "name" : "zhangsan01", "nickname" : "zhangsan01", "sex" : "女", "age" : 50 } }, { "_index" : "student", "_type" : "_doc", "_id" : "1005", "_score" : 1.0, "_source" : { "name" : "zhangsan02", "nickname" : "zhangsan02", "sex" : "女", "age" : 30 } } ] } } ``` **2. 匹配查询** `match` 匹配类型查询，会把查询条件进行分词，然后进行查询，多个词条之间是 or 的关系。 ```json #写法1 GET /student/_search?q=name:zhangsan #写法2 GET /student/_search { "query": { "match": { "name": "zhangsan" } } } ``` 结果如下： ```json { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 1, "relation" : "eq" }, "max_score" : 1.3862942, "hits" : [ { "_index" : "student", "_type" : "_doc", "_id" : "1001", "_score" : 1.3862942, "_source" : { "name" : "zhangsan", "nickname" : "zhangsan", "sex" : "男", "age" : 30 } } ] } } ``` **3. 字段匹配查询** `multi_match` 与 `match` 类似，不同的是它可以在多个字段中查询，多个字段是 OR 关系。 ```json GET /student/_search { "query": { "multi_match": { "query": "zhangsan", "fields": ["name", "nickname"] #name=zhangsan 或者 nickname=zhangsan } } } ``` 结果如下： ```json { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 1, "relation" : "eq" }, "max_score" : 1.540445, "hits" : [ { "_index" : "student", "_type" : "_doc", "_id" : "1001", "_score" : 1.540445, "_source" : { "name" : "zhangsan", "nickname" : "zhangsan", "sex" : "男", "age" : 30 } } ] } } ``` **4. 关键字精准查询** `term` 查询，精确的关键词匹配查询，不对查询条件进行分词。 ```json GET /student/_search { "query": { "term": { "name": { "value": "zhangsan" } } } } ``` 结果如下： ```json { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 1, "relation" : "eq" }, "max_score" : 1.3862942, "hits" : [ { "_index" : "student", "_type" : "_doc", "_id" : "1001", "_score" : 1.3862942, "_source" : { "name" : "zhangsan", "nickname" : "zhangsan", "sex" : "男", "age" : 30 } } ] } } ``` **5. 多关键字精确查询** `terms` 查询和 `term` 查询一样，但它允许你指定多值进行匹配。 ```json GET /student/_search { "query": { "terms": { "name": ["zhangsan", "lisi"] #name=zhangsan 或者name=lisi } } } ``` 解决如下： ```json { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 2, "relation" : "eq" }, "max_score" : 1.0, "hits" : [ { "_index" : "student", "_type" : "_doc", "_id" : "1001", "_score" : 1.0, "_source" : { "name" : "zhangsan", "nickname" : "zhangsan", "sex" : "男", "age" : 30 } }, { "_index" : "student", "_type" : "_doc", "_id" : "1002", "_score" : 1.0, "_source" : { "name" : "lisi", "nickname" : "lisi", "sex" : "男", "age" : 20 } } ] } } ``` **6. 只查询指定字段** 默认情况下，Elasticsearch 在搜索的结果中，会把文档中保存在`_source` 的所有字段都返回。如果我们只想获取其中的部分字段，我们可以添加`_source` 的过滤。 ```json #写法1 GET /student/_search?q=nickname:zhangsan&_source=name,nickname #写法2 GET /student/_search { "_source": ["name", "nickname"], #只查询name和nickname字段 "query": { "terms": { "nickname": ["zhangsan"] #条件是nickname=zhangsan } } } ``` 结果如下： ```json { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 1, "relation" : "eq" }, "max_score" : 1.3862942, "hits" : [ { "_index" : "student", "_type" : "_doc", "_id" : "1001", "_score" : 1.3862942, "_source" : { "name" : "zhangsan", "nickname" : "zhangsan" } } ] } } ``` **7. 过滤字段** `includes`：来指定想要显示的字段。 `excludes`：来指定不想要显示的字段。（1）只查询`includes`指定的字段。 ```json GET /student/_search { "_source": { "includes": ["name", "nickname"] #只查询name、nickname字段 }, "query": { "terms": { "nickname": ["zhangsan", "lisi"] #条件是nickname=zhangsan 或者niciname=lisi } } } ``` 结果如下： ```json { "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 2, "relation" : "eq" }, "max_score" : 1.0, "hits" : [ { "_index" : "student", "_type" : "_doc", "_id" : "1001", "_score" : 1.0, "_source" : { "name" : "zhangsan", "nickname" : "zhangsan" } }, { "_index" : "student", "_type" : "_doc", "_id" : "1002", "_score" : 1.0, "_source" : { "name" : "lisi", "nickname" : "lisi" } } ] } } ``` （2）除了`excludes`指定的字段不查询，其他字段都查询。 ```json GET /student/_search { "_source": { "excludes": ["name", "nickname"] #不查询name、nickname字段 }, "query": { "terms": { "nickname": ["zhangsan", "lisi"] #条件是nickname=zhangsan 或者nickname=lisi } } } ``` 结果如下： ```json { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 2, "relation" : "eq" }, "max_score" : 1.0, "hits" : [ { "_index" : "student", "_type" : "_doc", "_id" : "1001", "_score" : 1.0, "_source" : { "sex" : "男", "age" : 30 } }, { "_index" : "student", "_type" : "_doc", "_id" : "1002", "_score" : 1.0, "_source" : { "sex" : "男", "age" : 20 } } ] } } ``` **8. 组合查询** `bool`把各种其它查询通过`must`（必须）、`must_not`（必须不）、`should`（应该）的方式进行组合。 ```json GET /student/_search { "query": { "bool": { "must": [ { "match": { "name": "zhangsan" } } ], "must_not": [ { "match": { "age": 40 } } ], "should": [ { "match": { "sex": "男" } } ] } } } ``` 结果如下： ```json { "took" : 2, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 1, "relation" : "eq" }, "max_score" : 2.261763, "hits" : [ { "_index" : "student", "_type" : "_doc", "_id" : "1001", "_score" : 2.261763, "_source" : { "name" : "zhangsan", "nickname" : "zhangsan", "sex" : "男", "age" : 30 } } ] } } ``` **9. 范围查询** `range` 查询找出那些落在指定区间内的数字或者时间。`range` 查询允许以下字符。 ``` gt 大于（>） gte 大于等于（>=） lt 小于（<） lte 小于等于（<=） ``` ```json #查询age在[30, 35]区间的文档 GET /student/_search?q=age[30 TO 35] #查询age>=30的文档 GET /student/_search?q=age:>=40 #查询age在[30, 35]区间的文档 GET /student/_search { "query": { "range": { "age": { "gte": 30, "lte": 35 } } } } ``` 查询age在[30, 35]区间结果如下： ```json { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 2, "relation" : "eq" }, "max_score" : 1.0, "hits" : [ { "_index" : "student", "_type" : "_doc", "_id" : "1001", "_score" : 1.0, "_source" : { "name" : "zhangsan", "nickname" : "zhangsan", "sex" : "男", "age" : 30 } }, { "_index" : "student", "_type" : "_doc", "_id" : "1005", "_score" : 1.0, "_source" : { "name" : "zhangsan02", "nickname" : "zhangsan02", "sex" : "女", "age" : 30 } } ] } } ``` **10. 模糊查询** 返回包含与搜索字词相似的字词的文档。编辑距离是将一个术语转换为另一个术语所需的一个字符更改的次数。这些更改可以包括： * 更改字符（box → fox） * 删除字符（black → lack） * 插入字符（sic → sick） * 转置两个相邻字符（act → cat）为了找到相似的术语，`fuzzy` 查询会在指定的编辑距离内创建一组搜索词的所有可能的变体或扩展。然后查询返回每个扩展的完全匹配。通过 `fuzziness` 修改编辑距离。一般使用默认值 AUTO，根据术语的长度生成编辑距离。 ```json GET /student/_search { "query": { "fuzzy": { "name": { "value": "zhangsan" } } } } GET /student/_search { "query": { "fuzzy": { "name": { "value": "zhangsan" , "fuzziness": 1 } } } } ``` 结果如下： ```json #########不指定编辑距离fuzziness的结果######### { "took" : 3, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 3, "relation" : "eq" }, "max_score" : 1.3862942, "hits" : [ { "_index" : "student", "_type" : "_doc", "_id" : "1001", "_score" : 1.3862942, "_source" : { "name" : "zhangsan", "nickname" : "zhangsan", "sex" : "男", "age" : 30 } }, { "_index" : "student", "_type" : "_doc", "_id" : "1004", "_score" : 1.0397208, "_source" : { "name" : "zhangsan01", "nickname" : "zhangsan01", "sex" : "女", "age" : 50 } }, { "_index" : "student", "_type" : "_doc", "_id" : "1005", "_score" : 1.0397208, "_source" : { "name" : "zhangsan02", "nickname" : "zhangsan02", "sex" : "女", "age" : 30 } } ] } } #########指定编辑距离fuzziness为1的结果######### { "took" : 3, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 1, "relation" : "eq" }, "max_score" : 1.3862942, "hits" : [ { "_index" : "student", "_type" : "_doc", "_id" : "1001", "_score" : 1.3862942, "_source" : { "name" : "zhangsan", "nickname" : "zhangsan", "sex" : "男", "age" : 30 } } ] } } ``` **11. 字段排序** ```json #先根据age降序排序，再根据_score字段降序排序 GET /student/_search { "query": { "match_all": {} }, "sort": [ { "age": { "order": "desc" #desc是降序，asc是升序 } }, { "_score": { "order": "desc" } } ] } ``` 结果如下： ```json { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 5, "relation" : "eq" }, "max_score" : null, "hits" : [ { "_index" : "student", "_type" : "_doc", "_id" : "1004", "_score" : 1.0, "_source" : { "name" : "zhangsan01", "nickname" : "zhangsan01", "sex" : "女", "age" : 50 }, "sort" : [ 50, 1.0 ] }, { "_index" : "student", "_type" : "_doc", "_id" : "1003", "_score" : 1.0, "_source" : { "name" : "wangwu", "nickname" : "wangwu", "sex" : "女", "age" : 40 }, "sort" : [ 40, 1.0 ] }, { "_index" : "student", "_type" : "_doc", "_id" : "1001", "_score" : 1.0, "_source" : { "name" : "zhangsan", "nickname" : "zhangsan", "sex" : "男", "age" : 30 }, "sort" : [ 30, 1.0 ] }, { "_index" : "student", "_type" : "_doc", "_id" : "1005", "_score" : 1.0, "_source" : { "name" : "zhangsan02", "nickname" : "zhangsan02", "sex" : "女", "age" : 30 }, "sort" : [ 30, 1.0 ] }, { "_index" : "student", "_type" : "_doc", "_id" : "1002", "_score" : 1.0, "_source" : { "name" : "lisi", "nickname" : "lisi", "sex" : "男", "age" : 20 }, "sort" : [ 20, 1.0 ] } ] } } ``` **12. 高亮查询** 高亮查询由`highlight`属性提供，它由下面几个标签组合完成高亮查询。 ``` pre_tags：前置标签 post_tags：后置标签 fields：需要高亮的字段 title：这里声明 title 字段需要高亮，后面可以为这个字段设置特有配置，也可以空 ``` ```json GET /student/_search { "query": { "match": { "name": "zhangsan" #在这里指定name，则只有name字段可以高亮显示 } }, "highlight": { "pre_tags": "<font color='red'>", "post_tags": "</font>", "fields": { "name": {} } } } ``` 结果如下： ```json { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 1, "relation" : "eq" }, "max_score" : 1.3862942, "hits" : [ { "_index" : "student", "_type" : "_doc", "_id" : "1001", "_score" : 1.3862942, "_source" : { "name" : "zhangsan", "nickname" : "zhangsan", "sex" : "男", "age" : 30 }, "highlight" : { "name" : [ "<font color='red'>zhangsan</font>" ] } } ] } } ``` 返回json数据看不出高亮效果，我用Postman查询结果如下： ![](https://img.kancloud.cn/88/39/8839c823073c18d8470bcc47fe616524_1410x216.png) **13. 分页查询** `from`：当前页的起始索引，默认从 0 开始。 `$ from = (pageNum - 1) * size $`，pageNum为当前第几页。 `size`：每页显示多少条。 ```json GET /student/_search { "query": { "match_all": {} }, "sort": [ { "age": { "order": "desc" } } ], "from": 0, "size": 2 } ``` 结果如下： ```json { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 5, "relation" : "eq" }, "max_score" : null, "hits" : [ { "_index" : "student", "_type" : "_doc", "_id" : "1004", "_score" : null, "_source" : { "name" : "zhangsan01", "nickname" : "zhangsan01", "sex" : "女", "age" : 50 }, "sort" : [ 50 ] }, { "_index" : "student", "_type" : "_doc", "_id" : "1003", "_score" : null, "_source" : { "name" : "wangwu", "nickname" : "wangwu", "sex" : "女", "age" : 40 }, "sort" : [ 40 ] } ] } } ``` **14. 聚合查询** 聚合允许使用者对 es 文档进行统计分析，类似与关系型数据库中的 group by，当然还有很多其他的聚合，例如取最大值、平均值等等。（1）求最大值、最小值等。 ```json GET /student/_search { "aggs": { "max_age": { #max_age是别名，自由定义即可 "max": { #对age字段求最大值 "field": "age" } } }, "size": 0 } GET /student/_search { "aggs": { "min_age": { "min": {"field": "age"} #对age字段求最小值 } }, "size": 0 } GET /student/_search { "aggs": { "sum_age": { "sum": {"field": "age"} #对age字段求和 } }, "size": 0 } GET /student/_search { "aggs": { "avg_age": { "avg": {"field": "age"} #对age字段求平均值 } }, "size": 0 } GET /student/_search { "aggs": { "distinct_age": { "cardinality": {"field": "age"} #对age字段的值进行去重之后再取总数 } }, "size": 0 } ``` 结果我这里就不写出来了。（2）`state`聚合。对某个字段一次性返回 count，max，min，avg 和 sum 五个指标 ```json GET /student/_search { "aggs": { "stats_age": { "stats": {"field": "age"} } }, "size": 0 } ``` 结果如下： ```json { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 5, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "aggregations" : { "stats_age" : { "count" : 5, "min" : 20.0, "max" : 50.0, "avg" : 34.0, "sum" : 170.0 } } } ``` **15. 桶聚合查询** 桶聚和相当于 sql 中的 group by 语句。（1）分组。 ```json GET /student/_search { "aggs": { "age_groupby": { "terms": {"field": "age"} } }, "size": 0 } ``` 结果如下： ```json { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 5, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "aggregations" : { "age_groupby" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : 30, "doc_count" : 2 #30岁这一组的人数有两个 }, { "key" : 20, "doc_count" : 1 }, { "key" : 40, "doc_count" : 1 }, { "key" : 50, "doc_count" : 1 } ] } } } ``` （2）分组聚合。 ```json #先分组，再对每一个组求和 GET /student/_search { "aggs": { "age_groupby": { "terms": {"field": "age"}, "aggs": { "sum_age": { "sum": {"field": "age"} } } } }, "size": 0 } ``` 结果如下： ```json { "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 5, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "aggregations" : { "age_groupby" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : 30, "doc_count" : 2, "sum_age" : { "value" : 60.0 #30岁这一组的求和结果为60 } }, { "key" : 20, "doc_count" : 1, "sum_age" : { "value" : 20.0 } }, { "key" : 40, "doc_count" : 1, "sum_age" : { "value" : 40.0 } }, { "key" : 50, "doc_count" : 1, "sum_age" : { "value" : 50.0 } } ] } } } ```