企业🤖AI智能体构建引擎,智能编排和调试,一键部署,支持私有化部署方案 广告
### 1. term filter 与keyword 测试数据 ~~~ POST /forum/article/_bulk { "index": { "_id": 1 }} { "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden": false, "postDate": "2017-01-01" } { "index": { "_id": 2 }} { "articleID" : "KDKE-B-9947-#kL5", "userID" : 1, "hidden": false, "postDate": "2017-01-02" } { "index": { "_id": 3 }} { "articleID" : "JODL-X-1937-#pV7", "userID" : 2, "hidden": false, "postDate": "2017-01-01" } { "index": { "_id": 4 }} { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden": true, "postDate": "2017-01-02" } ~~~ 1. 匹配text ~~~ GET forum/_search { "query": { "constant_score": { # 固定分值,就不用计算相关度,过滤呗 "filter": { "term": { "articleID": "KDKE-B-9947-#kL5" } }, "boost": 1.2 } } } ~~~ > articleID被分词成:kdke,b,9947,kl5,而term过滤是精准过滤,KDKE-B-9947-#kL5不会被分词,还是用KDKE-B-9947-#kL5去查询过滤,所以这条过滤不能得到document。 结果啥也没搜到 ~~~ { "took": 1, "timed_out": false, "_shards": { "total": 5, "successful": 5, "failed": 0 }, "hits": { "total": 0, "max_score": null, "hits": [] } } ~~~ 2. 再用articleID.keyword去过滤 keyword参考elasticsearch基础 type=keyword ~~~ GET forum/_search { "query": { "constant_score": { "filter": { "term": { "articleID.keyword": "KDKE-B-9947-#kL5" } }, "boost": 1.2 # 固定分值,不指定默认为1 } } } ~~~ 结果 ~~~ { "took": 1, "timed_out": false, "_shards": { "total": 5, "successful": 5, "failed": 0 }, "hits": { "total": 1, "max_score": 1.2, "hits": [ { "_index": "forum", "_type": "article", "_id": "2", "_score": 1.2, "_source": { "articleID": "KDKE-B-9947-#kL5", "userID": 1, "hidden": false, "postDate": "2017-01-02" } } ] } } ~~~ 或者这么写 ~~~ GET forum/_search { "query": { "bool": { "filter": { "term": { "articleID.keyword": "KDKE-B-9947-#kL5" } } } } } ~~~ ## 2. 组合查询 ~~~ GET forum/_search { "query": { "bool": { "should": [ {"term": {"postDate":"2017-01-01"}}, {"term": {"articleID":"QQPX-R-3956-#aD8"}} # 文档被分词,查询条件(不被分词)无法精准匹配到QQPX-R-3956-#aD8的文档 ] } } } ~~~ ~~~ GET forum/_search { "query": { "bool": { "should": [ {"term": {"postDate":"2017-01-01"}}, {"term": {"articleID.keyword":"QQPX-R-3956-#aD8"}} # text 字段keyword属性,不分词,查询到QQPX-R-3956-#aD8 ] } } } ~~~ ## 3. terms 查询 1. 查找articleID是QQPX-R-3956-#aD8 或者KDKE-B-9947-#kL5的文档 ~~~ GET forum/_search { "query": { "terms": { "articleID.keyword": [ "QQPX-R-3956-#aD8", "KDKE-B-9947-#kL5" ] } } } ~~~ ## 4. range 1. range查询 ~~~ GET forum/_search { "query": { "range": { "view_cnt": { "gte": 50, "lte": 90 } } } } ~~~ 2. range过滤 ~~~ GET forum/_search { "query": { "constant_score": { "filter": { "range": { "view_cnt": { "gte": 50, "lte": 90 } } } } } } ~~~ * 同样的效果range过滤比range查询要快 ## 5. 分组聚合 穿件index和映射 ~~~ PUT /tvs { "mappings": { "sales": { "properties": { "price": { "type": "long" }, "color": { "type": "keyword" }, "brand": { "type": "keyword" }, "sold_date": { "type": "date" } } } } } ~~~ 数据 ~~~ POST /tvs/sales/_bulk { "index": {}} { "price" : 1000, "color" : "红色", "brand" : "长虹", "sold_date" : "2016-10-28" } { "index": {}} { "price" : 2000, "color" : "红色", "brand" : "长虹", "sold_date" : "2016-11-05" } { "index": {}} { "price" : 3000, "color" : "绿色", "brand" : "小米", "sold_date" : "2016-05-18" } { "index": {}} { "price" : 1500, "color" : "蓝色", "brand" : "TCL", "sold_date" : "2016-07-02" } { "index": {}} { "price" : 1200, "color" : "绿色", "brand" : "TCL", "sold_date" : "2016-08-19" } { "index": {}} { "price" : 2000, "color" : "红色", "brand" : "长虹", "sold_date" : "2016-11-05" } { "index": {}} { "price" : 8000, "color" : "红色", "brand" : "三星", "sold_date" : "2017-01-01" } { "index": {}} { "price" : 2500, "color" : "蓝色", "brand" : "小米", "sold_date" : "2017-02-12" } ~~~ 1. 按照color分组 ~~~ GET /tvs/_search { "size": 0, # 只显示聚合结果 "aggs": { "group_by_color": { "terms": { "field": "color", "size": 10000 # 控制返回结果数量 } } } } ~~~ 得到 ~~~ "buckets": [ { "key": "红色", "doc_count": 4 }, { "key": "绿色", "doc_count": 2 }, { "key": "蓝色", "doc_count": 2 } ] } } ~~~ 2. 求每种颜色电视销售价格的平均值 ~~~ GET /tvs/_search { "size": 0, # 只显示聚合结果 "aggs": { "group_by_color": { "terms": { "field": "color", "size": 1000 }, "aggs": { "avg_price": { "avg": { "field": "price" } } } } } } ~~~ color》brand》avg ~~~ GET tvs/_search { "size": 0, "aggs": { "group_by_color": { "terms": { "field": "color", "size": 10 }, "aggs": { "group_by_brand": { "terms": { "field": "brand", "size": 10 }, "aggs": { "avg_price": { "avg": { "field": "price" } } } } } } } } ~~~ 3. 求颜色下的价格最高,最小值,价格总数 ~~~ GET /tvs/_search { "size": 0, "aggs": { "group_by_color": { "terms": { "field": "color", "size": 10000 }, "aggs": { "max_price": { "max": { "field": "price" } }, "min_price":{ "min": { "field": "price" } }, "sum_price":{ "sum": { "field": "price" } } } } } } ~~~ 得到 ~~~ "buckets": [ { "key": "红色", "doc_count": 4, "max_price": { "value": 8000 }, "min_price": { "value": 1000 }, "sum_price": { "value": 13000 } }, { "key": "绿色", "doc_count": 2, "max_price": { "value": 3000 }, "min_price": { "value": 1200 }, "sum_price": { "value": 4200 } }, { "key": "蓝色", "doc_count": 2, "max_price": { "value": 2500 }, "min_price": { "value": 1500 }, "sum_price": { "value": 4000 } ~~~ ## 6. histogram 1. 求以2000为步长,电视的销售总价 ~~~ GET /tvs/_search { "size": 0, "aggs": { "group_by_price": { "histogram": { "field": "price", "interval": 2000 }, "aggs": { "NAME": { "sum": { "field": "price" } } } } } } ~~~ 得到 ~~~ "group_by_price": { "buckets": [ { "key": 0, "doc_count": 3, "NAME": { "value": 3700 } }, { "key": 2000, "doc_count": 4, "NAME": { "value": 9500 } }, { "key": 4000, "doc_count": 0, "NAME": { "value": 0 } }, { "key": 6000, "doc_count": 0, "NAME": { "value": 0 } }, { "key": 8000, "doc_count": 1, "NAME": { "value": 8000 } } ] ~~~ 2. 求每个月的销售总价 ## 7. 求季度每个品牌的销售额 ~~~ GET tvs/_search { "size": 0, "aggs": { "sales": { "date_histogram": { "field": "sold_date", "interval": "quarter", "format": "yyyy-MM-dd", "min_doc_count": 0, "extended_bounds":{ "min":"2016-01-01", "max":"2017-12-12" } }, "aggs": { "group_price_month": { "terms": { "field": "brand" }, "aggs": { "sales_sum": { "sum": { "field": "price" } } } } } } } } ~~~ ~~~ }, { "key_as_string": "2016-07-01", "key": 1467331200000, "doc_count": 2, "group_price_month": { "doc_count_error_upper_bound": 0, "sum_other_doc_count": 0, "buckets": [ { "key": "TCL", "doc_count": 2, "sales_sum": { "value": 2700 } } ] } }, { "key_as_string": "2016-10-01", "key": 1475280000000, "doc_count": 3, "group_price_month": { "doc_count_error_upper_bound": 0, "sum_other_doc_count": 0, "buckets": [ { "key": "长虹", "doc_count": 3, "sales_sum": { "value": 5000 } } ] } }, { ~~~ ## 8. filter与aggression 查找电视价格大于2500的销售平均价格 首先把价格大于2500的电视过滤出来,在聚合 ~~~ GET /tvs/_search { "size": 0, "query": { "constant_score": { "filter": { "range": { "price": { "gt": 2500 } } } } }, "aggs": { "avg_lgt1200": { "avg": { "field": "price" } } } } ~~~ 得到 ~~~ "hits": [] }, "aggregations": { "avg_lgt1200": { "value": 5500 } } } ~~~ ## 9. 最近一个月某品牌的销售总价 ~~~ GET /tvs/_search { "size": 0, "query": { "constant_score": { "filter": { "term": { "brand": "长虹" } } } }, "aggs": { "recent_moth":{ "filter": { # 在这里过滤,因为只在查询的结果中过滤,性能最优 "range": { "sold_date": { "gte": "now-30d" # now-30d 从现在开始,减30天 } } }, "aggs": { "sum_price": { "sum": { "field": "price" } } } } } } ~~~ 得到 ~~~ "aggregations": { "recent_moth": { "doc_count": 3, "sum_price": { "value": 5000 } } } } ~~~ ## 10. 聚合排序 1. 一个桶排序 ~~~ GET /tvs/_search { "size": 0, "aggs": { "groupbycolor": { "terms": { "field": "color", "order": { "avg_price": "asc" # 指定按照avg_price排序 } }, "aggs": { "avg_price": { "avg": { "field": "price" } } } } } } ~~~ 2. 2个桶排序 ~~~ GET /tvs/sales/_search { "size": 0, "aggs": { "group_by_color": { "terms": { "field": "color", "order": { "_count": "asc" # 大桶按照doc count升序 } }, "aggs": { "group_by_brand": { "terms": { "field": "brand", "order": { "avg_price": "desc" # 小桶降序 } }, "aggs": { "avg_price": { "avg": { "field": "price" } } } } } } } } ~~~ ## 11. 网站访问时间统计 ~~~ PUT /website { "mappings": { "logs":{ "properties": { "latency":{ "type": "long" }, "province":{ "type": "keyword" }, "timestamp":{ "type": "date" } } } } } ~~~ ~~~ POST /website/logs/_bulk { "index": {}} { "latency" : 105, "province" : "江苏", "timestamp" : "2016-10-28" } { "index": {}} { "latency" : 83, "province" : "江苏", "timestamp" : "2016-10-29" } { "index": {}} { "latency" : 92, "province" : "江苏", "timestamp" : "2016-10-29" } { "index": {}} { "latency" : 112, "province" : "江苏", "timestamp" : "2016-10-28" } { "index": {}} { "latency" : 68, "province" : "江苏", "timestamp" : "2016-10-28" } { "index": {}} { "latency" : 76, "province" : "江苏", "timestamp" : "2016-10-29" } { "index": {}} { "latency" : 101, "province" : "新疆", "timestamp" : "2016-10-28" } { "index": {}} { "latency" : 275, "province" : "新疆", "timestamp" : "2016-10-29" } { "index": {}} { "latency" : 166, "province" : "新疆", "timestamp" : "2016-10-29" } { "index": {}} { "latency" : 654, "province" : "新疆", "timestamp" : "2016-10-28" } { "index": {}} { "latency" : 389, "province" : "新疆", "timestamp" : "2016-10-28" } { "index": {}} { "latency" : 302, "province" : "新疆", "timestamp" : "2016-10-29" } ~~~ ~~~ 需求:比如有一个网站,记录下了每次请求的访问的耗时,需要统计tp50,tp90,tp99 tp50:50%的请求的耗时最长在多长时间 tp90:90%的请求的耗时最长在多长时间 tp99:99%的请求的耗时最长在多长时间 ~~~ ~~~ GET /website/_search { "aggs": { "latency_percentiles": { "percentiles": { "field": "latency", "percents": [ 50, 99 ] } } } } ~~~ 得到 ~~~ }, "aggregations": { "latency_percentiles": { "values": { "50.0": 108.5, # 50%的请求在108以内 "99.0": 624.8500000000001 # 99%的请求在624以内 } } } } ~~~ 2. 统计各省的访问情况 * 一个aggs内可以包含一个aggs * 一个aggs可以包含多个不同的metric(指标统计:平均值,最大值,,,),但是记得起名字 ~~~ GET /website/logs/_search { "size": 0, "aggs": { "group_by_province": { "terms": { "field": "province" }, "aggs": { "latency_percentiles": { "percentiles": { "field": "latency", "percents": [ 50, 95, 99 ] } }, "latency_avg": { "avg": { "field": "latency" } } } } } } ~~~ 得到,新疆的网络不行啊 ~~~ "buckets": [ { "key": "新疆", "doc_count": 6, "latency_avg": { "value": 314.5 }, "latency_percentiles": { "values": { "50.0": 288.5, "95.0": 587.75, "99.0": 640.75 } } }, { "key": "江苏", "doc_count": 6, "latency_avg": { "value": 89.33333333333333 }, "latency_percentiles": { "values": { "50.0": 87.5, "95.0": 110.25, "99.0": 111.65 } } } ] } } } ~~~ ## 12. percentile_ranks(求某数值所占比例) > SLA:就是你提供的服务的标准 > 我们的网站的提供的访问延时的SLA,确保所有的请求100%,都必须在200ms以内,大公司内,一般都是要求100%在200ms以内 > 如果超过1s,则需要升级到A级故障,代表网站的访问性能和用户体验急剧下降 > 需求:在200ms以内的,有百分之多少,在1000毫秒以内的有百分之多少,percentile ranks metric > 这个percentile ranks,其实比pencentile还要常用 > 按照品牌分组,计算,电视机,售价在1000占比,2000占比,3000占比 ~~~ GET /website/_search { "size": 0, "aggs": { "groupby_province": { "terms": { "field": "province" }, "aggs": { "percentile_s": { "percentile_ranks": { "field": "latency", "values": [ 100, 300, 500 ] } } } } } } ~~~ 得到 ~~~ "buckets": [ { "key": "新疆", "doc_count": 6, "percentile_s": { "values": { "100.0": 8.076923076923077, "300.0": 53.3625730994152, "500.0": 65.31446540880503 } } }, { "key": "江苏", "doc_count": 6, "percentile_s": { "values": { "100.0": 46.42857142857142, "300.0": 100, "500.0": 100 } } } ] } } } ~~~ > 新疆: > 响应时间小于100毫秒占46.42% > 响应时间小于300毫秒占53.42% > 响应时间小于500毫秒占65.42% > ## 13. 文档模型 1. 建立索引,已经文件路径分词器 ~~~ PUT /fs { "settings": { "analysis": { "analyzer": { "paths": { "tokenizer": "path_hierarchy" } } } } } ~~~ path_hierarchy:路径分词器 2. 创建映射 ~~~ PUT /fs/_mapping/file { "properties": { "name": { "type": "keyword" }, "path": { "type": "keyword", # 不分词的路径,用于精准匹配 "fields": { "tree": { "type": "text", "analyzer": "paths" # 分词子field,用于所有路径的搜索 } } } } } ~~~ 3. 插入一条数据 ~~~ PUT /fs/file/1 { "name": "README.txt", "path": "/workspace/projects/helloworld", "contents": "这是我的第一个elasticsearch程序" } ~~~ 4. 搜索文件 1. 查找/workspace/projects/helloworld路径下的elasticsearch文件 ~~~ GET /fs/file/_search { "query": { "bool": { "must": [ { "match": { "contents": "elasticsearch" } }, { "constant_score": { "filter": { "term": { "path": "/workspace/projects/helloworld" } } } } ] } } } ~~~ ~~~ "hits": [ { "_index": "fs", "_type": "file", "_id": "1", "_score": 1.284885, "_source": { "name": "README.txt", "path": "/workspace/projects/helloworld", "contents": "这是我的第一个elasticsearch程序" } } ] } } ~~~ 5. 查找/workspace目录下的所有elasticsearch文件 ~~~ GET /fs/file/_search { "query": { "bool": { "must": [ { "match": { "contents": "elasticsearch" } }, { "constant_score": { "filter": { "term": { "path.tree": "/workspace" # 用可分词的field,过滤所有路径 } } } } ] } } } ~~~