ElasticSearch DSL聚合查询语句

本来像聚合(aggregation)这种东西, 在Grafana中可以轻易的实现, 但是偶尔会有需求, 需要自己写DSL脚本实现一些功能, 于是, 只好自己动手了.

例子1

查询serverName=”dns-server-1″结果里, 按hostip的数量进行排序, 取前5

GET /my-service-2020.07.22/_search
{
  "query": {
    "term": { "serverName.keyword": "dns-server-1" }
  },
  "size" : 0,
  "aggs": {
    "top-10-hostip": {
      "terms": {
      	"field": "hostip.keyword",
        "size": 5
      }
    }
  }
}

结果

{
  "took" : 17494,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "aggregations" : {
    "top-10-hostip" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 97386291,
      "buckets" : [
        {
          "key" : "192.168.11.38",
          "doc_count" : 44581004
        },
        {
          "key" : "192.168.20.5",
          "doc_count" : 3772738
        },
        {
          "key" : "NXDOMAIN",
          "doc_count" : 1982672
        },
        {
          "key" : "103.15.99.87",
          "doc_count" : 902146
        },
        {
          "key" : "103.15.99.85",
          "doc_count" : 902014
        }
      ]
    }
  }
}

python写法:

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, A

es = Elasticsearch(['172.29.4.158', '172.29.4.157', '172.29.4.156'],http_auth=('elastic', 'MyPassword'), timeout=60)
index = 'my-service-2020.07.22'
s = Search(using=es, index=index)
s = s.filter('term', **{"serverName.keyword": "dns-sever-1"}).extra(size=0)
a = A('terms', field="hostip.keyword", size=5)
s.aggs.bucket("my_aggr", a)
response = s.execute().to_dict()
print(response['aggregations']["my_aggr"]['buckets'])

例子2

查询serverName=”dns-server-1″结果里, 将每小时的数据量取出来, 形成一个报表

POST /my-service-2020.07.23/_search
{
  "query": {
    "term": { "serverName.keyword": "dns-server-1" }
  },
  "size": 0,
  "aggs": {
    "reply_per_hour": {
      "date_histogram": {
        "field": "@timestamp",
        "calendar_interval": "1h",
        "time_zone": "Asia/Shanghai",
        "min_doc_count": 1
      }
    }
  }
}

结果如下:

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "aggregations" : {
    "reply_per_hour" : {
      "buckets" : [
        {
          "key_as_string" : "2020-07-23T08:00:00.000+08:00",
          "key" : 1595462400000,
          "doc_count" : 2265522
        },
        {
          "key_as_string" : "2020-07-23T09:00:00.000+08:00",
          "key" : 1595466000000,
          "doc_count" : 7008095
        },
        {
          "key_as_string" : "2020-07-23T10:00:00.000+08:00",
          "key" : 1595469600000,
          "doc_count" : 10968247
        },
        {
          "key_as_string" : "2020-07-23T11:00:00.000+08:00",
          "key" : 1595473200000,
          "doc_count" : 11152200
        },
        {
          "key_as_string" : "2020-07-23T12:00:00.000+08:00",
          "key" : 1595476800000,
          "doc_count" : 11449941
        },
        {
          "key_as_string" : "2020-07-23T13:00:00.000+08:00",
          "key" : 1595480400000,
          "doc_count" : 10738942
        },
        {
          "key_as_string" : "2020-07-23T14:00:00.000+08:00",
          "key" : 1595484000000,
          "doc_count" : 11150819
        },
        {
          "key_as_string" : "2020-07-23T15:00:00.000+08:00",
          "key" : 1595487600000,
          "doc_count" : 11070727
        },
        {
          "key_as_string" : "2020-07-23T16:00:00.000+08:00",
          "key" : 1595491200000,
          "doc_count" : 11055146
        },
        {
          "key_as_string" : "2020-07-23T17:00:00.000+08:00",
          "key" : 1595494800000,
          "doc_count" : 10978737
        },
        {
          "key_as_string" : "2020-07-23T18:00:00.000+08:00",
          "key" : 1595498400000,
          "doc_count" : 11225315
        },
        {
          "key_as_string" : "2020-07-23T19:00:00.000+08:00",
          "key" : 1595502000000,
          "doc_count" : 10358962
        },
        {
          "key_as_string" : "2020-07-23T20:00:00.000+08:00",
          "key" : 1595505600000,
          "doc_count" : 8871878
        },
        {
          "key_as_string" : "2020-07-23T21:00:00.000+08:00",
          "key" : 1595509200000,
          "doc_count" : 6981503
        },
        {
          "key_as_string" : "2020-07-23T22:00:00.000+08:00",
          "key" : 1595512800000,
          "doc_count" : 4599943
        },
        {
          "key_as_string" : "2020-07-23T23:00:00.000+08:00",
          "key" : 1595516400000,
          "doc_count" : 2847069
        },
        {
          "key_as_string" : "2020-07-24T00:00:00.000+08:00",
          "key" : 1595520000000,
          "doc_count" : 2282815
        },
        {
          "key_as_string" : "2020-07-24T01:00:00.000+08:00",
          "key" : 1595523600000,
          "doc_count" : 1836456
        },
        {
          "key_as_string" : "2020-07-24T02:00:00.000+08:00",
          "key" : 1595527200000,
          "doc_count" : 1665192
        },
        {
          "key_as_string" : "2020-07-24T03:00:00.000+08:00",
          "key" : 1595530800000,
          "doc_count" : 1570905
        },
        {
          "key_as_string" : "2020-07-24T04:00:00.000+08:00",
          "key" : 1595534400000,
          "doc_count" : 1503438
        },
        {
          "key_as_string" : "2020-07-24T05:00:00.000+08:00",
          "key" : 1595538000000,
          "doc_count" : 1448256
        },
        {
          "key_as_string" : "2020-07-24T06:00:00.000+08:00",
          "key" : 1595541600000,
          "doc_count" : 1470739
        },
        {
          "key_as_string" : "2020-07-24T07:00:00.000+08:00",
          "key" : 1595545200000,
          "doc_count" : 1616518
        }
      ]
    }
  }
}

python写法

这里多加了几个查询条件(serverName=”dns-server-1″, hostip.keyword=”192.168.11.12″和type.keyword=”reply”)

from elasticsearch import Elasticsearch 
from elasticsearch_dsl import Search, A 

es = Elasticsearch(['172.29.4.158', '172.29.4.157', '172.29.4.156'],http_auth=('elastic', 'MyPassword'), timeout=60) index = 'my-service-2020.07.22'
s = Search(using=es, index=index)
s = s.filter('term', **{"serverName.keyword": "dns-sever-1"}).filter('term', **{"hostip.keyword": "192.168.11.12"}).filter('match', type__keyword="reply" ).extra(size=0)
a = A('date_histogram', field="@timestamp", time_zone="Asia/Shanghai", min_doc_count=100, calendar_interval="1s")
s.aggs.bucket("my_aggr", a)
response = s.execute().to_dict()
print(response['aggregations']["my_aggr"]['buckets'])

参考文档:
Elasticsearch DSL
Max Bucket Aggregation
elasticsearch-dsl で集計を伴う検索クエリを書く

发表评论

电子邮件地址不会被公开。