logstash的drop过滤器插件

logstash在filter段对日志进行解析的时候, 可以直接筛选出我们想要的日志内容, 如果日志内容里不包括某些字段, 我们可以把整条日志直接扔掉, 下面是配置.

input {
    kafka {
        bootstrap_servers => "k1.zhukun.net:6687 k2.zhukun.net:6687"
        #topics_pattern => "zhukun.net.log.rms-api.*"  # 如果收取多个kafaka topic里的消息可以用此写法
        topics => ["zhukun.net_console.log"]
        group_id => "logstash-mp-ops"
        consumer_threads => 10
        decorate_events => true
        auto_offset_reset => "latest"
    }
}

filter {

    # 如果message里不以2019/2020/2021开头, 则直接丢弃整条日志
    if [message] !~ /^[2020|2021|2019]/ {
        drop { }
    }
	
    # 直接打印出来原始日志看看
    #ruby {
    #    code => 'puts event("message")'
    #}
	
    # grop正则匹配
    grok {
        match => { "message" => '%{TIMESTAMP_ISO8601:time_local}\s*\[%{DATA:service}\]\s*%{LOGLEVEL:loglevel}\s*%{DATA:message}$' }
        overwrite => ["message"]
    }

    # 日期处理
    date {
        # 将time_local赋给@timestamp字段, 右侧是time_local的实际格式, 例如2019-03-18 08:12:45.006
        match => ["time_local", "yyyy-MM-dd HH:mm:ss.SSS"]
        target => "@timestamp"    # 默认target就是@timestamp
    }

    # 如果把整条日志都解析出来以后(已经解析到各个tag之中), 原始日志应该也没什么用了, 可以考虑直接扔掉原始日志
    #mutate {
    #    remove_field => [ "message" ]
    #}

}

output {
    elasticsearch {
        hosts => ["10.18.4.24:9200","10.18.4.25:9200","10.18.4.77:9200","10.18.4.78:9200", "10.11.149.69:9200","10.16.22.149:9200"]
        index => "zhukun.net_console.log-%{+yyyy.MM.dd}"
    }
    #stdout {
    #   codec => rubydebug {
    #       metadata => true
    #    }
    #}
}

参考文档

https://www.elastic.co/guide/en/logstash/current/plugins-filters-drop.html#plugins-filters-drop-options
https://discuss.elastic.co/t/logstash-grep-and-drop/35313
https://stackoverflow.com/questions/29609355/missing-grep-filter-in-logstash

logstash的drop过滤器插件》上有1条评论

发表评论

电子邮件地址不会被公开。