网站首页 文章专栏 ELK实战---收集网站搜索关键词

ELK实战---收集网站搜索关键词

编辑时间:2019-09-12 18:49:37 作者:苹果 浏览量:4488





    基本环境搭建,可参考上一篇文章    《简述 ELK 搭建日志平台》

    1.入口机收集

        input {        
            file {
               type => "nginx1"
                path => ["/logs/11111111.log","/logs/2222222.log"]    #日志路径
            }
        }
        filter {
           mutate {
              gsub => ["message", "\\x", "\\\x"]
           }
        }
        output {
           redis {
                host => "192.168.1.1"
                port => 6379
                data_type => "list"
                key => "logstash-keywords"   #存入redis  key为  logstash-keywords
            }
        }




    2. 客户端过滤,收集

        input {        
                 # 从redis中获取
                redis {
                        host => "192.168.1.1"
                        data_type => "list"
                        key => "logstash-keywords"
                        type => "redis-input"
                       port => "6379"
                        threads=>8
                        batch_count => 1000
                }
        }
        filter {
            json {
                source => "message"
                remove_field => "message"
            }
            urldecode {
                all_fields => true
            }
            mutate {
               split => ["request_uri","?"]       #从请求地址中,剥离地址 和参数
               add_field => ["uri", "%{request_uri[0]}"]
               add_field => ["temp_keywords", "%{request_uri[1]}"]
            }
            if [uri] == "list.html"{
                           mutate {
                                  split => [temp_keywords,"="]  #因上述关键字格式为 keyword=*** ,因此只用简单的分割即可
                                  add_field => ["search_keyword", "%{temp_keywords[1]}"]   #添加字段,自定义
                                  add_field => ["search_type", "1"]
                                  add_field => ["search_type_name", "心灵语丝"]
                              }
            }
         mutate {
                              # 删除不需要的字段,因只做关键字收集,不考虑其他,所以全部删除掉
                              remove_field => "temp_keywords"
                              remove_field => "upstream_addr"
                              remove_field => "request_time"
                              remove_field => "upstream_response_time"
                              remove_field => "request_body"
                              remove_field => "type"
                              remove_field => "@version"
                              remove_field => "http_x_forwarded_for"
                              remove_field => "body_bytes_sent"
                              remove_field => "time_local"
                              remove_field => "status"
                              remove_field => "path"
                              remove_field => "remote_addr"
                              remove_field => "request_uri"
                              remove_field => "http_referer"
                              remove_field => "uri"
                              remove_field => "http_user_agent"
                              remove_field => "host"
                              remove_field => "@timestamp"
                              remove_field => "method"
                          }
        }
        output {
               #有关键字的数据,另存到一个es 索引库中
               if [search_keyword]{
               elasticsearch {
                               hosts => "192.168.1.2:9200"
                               index=>"logstash-keywords-%{+YYYY.MM}"
                               document_type =>"keywords"
                       }
               }
        }

 

   特别注意:logstash/conf.d/  所有变量全部公用    (真的很坑)
    比如,conf.d/ 下有两个配置文件,a.conf,b.conf;
    在 a.conf 中做了处理
    假设 request_uri = "list.html?keywords=关键字"

        mutate {        
               split => ["request_uri","?"]       #此时 request_uri 已变成数组
               add_field => ["uri", "%{request_uri[0]}"]  #uri 为数组中的第一元素
             }




    在b.conf 中可以直接使用 uri  并且request_uri也是数组的格式;如果在b.conf 做这样的操作:

        mutate {        
               add_field => ["uri", "%{request_uri[0]}"]  #此时 uri 也将变成数组,有两个元素,真的有点坑
             }



    关于 logstash 的语法格式 详见  https://www.elastic.co/guide/en/logstash/6.7/event-dependent-configuration.html#conditionals


    出自:何冰华个人网站

    地址:https://www.hebinghua.com/

    转载请注明出处


来说两句吧
最新评论