年底了,物联网设备和用户活跃度处在高位,而表现在平台的是是各项指标高,除cpu、内存外,日志也高。这边日志是使用loki搭建的,容器化部署,日志保留3个月,共2160个小时。配置文件如下:

[root@loki loki]# cat docker-compose.yml
---
version: "3"

networks:
  loki:

services:
  loki:
    image: grafana/loki:2.6.1
    # command: "-config.file=/etc/loki/config.yaml"
    entrypoint:
      - sh
      - -euc
      - |
        cat <<EOF>/tmp/config.yaml
        auth_enabled: false
        server:
          http_listen_port: 3100
          grpc_listen_port: 9096        
        common:
          path_prefix: /loki
          storage:
            filesystem:
              chunks_directory: /loki/chunks
              rules_directory: /loki/rules
          replication_factor: 1
          ring:
            instance_addr: 127.0.0.1
            kvstore:
              store: inmemory        
        ingester:
          wal:
            enabled: true
            dir: /loki/wal
          lifecycler:
            address: 127.0.0.1
            ring:
              kvstore:
                store: inmemory
              replication_factor: 1
            final_sleep: 0s
          chunk_idle_period: 1h       # Any chunk not receiving new logs in this time will be flushed
          max_chunk_age: 1h           # All chunks will be flushed when they hit this age, default is 1h
          chunk_target_size: 1048576  # Loki will attempt to build chunks up to 1.5MB, flushing first if chunk_idle_period or max_chunk_age is reached first
          chunk_retain_period: 30s    # Must be greater than index read cache TTL if using an index cache (Default index read cache TTL is 5m)
          max_transfer_retries: 0     # Chunk transfers disabled
        
        # 《2023.07.06增加如下内容
        frontend:
          compress_responses: true
        query_range:
          #split_queries_by_interval: 24h
          results_cache:
            cache:
              redis:
                endpoint: redis:6379
                expiration: 1h
          cache_results: true
        
        storage_config:
          index_queries_cache_config:
            redis:
              endpoint: redis:6379
              expiration: 1h
        
        chunk_store_config:
          chunk_cache_config:
            redis:
              endpoint: redis:6379
              expiration: 1h
          write_dedupe_cache_config:
            redis:
              endpoint: redis:6379
              expiration: 1h
        # 》

        #query_range:
        #  results_cache:
        #    cache:
        #      embedded_cache:
        #      enabled: true
        #      max_size_mb: 100
        
        schema_config:
          configs:
            - from: 2020-10-24
              store: boltdb-shipper
              object_store: filesystem
              schema: v11
              index:
                prefix: index_
                period: 24h

        # https://grafana.com/docs/loki/latest/configuration/#limits_config
        limits_config:
          ingestion_rate_mb: 1000
          ingestion_burst_size_mb: 1000
          reject_old_samples: true # 是否拒绝旧样本
          reject_old_samples_max_age: 168h # 7天之前的样本被拒绝
          per_stream_rate_limit: 50MB 
          per_stream_rate_limit_burst: 100MB
          max_entries_limit_per_query: 100000 # 最大查询数
          retention_period: 2160h  # 超过30天的块数据将被删除 * 3 @2023.10.13
        table_manager:
          retention_deletes_enabled: true   # 保留删除开启
          retention_period: 2160h  # 超过30天的块数据将被删除 * 6 @2023.07.13
        compactor:
          working_directory: /tmp/loki/retention
          shared_store: filesystem
          compaction_interval: 10m
          retention_enabled: true
          retention_delete_delay: 10s
          retention_delete_worker_count: 150

        
        ruler:
          alertmanager_url: http://localhost:9093
        
        # By default, Loki will send anonymous, but uniquely-identifiable usage and configuration
        # analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/
        #
        # Statistics help us better understand how Loki is used, and they show us performance
        # levels for most users. This helps us prioritize features and documentation.
        # For more information on what's sent, look at
        # https://github.com/grafana/loki/blob/main/pkg/usagestats/stats.go
        # Refer to the buildReport method to see what goes into a report.
        #
        # If you would like to disable reporting, uncomment the following lines:
        #analytics:
        #  reporting_enabled: false        
        EOF
        /usr/bin/loki -config.file=/tmp/config.yaml

    restart: always
    ports:
      - 3101:3100
      - 7946
      - 9095
    volumes:
      - /data/loki:/loki
    environment:
      - TZ=Asia/Shanghai
    networks: &loki-dns
      loki:
        aliases:
          - loki

  gateway:
    image: nginx:latest
    restart: always
    environment:
      - TZ=Asia/Shanghai
    depends_on:
      - loki
    entrypoint:
      - sh
      - -euc
      - |
        cat <<EOF > /etc/nginx/nginx.conf
        user  nginx;
        worker_processes  5;  ## Default: 1

        events {
          worker_connections   1000;
        }

        http {
          resolver 127.0.0.11;

          server {
            listen             3100;

            location = / {
              return 200 'OK';
              auth_basic off;
            }
             location = /getip {
              return 200 '\$$remote_addr';
              auth_basic off;
            }       
        
            location = /api/prom/push {
              proxy_pass       http://loki:3100\$$request_uri;
            }

            location = /api/prom/tail {
              proxy_pass       http://read:3100\$$request_uri;
              proxy_set_header Upgrade \$$http_upgrade;
              proxy_set_header Connection "upgrade";
            }

            location ~ /api/prom/.* {
              proxy_pass       http://loki:3100\$$request_uri;
            }

            location = /loki/api/v1/push {
              proxy_pass       http://loki:3100\$$request_uri;
            }

            location = /loki/api/v1/tail {
              proxy_pass       http://loki:3100\$$request_uri;
              proxy_set_header Upgrade \$$http_upgrade;
              proxy_set_header Connection "upgrade";
            }

            location ~ /loki/api/.* {
              proxy_pass       http://loki:3100\$$request_uri;
            }
          }
        }
        EOF
        /docker-entrypoint.sh nginx -g "daemon off;"
    ports:
      - "3100:3100"
    networks:
      - loki

  redis:
    image: redis:latest
    restart: always
    environment:
      - TZ=Asia/Shanghai
    ports:
      - "6379:6379"
    networks:
      - loki

其中,用到了redis缓存1天的日志,保留3个月共90天的日志。但是早上巡检看到磁盘已经高于80%了。
2023-12-14T02:00:52.png

从网上搜索了一圈,怎么删日志呢?

有的说可以

curl -X DELETE 'http://10.10.19.106:3100/api/v1/purge?dryRun=false&keepHours=2160&deleteAll=true&index=loki_local'

但是测试直接提示METHOD 405,方法不支持。

反向一想,现在的日志到底到了哪里了呢?

经过测算,从今天算起,2160个小时之前是2023年9月15日,那么只要9月16日凌晨有日志,而9月15日凌晨 没有日志就说明当前日志确实保留了2160个小时。

2023-12-14T02:02:01.png

在grafana的loki插件中查询日志发现,确实 是这样的。

既然这样,那就直接扩容硬盘吧,还是保留3个月。

2023-12-14T02:03:18.png
2023-12-14T02:03:23.png

这回妥了,告警也消失了。

最后修改:2024 年 05 月 11 日
如果觉得我的文章对你有用,请随意赞赏