Compilation results for clouddumps1002.wikimedia.org: System changes detected
You can retrieve this result from host.json.Catalog differences
Summary
| Total Resources: | 3735 |
|---|---|
| Resources added: | 0 |
| Resources removed: | 0 |
| Resources modified: | 2 |
| Change percentage: | 0.05% |
Resources modified
- File[/etc/nginx/sites-available/xmldumps]
- Content differences:
--- /etc/nginx/sites-available/xmldumps.orig +++ /etc/nginx/sites-available/xmldumps @@ -4,6 +4,74 @@ ########################## limit_conn_zone $remote_addr$http_user_agent zone=addr:10m; + +# The ECS spec just uses the HTTP version numbers, so we strip the HTTP/ prefix here. +map $server_protocol $ecs_http_version { + "HTTP/1.0" "1.0"; + "HTTP/1.1" "1.1"; + "HTTP/2.0" "2.0"; + default $server_protocol; +} + +# Here we use the HTTP status code to map to event.outcome +map $status $ecs_event_outcome { + ~^2 "success"; + ~^3 "success"; + ~^4 "failure"; + ~^5 "failure"; + default "unknown"; +} + +# nginx has no way to emit a literal '$', which the Event Platform '$schema' field +# name requires, so we expose one via a map and reference it as '${dollar}schema'. +map $host $dollar { + default "$"; +} + +# This directive configures the ECS compatible JSON log format. +# The '$schema', 'meta' and 'dt' fields are required by the Event Platform so that +# these events can be ingested into the Data Lake (see T291645). 'dt' is the Event +# Platform time field; '@timestamp' is retained for ECS compatibility. +log_format ecs_json escape=json + '{' + '"${dollar}schema":"/development/elastic/ecs/1.0.0",' + '"meta":{' + '"stream":"webrequest.dumps.dev0",' + '"id":"$request_id",' + '"dt":"$time_iso8601",' + '"domain":"$host"' + '},' + '"dt":"$time_iso8601",' + '"@timestamp":"$time_iso8601",' + '"ecs":{"version":"1.11.0"},' + '"event":{' + '"dataset":"dumps.access",' + '"kind":"event",' + '"category":"web",' + '"type":"access",' + '"outcome":"$ecs_event_outcome"' + '},' + '"http":{' + '"request":{' + '"method":"$request_method",' + '"referrer":"$http_referer",' + '"bytes":$request_length' + '},' + '"response":{' + '"status_code":$status,' + '"body":{"bytes":$body_bytes_sent}' + '},' + '"version":"$ecs_http_version"' + '},' + '"url":{' + '"original":"$request_uri",' + '"path":"$uri",' + '"query":"$query_string"' + '},' + '"source":{"ip":"$remote_addr"},' + '"user_agent":{"original":"$http_user_agent"},' + '"destination":{"address":"$host"}' + '}'; server { listen [::]:443 default ssl ipv6only=off; @@ -22,6 +90,12 @@ add_header Strict-Transport-Security "max-age=106384710; includeSubDomains; preload" always; root /srv/dumps/xmldatadumps/public; + + # We want to send access logs to syslog in addition to the standard log files. + # The 'dumps-http' tag wil match with an entry in /etc/rsyslog.lookup.d/lookup_table_output.json + # so these logs will be sent to the kafka-logging cluster. + access_log /var/log/nginx/access.log; # duplicate of the standard configuration in the http block. + access_log syslog:server=unix:/dev/log,facility=local7,tag=dumps-http,severity=info ecs_json; add_header Server 'clouddumps1002.wikimedia.org' always;
- Content differences:
- File[/etc/rsyslog.d/30-output-kafka.conf]
- Content differences:
--- /etc/rsyslog.d/30-output-kafka.conf.orig +++ /etc/rsyslog.d/30-output-kafka.conf @@ -13,6 +13,19 @@ # define a template to be used by omkafka dynatopic template(name="kafka_topic" type="string" string="rsyslog-%syslogseverity-text%") + +# Event Platform support (T291645): messages that carry a 'meta.stream' field are +# Event Platform events (e.g. ECS formatted logs). Produce them to the +# '<datacenter>.<meta.stream>' topic so they can be ingested into the Data Lake. +# The datacenter prefix is supplied by puppet; the stream suffix is read from the +# parsed json message via the '%!meta!stream%' property. +template(name="event_platform_topic" type="string" string="eqiad.%!meta!stream%") + +# Emit the parsed json message verbatim (with $schema, meta and dt intact) so the +# event remains valid for Event Platform / Data Lake ingestion. +template(name="event_platform_json" type="list") { + property(name="$!all-json") +} # send to kafka if lookup table contains "kafka" for relevant programname # $.log_outputs defined by lookup table in lookup_output.conf @@ -42,6 +55,31 @@ # unfortunately rsyslog doesn't allow variables to be used as template # names, so the kafka action is duplicated here. if $parsesuccess == "OK" then { + # Event Platform events carry a 'meta.stream' field. Produce these only to + # the '<datacenter>.<meta.stream>' topic (T291645). Logstash consumes this + # topic via an explicit kafka input rather than the 'rsyslog-*' pattern. + if ($!meta!stream != "") then { + action(type="omkafka" + name="omkafka_event_platform" + broker=["kafka-logging1001.eqiad.wmnet:9093","kafka-logging1002.eqiad.wmnet:9093","kafka-logging1003.eqiad.wmnet:9093","kafka-logging1004.eqiad.wmnet:9093","kafka-logging1005.eqiad.wmnet:9093"] + topic="event_platform_topic" + dynatopic="on" + dynatopic.cachesize="1000" + partitions.auto="on" + template="event_platform_json" + queue.type="LinkedList" queue.size="10000" queue.filename="output_kafka_event_platform" + queue.highWatermark="7000" queue.lowWatermark="6000" + queue.checkpointInterval="5" + queue.maxDiskSpace="40960000" + confParam=[ "security.protocol=ssl", + "ssl.ca.location=/etc/ssl/certs/wmf-ca-certificates.crt", + "compression.codec=snappy", + "socket.timeout.ms=10000", + "socket.keepalive.enable=true", + "queue.buffering.max.ms=50", + "batch.num.messages=1000" ] + ) + } else { action(type="omkafka" name="omkafka_syslog_cee" broker=["kafka-logging1001.eqiad.wmnet:9093","kafka-logging1002.eqiad.wmnet:9093","kafka-logging1003.eqiad.wmnet:9093","kafka-logging1004.eqiad.wmnet:9093","kafka-logging1005.eqiad.wmnet:9093"] @@ -62,6 +100,7 @@ "queue.buffering.max.ms=50", "batch.num.messages=1000" ] ) + } } else { # if ecs_170 in log_outputs, use that template to format # non-json-formatted syslog events into an ecs-compatible form
- Content differences: