--- Prometheus::Alert::Rule[check_check_jenkins_status_0566fc363c3a4922cf93444b3603f43c].orig
+++ Prometheus::Alert::Rule[check_check_jenkins_status_0566fc363c3a4922cf93444b3603f43c]
+ ensure => absent
+ for => 11m
+ runbook => https://wikitech.wikimedia.org/wiki/Jenkins#How_to
+ logs => https://logstash.wikimedia.org/app/dashboards#/view/2d343ac0-6df8-11f0-8e08-7fab0da52b33?_g=(filters:!((query:(match_phrase:(event.module:check_check_jenkins_status))),(query:(match_phrase:(host.name:{{$labels.instance|stripPort}})))))
+ alert_name => nrpe_Check_unit_status_of_jenkins
+ summary => NRPE CHECK: Check unit status of jenkins
+ group => nrpechecks
+ severity => info
+ site => eqiad
+ def_label_whitelst => ['team', 'severity']
+ description => NRPE CHECK: Check unit status of jenkins
+ dashboard => TODO
+ team => observability
+ instance => ops
+ expr => (nagios_nrpe_check_result{alert_rule_hash="0566fc363c3a4922cf93444b3603f43c",check_name="check_check_jenkins_status", status=~"(WARNING|CRITICAL)", severity=~"(warning|critical)"} > 0) * on (instance) group_left (team) role_owner
File[/lib/systemd/system/nrpe2nodexp-check_jenkins_status.timer]
- Parameters differences:
--- File[/lib/systemd/system/nrpe2nodexp-check_jenkins_status.timer].orig
+++ File[/lib/systemd/system/nrpe2nodexp-check_jenkins_status.timer]
+ ensure => absent
+ owner => root
+ notify => Exec[systemd daemon-reload for nrpe2nodexp-check_jenkins_status.timer (nrpe2nodexp-check_jenkins_status.timer)]
+ mode => 0444
+ group => root
- Content differences:
--- /lib/systemd/system/nrpe2nodexp-check_jenkins_status.timer.orig
+++ /lib/systemd/system/nrpe2nodexp-check_jenkins_status.timer
@@ -0,0 +1,14 @@
+[Unit]
+Description=Periodic execution of nrpe2nodexp-check_jenkins_status.service
+
+[Timer]
+Unit=nrpe2nodexp-check_jenkins_status.service
+# Accuracy sets the maximum time interval around the execution time we want to allow
+AccuracySec=15sec
+OnUnitInactiveSec=5min
+OnActiveSec=1s
+RandomizedDelaySec=300
+FixedRandomDelay=true
+
+[Install]
+WantedBy=multi-user.target
- Systemd::Monitor[jenkins]
- Parameters differences:
--- Systemd::Monitor[jenkins].orig
+++ Systemd::Monitor[jenkins]
+ ensure => present
+ contact_group => admins
+ migration_task => T407130
+ check_interval => 10
+ notes_url => https://wikitech.wikimedia.org/wiki/Jenkins#How_to
+ critical => False
+ retries => 2
- File[/etc/rsyslog.d/25-nrpe2nodexp-check-jenkins-status.conf]
- Parameters differences:
--- File[/etc/rsyslog.d/25-nrpe2nodexp-check-jenkins-status.conf].orig
+++ File[/etc/rsyslog.d/25-nrpe2nodexp-check-jenkins-status.conf]
+ ensure => absent
+ owner => root
+ notify => Service[rsyslog]
+ mode => 0444
+ group => root
- Content differences:
--- /etc/rsyslog.d/25-nrpe2nodexp-check-jenkins-status.conf.orig
+++ /etc/rsyslog.d/25-nrpe2nodexp-check-jenkins-status.conf
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: Apache-2.0
+if $programname contains "nrpe2nodexp-check_jenkins_status" then {
+ if ($msg contains "\"ecs.version\": \"1.7.0\"") then {
+ # Send logs to kafka
+ set $.log_outputs = "kafka ecs_170 local";
+ } else {
+ # Filter out non-relevant nrpe2nodexp messages
+ stop
+ }
+}
- File[/lib/systemd/system/nrpe2nodexp-check_jenkins_status.service]
- Parameters differences:
--- File[/lib/systemd/system/nrpe2nodexp-check_jenkins_status.service].orig
+++ File[/lib/systemd/system/nrpe2nodexp-check_jenkins_status.service]
+ ensure => absent
+ owner => root
+ notify => Exec[systemd daemon-reload for nrpe2nodexp-check_jenkins_status.service (nrpe2nodexp-check_jenkins_status.service)]
+ mode => 0444
+ group => root
- Content differences:
--- /lib/systemd/system/nrpe2nodexp-check_jenkins_status.service.orig
+++ /lib/systemd/system/nrpe2nodexp-check_jenkins_status.service
@@ -0,0 +1,11 @@
+[Unit]
+Description=execution of nrpe2nodexp for the check_check_jenkins_status command.
+Documentation=https://wikitech.wikimedia.org/wiki/Monitoring/systemd_unit_state
+
+[Service]
+Type=oneshot
+User=nagios
+
+Group=prometheus-node-exporter
+SyslogIdentifier=nrpe2nodexp-check_jenkins_status
+ExecStart=-/usr/local/bin/nrpe2nodexp --alert-rule-hash "0566fc363c3a4922cf93444b3603f43c" --timeout 10 --check-command "check_check_jenkins_status"
- Monitoring::Service[check_jenkins_status]
- Parameters differences:
--- Monitoring::Service[check_jenkins_status].orig
+++ Monitoring::Service[check_jenkins_status]
+ ensure => present
+ retry_interval => 1
+ freshness => 36000
+ check_interval => 10
+ config_dir => /etc/nagios
+ critical => False
+ passive => False
+ contact_group => admins
+ check_command => nrpe_check!check_check_jenkins_status!10
+ description => Check unit status of jenkins
+ migration_task => T407130
+ host => contint1002
+ notes_url => https://wikitech.wikimedia.org/wiki/Jenkins#How_to
+ retries => 2
- Nrpe::Check[check_check_jenkins_status]
- Parameters differences:
--- Nrpe::Check[check_check_jenkins_status].orig
+++ Nrpe::Check[check_check_jenkins_status]
+ ensure => present
+ command => /usr/local/lib/nagios/plugins/check_systemd_unit_status jenkins
+ before => Monitoring::Service[check_jenkins_status]
- Monitoring::Exported_nagios_service[contint1002 check_jenkins_status]
- Parameters differences:
--- Monitoring::Exported_nagios_service[contint1002 check_jenkins_status].orig
+++ Monitoring::Exported_nagios_service[contint1002 check_jenkins_status]
+ ensure => present
+ host_name => contint1002
+ retry_interval => 1
+ passive_checks_enabled => 1
+ servicegroups => ci_eqiad
+ is_volatile => 0
+ service_description => Check unit status of jenkins
+ notification_period => 24x7
+ notification_interval => 0
+ active_checks_enabled => 1
+ check_command => nrpe_check!check_check_jenkins_status!10
+ notifications_enabled => 1
+ check_freshness => 0
+ check_interval => 10
+ contact_groups => admins
+ notification_options => c,r,f
+ max_check_attempts => 2
+ check_period => 24x7
+ notes_url => https://wikitech.wikimedia.org/wiki/Jenkins#How_to
- Systemd::Unit[nrpe2nodexp-check_jenkins_status.service]
- Parameters differences:
--- Systemd::Unit[nrpe2nodexp-check_jenkins_status.service].orig
+++ Systemd::Unit[nrpe2nodexp-check_jenkins_status.service]
+ ensure => absent
+ require => ['Class[Systemd]']
+ override => False
+ override_filename => puppet-override.conf
+ restart => False
+ unit => nrpe2nodexp-check_jenkins_status.service
- Systemd::Unit[nrpe2nodexp-check_jenkins_status.timer]
- Parameters differences:
--- Systemd::Unit[nrpe2nodexp-check_jenkins_status.timer].orig
+++ Systemd::Unit[nrpe2nodexp-check_jenkins_status.timer]
+ ensure => absent
+ require => ['Class[Systemd]']
+ override => False
+ override_filename => puppet-override.conf
+ restart => False
+ unit => nrpe2nodexp-check_jenkins_status.timer
- Systemd::Timer[nrpe2nodexp-check_jenkins_status]
- Parameters differences:
--- Systemd::Timer[nrpe2nodexp-check_jenkins_status].orig
+++ Systemd::Timer[nrpe2nodexp-check_jenkins_status]
+ ensure => absent
+ splay => 300
+ unit_name => nrpe2nodexp-check_jenkins_status.service
+ timer_intervals => [{'start': 'OnUnitInactiveSec', 'interval': '5min'}, {'interval': '1s', 'start': 'OnActiveSec'}]
+ fixed_random_delay => True
+ accuracy => 15sec
- Rsyslog::Conf[nrpe2nodexp-check_jenkins_status]
- Parameters differences:
--- Rsyslog::Conf[nrpe2nodexp-check_jenkins_status].orig
+++ Rsyslog::Conf[nrpe2nodexp-check_jenkins_status]
+ ensure => absent
+ mode => 0444
+ priority => 25
- Nrpe::Monitor_service[check_jenkins_status]
- Parameters differences:
--- Nrpe::Monitor_service[check_jenkins_status].orig
+++ Nrpe::Monitor_service[check_jenkins_status]
+ ensure => present
+ retry_interval => 1
+ contact_group => admins
+ description => Check unit status of jenkins
+ enable_icinga_check => True
+ alertmanager_team => observability
+ alertmanager_severity_override => info
+ check_interval => 10
+ critical => False
+ enable_nrpe2nodexp => False
+ timeout => 10
+ nrpe2nodexp_parse_perf_data => False
+ migration_task => T407130
+ nrpe_command => /usr/local/lib/nagios/plugins/check_systemd_unit_status jenkins
+ notes_url => https://wikitech.wikimedia.org/wiki/Jenkins#How_to
+ retries => 2
- Systemd::Service[nrpe2nodexp-check_jenkins_status]
- Parameters differences:
--- Systemd::Service[nrpe2nodexp-check_jenkins_status].orig
+++ Systemd::Service[nrpe2nodexp-check_jenkins_status]
+ ensure => absent
+ monitoring_contact_group => admins
+ override => False
+ monitoring_critical => False
+ restart => False
+ service_params => {}
+ require => Systemd::Unit[nrpe2nodexp-check_jenkins_status.service]
+ unit_type => timer
+ monitoring_enabled => False
+ migration_task => T407130
- File[/etc/sudoers.d/nrpe-check_check_jenkins_status]
- Parameters differences:
--- File[/etc/sudoers.d/nrpe-check_check_jenkins_status].orig
+++ File[/etc/sudoers.d/nrpe-check_check_jenkins_status]
+ ensure => absent
+ owner => root
+ require => Package[nagios-nrpe-server]
+ group => root
- Class[Jenkins]
- Parameters differences:
--- Class[Jenkins].orig
+++ Class[Jenkins]
+ monitoring_notes_url => https://wikitech.wikimedia.org/wiki/Jenkins#How_to
+ monitoring_enabled => True
- Nrpe::Plugin[check_systemd_unit_status]
- Parameters differences:
--- Nrpe::Plugin[check_systemd_unit_status].orig
+++ Nrpe::Plugin[check_systemd_unit_status]
+ ensure => present
+ source => puppet:///modules/systemd/check_systemd_unit_status
- File[/usr/local/lib/nagios/plugins/check_systemd_unit_status]
- Parameters differences:
--- File[/usr/local/lib/nagios/plugins/check_systemd_unit_status].orig
+++ File[/usr/local/lib/nagios/plugins/check_systemd_unit_status]
+ ensure => file
+ tag => nrpe::plugin
+ require => File[/usr/local/lib/nagios/plugins/]
+ source => puppet:///modules/systemd/check_systemd_unit_status
+ owner => root
+ mode => 0555
+ group => root
- File[/var/lib/prometheus/node.d/check_check_jenkins_status.prom]
- Parameters differences:
--- File[/var/lib/prometheus/node.d/check_check_jenkins_status.prom].orig
+++ File[/var/lib/prometheus/node.d/check_check_jenkins_status.prom]
+ ensure => absent
+ owner => root
+ group => root
- Exec[systemd daemon-reload for nrpe2nodexp-check_jenkins_status.timer (nrpe2nodexp-check_jenkins_status.timer)]
- Parameters differences:
--- Exec[systemd daemon-reload for nrpe2nodexp-check_jenkins_status.timer (nrpe2nodexp-check_jenkins_status.timer)].orig
+++ Exec[systemd daemon-reload for nrpe2nodexp-check_jenkins_status.timer (nrpe2nodexp-check_jenkins_status.timer)]
+ refreshonly => True
+ command => /bin/systemctl daemon-reload