From 12272be1411580a6d2ce76f403e675451c9b480a Mon Sep 17 00:00:00 2001
From: jurgenhaas <juergen@paragon-es.de>
Date: Mon, 27 May 2019 18:01:58 +0200
Subject: [PATCH] Update netdata to v1.15.0

---
 defaults/main.yml               |  2 +-
 tasks/install.yml               |  1 +
 templates/health/httpcheck.conf | 95 +++++++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+), 1 deletion(-)
 create mode 100644 templates/health/httpcheck.conf

diff --git a/defaults/main.yml b/defaults/main.yml
index e364820..f41a640 100644
--- a/defaults/main.yml
+++ b/defaults/main.yml
@@ -1,4 +1,4 @@
-netdata_version: 'v1.14.0'
+netdata_version: 'v1.15.0'
 netdata_force_reset: false
 netdata_fluentd_buffer:
   green: 75
diff --git a/tasks/install.yml b/tasks/install.yml
index 689dca5..a2c6f2f 100644
--- a/tasks/install.yml
+++ b/tasks/install.yml
@@ -44,5 +44,6 @@
     dest: '/etc/netdata/health.d/{{ item }}.conf'
   with_items:
     - 'fluentd_buffer'
+    - 'httpcheck'
   notify:
     - "Restart NetData"
diff --git a/templates/health/httpcheck.conf b/templates/health/httpcheck.conf
new file mode 100644
index 0000000..4bb012e
--- /dev/null
+++ b/templates/health/httpcheck.conf
@@ -0,0 +1,95 @@
+template: httpcheck_last_collected_secs
+families: *
+      on: httpcheck.status
+    calc: $now - $last_collected_t
+   every: 10s
+   units: seconds ago
+    warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
+    crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+   delay: down 5m multiplier 1.5 max 1h
+    info: number of seconds since the last successful data collection
+      to: sysadmin
+
+# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
+template: web_service_up
+families: *
+      on: httpcheck.status
+  lookup: average -1m unaligned percentage of success
+    calc: ($this < 75) ? (0) : ($this)
+   every: 5s
+   units: up/down
+    info: at least 75% verified responses during last 60 seconds, ideal for badges
+      to: silent
+
+template: web_service_bad_content
+families: *
+      on: httpcheck.status
+  lookup: average -5m unaligned percentage of bad_content
+   every: 10s
+   units: %
+    warn: $this >= 10 AND $this < 40
+    crit: $this >= 40
+   delay: down 5m multiplier 1.5 max 1h
+    info: average of unexpected http response content during the last 5 minutes
+      to: webmaster
+
+template: web_service_bad_status
+families: *
+      on: httpcheck.status
+  lookup: average -5m unaligned percentage of bad_status
+   every: 10s
+   units: %
+    warn: $this >= 10 AND $this < 40
+    crit: $this >= 40
+   delay: down 5m multiplier 1.5 max 1h
+    info: average of unexpected http status during the last 5 minutes
+      to: webmaster
+
+template: web_service_timeouts
+families: *
+      on: httpcheck.status
+  lookup: average -5m unaligned percentage of timeout
+   every: 10s
+   units: %
+    info: average of timeouts during the last 5 minutes
+
+template: no_web_service_connections
+families: *
+      on: httpcheck.status
+  lookup: average -5m unaligned percentage of no_connection
+   every: 10s
+   units: %
+    info: average of failed requests during the last 5 minutes
+
+# combined timeout & no connection alarm
+template: web_service_unreachable
+families: *
+      on: httpcheck.status
+    calc: ($no_web_service_connections >= $web_service_timeouts) ? ($no_web_service_connections) : ($web_service_timeouts)
+   units: %
+   every: 10s
+    warn: ($no_web_service_connections >= 10 OR $web_service_timeouts >= 10) AND ($no_web_service_connections < 40 OR $web_service_timeouts < 40)
+    crit: $no_web_service_connections >= 40 OR $web_service_timeouts >= 40
+   delay: down 5m multiplier 1.5 max 1h
+    info: average of failed requests either due to timeouts or no connection during the last 5 minutes
+      to: webmaster
+
+template: 1h_web_service_response_time
+families: *
+      on: httpcheck.responsetime
+  lookup: average -1h unaligned of time
+   every: 30s
+   units: ms
+    info: average response time over the last hour
+
+template: web_service_slow
+families: *
+      on: httpcheck.responsetime
+  lookup: average -5m unaligned of time
+   units: ms
+   every: 10s
+    warn: ($this > ($1h_web_service_response_time * 4) )
+    crit: ($this > ($1h_web_service_response_time * 6) )
+    info: average response time over the last 5 minutes, compared to the average over the last hour
+   delay: down 5m multiplier 1.5 max 1h
+      to: webmaster
-- 
GitLab