template_openshift_node.yml 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. ---
  2. g_template_openshift_node:
  3. name: Template Openshift Node
  4. zitems:
  5. - key: openshift.node.process.count
  6. description: Shows number of OpenShift Node processes running
  7. type: int
  8. applications:
  9. - Openshift Node
  10. - key: openshift.node.ovs.pids.count
  11. description: Shows number of ovs process ids running
  12. type: int
  13. applications:
  14. - Openshift Node
  15. - key: openshift.node.ovs.ports.count
  16. description: Shows number of OVS ports defined
  17. type: int
  18. applications:
  19. - Openshift Node
  20. - key: openshift.node.ovs.stray.rules
  21. description: Number of OVS stray rules found/removed
  22. type: int
  23. applications:
  24. - Openshift Node
  25. - key: openshift.node.registry-pods.healthy_pct
  26. description: Shows the percentage of healthy registries in the cluster
  27. type: int
  28. applications:
  29. - Openshift Node
  30. - key: openshift.node.registry.service.ping
  31. description: Ping docker-registry service from node
  32. type: int
  33. applications:
  34. - Openshift Node
  35. ztriggers:
  36. - name: 'One or more Docker Registries is unhealthy according to {HOST.NAME}'
  37. expression: '{Template Openshift Node:openshift.node.registry-pods.healthy_pct.last(#2)}<100 and {Template Openshift Node:openshift.node.registry-pods.healthy_pct.last(#1)}<100'
  38. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc'
  39. priority: avg
  40. - name: 'Docker Registry service is unhealthy according to {HOST.NAME}'
  41. expression: '{Template Openshift Node:openshift.node.registry.service.ping.last(#2)}<1 and {Template Openshift Node:openshift.node.registry.service.ping.last(#1)}<1'
  42. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc'
  43. priority: avg
  44. - name: 'Openshift Node process not running on {HOST.NAME}'
  45. expression: '{Template Openshift Node:openshift.node.process.count.max(#3)}<1'
  46. url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
  47. priority: high
  48. - name: 'Too many Openshift Node processes running on {HOST.NAME}'
  49. expression: '{Template Openshift Node:openshift.node.process.count.min(#3)}>1'
  50. url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
  51. priority: high
  52. - name: '[Heal] OVS may not be running on {HOST.NAME}'
  53. expression: '{Template Openshift Node:openshift.node.ovs.pids.count.last(#1)}<>4 and {Template Openshift Node:openshift.node.ovs.pids.count.last(#2)}<>4'
  54. url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
  55. priority: high
  56. - name: 'Number of OVS ports is 0 on {HOST.NAME}'
  57. expression: '{Template Openshift Node:openshift.node.ovs.ports.count.last()}=0'
  58. url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
  59. priority: high