template_openshift_master.yml 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. ---
  2. g_template_openshift_master:
  3. name: Template Openshift Master
  4. zitems:
  5. - name: create_app
  6. applications:
  7. - Openshift Master
  8. key: create_app
  9. - key: openshift.master.process.count
  10. description: Shows number of master processes running
  11. type: int
  12. applications:
  13. - Openshift Master
  14. - key: openshift.master.user.count
  15. description: Shows number of users in a cluster
  16. type: int
  17. applications:
  18. - Openshift Master
  19. - key: openshift.master.pod.running.count
  20. description: Shows number of pods running
  21. type: int
  22. applications:
  23. - Openshift Master
  24. - key: openshift.project.counter
  25. description: Shows number of projects on a cluster
  26. type: int
  27. applications:
  28. - Openshift Master
  29. - key: openshift.master.etcd.create.success
  30. description: Show number of successful create actions
  31. type: int
  32. applications:
  33. - Openshift Etcd
  34. - key: openshift.master.etcd.create.fail
  35. description: Show number of failed create actions
  36. type: int
  37. applications:
  38. - Openshift Etcd
  39. - key: openshift.master.etcd.delete.success
  40. description: Show number of successful delete actions
  41. type: int
  42. applications:
  43. - Openshift Etcd
  44. - key: openshift.master.etcd.delete.fail
  45. description: Show number of failed delete actions
  46. type: int
  47. applications:
  48. - Openshift Etcd
  49. - key: openshift.master.etcd.get.success
  50. description: Show number of successful get actions
  51. type: int
  52. applications:
  53. - Openshift Etcd
  54. - key: openshift.master.etcd.get.fail
  55. description: Show number of failed get actions
  56. type: int
  57. applications:
  58. - Openshift Etcd
  59. - key: openshift.master.etcd.set.success
  60. description: Show number of successful set actions
  61. type: int
  62. applications:
  63. - Openshift Etcd
  64. - key: openshift.master.etcd.set.fail
  65. description: Show number of failed set actions
  66. type: int
  67. applications:
  68. - Openshift Etcd
  69. - key: openshift.master.etcd.update.success
  70. description: Show number of successful update actions
  71. type: int
  72. applications:
  73. - Openshift Etcd
  74. - key: openshift.master.etcd.update.fail
  75. description: Show number of failed update actions
  76. type: int
  77. applications:
  78. - Openshift Etcd
  79. - key: openshift.master.etcd.watchers
  80. description: Show number of etcd watchers
  81. type: int
  82. applications:
  83. - Openshift Etcd
  84. - key: openshift.master.etcd.ping
  85. description: etcd ping
  86. type: int
  87. applications:
  88. - Openshift Etcd
  89. ztriggers:
  90. - name: 'Application creation has failed on {HOST.NAME}'
  91. expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
  92. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
  93. priority: avg
  94. - name: 'Openshift Master process not running on {HOST.NAME}'
  95. expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1'
  96. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
  97. priority: high
  98. - name: 'Too many Openshift Master processes running on {HOST.NAME}'
  99. expression: '{Template Openshift Master:openshift.master.process.count.min(#3)}>1'
  100. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
  101. priority: high
  102. - name: 'Number of users for Openshift Master on {HOST.NAME}'
  103. expression: '{Template Openshift Master:openshift.master.user.count.last()}=0'
  104. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
  105. priority: info
  106. - name: 'There are no projects running on {HOST.NAME}'
  107. expression: '{Template Openshift Master:openshift.project.counter.last()}=0'
  108. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
  109. priority: info
  110. - name: 'Low number of etcd watchers on {HOST.NAME}'
  111. expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10'
  112. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
  113. priority: avg
  114. - name: 'Etcd ping failed on {HOST.NAME}'
  115. expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0'
  116. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
  117. priority: high