template_app_zabbix_server.yml 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. ---
  2. g_template_app_zabbix_server:
  3. name: Template App Zabbix Server
  4. zitems:
  5. - key: housekeeper_creates
  6. applications:
  7. - Zabbix server
  8. description: A simple count of the number of partition creates output by the housekeeper script.
  9. units: ''
  10. value_type: int
  11. zabbix_type: 5
  12. - key: housekeeper_drops
  13. applications:
  14. - Zabbix server
  15. description: A simple count of the number of partition drops output by the housekeeper script.
  16. units: ''
  17. value_type: int
  18. zabbix_type: 5
  19. - key: housekeeper_errors
  20. applications:
  21. - Zabbix server
  22. description: A simple count of the number of errors output by the housekeeper script.
  23. units: ''
  24. value_type: int
  25. zabbix_type: 5
  26. - key: housekeeper_total
  27. applications:
  28. - Zabbix server
  29. description: A simple count of the total number of lines output by the housekeeper
  30. script.
  31. units: ''
  32. value_type: int
  33. zabbix_type: 5
  34. - key: zabbix[process,alerter,avg,busy]
  35. applications:
  36. - Zabbix server
  37. description: ''
  38. units: '%'
  39. value_type: float
  40. zabbix_type: 5
  41. - key: zabbix[process,configuration syncer,avg,busy]
  42. applications:
  43. - Zabbix server
  44. description: ''
  45. units: '%'
  46. value_type: float
  47. zabbix_type: 5
  48. - key: zabbix[process,db watchdog,avg,busy]
  49. applications:
  50. - Zabbix server
  51. description: ''
  52. units: '%'
  53. value_type: float
  54. zabbix_type: 5
  55. - key: zabbix[process,discoverer,avg,busy]
  56. applications:
  57. - Zabbix server
  58. description: ''
  59. units: '%'
  60. value_type: float
  61. zabbix_type: 5
  62. - key: zabbix[process,escalator,avg,busy]
  63. applications:
  64. - Zabbix server
  65. description: ''
  66. units: '%'
  67. value_type: float
  68. zabbix_type: 5
  69. - key: zabbix[process,history syncer,avg,busy]
  70. applications:
  71. - Zabbix server
  72. description: ''
  73. units: '%'
  74. value_type: float
  75. zabbix_type: 5
  76. - key: zabbix[process,housekeeper,avg,busy]
  77. applications:
  78. - Zabbix server
  79. description: ''
  80. units: '%'
  81. value_type: float
  82. zabbix_type: 5
  83. - key: zabbix[process,http poller,avg,busy]
  84. applications:
  85. - Zabbix server
  86. description: ''
  87. units: '%'
  88. value_type: float
  89. zabbix_type: 5
  90. - key: zabbix[process,icmp pinger,avg,busy]
  91. applications:
  92. - Zabbix server
  93. description: ''
  94. units: '%'
  95. value_type: float
  96. zabbix_type: 5
  97. - key: zabbix[process,ipmi poller,avg,busy]
  98. applications:
  99. - Zabbix server
  100. description: ''
  101. units: '%'
  102. value_type: float
  103. zabbix_type: 5
  104. - key: zabbix[process,java poller,avg,busy]
  105. applications:
  106. - Zabbix server
  107. description: ''
  108. units: '%'
  109. value_type: float
  110. zabbix_type: 5
  111. - key: zabbix[process,node watcher,avg,busy]
  112. applications:
  113. - Zabbix server
  114. description: ''
  115. units: '%'
  116. value_type: float
  117. zabbix_type: 5
  118. - key: zabbix[process,poller,avg,busy]
  119. applications:
  120. - Zabbix server
  121. description: ''
  122. units: '%'
  123. value_type: float
  124. zabbix_type: 5
  125. - key: zabbix[process,proxy poller,avg,busy]
  126. applications:
  127. - Zabbix server
  128. description: ''
  129. units: '%'
  130. value_type: float
  131. zabbix_type: 5
  132. - key: zabbix[process,self-monitoring,avg,busy]
  133. applications:
  134. - Zabbix server
  135. description: ''
  136. units: '%'
  137. value_type: float
  138. zabbix_type: 5
  139. - key: zabbix[process,snmp trapper,avg,busy]
  140. applications:
  141. - Zabbix server
  142. description: ''
  143. units: '%'
  144. value_type: float
  145. zabbix_type: 5
  146. - key: zabbix[process,timer,avg,busy]
  147. applications:
  148. - Zabbix server
  149. description: ''
  150. units: '%'
  151. value_type: float
  152. zabbix_type: 5
  153. - key: zabbix[process,trapper,avg,busy]
  154. applications:
  155. - Zabbix server
  156. description: ''
  157. units: '%'
  158. value_type: float
  159. zabbix_type: 5
  160. - key: zabbix[process,unreachable poller,avg,busy]
  161. applications:
  162. - Zabbix server
  163. description: ''
  164. units: '%'
  165. value_type: float
  166. zabbix_type: 5
  167. - key: zabbix[queue,10m]
  168. applications:
  169. - Zabbix server
  170. description: ''
  171. units: ''
  172. value_type: int
  173. zabbix_type: 5
  174. interval: 600
  175. - key: zabbix[queue]
  176. applications:
  177. - Zabbix server
  178. description: ''
  179. units: ''
  180. value_type: int
  181. zabbix_type: 5
  182. interval: 600
  183. - key: zabbix[rcache,buffer,pfree]
  184. applications:
  185. - Zabbix server
  186. description: ''
  187. units: ''
  188. value_type: float
  189. zabbix_type: 5
  190. - key: zabbix[wcache,history,pfree]
  191. applications:
  192. - Zabbix server
  193. description: ''
  194. units: ''
  195. value_type: float
  196. zabbix_type: 5
  197. - key: zabbix[wcache,text,pfree]
  198. applications:
  199. - Zabbix server
  200. description: ''
  201. units: ''
  202. value_type: float
  203. zabbix_type: 5
  204. - key: zabbix[wcache,trend,pfree]
  205. applications:
  206. - Zabbix server
  207. description: ''
  208. units: ''
  209. value_type: float
  210. zabbix_type: 5
  211. - key: zabbix[wcache,values]
  212. applications:
  213. - Zabbix server
  214. description: ''
  215. units: ''
  216. value_type: float
  217. zabbix_type: 5
  218. delta: 1 # speed per second
  219. ztriggers:
  220. - description: "There has been unexpected output while running the housekeeping script\
  221. \ on the Zabbix. There are only three kinds of lines we expect to see in the output,\
  222. \ and we've gotten something enw.\r\n\r\nCheck the script's output in /var/lib/zabbix/state\
  223. \ for more details."
  224. expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}+{Template App Zabbix Server:housekeeper_creates.last(0)}+{Template App Zabbix Server:housekeeper_drops.last(0)}<>{Template App Zabbix Server:housekeeper_total.last(0)}'
  225. name: Unexpected output in Zabbix DB Housekeeping
  226. priority: avg
  227. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_DB_Housekeeping.asciidoc
  228. - description: An error has occurred during running the housekeeping script on the Zabbix. Check the script's output in /var/lib/zabbix/state for more details.
  229. expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}>0'
  230. name: Errors during Zabbix DB Housekeeping
  231. priority: high
  232. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  233. - description: ''
  234. expression: '{Template App Zabbix Server:zabbix[process,alerter,avg,busy].min(600)}>75'
  235. name: Zabbix alerter processes more than 75% busy
  236. priority: avg
  237. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  238. - description: ''
  239. expression: '{Template App Zabbix Server:zabbix[process,configuration syncer,avg,busy].min(600)}>75'
  240. name: Zabbix configuration syncer processes more than 75% busy
  241. priority: avg
  242. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  243. - description: ''
  244. expression: '{Template App Zabbix Server:zabbix[process,db watchdog,avg,busy].min(600)}>75'
  245. name: Zabbix db watchdog processes more than 75% busy
  246. priority: avg
  247. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  248. - description: ''
  249. expression: '{Template App Zabbix Server:zabbix[process,discoverer,avg,busy].min(600)}>75'
  250. name: Zabbix discoverer processes more than 75% busy
  251. priority: avg
  252. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  253. - description: ''
  254. expression: '{Template App Zabbix Server:zabbix[process,escalator,avg,busy].min(600)}>75'
  255. name: Zabbix escalator processes more than 75% busy
  256. priority: avg
  257. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  258. - description: ''
  259. expression: '{Template App Zabbix Server:zabbix[process,history syncer,avg,busy].min(600)}>75'
  260. name: Zabbix history syncer processes more than 75% busy
  261. priority: avg
  262. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  263. - description: ''
  264. expression: '{Template App Zabbix Server:zabbix[process,housekeeper,avg,busy].min(1800)}>75'
  265. name: Zabbix housekeeper processes more than 75% busy
  266. priority: avg
  267. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  268. - description: ''
  269. expression: '{Template App Zabbix Server:zabbix[process,http poller,avg,busy].min(600)}>75'
  270. name: Zabbix http poller processes more than 75% busy
  271. priority: avg
  272. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  273. - description: ''
  274. expression: '{Template App Zabbix Server:zabbix[process,icmp pinger,avg,busy].min(600)}>75'
  275. name: Zabbix icmp pinger processes more than 75% busy
  276. priority: avg
  277. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  278. - description: ''
  279. expression: '{Template App Zabbix Server:zabbix[process,ipmi poller,avg,busy].min(600)}>75'
  280. name: Zabbix ipmi poller processes more than 75% busy
  281. priority: avg
  282. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  283. - description: ''
  284. expression: '{Template App Zabbix Server:zabbix[process,java poller,avg,busy].min(600)}>75'
  285. name: Zabbix java poller processes more than 75% busy
  286. priority: avg
  287. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  288. - description: ''
  289. expression: '{Template App Zabbix Server:zabbix[process,node watcher,avg,busy].min(600)}>75'
  290. name: Zabbix node watcher processes more than 75% busy
  291. priority: avg
  292. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  293. - description: ''
  294. expression: '{Template App Zabbix Server:zabbix[process,poller,avg,busy].min(600)}>75'
  295. name: Zabbix poller processes more than 75% busy
  296. priority: high
  297. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  298. - description: ''
  299. expression: '{Template App Zabbix Server:zabbix[process,proxy poller,avg,busy].min(600)}>75'
  300. name: Zabbix proxy poller processes more than 75% busy
  301. priority: avg
  302. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  303. - description: ''
  304. expression: '{Template App Zabbix Server:zabbix[process,self-monitoring,avg,busy].min(600)}>75'
  305. name: Zabbix self-monitoring processes more than 75% busy
  306. priority: avg
  307. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  308. - description: ''
  309. expression: '{Template App Zabbix Server:zabbix[process,snmp trapper,avg,busy].min(600)}>75'
  310. name: Zabbix snmp trapper processes more than 75% busy
  311. priority: avg
  312. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  313. - description: Timer processes usually are busy because they have to process time
  314. based trigger functions
  315. expression: '{Template App Zabbix Server:zabbix[process,timer,avg,busy].min(600)}>75'
  316. name: Zabbix timer processes more than 75% busy
  317. priority: avg
  318. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  319. - description: ''
  320. expression: '{Template App Zabbix Server:zabbix[process,trapper,avg,busy].min(600)}>75'
  321. name: Zabbix trapper processes more than 75% busy
  322. priority: avg
  323. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  324. - description: ''
  325. expression: '{Template App Zabbix Server:zabbix[process,unreachable poller,avg,busy].min(600)}>75'
  326. name: Zabbix unreachable poller processes more than 75% busy
  327. priority: avg
  328. url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc
  329. - description: "This alert generally indicates a performance problem or a problem\
  330. \ with the zabbix-server or proxy.\r\n\r\nThe first place to check for issues\
  331. \ is Administration > Queue. Be sure to check the general view and the per-proxy\
  332. \ view."
  333. expression: '{Template App Zabbix Server:zabbix[queue,10m].min(600)}>1000'
  334. name: More than 1000 items having missing data for more than 10 minutes
  335. priority: high
  336. url: https://github.com/openshift/ops-sop/blob/master/Alerts/data_lost_overview_plugin.asciidoc
  337. - description: Consider increasing CacheSize in the zabbix_server.conf configuration
  338. file
  339. expression: '{Template App Zabbix Server:zabbix[rcache,buffer,pfree].min(600)}<5'
  340. name: Less than 5% free in the configuration cache
  341. priority: info
  342. url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc
  343. - description: ''
  344. expression: '{Template App Zabbix Server:zabbix[wcache,history,pfree].min(600)}<25'
  345. name: Less than 25% free in the history cache
  346. priority: avg
  347. url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc
  348. - description: ''
  349. expression: '{Template App Zabbix Server:zabbix[wcache,text,pfree].min(600)}<25'
  350. name: Less than 25% free in the text history cache
  351. priority: avg
  352. url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc
  353. - description: ''
  354. expression: '{Template App Zabbix Server:zabbix[wcache,trend,pfree].min(600)}<25'
  355. name: Less than 25% free in the trends cache
  356. priority: avg
  357. url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc