template_os_linux.yml 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. ---
  2. g_template_os_linux:
  3. name: Template OS Linux
  4. zitems:
  5. - key: kernel.uname.sysname
  6. applications:
  7. - Kernel
  8. value_type: string
  9. - key: kernel.all.cpu.wait.total
  10. applications:
  11. - Kernel
  12. value_type: float
  13. units: '%'
  14. - key: kernel.all.cpu.irq.hard
  15. applications:
  16. - Kernel
  17. value_type: float
  18. units: '%'
  19. - key: kernel.all.cpu.idle
  20. applications:
  21. - Kernel
  22. value_type: float
  23. units: '%'
  24. - key: kernel.uname.distro
  25. applications:
  26. - Kernel
  27. value_type: string
  28. - key: kernel.uname.nodename
  29. applications:
  30. - Kernel
  31. value_type: string
  32. - key: kernel.all.cpu.irq.soft
  33. applications:
  34. - Kernel
  35. value_type: float
  36. units: '%'
  37. - key: kernel.all.load.15_minute
  38. applications:
  39. - Kernel
  40. value_type: float
  41. - key: kernel.all.cpu.sys
  42. applications:
  43. - Kernel
  44. value_type: float
  45. units: '%'
  46. - key: kernel.all.load.5_minute
  47. applications:
  48. - Kernel
  49. value_type: float
  50. - key: kernel.all.cpu.nice
  51. applications:
  52. - Kernel
  53. value_type: float
  54. units: '%'
  55. - key: kernel.all.load.1_minute
  56. applications:
  57. - Kernel
  58. value_type: float
  59. - key: kernel.uname.version
  60. applications:
  61. - Kernel
  62. value_type: string
  63. - key: kernel.all.uptime
  64. applications:
  65. - Kernel
  66. value_type: int
  67. - key: kernel.all.cpu.user
  68. applications:
  69. - Kernel
  70. value_type: float
  71. units: '%'
  72. - key: kernel.uname.machine
  73. applications:
  74. - Kernel
  75. value_type: string
  76. - key: hinv.ncpu
  77. applications:
  78. - Kernel
  79. value_type: int
  80. - key: kernel.all.cpu.steal
  81. applications:
  82. - Kernel
  83. value_type: float
  84. units: '%'
  85. - key: kernel.all.pswitch
  86. applications:
  87. - Kernel
  88. value_type: int
  89. - key: kernel.uname.release
  90. applications:
  91. - Kernel
  92. value_type: string
  93. - key: proc.nprocs
  94. applications:
  95. - Kernel
  96. value_type: int
  97. # Memory Items
  98. - key: mem.freemem
  99. applications:
  100. - Memory
  101. value_type: int
  102. description: "PCP: free system memory metric from /proc/meminfo"
  103. multiplier: 1024
  104. units: B
  105. - key: mem.util.bufmem
  106. applications:
  107. - Memory
  108. value_type: int
  109. description: "PCP: Memory allocated for buffer_heads.; I/O buffers metric from /proc/meminfo"
  110. multiplier: 1024
  111. units: B
  112. - key: swap.used
  113. applications:
  114. - Memory
  115. value_type: int
  116. description: "PCP: swap used metric from /proc/meminfo"
  117. multiplier: 1024
  118. units: B
  119. - key: swap.length
  120. applications:
  121. - Memory
  122. value_type: int
  123. description: "PCP: total swap available metric from /proc/meminfo"
  124. multiplier: 1024
  125. units: B
  126. - key: mem.physmem
  127. applications:
  128. - Memory
  129. value_type: int
  130. description: "PCP: The value of this metric corresponds to the \"MemTotal\" field reported by /proc/meminfo. Note that this does not necessarily correspond to actual installed physical memory - there may be areas of the physical address space mapped as ROM in various peripheral devices and the bios may be mirroring certain ROMs in RAM."
  131. multiplier: 1024
  132. units: B
  133. - key: swap.free
  134. applications:
  135. - Memory
  136. value_type: int
  137. description: "PCP: swap free metric from /proc/meminfo"
  138. multiplier: 1024
  139. units: B
  140. - key: mem.util.available
  141. applications:
  142. - Memory
  143. value_type: int
  144. description: "PCP: The amount of memory that is available for a new workload, without pushing the system into swap. Estimated from MemFree, Active(file), Inactive(file), and SReclaimable, as well as the \"low\" watermarks from /proc/zoneinfo.; available memory from /proc/meminfo"
  145. multiplier: 1024
  146. units: B
  147. - key: mem.util.used
  148. applications:
  149. - Memory
  150. value_type: int
  151. description: "PCP: Used memory is the difference between mem.physmem and mem.freemem; used memory metric from /proc/meminfo"
  152. multiplier: 1024
  153. units: B
  154. - key: mem.util.cached
  155. applications:
  156. - Memory
  157. value_type: int
  158. description: "PCP: Memory used by the page cache, including buffered file data. This is in-memory cache for files read from the disk (the pagecache) but doesn't include SwapCached.; page cache metric from /proc/meminfo"
  159. multiplier: 1024
  160. units: B
  161. zdiscoveryrules:
  162. - name: disc.filesys
  163. key: disc.filesys
  164. lifetime: 1
  165. description: "Dynamically register the filesystems"
  166. - name: disc.disk
  167. key: disc.disk
  168. lifetime: 1
  169. description: "Dynamically register disks on a node"
  170. - name: disc.network
  171. key: disc.network
  172. lifetime: 1
  173. description: "Dynamically register network interfaces on a node"
  174. zitemprototypes:
  175. - discoveryrule_key: disc.filesys
  176. name: "disc.filesys.full.{#OSO_FILESYS}"
  177. key: "disc.filesys.full[{#OSO_FILESYS}]"
  178. value_type: float
  179. description: "PCP filesys.full option. This is the percent full returned from pcp filesys.full"
  180. applications:
  181. - Disk
  182. - discoveryrule_key: disc.filesys
  183. name: "Percentage of used inodes on {#OSO_FILESYS}"
  184. key: "disc.filesys.inodes.pused[{#OSO_FILESYS}]"
  185. value_type: float
  186. description: "PCP derived value of percentage of used inodes on a filesystem."
  187. applications:
  188. - Disk
  189. - discoveryrule_key: disc.disk
  190. name: "TPS (IOPS) for disk {#OSO_DISK}"
  191. key: "disc.disk.tps[{#OSO_DISK}]"
  192. value_type: int
  193. description: "PCP disk.dev.totals metric measured over a period of time. This shows how many disk transactions per second the disk is using"
  194. applications:
  195. - Disk
  196. - discoveryrule_key: disc.disk
  197. name: "Percent Utilized for disk {#OSO_DISK}"
  198. key: "disc.disk.putil[{#OSO_DISK}]"
  199. value_type: float
  200. description: "PCP disk.dev.avactive metric measured over a period of time. This is the '%util' in the iostat command"
  201. applications:
  202. - Disk
  203. - discoveryrule_key: disc.network
  204. name: "Bytes per second IN on network interface {#OSO_NET_INTERFACE}"
  205. key: "disc.network.in.bytes[{#OSO_NET_INTERFACE}]"
  206. value_type: int
  207. units: B
  208. delta: 1
  209. description: "PCP network.interface.in.bytes metric. This is setup as a delta in Zabbix to measure the speed per second"
  210. applications:
  211. - Network
  212. - discoveryrule_key: disc.network
  213. name: "Bytes per second OUT on network interface {#OSO_NET_INTERFACE}"
  214. key: "disc.network.out.bytes[{#OSO_NET_INTERFACE}]"
  215. value_type: int
  216. units: B
  217. delta: 1
  218. description: "PCP network.interface.out.bytes metric. This is setup as a delta in Zabbix to measure the speed per second"
  219. applications:
  220. - Network
  221. ztriggerprototypes:
  222. - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}'
  223. expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90'
  224. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
  225. priority: high
  226. # This has a dependency on the previous trigger
  227. # Trigger Prototypes do not work in 2.4. They will work in Zabbix 3.0
  228. - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}'
  229. expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85'
  230. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
  231. priority: warn
  232. dependencies:
  233. - 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}'
  234. - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}'
  235. expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>95'
  236. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
  237. priority: high
  238. # This has a dependency on the previous trigger
  239. # Trigger Prototypes do not work in 2.4. They will work in Zabbix 3.0
  240. - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}'
  241. expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90'
  242. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
  243. priority: warn
  244. dependencies:
  245. - 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}'
  246. ztriggers:
  247. - name: 'Too many TOTAL processes on {HOST.NAME}'
  248. expression: '{Template OS Linux:proc.nprocs.last()}>5000'
  249. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc'
  250. priority: warn
  251. - name: 'Lack of available memory on {HOST.NAME}'
  252. expression: '{Template OS Linux:mem.freemem.last()}<30720000'
  253. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc'
  254. priority: warn
  255. description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024'
  256. # CPU Utilization #
  257. - name: 'CPU idle less than 5% on {HOST.NAME}'
  258. expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<5'
  259. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc'
  260. priority: average
  261. description: 'CPU is less than 5% idle'
  262. - name: 'CPU idle less than 10% on {HOST.NAME}'
  263. expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<10'
  264. url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc'
  265. priority: average
  266. description: 'CPU is less than 10% idle'
  267. dependencies:
  268. - 'CPU idle less than 5% on {HOST.NAME}'