123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314 |
- ---
- g_template_os_linux:
- name: Template OS Linux
- zitems:
- - key: kernel.uname.sysname
- applications:
- - Kernel
- value_type: string
- - key: kernel.all.cpu.wait.total
- applications:
- - Kernel
- value_type: float
- units: '%'
- - key: kernel.all.cpu.irq.hard
- applications:
- - Kernel
- value_type: float
- units: '%'
- - key: kernel.all.cpu.idle
- applications:
- - Kernel
- value_type: float
- units: '%'
- - key: kernel.uname.distro
- applications:
- - Kernel
- value_type: string
- - key: kernel.uname.nodename
- applications:
- - Kernel
- value_type: string
- - key: kernel.all.cpu.irq.soft
- applications:
- - Kernel
- value_type: float
- units: '%'
- - key: kernel.all.load.15_minute
- applications:
- - Kernel
- value_type: float
- - key: kernel.all.cpu.sys
- applications:
- - Kernel
- value_type: float
- units: '%'
- - key: kernel.all.load.5_minute
- applications:
- - Kernel
- value_type: float
- - key: kernel.all.cpu.nice
- applications:
- - Kernel
- value_type: float
- units: '%'
- - key: kernel.all.load.1_minute
- applications:
- - Kernel
- value_type: float
- - key: kernel.uname.version
- applications:
- - Kernel
- value_type: string
- - key: kernel.all.uptime
- applications:
- - Kernel
- value_type: int
- - key: kernel.all.cpu.user
- applications:
- - Kernel
- value_type: float
- units: '%'
- - key: kernel.uname.machine
- applications:
- - Kernel
- value_type: string
- - key: hinv.ncpu
- applications:
- - Kernel
- value_type: int
- - key: kernel.all.cpu.steal
- applications:
- - Kernel
- value_type: float
- units: '%'
- - key: kernel.all.pswitch
- applications:
- - Kernel
- value_type: int
- - key: kernel.uname.release
- applications:
- - Kernel
- value_type: string
- - key: proc.nprocs
- applications:
- - Kernel
- value_type: int
- # Memory Items
- - key: mem.freemem
- applications:
- - Memory
- value_type: int
- description: "PCP: free system memory metric from /proc/meminfo"
- multiplier: 1024
- units: B
- - key: mem.util.bufmem
- applications:
- - Memory
- value_type: int
- description: "PCP: Memory allocated for buffer_heads.; I/O buffers metric from /proc/meminfo"
- multiplier: 1024
- units: B
- - key: swap.used
- applications:
- - Memory
- value_type: int
- description: "PCP: swap used metric from /proc/meminfo"
- multiplier: 1024
- units: B
- - key: swap.length
- applications:
- - Memory
- value_type: int
- description: "PCP: total swap available metric from /proc/meminfo"
- multiplier: 1024
- units: B
- - key: mem.physmem
- applications:
- - Memory
- value_type: int
- description: "PCP: The value of this metric corresponds to the \"MemTotal\" field reported by /proc/meminfo. Note that this does not necessarily correspond to actual installed physical memory - there may be areas of the physical address space mapped as ROM in various peripheral devices and the bios may be mirroring certain ROMs in RAM."
- multiplier: 1024
- units: B
- - key: swap.free
- applications:
- - Memory
- value_type: int
- description: "PCP: swap free metric from /proc/meminfo"
- multiplier: 1024
- units: B
- - key: mem.util.available
- applications:
- - Memory
- value_type: int
- description: "PCP: The amount of memory that is available for a new workload, without pushing the system into swap. Estimated from MemFree, Active(file), Inactive(file), and SReclaimable, as well as the \"low\" watermarks from /proc/zoneinfo.; available memory from /proc/meminfo"
- multiplier: 1024
- units: B
- - key: mem.util.used
- applications:
- - Memory
- value_type: int
- description: "PCP: Used memory is the difference between mem.physmem and mem.freemem; used memory metric from /proc/meminfo"
- multiplier: 1024
- units: B
- - key: mem.util.cached
- applications:
- - Memory
- value_type: int
- description: "PCP: Memory used by the page cache, including buffered file data. This is in-memory cache for files read from the disk (the pagecache) but doesn't include SwapCached.; page cache metric from /proc/meminfo"
- multiplier: 1024
- units: B
- zdiscoveryrules:
- - name: disc.filesys
- key: disc.filesys
- lifetime: 1
- description: "Dynamically register the filesystems"
- - name: disc.disk
- key: disc.disk
- lifetime: 1
- description: "Dynamically register disks on a node"
- - name: disc.network
- key: disc.network
- lifetime: 1
- description: "Dynamically register network interfaces on a node"
- zitemprototypes:
- - discoveryrule_key: disc.filesys
- name: "disc.filesys.full.{#OSO_FILESYS}"
- key: "disc.filesys.full[{#OSO_FILESYS}]"
- value_type: float
- description: "PCP filesys.full option. This is the percent full returned from pcp filesys.full"
- applications:
- - Disk
- - discoveryrule_key: disc.filesys
- name: "Percentage of used inodes on {#OSO_FILESYS}"
- key: "disc.filesys.inodes.pused[{#OSO_FILESYS}]"
- value_type: float
- description: "PCP derived value of percentage of used inodes on a filesystem."
- applications:
- - Disk
- - discoveryrule_key: disc.disk
- name: "TPS (IOPS) for disk {#OSO_DISK}"
- key: "disc.disk.tps[{#OSO_DISK}]"
- value_type: int
- description: "PCP disk.dev.totals metric measured over a period of time. This shows how many disk transactions per second the disk is using"
- applications:
- - Disk
- - discoveryrule_key: disc.disk
- name: "Percent Utilized for disk {#OSO_DISK}"
- key: "disc.disk.putil[{#OSO_DISK}]"
- value_type: float
- description: "PCP disk.dev.avactive metric measured over a period of time. This is the '%util' in the iostat command"
- applications:
- - Disk
- - discoveryrule_key: disc.network
- name: "Bytes per second IN on network interface {#OSO_NET_INTERFACE}"
- key: "disc.network.in.bytes[{#OSO_NET_INTERFACE}]"
- value_type: int
- units: B
- delta: 1
- description: "PCP network.interface.in.bytes metric. This is setup as a delta in Zabbix to measure the speed per second"
- applications:
- - Network
- - discoveryrule_key: disc.network
- name: "Bytes per second OUT on network interface {#OSO_NET_INTERFACE}"
- key: "disc.network.out.bytes[{#OSO_NET_INTERFACE}]"
- value_type: int
- units: B
- delta: 1
- description: "PCP network.interface.out.bytes metric. This is setup as a delta in Zabbix to measure the speed per second"
- applications:
- - Network
- ztriggerprototypes:
- - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}'
- expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
- priority: high
- # This has a dependency on the previous trigger
- # Trigger Prototypes do not work in 2.4. They will work in Zabbix 3.0
- - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}'
- expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
- priority: warn
- dependencies:
- - 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}'
- - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}'
- expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>95'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
- priority: high
- # This has a dependency on the previous trigger
- # Trigger Prototypes do not work in 2.4. They will work in Zabbix 3.0
- - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}'
- expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
- priority: warn
- dependencies:
- - 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}'
- ztriggers:
- - name: 'Too many TOTAL processes on {HOST.NAME}'
- expression: '{Template OS Linux:proc.nprocs.last()}>5000'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc'
- priority: warn
- - name: 'Lack of available memory on {HOST.NAME}'
- expression: '{Template OS Linux:mem.freemem.last()}<30720000'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc'
- priority: warn
- description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024'
- # CPU Utilization #
- - name: 'CPU idle less than 5% on {HOST.NAME}'
- expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<5'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc'
- priority: average
- description: 'CPU is less than 5% idle'
- - name: 'CPU idle less than 10% on {HOST.NAME}'
- expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<10'
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc'
- priority: average
- description: 'CPU is less than 10% idle'
- dependencies:
- - 'CPU idle less than 5% on {HOST.NAME}'
|