|
@@ -52,112 +52,135 @@ g_template_os_linux:
|
|
|
- Kernel
|
|
|
value_type: float
|
|
|
|
|
|
- - key: mem.freemem
|
|
|
+ - key: kernel.all.cpu.nice
|
|
|
applications:
|
|
|
- - Memory
|
|
|
+ - Kernel
|
|
|
value_type: int
|
|
|
|
|
|
- - key: kernel.all.cpu.nice
|
|
|
+ - key: kernel.all.load.1_minute
|
|
|
applications:
|
|
|
- Kernel
|
|
|
- value_type: int
|
|
|
+ value_type: float
|
|
|
|
|
|
- - key: mem.util.bufmem
|
|
|
+ - key: kernel.uname.version
|
|
|
applications:
|
|
|
- - Memory
|
|
|
- value_type: int
|
|
|
+ - Kernel
|
|
|
+ value_type: string
|
|
|
|
|
|
- - key: swap.used
|
|
|
+ - key: kernel.all.uptime
|
|
|
applications:
|
|
|
- - Memory
|
|
|
+ - Kernel
|
|
|
value_type: int
|
|
|
|
|
|
- - key: kernel.all.load.1_minute
|
|
|
+ - key: kernel.all.cpu.user
|
|
|
applications:
|
|
|
- Kernel
|
|
|
- value_type: float
|
|
|
+ value_type: int
|
|
|
|
|
|
- - key: kernel.uname.version
|
|
|
+ - key: kernel.uname.machine
|
|
|
applications:
|
|
|
- Kernel
|
|
|
value_type: string
|
|
|
|
|
|
- - key: swap.length
|
|
|
+ - key: hinv.ncpu
|
|
|
applications:
|
|
|
- - Memory
|
|
|
+ - Kernel
|
|
|
value_type: int
|
|
|
|
|
|
- - key: mem.physmem
|
|
|
+ - key: kernel.all.cpu.steal
|
|
|
applications:
|
|
|
- - Memory
|
|
|
+ - Kernel
|
|
|
value_type: int
|
|
|
|
|
|
- - key: kernel.all.uptime
|
|
|
+ - key: kernel.all.pswitch
|
|
|
applications:
|
|
|
- Kernel
|
|
|
value_type: int
|
|
|
|
|
|
- - key: swap.free
|
|
|
+ - key: kernel.uname.release
|
|
|
applications:
|
|
|
- - Memory
|
|
|
- value_type: int
|
|
|
+ - Kernel
|
|
|
+ value_type: string
|
|
|
|
|
|
- - key: mem.util.available
|
|
|
+ - key: proc.nprocs
|
|
|
applications:
|
|
|
- - Memory
|
|
|
+ - Kernel
|
|
|
value_type: int
|
|
|
|
|
|
- - key: mem.util.used
|
|
|
+ # Memory Items
|
|
|
+ - key: mem.freemem
|
|
|
applications:
|
|
|
- Memory
|
|
|
value_type: int
|
|
|
- description: used memory
|
|
|
+ description: "PCP: free system memory metric from /proc/meminfo"
|
|
|
multiplier: 1024
|
|
|
units: B
|
|
|
|
|
|
- - key: kernel.all.cpu.user
|
|
|
+ - key: mem.util.bufmem
|
|
|
applications:
|
|
|
- - Kernel
|
|
|
+ - Memory
|
|
|
value_type: int
|
|
|
+ description: "PCP: Memory allocated for buffer_heads.; I/O buffers metric from /proc/meminfo"
|
|
|
+ multiplier: 1024
|
|
|
+ units: B
|
|
|
|
|
|
- - key: kernel.uname.machine
|
|
|
+ - key: swap.used
|
|
|
applications:
|
|
|
- - Kernel
|
|
|
- value_type: string
|
|
|
+ - Memory
|
|
|
+ value_type: int
|
|
|
+ description: "PCP: swap used metric from /proc/meminfo"
|
|
|
+ multiplier: 1024
|
|
|
+ units: B
|
|
|
|
|
|
- - key: hinv.ncpu
|
|
|
+ - key: swap.length
|
|
|
applications:
|
|
|
- - Kernel
|
|
|
+ - Memory
|
|
|
value_type: int
|
|
|
+ description: "PCP: total swap available metric from /proc/meminfo"
|
|
|
+ multiplier: 1024
|
|
|
+ units: B
|
|
|
|
|
|
- - key: mem.util.cached
|
|
|
+ - key: mem.physmem
|
|
|
applications:
|
|
|
- Memory
|
|
|
value_type: int
|
|
|
- description: cached memory
|
|
|
+ description: "PCP: The value of this metric corresponds to the \"MemTotal\" field reported by /proc/meminfo. Note that this does not necessarily correspond to actual installed physical memory - there may be areas of the physical address space mapped as ROM in various peripheral devices and the bios may be mirroring certain ROMs in RAM."
|
|
|
multiplier: 1024
|
|
|
units: B
|
|
|
|
|
|
- - key: kernel.all.cpu.steal
|
|
|
+ - key: swap.free
|
|
|
applications:
|
|
|
- - Kernel
|
|
|
+ - Memory
|
|
|
value_type: int
|
|
|
+ description: "PCP: swap free metric from /proc/meminfo"
|
|
|
+ multiplier: 1024
|
|
|
+ units: B
|
|
|
|
|
|
- - key: kernel.all.pswitch
|
|
|
+ - key: mem.util.available
|
|
|
applications:
|
|
|
- - Kernel
|
|
|
+ - Memory
|
|
|
value_type: int
|
|
|
+ description: "PCP: The amount of memory that is available for a new workload, without pushing the system into swap. Estimated from MemFree, Active(file), Inactive(file), and SReclaimable, as well as the \"low\" watermarks from /proc/zoneinfo.; available memory from /proc/meminfo"
|
|
|
+ multiplier: 1024
|
|
|
+ units: B
|
|
|
|
|
|
- - key: kernel.uname.release
|
|
|
+ - key: mem.util.used
|
|
|
applications:
|
|
|
- - Kernel
|
|
|
- value_type: string
|
|
|
+ - Memory
|
|
|
+ value_type: int
|
|
|
+ description: "PCP: Used memory is the difference between mem.physmem and mem.freemem; used memory metric from /proc/meminfo"
|
|
|
+ multiplier: 1024
|
|
|
+ units: B
|
|
|
|
|
|
- - key: proc.nprocs
|
|
|
+ - key: mem.util.cached
|
|
|
applications:
|
|
|
- - Kernel
|
|
|
+ - Memory
|
|
|
value_type: int
|
|
|
+ description: "PCP: Memory used by the page cache, including buffered file data. This is in-memory cache for files read from the disk (the pagecache) but doesn't include SwapCached.; page cache metric from /proc/meminfo"
|
|
|
+ multiplier: 1024
|
|
|
+ units: B
|
|
|
|
|
|
+ # Disk items
|
|
|
- key: filesys.full.xvda2
|
|
|
applications:
|
|
|
- Disk
|
|
@@ -169,32 +192,33 @@ g_template_os_linux:
|
|
|
value_type: float
|
|
|
|
|
|
ztriggers:
|
|
|
- - description: 'Filesystem: / has less than 10% free on {HOST.NAME}'
|
|
|
+ - name: 'Filesystem: / has less than 10% free on {HOST.NAME}'
|
|
|
expression: '{Template OS Linux:filesys.full.xvda2.last()}>90'
|
|
|
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
|
|
|
priority: warn
|
|
|
|
|
|
- - description: 'Filesystem: / has less than 5% free on {HOST.NAME}'
|
|
|
+ - name: 'Filesystem: / has less than 5% free on {HOST.NAME}'
|
|
|
expression: '{Template OS Linux:filesys.full.xvda2.last()}>95'
|
|
|
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
|
|
|
priority: high
|
|
|
|
|
|
- - description: 'Filesystem: /var has less than 10% free on {HOST.NAME}'
|
|
|
+ - name: 'Filesystem: /var has less than 10% free on {HOST.NAME}'
|
|
|
expression: '{Template OS Linux:filesys.full.xvda3.last()}>90'
|
|
|
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
|
|
|
priority: warn
|
|
|
|
|
|
- - description: 'Filesystem: /var has less than 5% free on {HOST.NAME}'
|
|
|
+ - name: 'Filesystem: /var has less than 5% free on {HOST.NAME}'
|
|
|
expression: '{Template OS Linux:filesys.full.xvda3.last()}>95'
|
|
|
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
|
|
|
priority: high
|
|
|
|
|
|
- - description: 'Too many TOTAL processes on {HOST.NAME}'
|
|
|
+ - name: 'Too many TOTAL processes on {HOST.NAME}'
|
|
|
expression: '{Template OS Linux:proc.nprocs.last()}>5000'
|
|
|
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc'
|
|
|
priority: warn
|
|
|
|
|
|
- - description: 'Lack of available memory on {HOST.NAME}'
|
|
|
- expression: '{Template OS Linux:mem.freemem.last()}<3000'
|
|
|
+ - name: 'Lack of available memory on {HOST.NAME}'
|
|
|
+ expression: '{Template OS Linux:mem.freemem.last()}<30720000'
|
|
|
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc'
|
|
|
priority: warn
|
|
|
+ description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024'
|