0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-17 03:02:41 +00:00

go.d zfspool: collect vdev health state ()

This commit is contained in:
Ilya Mashchenko 2024-08-21 12:52:28 +03:00 committed by GitHub
parent 57308513d2
commit 622f19235b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 771 additions and 189 deletions

View file

@ -10,22 +10,44 @@ import (
)
const (
prioZpoolSpaceUtilization = 2820 + iota
prioZpoolHealthState = 2820 + iota
prioVdevHealthState
prioZpoolSpaceUtilization
prioZpoolSpaceUsage
prioZpoolFragmentation
prioZpoolHealthState
)
var zpoolChartsTmpl = module.Charts{
zpoolHealthStateChartTmpl.Copy(),
zpoolSpaceUtilizationChartTmpl.Copy(),
zpoolSpaceUsageChartTmpl.Copy(),
zpoolFragmentationChartTmpl.Copy(),
zpoolHealthStateChartTmpl.Copy(),
}
var (
zpoolHealthStateChartTmpl = module.Chart{
ID: "zfspool_%s_health_state",
Title: "Zpool health state",
Units: "state",
Fam: "health",
Ctx: "zfspool.pool_health_state",
Type: module.Line,
Priority: prioZpoolHealthState,
Dims: module.Dims{
{ID: "zpool_%s_health_state_online", Name: "online"},
{ID: "zpool_%s_health_state_degraded", Name: "degraded"},
{ID: "zpool_%s_health_state_faulted", Name: "faulted"},
{ID: "zpool_%s_health_state_offline", Name: "offline"},
{ID: "zpool_%s_health_state_unavail", Name: "unavail"},
{ID: "zpool_%s_health_state_removed", Name: "removed"},
{ID: "zpool_%s_health_state_suspended", Name: "suspended"},
},
}
zpoolSpaceUtilizationChartTmpl = module.Chart{
ID: "zfspool_%s_space_utilization",
Title: "Zpool space utilization",
@ -64,23 +86,29 @@ var (
{ID: "zpool_%s_frag", Name: "fragmentation"},
},
}
)
zpoolHealthStateChartTmpl = module.Chart{
ID: "zfspool_%s_health_state",
Title: "Zpool health state",
var vdevChartsTmpl = module.Charts{
vdevHealthStateChartTmpl.Copy(),
}
var (
vdevHealthStateChartTmpl = module.Chart{
ID: "vdev_%s_health_state",
Title: "Zpool Vdev health state",
Units: "state",
Fam: "health",
Ctx: "zfspool.pool_health_state",
Ctx: "zfspool.vdev_health_state",
Type: module.Line,
Priority: prioZpoolHealthState,
Priority: prioVdevHealthState,
Dims: module.Dims{
{ID: "zpool_%s_health_state_online", Name: "online"},
{ID: "zpool_%s_health_state_degraded", Name: "degraded"},
{ID: "zpool_%s_health_state_faulted", Name: "faulted"},
{ID: "zpool_%s_health_state_offline", Name: "offline"},
{ID: "zpool_%s_health_state_unavail", Name: "unavail"},
{ID: "zpool_%s_health_state_removed", Name: "removed"},
{ID: "zpool_%s_health_state_suspended", Name: "suspended"},
{ID: "vdev_%s_health_state_online", Name: "online"},
{ID: "vdev_%s_health_state_degraded", Name: "degraded"},
{ID: "vdev_%s_health_state_faulted", Name: "faulted"},
{ID: "vdev_%s_health_state_offline", Name: "offline"},
{ID: "vdev_%s_health_state_unavail", Name: "unavail"},
{ID: "vdev_%s_health_state_removed", Name: "removed"},
{ID: "vdev_%s_health_state_suspended", Name: "suspended"},
},
}
)
@ -104,8 +132,35 @@ func (z *ZFSPool) addZpoolCharts(name string) {
}
func (z *ZFSPool) removeZpoolCharts(name string) {
px := fmt.Sprintf("zpool_%s_", name)
px := fmt.Sprintf("zfspool_%s_", name)
z.removeCharts(px)
}
func (z *ZFSPool) addVdevCharts(pool, vdev string) {
charts := vdevChartsTmpl.Copy()
for _, chart := range *charts {
chart.ID = fmt.Sprintf(chart.ID, cleanVdev(vdev))
chart.Labels = []module.Label{
{Key: "pool", Value: pool},
{Key: "vdev", Value: vdev},
}
for _, dim := range chart.Dims {
dim.ID = fmt.Sprintf(dim.ID, vdev)
}
}
if err := z.Charts().Add(*charts...); err != nil {
z.Warning(err)
}
}
func (z *ZFSPool) removeVdevCharts(vdev string) {
px := fmt.Sprintf("vdev_%s_", cleanVdev(vdev))
z.removeCharts(px)
}
func (z *ZFSPool) removeCharts(px string) {
for _, chart := range *z.Charts() {
if strings.HasPrefix(chart.ID, px) {
chart.MarkRemove()
@ -113,3 +168,8 @@ func (z *ZFSPool) removeZpoolCharts(name string) {
}
}
}
func cleanVdev(vdev string) string {
r := strings.NewReplacer(".", "_")
return r.Replace(vdev)
}

View file

@ -2,14 +2,6 @@
package zfspool
import (
"bufio"
"bytes"
"fmt"
"strconv"
"strings"
)
var zpoolHealthStates = []string{
"online",
"degraded",
@ -20,158 +12,16 @@ var zpoolHealthStates = []string{
"suspended",
}
type zpoolStats struct {
name string
sizeBytes string
allocBytes string
freeBytes string
fragPerc string
capPerc string
dedupRatio string
health string
}
func (z *ZFSPool) collect() (map[string]int64, error) {
bs, err := z.exec.list()
if err != nil {
return nil, err
}
zpools, err := parseZpoolListOutput(bs)
if err != nil {
return nil, err
}
mx := make(map[string]int64)
z.collectZpoolListStats(mx, zpools)
if err := z.collectZpoolList(mx); err != nil {
return nil, err
}
if err := z.collectZpoolListVdev(mx); err != nil {
return mx, err
}
return mx, nil
}
func (z *ZFSPool) collectZpoolListStats(mx map[string]int64, zpools []zpoolStats) {
seen := make(map[string]bool)
for _, zpool := range zpools {
seen[zpool.name] = true
if !z.zpools[zpool.name] {
z.addZpoolCharts(zpool.name)
z.zpools[zpool.name] = true
}
px := "zpool_" + zpool.name + "_"
if v, ok := parseInt(zpool.sizeBytes); ok {
mx[px+"size"] = v
}
if v, ok := parseInt(zpool.freeBytes); ok {
mx[px+"free"] = v
}
if v, ok := parseInt(zpool.allocBytes); ok {
mx[px+"alloc"] = v
}
if v, ok := parseFloat(zpool.capPerc); ok {
mx[px+"cap"] = int64(v)
}
if v, ok := parseFloat(zpool.fragPerc); ok {
mx[px+"frag"] = int64(v)
}
for _, s := range zpoolHealthStates {
mx[px+"health_state_"+s] = 0
}
mx[px+"health_state_"+zpool.health] = 1
}
for name := range z.zpools {
if !seen[name] {
z.removeZpoolCharts(name)
delete(z.zpools, name)
}
}
}
func parseZpoolListOutput(bs []byte) ([]zpoolStats, error) {
var lines []string
sc := bufio.NewScanner(bytes.NewReader(bs))
for sc.Scan() {
if text := strings.TrimSpace(sc.Text()); text != "" {
lines = append(lines, text)
}
}
if len(lines) < 2 {
return nil, fmt.Errorf("unexpected data: wanted >= 2 lines, got %d", len(lines))
}
headers := strings.Fields(lines[0])
if len(headers) == 0 {
return nil, fmt.Errorf("unexpected data: missing headers")
}
var zpools []zpoolStats
/*
# zpool list -p
NAME SIZE ALLOC FREE EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
rpool 21367462298 9051643576 12240656794 - 33 42 1.00 ONLINE -
zion - - - - - - - FAULTED -
*/
for _, line := range lines[1:] {
values := strings.Fields(line)
if len(values) != len(headers) {
return nil, fmt.Errorf("unequal columns: headers(%d) != values(%d)", len(headers), len(values))
}
var zpool zpoolStats
for i, v := range values {
v = strings.TrimSpace(v)
switch strings.ToLower(headers[i]) {
case "name":
zpool.name = v
case "size":
zpool.sizeBytes = v
case "alloc":
zpool.allocBytes = v
case "free":
zpool.freeBytes = v
case "frag":
zpool.fragPerc = v
case "cap":
zpool.capPerc = v
case "dedup":
zpool.dedupRatio = v
case "health":
zpool.health = strings.ToLower(v)
}
if last := i+1 == len(headers); last && zpool.name != "" && zpool.health != "" {
zpools = append(zpools, zpool)
}
}
}
if len(zpools) == 0 {
return nil, fmt.Errorf("unexpected data: missing pools")
}
return zpools, nil
}
func parseInt(s string) (int64, bool) {
if s == "-" {
return 0, false
}
v, err := strconv.ParseInt(s, 10, 64)
return v, err == nil
}
func parseFloat(s string) (float64, bool) {
if s == "-" {
return 0, false
}
v, err := strconv.ParseFloat(s, 64)
return v, err == nil
}

View file

@ -0,0 +1,160 @@
// SPDX-License-Identifier: GPL-3.0-or-later
package zfspool
import (
"bufio"
"bytes"
"errors"
"fmt"
"strconv"
"strings"
)
type zpoolEntry struct {
name string
sizeBytes string
allocBytes string
freeBytes string
fragPerc string
capPerc string
dedupRatio string
health string
}
func (z *ZFSPool) collectZpoolList(mx map[string]int64) error {
bs, err := z.exec.list()
if err != nil {
return err
}
zpools, err := parseZpoolListOutput(bs)
if err != nil {
return fmt.Errorf("bad zpool list output: %v", err)
}
seen := make(map[string]bool)
for _, zpool := range zpools {
seen[zpool.name] = true
if !z.seenZpools[zpool.name] {
z.addZpoolCharts(zpool.name)
z.seenZpools[zpool.name] = true
}
px := "zpool_" + zpool.name + "_"
if v, ok := parseInt(zpool.sizeBytes); ok {
mx[px+"size"] = v
}
if v, ok := parseInt(zpool.freeBytes); ok {
mx[px+"free"] = v
}
if v, ok := parseInt(zpool.allocBytes); ok {
mx[px+"alloc"] = v
}
if v, ok := parseFloat(zpool.capPerc); ok {
mx[px+"cap"] = int64(v)
}
if v, ok := parseFloat(zpool.fragPerc); ok {
mx[px+"frag"] = int64(v)
}
for _, s := range zpoolHealthStates {
mx[px+"health_state_"+s] = 0
}
mx[px+"health_state_"+zpool.health] = 1
}
for name := range z.seenZpools {
if !seen[name] {
z.removeZpoolCharts(name)
delete(z.seenZpools, name)
}
}
return nil
}
func parseZpoolListOutput(bs []byte) ([]zpoolEntry, error) {
/*
# zpool list -p
NAME SIZE ALLOC FREE EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
rpool 21367462298 9051643576 12240656794 - 33 42 1.00 ONLINE -
zion - - - - - - - FAULTED -
*/
var headers []string
var zpools []zpoolEntry
sc := bufio.NewScanner(bytes.NewReader(bs))
for sc.Scan() {
line := strings.TrimSpace(sc.Text())
if line == "" {
continue
}
if len(headers) == 0 {
if !strings.HasPrefix(line, "NAME") {
return nil, fmt.Errorf("missing headers (line '%s')", line)
}
headers = strings.Fields(line)
continue
}
values := strings.Fields(line)
if len(values) != len(headers) {
return nil, fmt.Errorf("unequal columns: headers(%d) != values(%d)", len(headers), len(values))
}
var zpool zpoolEntry
for i, v := range values {
v = strings.TrimSpace(v)
switch strings.ToLower(headers[i]) {
case "name":
zpool.name = v
case "size":
zpool.sizeBytes = v
case "alloc":
zpool.allocBytes = v
case "free":
zpool.freeBytes = v
case "frag":
zpool.fragPerc = v
case "cap":
zpool.capPerc = v
case "dedup":
zpool.dedupRatio = v
case "health":
zpool.health = strings.ToLower(v)
}
}
if zpool.name != "" && zpool.health != "" {
zpools = append(zpools, zpool)
}
}
if len(zpools) == 0 {
return nil, errors.New("no pools found")
}
return zpools, nil
}
func parseInt(s string) (int64, bool) {
if s == "-" {
return 0, false
}
v, err := strconv.ParseInt(s, 10, 64)
return v, err == nil
}
func parseFloat(s string) (float64, bool) {
if s == "-" {
return 0, false
}
v, err := strconv.ParseFloat(s, 64)
return v, err == nil
}

View file

@ -0,0 +1,138 @@
// SPDX-License-Identifier: GPL-3.0-or-later
package zfspool
import (
"bufio"
"bytes"
"fmt"
"strings"
)
type vdevEntry struct {
name string
vdev string // The full path of the vdev within the zpool hierarchy.
health string
// Represents the nesting level of the vdev within the zpool hierarchy, based on indentation.
// A level of -1 indicates the root vdev (the pool itself).
level int
}
func (z *ZFSPool) collectZpoolListVdev(mx map[string]int64) error {
seen := make(map[string]bool)
for pool := range z.seenZpools {
bs, err := z.exec.listWithVdev(pool)
if err != nil {
return err
}
vdevs, err := parseZpoolListVdevOutput(bs)
if err != nil {
return fmt.Errorf("bad zpool list vdev output (pool '%s'): %v", pool, err)
}
for _, vdev := range vdevs {
if vdev.health == "" || vdev.health == "-" {
continue
}
seen[vdev.vdev] = true
if !z.seenVdevs[vdev.vdev] {
z.seenVdevs[vdev.vdev] = true
z.addVdevCharts(pool, vdev.vdev)
}
px := fmt.Sprintf("vdev_%s_", vdev.vdev)
for _, s := range zpoolHealthStates {
mx[px+"health_state_"+s] = 0
}
mx[px+"health_state_"+vdev.health] = 1
}
}
for name := range z.seenVdevs {
if !seen[name] {
z.removeVdevCharts(name)
delete(z.seenVdevs, name)
}
}
return nil
}
func parseZpoolListVdevOutput(bs []byte) ([]vdevEntry, error) {
var headers []string
var vdevs []vdevEntry
sc := bufio.NewScanner(bytes.NewReader(bs))
for sc.Scan() {
line := sc.Text()
if line == "" {
continue
}
if len(headers) == 0 {
if !strings.HasPrefix(line, "NAME") {
return nil, fmt.Errorf("missing headers (line '%s')", line)
}
headers = strings.Fields(line)
continue
}
values := strings.Fields(line)
if len(values) == 0 || len(values) > len(headers) {
return nil, fmt.Errorf("unexpected columns: headers(%d) values(%d) (line '%s')", len(headers), len(values), line)
}
vdev := vdevEntry{
level: len(line) - len(strings.TrimLeft(line, " ")),
}
for i, v := range values {
switch strings.ToLower(headers[i]) {
case "name":
vdev.name = v
case "health":
vdev.health = strings.ToLower(v)
}
}
if vdev.name != "" {
if len(vdevs) == 0 {
vdev.level = -1 // Pool
}
vdevs = append(vdevs, vdev)
}
}
// set parent/child relationships
for i := range vdevs {
v := &vdevs[i]
switch i {
case 0:
v.vdev = v.name
default:
// find parent with a lower level
for j := i - 1; j >= 0; j-- {
if vdevs[j].level < v.level {
v.vdev = fmt.Sprintf("%s/%s", vdevs[j].vdev, v.name)
break
}
}
if v.vdev == "" {
return nil, fmt.Errorf("no parent for vdev '%s'", v.name)
}
}
}
// first is Pool
if len(vdevs) < 2 {
return nil, fmt.Errorf("no vdevs found")
}
return vdevs[1:], nil
}

View file

@ -39,3 +39,18 @@ func (e *zpoolCLIExec) list() ([]byte, error) {
return bs, nil
}
func (e *zpoolCLIExec) listWithVdev(pool string) ([]byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
defer cancel()
cmd := exec.CommandContext(ctx, e.binPath, "list", "-p", "-v", "-L", pool)
e.Debugf("executing '%s'", cmd)
bs, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("error on '%s': %v", cmd, err)
}
return bs, nil
}

View file

@ -92,6 +92,10 @@ modules:
metric: zfspool.pool_health_state
info: "ZFS pool ${label:pool} state is faulted or unavail"
link: https://github.com/netdata/netdata/blob/master/src/health/health.d/zfs.conf
- name: zfs_vdev_health_state
metric: zfspool.vdev_health_state
info: "ZFS vdev ${label:vdev} state is faulted or degraded"
link: https://github.com/netdata/netdata/blob/master/src/health/health.d/zfs.conf
metrics:
folding:
title: Metrics
@ -136,3 +140,23 @@ modules:
- name: unavail
- name: removed
- name: suspended
- name: zfs pool vdev
description: These metrics refer to the ZFS pool virtual device.
labels:
- name: pool
description: Zpool name
- name: vdev
description: Unique identifier for a virtual device (vdev) within a ZFS pool.
metrics:
- name: zfspool.vdev_health_state
description: Zpool Vdev health state
unit: 'state'
chart_type: line
dimensions:
- name: online
- name: degraded
- name: faulted
- name: offline
- name: unavail
- name: removed
- name: suspended

View file

@ -0,0 +1,12 @@
NAME SIZE ALLOC FREE CKPOINT EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
rpool 9981503995904 3046188658688 6935315337216 - - 9 30 1.00 DEGRADED -
mirror-0 9981503995904 3046188658688 6935315337216 - - 9 30 - ONLINE
sdc2 9998683602944 - - - - - - - ONLINE
sdd2 9998683602944 - - - - - - - ONLINE
logs - - - - - - - - -
mirror-1 17716740096 393216 17716346880 - - 0 0 - DEGRADED
sdb1 17951621120 - - - - - - - ONLINE
14807975228228307538 - - - - - - - - UNAVAIL
cache - - - - - - - - -
sdb2 99000254464 98755866624 239665152 - - 0 99 - ONLINE
wwn-0x500151795954c095-part2 - - - - - - - - UNAVAIL

View file

@ -0,0 +1,5 @@
NAME SIZE ALLOC FREE CKPOINT EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
rpool 3985729650688 1647130456064 2338599194624 - - 55 41 1.00 ONLINE -
mirror-0 3985729650688 1647130456064 2338599194624 - - 55 41 - ONLINE
nvme2n1p3 4000249020416 - - - - - - - ONLINE
nvme0n1p3 4000249020416 - - - - - - - ONLINE

View file

@ -31,8 +31,9 @@ func New() *ZFSPool {
BinaryPath: "/usr/bin/zpool",
Timeout: web.Duration(time.Second * 2),
},
charts: &module.Charts{},
zpools: make(map[string]bool),
charts: &module.Charts{},
seenZpools: make(map[string]bool),
seenVdevs: make(map[string]bool),
}
}
@ -51,10 +52,12 @@ type (
exec zpoolCLI
zpools map[string]bool
seenZpools map[string]bool
seenVdevs map[string]bool
}
zpoolCLI interface {
list() ([]byte, error)
listWithVdev(pool string) ([]byte, error)
}
)

View file

@ -5,6 +5,7 @@ package zfspool
import (
"errors"
"os"
"strings"
"testing"
"github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module"
@ -17,7 +18,9 @@ var (
dataConfigJSON, _ = os.ReadFile("testdata/config.json")
dataConfigYAML, _ = os.ReadFile("testdata/config.yaml")
dataZpoolList, _ = os.ReadFile("testdata/zpool-list.txt")
dataZpoolList, _ = os.ReadFile("testdata/zpool-list.txt")
dataZpoolListWithVdev, _ = os.ReadFile("testdata/zpool-list-vdev.txt")
dataZpoolListWithVdevLogsCache, _ = os.ReadFile("testdata/zpool-list-vdev-logs-cache.txt")
)
func Test_testDataIsValid(t *testing.T) {
@ -25,7 +28,9 @@ func Test_testDataIsValid(t *testing.T) {
"dataConfigJSON": dataConfigJSON,
"dataConfigYAML": dataConfigYAML,
"dataZpoolList": dataZpoolList,
"dataZpoolList": dataZpoolList,
"dataZpoolListWithVdev": dataZpoolListWithVdev,
"dataZpoolListWithVdevLogsCache": dataZpoolListWithVdevLogsCache,
} {
require.NotNil(t, data, name)
@ -81,7 +86,7 @@ func TestZFSPool_Cleanup(t *testing.T) {
"after check": {
prepare: func() *ZFSPool {
zp := New()
zp.exec = prepareMockOK()
zp.exec = prepareMockOk()
_ = zp.Check()
return zp
},
@ -89,7 +94,7 @@ func TestZFSPool_Cleanup(t *testing.T) {
"after collect": {
prepare: func() *ZFSPool {
zp := New()
zp.exec = prepareMockOK()
zp.exec = prepareMockOk()
_ = zp.Collect()
return zp
},
@ -115,7 +120,7 @@ func TestZFSPool_Check(t *testing.T) {
wantFail bool
}{
"success case": {
prepareMock: prepareMockOK,
prepareMock: prepareMockOk,
wantFail: false,
},
"error on list call": {
@ -153,8 +158,186 @@ func TestZFSPool_Collect(t *testing.T) {
wantMetrics map[string]int64
}{
"success case": {
prepareMock: prepareMockOK,
prepareMock: prepareMockOk,
wantMetrics: map[string]int64{
"vdev_rpool/mirror-0/nvme0n1p3_health_state_degraded": 0,
"vdev_rpool/mirror-0/nvme0n1p3_health_state_faulted": 0,
"vdev_rpool/mirror-0/nvme0n1p3_health_state_offline": 0,
"vdev_rpool/mirror-0/nvme0n1p3_health_state_online": 1,
"vdev_rpool/mirror-0/nvme0n1p3_health_state_removed": 0,
"vdev_rpool/mirror-0/nvme0n1p3_health_state_suspended": 0,
"vdev_rpool/mirror-0/nvme0n1p3_health_state_unavail": 0,
"vdev_rpool/mirror-0/nvme2n1p3_health_state_degraded": 0,
"vdev_rpool/mirror-0/nvme2n1p3_health_state_faulted": 0,
"vdev_rpool/mirror-0/nvme2n1p3_health_state_offline": 0,
"vdev_rpool/mirror-0/nvme2n1p3_health_state_online": 1,
"vdev_rpool/mirror-0/nvme2n1p3_health_state_removed": 0,
"vdev_rpool/mirror-0/nvme2n1p3_health_state_suspended": 0,
"vdev_rpool/mirror-0/nvme2n1p3_health_state_unavail": 0,
"vdev_rpool/mirror-0_health_state_degraded": 0,
"vdev_rpool/mirror-0_health_state_faulted": 0,
"vdev_rpool/mirror-0_health_state_offline": 0,
"vdev_rpool/mirror-0_health_state_online": 1,
"vdev_rpool/mirror-0_health_state_removed": 0,
"vdev_rpool/mirror-0_health_state_suspended": 0,
"vdev_rpool/mirror-0_health_state_unavail": 0,
"vdev_zion/mirror-0/nvme0n1p3_health_state_degraded": 0,
"vdev_zion/mirror-0/nvme0n1p3_health_state_faulted": 0,
"vdev_zion/mirror-0/nvme0n1p3_health_state_offline": 0,
"vdev_zion/mirror-0/nvme0n1p3_health_state_online": 1,
"vdev_zion/mirror-0/nvme0n1p3_health_state_removed": 0,
"vdev_zion/mirror-0/nvme0n1p3_health_state_suspended": 0,
"vdev_zion/mirror-0/nvme0n1p3_health_state_unavail": 0,
"vdev_zion/mirror-0/nvme2n1p3_health_state_degraded": 0,
"vdev_zion/mirror-0/nvme2n1p3_health_state_faulted": 0,
"vdev_zion/mirror-0/nvme2n1p3_health_state_offline": 0,
"vdev_zion/mirror-0/nvme2n1p3_health_state_online": 1,
"vdev_zion/mirror-0/nvme2n1p3_health_state_removed": 0,
"vdev_zion/mirror-0/nvme2n1p3_health_state_suspended": 0,
"vdev_zion/mirror-0/nvme2n1p3_health_state_unavail": 0,
"vdev_zion/mirror-0_health_state_degraded": 0,
"vdev_zion/mirror-0_health_state_faulted": 0,
"vdev_zion/mirror-0_health_state_offline": 0,
"vdev_zion/mirror-0_health_state_online": 1,
"vdev_zion/mirror-0_health_state_removed": 0,
"vdev_zion/mirror-0_health_state_suspended": 0,
"vdev_zion/mirror-0_health_state_unavail": 0,
"zpool_rpool_alloc": 9051643576,
"zpool_rpool_cap": 42,
"zpool_rpool_frag": 33,
"zpool_rpool_free": 12240656794,
"zpool_rpool_health_state_degraded": 0,
"zpool_rpool_health_state_faulted": 0,
"zpool_rpool_health_state_offline": 0,
"zpool_rpool_health_state_online": 1,
"zpool_rpool_health_state_removed": 0,
"zpool_rpool_health_state_suspended": 0,
"zpool_rpool_health_state_unavail": 0,
"zpool_rpool_size": 21367462298,
"zpool_zion_health_state_degraded": 0,
"zpool_zion_health_state_faulted": 1,
"zpool_zion_health_state_offline": 0,
"zpool_zion_health_state_online": 0,
"zpool_zion_health_state_removed": 0,
"zpool_zion_health_state_suspended": 0,
"zpool_zion_health_state_unavail": 0,
},
},
"success case vdev logs and cache": {
prepareMock: prepareMockOkVdevLogsCache,
wantMetrics: map[string]int64{
"vdev_rpool/cache/sdb2_health_state_degraded": 0,
"vdev_rpool/cache/sdb2_health_state_faulted": 0,
"vdev_rpool/cache/sdb2_health_state_offline": 0,
"vdev_rpool/cache/sdb2_health_state_online": 1,
"vdev_rpool/cache/sdb2_health_state_removed": 0,
"vdev_rpool/cache/sdb2_health_state_suspended": 0,
"vdev_rpool/cache/sdb2_health_state_unavail": 0,
"vdev_rpool/cache/wwn-0x500151795954c095-part2_health_state_degraded": 0,
"vdev_rpool/cache/wwn-0x500151795954c095-part2_health_state_faulted": 0,
"vdev_rpool/cache/wwn-0x500151795954c095-part2_health_state_offline": 0,
"vdev_rpool/cache/wwn-0x500151795954c095-part2_health_state_online": 0,
"vdev_rpool/cache/wwn-0x500151795954c095-part2_health_state_removed": 0,
"vdev_rpool/cache/wwn-0x500151795954c095-part2_health_state_suspended": 0,
"vdev_rpool/cache/wwn-0x500151795954c095-part2_health_state_unavail": 1,
"vdev_rpool/logs/mirror-1/14807975228228307538_health_state_degraded": 0,
"vdev_rpool/logs/mirror-1/14807975228228307538_health_state_faulted": 0,
"vdev_rpool/logs/mirror-1/14807975228228307538_health_state_offline": 0,
"vdev_rpool/logs/mirror-1/14807975228228307538_health_state_online": 0,
"vdev_rpool/logs/mirror-1/14807975228228307538_health_state_removed": 0,
"vdev_rpool/logs/mirror-1/14807975228228307538_health_state_suspended": 0,
"vdev_rpool/logs/mirror-1/14807975228228307538_health_state_unavail": 1,
"vdev_rpool/logs/mirror-1/sdb1_health_state_degraded": 0,
"vdev_rpool/logs/mirror-1/sdb1_health_state_faulted": 0,
"vdev_rpool/logs/mirror-1/sdb1_health_state_offline": 0,
"vdev_rpool/logs/mirror-1/sdb1_health_state_online": 1,
"vdev_rpool/logs/mirror-1/sdb1_health_state_removed": 0,
"vdev_rpool/logs/mirror-1/sdb1_health_state_suspended": 0,
"vdev_rpool/logs/mirror-1/sdb1_health_state_unavail": 0,
"vdev_rpool/logs/mirror-1_health_state_degraded": 1,
"vdev_rpool/logs/mirror-1_health_state_faulted": 0,
"vdev_rpool/logs/mirror-1_health_state_offline": 0,
"vdev_rpool/logs/mirror-1_health_state_online": 0,
"vdev_rpool/logs/mirror-1_health_state_removed": 0,
"vdev_rpool/logs/mirror-1_health_state_suspended": 0,
"vdev_rpool/logs/mirror-1_health_state_unavail": 0,
"vdev_rpool/mirror-0/sdc2_health_state_degraded": 0,
"vdev_rpool/mirror-0/sdc2_health_state_faulted": 0,
"vdev_rpool/mirror-0/sdc2_health_state_offline": 0,
"vdev_rpool/mirror-0/sdc2_health_state_online": 1,
"vdev_rpool/mirror-0/sdc2_health_state_removed": 0,
"vdev_rpool/mirror-0/sdc2_health_state_suspended": 0,
"vdev_rpool/mirror-0/sdc2_health_state_unavail": 0,
"vdev_rpool/mirror-0/sdd2_health_state_degraded": 0,
"vdev_rpool/mirror-0/sdd2_health_state_faulted": 0,
"vdev_rpool/mirror-0/sdd2_health_state_offline": 0,
"vdev_rpool/mirror-0/sdd2_health_state_online": 1,
"vdev_rpool/mirror-0/sdd2_health_state_removed": 0,
"vdev_rpool/mirror-0/sdd2_health_state_suspended": 0,
"vdev_rpool/mirror-0/sdd2_health_state_unavail": 0,
"vdev_rpool/mirror-0_health_state_degraded": 0,
"vdev_rpool/mirror-0_health_state_faulted": 0,
"vdev_rpool/mirror-0_health_state_offline": 0,
"vdev_rpool/mirror-0_health_state_online": 1,
"vdev_rpool/mirror-0_health_state_removed": 0,
"vdev_rpool/mirror-0_health_state_suspended": 0,
"vdev_rpool/mirror-0_health_state_unavail": 0,
"vdev_zion/cache/sdb2_health_state_degraded": 0,
"vdev_zion/cache/sdb2_health_state_faulted": 0,
"vdev_zion/cache/sdb2_health_state_offline": 0,
"vdev_zion/cache/sdb2_health_state_online": 1,
"vdev_zion/cache/sdb2_health_state_removed": 0,
"vdev_zion/cache/sdb2_health_state_suspended": 0,
"vdev_zion/cache/sdb2_health_state_unavail": 0,
"vdev_zion/cache/wwn-0x500151795954c095-part2_health_state_degraded": 0,
"vdev_zion/cache/wwn-0x500151795954c095-part2_health_state_faulted": 0,
"vdev_zion/cache/wwn-0x500151795954c095-part2_health_state_offline": 0,
"vdev_zion/cache/wwn-0x500151795954c095-part2_health_state_online": 0,
"vdev_zion/cache/wwn-0x500151795954c095-part2_health_state_removed": 0,
"vdev_zion/cache/wwn-0x500151795954c095-part2_health_state_suspended": 0,
"vdev_zion/cache/wwn-0x500151795954c095-part2_health_state_unavail": 1,
"vdev_zion/logs/mirror-1/14807975228228307538_health_state_degraded": 0,
"vdev_zion/logs/mirror-1/14807975228228307538_health_state_faulted": 0,
"vdev_zion/logs/mirror-1/14807975228228307538_health_state_offline": 0,
"vdev_zion/logs/mirror-1/14807975228228307538_health_state_online": 0,
"vdev_zion/logs/mirror-1/14807975228228307538_health_state_removed": 0,
"vdev_zion/logs/mirror-1/14807975228228307538_health_state_suspended": 0,
"vdev_zion/logs/mirror-1/14807975228228307538_health_state_unavail": 1,
"vdev_zion/logs/mirror-1/sdb1_health_state_degraded": 0,
"vdev_zion/logs/mirror-1/sdb1_health_state_faulted": 0,
"vdev_zion/logs/mirror-1/sdb1_health_state_offline": 0,
"vdev_zion/logs/mirror-1/sdb1_health_state_online": 1,
"vdev_zion/logs/mirror-1/sdb1_health_state_removed": 0,
"vdev_zion/logs/mirror-1/sdb1_health_state_suspended": 0,
"vdev_zion/logs/mirror-1/sdb1_health_state_unavail": 0,
"vdev_zion/logs/mirror-1_health_state_degraded": 1,
"vdev_zion/logs/mirror-1_health_state_faulted": 0,
"vdev_zion/logs/mirror-1_health_state_offline": 0,
"vdev_zion/logs/mirror-1_health_state_online": 0,
"vdev_zion/logs/mirror-1_health_state_removed": 0,
"vdev_zion/logs/mirror-1_health_state_suspended": 0,
"vdev_zion/logs/mirror-1_health_state_unavail": 0,
"vdev_zion/mirror-0/sdc2_health_state_degraded": 0,
"vdev_zion/mirror-0/sdc2_health_state_faulted": 0,
"vdev_zion/mirror-0/sdc2_health_state_offline": 0,
"vdev_zion/mirror-0/sdc2_health_state_online": 1,
"vdev_zion/mirror-0/sdc2_health_state_removed": 0,
"vdev_zion/mirror-0/sdc2_health_state_suspended": 0,
"vdev_zion/mirror-0/sdc2_health_state_unavail": 0,
"vdev_zion/mirror-0/sdd2_health_state_degraded": 0,
"vdev_zion/mirror-0/sdd2_health_state_faulted": 0,
"vdev_zion/mirror-0/sdd2_health_state_offline": 0,
"vdev_zion/mirror-0/sdd2_health_state_online": 1,
"vdev_zion/mirror-0/sdd2_health_state_removed": 0,
"vdev_zion/mirror-0/sdd2_health_state_suspended": 0,
"vdev_zion/mirror-0/sdd2_health_state_unavail": 0,
"vdev_zion/mirror-0_health_state_degraded": 0,
"vdev_zion/mirror-0_health_state_faulted": 0,
"vdev_zion/mirror-0_health_state_offline": 0,
"vdev_zion/mirror-0_health_state_online": 1,
"vdev_zion/mirror-0_health_state_removed": 0,
"vdev_zion/mirror-0_health_state_suspended": 0,
"vdev_zion/mirror-0_health_state_unavail": 0,
"zpool_rpool_alloc": 9051643576,
"zpool_rpool_cap": 42,
"zpool_rpool_frag": 33,
@ -199,16 +382,125 @@ func TestZFSPool_Collect(t *testing.T) {
mx := zp.Collect()
assert.Equal(t, test.wantMetrics, mx)
if len(test.wantMetrics) > 0 {
assert.Len(t, *zp.Charts(), len(zpoolChartsTmpl)*len(zp.zpools))
want := len(zpoolChartsTmpl)*len(zp.seenZpools) + len(vdevChartsTmpl)*len(zp.seenVdevs)
assert.Len(t, *zp.Charts(), want, "want charts")
module.TestMetricsHasAllChartsDimsSkip(t, zp.Charts(), mx, func(chart *module.Chart) bool {
return strings.HasPrefix(chart.ID, "zfspool_zion") && !strings.HasSuffix(chart.ID, "health_state")
})
}
})
}
}
func prepareMockOK() *mockZpoolCLIExec {
func TestZFSPool_parseZpoolListDevOutput(t *testing.T) {
tests := map[string]struct {
input string
want []vdevEntry
}{
"": {
input: `
NAME SIZE ALLOC FREE CKPOINT EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
store 9981503995904 3046188658688 6935315337216 - - 9 30 1.00 DEGRADED -
mirror-0 9981503995904 3046188658688 6935315337216 - - 9 30 - ONLINE
sdc2 9998683602944 - - - - - - - ONLINE
sdd2 9998683602944 - - - - - - - ONLINE
logs - - - - - - - - -
mirror-1 17716740096 393216 17716346880 - - 0 0 - DEGRADED
sdb1 17951621120 - - - - - - - ONLINE
14807975228228307538 - - - - - - - - UNAVAIL
cache - - - - - - - - -
sdb2 99000254464 98755866624 239665152 - - 0 99 - ONLINE
wwn-0x500151795954c095-part2 - - - - - - - - UNAVAIL
`,
want: []vdevEntry{
{
name: "mirror-0",
health: "online",
vdev: "store/mirror-0",
level: 2,
},
{
name: "sdc2",
health: "online",
vdev: "store/mirror-0/sdc2",
level: 4,
},
{
name: "sdd2",
health: "online",
vdev: "store/mirror-0/sdd2",
level: 4,
},
{
name: "logs",
health: "-",
vdev: "store/logs",
level: 0,
},
{
name: "mirror-1",
health: "degraded",
vdev: "store/logs/mirror-1",
level: 2,
},
{
name: "sdb1",
health: "online",
vdev: "store/logs/mirror-1/sdb1",
level: 4,
},
{
name: "14807975228228307538",
health: "unavail",
vdev: "store/logs/mirror-1/14807975228228307538",
level: 4,
},
{
name: "cache",
health: "-",
vdev: "store/cache",
level: 0,
},
{
name: "sdb2",
health: "online",
vdev: "store/cache/sdb2",
level: 2,
},
{
name: "wwn-0x500151795954c095-part2",
health: "unavail",
vdev: "store/cache/wwn-0x500151795954c095-part2",
level: 2,
},
},
},
}
for name, test := range tests {
t.Run(name, func(t *testing.T) {
v, err := parseZpoolListVdevOutput([]byte(test.input))
require.NoError(t, err)
assert.Equal(t, test.want, v)
})
}
}
func prepareMockOk() *mockZpoolCLIExec {
return &mockZpoolCLIExec{
listData: dataZpoolList,
listData: dataZpoolList,
listWithVdevData: dataZpoolListWithVdev,
}
}
func prepareMockOkVdevLogsCache() *mockZpoolCLIExec {
return &mockZpoolCLIExec{
listData: dataZpoolList,
listWithVdevData: dataZpoolListWithVdevLogsCache,
}
}
@ -233,8 +525,9 @@ Fusce et felis pulvinar, posuere sem non, porttitor eros.
}
type mockZpoolCLIExec struct {
errOnList bool
listData []byte
errOnList bool
listData []byte
listWithVdevData []byte
}
func (m *mockZpoolCLIExec) list() ([]byte, error) {
@ -244,3 +537,10 @@ func (m *mockZpoolCLIExec) list() ([]byte, error) {
return m.listData, nil
}
func (m *mockZpoolCLIExec) listWithVdev(pool string) ([]byte, error) {
s := string(m.listWithVdevData)
s = strings.Replace(s, "rpool", pool, 1)
return []byte(s), nil
}

View file

@ -67,7 +67,7 @@ component: File system
type: System
component: File system
calc: $degraded
units: boolean
units: status
every: 10s
warn: $this > 0
delay: down 1m multiplier 1.5 max 1h
@ -81,10 +81,25 @@ component: File system
type: System
component: File system
calc: $faulted + $unavail
units: boolean
units: status
every: 10s
crit: $this > 0
delay: down 1m multiplier 1.5 max 1h
summary: Critical ZFS pool ${label:pool} state
info: ZFS pool ${label:pool} state is faulted or unavail
to: sysadmin
template: zfs_vdev_health_state
on: zfspool.vdev_health_state
class: Errors
type: System
component: File system
calc: $degraded + $faulted
units: status
every: 10s
warn: $this > 0
delay: down 1m multiplier 1.5 max 1h
summary: ZFS vdev ${label:vdev} pool ${label:pool} state
info: ZFS vdev ${label:vdev} state is faulted or degraded
to: sysadmin