mirror of
https://github.com/netdata/netdata.git
synced 2025-04-27 22:26:21 +00:00
adaptec_raid python module (#4429)
* adaptec_raid module init version * adaptec_raid minor * adaptec_raid minor * adaptec_raid minor * adaptec_raid arcconf command fix * adaptec_raid minor fixes * adaptec_raid add alarms * adaptec_raid add link to screenshot to the readme
This commit is contained in:
parent
c0c5318100
commit
b85833f081
7 changed files with 370 additions and 0 deletions
collectors/python.d.plugin
health
|
@ -37,6 +37,7 @@ dist_pythonconfig_DATA = \
|
|||
$(top_srcdir)/installer/.keep \
|
||||
$(NULL)
|
||||
|
||||
include adaptec_raid/Makefile.inc
|
||||
include apache/Makefile.inc
|
||||
include beanstalk/Makefile.inc
|
||||
include bind_rndc/Makefile.inc
|
||||
|
|
13
collectors/python.d.plugin/adaptec_raid/Makefile.inc
Normal file
13
collectors/python.d.plugin/adaptec_raid/Makefile.inc
Normal file
|
@ -0,0 +1,13 @@
|
|||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
# THIS IS NOT A COMPLETE Makefile
|
||||
# IT IS INCLUDED BY ITS PARENT'S Makefile.am
|
||||
# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT
|
||||
|
||||
# install these files
|
||||
dist_python_DATA += adaptec_raid/adaptec_raid.chart.py
|
||||
dist_pythonconfig_DATA += adaptec_raid/adaptec_raid.conf
|
||||
|
||||
# do not install these files, but include them in the distribution
|
||||
dist_noinst_DATA += adaptec_raid/README.md adaptec_raid/Makefile.inc
|
||||
|
27
collectors/python.d.plugin/adaptec_raid/README.md
Normal file
27
collectors/python.d.plugin/adaptec_raid/README.md
Normal file
|
@ -0,0 +1,27 @@
|
|||
# adaptec raid
|
||||
|
||||
Module collects logical and physical devices health metrics.
|
||||
|
||||
**Requirements:**
|
||||
* `netdata` user needs to be able to sudo the `arcconf` program without password
|
||||
|
||||
To grab stats it executes:
|
||||
* `sudo -n arcconf GETCONFIG 1 LD`
|
||||
* `sudo -n arcconf GETCONFIG 1 PD`
|
||||
|
||||
|
||||
It produces:
|
||||
|
||||
1. **Logical Device Status**
|
||||
|
||||
2. **Physical Device State**
|
||||
|
||||
3. **Physical Device S.M.A.R.T warnings**
|
||||
|
||||
4. **Physical Device Temperature**
|
||||
|
||||
Screenshot:
|
||||
|
||||

|
||||
|
||||
---
|
245
collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py
Normal file
245
collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py
Normal file
|
@ -0,0 +1,245 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Description: adaptec_raid netdata python.d module
|
||||
# Author: Ilya Mashchenko (l2isbad)
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
|
||||
import re
|
||||
|
||||
from copy import deepcopy
|
||||
|
||||
from bases.FrameworkServices.ExecutableService import ExecutableService
|
||||
from bases.collection import find_binary
|
||||
|
||||
|
||||
update_every = 5
|
||||
|
||||
ORDER = [
|
||||
'ld_status',
|
||||
'pd_state',
|
||||
'pd_smart_warnings',
|
||||
'pd_temperature',
|
||||
]
|
||||
|
||||
CHARTS = {
|
||||
'ld_status': {
|
||||
'options': [None, 'Status Is Not OK', 'bool', 'logical devices', 'adapter_raid.ld_status', 'line'],
|
||||
'lines': []
|
||||
},
|
||||
'pd_state': {
|
||||
'options': [None, 'State Is Not OK', 'bool', 'physical devices', 'adapter_raid.pd_state', 'line'],
|
||||
'lines': []
|
||||
},
|
||||
'pd_smart_warnings': {
|
||||
'options': [None, 'S.M.A.R.T warnings', 'count', 'physical devices',
|
||||
'adapter_raid.smart_warnings', 'line'],
|
||||
'lines': []
|
||||
},
|
||||
'pd_temperature': {
|
||||
'options': [None, 'Temperature', 'celsius', 'physical devices', 'adapter_raid.temperature', 'line'],
|
||||
'lines': []
|
||||
},
|
||||
}
|
||||
|
||||
SUDO = 'sudo'
|
||||
ARCCONF = 'arcconf'
|
||||
|
||||
BAD_LD_STATUS = (
|
||||
'Degraded',
|
||||
'Failed',
|
||||
)
|
||||
|
||||
GOOD_PD_STATUS = (
|
||||
'Online',
|
||||
)
|
||||
|
||||
RE_LD = re.compile(
|
||||
r'Logical device number\s+([0-9]+).*?'
|
||||
r'Status of logical device\s+: ([a-zA-Z]+)'
|
||||
)
|
||||
|
||||
|
||||
def find_lds(d):
|
||||
d = ' '.join(v.strip() for v in d)
|
||||
return [LD(*v) for v in RE_LD.findall(d)]
|
||||
|
||||
|
||||
def find_pds(d):
|
||||
pds = list()
|
||||
pd = PD()
|
||||
|
||||
for row in d:
|
||||
row = row.strip()
|
||||
if row.startswith('Device #'):
|
||||
pd = PD()
|
||||
pd.id = row.split('#')[-1]
|
||||
elif not pd.id:
|
||||
continue
|
||||
|
||||
if row.startswith('State'):
|
||||
v = row.split()[-1]
|
||||
pd.state = v
|
||||
elif row.startswith('S.M.A.R.T. warnings'):
|
||||
v = row.split()[-1]
|
||||
pd.smart_warnings = v
|
||||
elif row.startswith('Temperature'):
|
||||
v = row.split(':')[-1].split()[0]
|
||||
pd.temperature = v
|
||||
elif row.startswith('NCQ status'):
|
||||
if pd.id and pd.state and pd.smart_warnings:
|
||||
pds.append(pd)
|
||||
pd = PD()
|
||||
|
||||
return pds
|
||||
|
||||
|
||||
class LD:
|
||||
def __init__(self, ld_id, status):
|
||||
self.id = ld_id
|
||||
self.status = status
|
||||
|
||||
def data(self):
|
||||
return {
|
||||
'ld_{0}_status'.format(self.id): int(self.status in BAD_LD_STATUS)
|
||||
}
|
||||
|
||||
|
||||
class PD:
|
||||
def __init__(self):
|
||||
self.id = None
|
||||
self.state = None
|
||||
self.smart_warnings = None
|
||||
self.temperature = None
|
||||
|
||||
def data(self):
|
||||
data = {
|
||||
'pd_{0}_state'.format(self.id): int(self.state not in GOOD_PD_STATUS),
|
||||
'pd_{0}_smart_warnings'.format(self.id): self.smart_warnings,
|
||||
}
|
||||
if self.temperature and self.temperature.isdigit():
|
||||
data['pd_{0}_temperature'.format(self.id)] = self.temperature
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class Arcconf:
|
||||
def __init__(self, arcconf):
|
||||
self.arcconf = arcconf
|
||||
|
||||
def ld_info(self):
|
||||
return [self.arcconf, 'GETCONFIG', '1', 'LD']
|
||||
|
||||
def pd_info(self):
|
||||
return [self.arcconf, 'GETCONFIG', '1', 'PD']
|
||||
|
||||
|
||||
# TODO: hardcoded sudo...
|
||||
class SudoArcconf:
|
||||
def __init__(self, arcconf, sudo):
|
||||
self.arcconf = Arcconf(arcconf)
|
||||
self.sudo = sudo
|
||||
|
||||
def ld_info(self):
|
||||
return [self.sudo, '-n'] + self.arcconf.ld_info()
|
||||
|
||||
def pd_info(self):
|
||||
return [self.sudo, '-n'] + self.arcconf.pd_info()
|
||||
|
||||
|
||||
class Service(ExecutableService):
|
||||
def __init__(self, configuration=None, name=None):
|
||||
ExecutableService.__init__(self, configuration=configuration, name=name)
|
||||
self.order = ORDER
|
||||
self.definitions = deepcopy(CHARTS)
|
||||
self.use_sudo = self.configuration.get('use_sudo', True)
|
||||
self.arcconf = None
|
||||
|
||||
def execute(self, command, stderr=False):
|
||||
return self._get_raw_data(command=command, stderr=stderr)
|
||||
|
||||
def check(self):
|
||||
sudo = find_binary(SUDO)
|
||||
if self.use_sudo:
|
||||
if not sudo:
|
||||
self.error('can\'t locate "{0}" binary'.format(SUDO))
|
||||
return False
|
||||
err = self.execute([sudo, '-n', '-v'], True)
|
||||
if err:
|
||||
self.error(' '.join(err))
|
||||
return False
|
||||
|
||||
arcconf = find_binary(ARCCONF)
|
||||
if not arcconf:
|
||||
self.error('can\'t locate "{0}" binary'.format(ARCCONF))
|
||||
return False
|
||||
|
||||
if self.use_sudo:
|
||||
self.arcconf = SudoArcconf(arcconf, sudo)
|
||||
else:
|
||||
self.arcconf = Arcconf(arcconf)
|
||||
|
||||
lds = self.get_lds()
|
||||
if not lds:
|
||||
return False
|
||||
|
||||
self.debug('discovered logical devices ids: {0}'.format([ld.id for ld in lds]))
|
||||
|
||||
pds = self.get_pds()
|
||||
if not pds:
|
||||
return False
|
||||
|
||||
self.debug('discovered physical devices ids: {0}'.format([pd.id for pd in pds]))
|
||||
|
||||
self.update_charts(lds, pds)
|
||||
return True
|
||||
|
||||
def get_data(self):
|
||||
data = dict()
|
||||
|
||||
for ld in self.get_lds():
|
||||
data.update(ld.data())
|
||||
|
||||
for pd in self.get_pds():
|
||||
data.update(pd.data())
|
||||
|
||||
return data
|
||||
|
||||
def get_lds(self):
|
||||
raw_lds = self.execute(self.arcconf.ld_info())
|
||||
if not raw_lds:
|
||||
return None
|
||||
|
||||
lds = find_lds(raw_lds)
|
||||
if not lds:
|
||||
self.error('failed to parse "{0}" output'.format(' '.join(self.arcconf.ld_info())))
|
||||
self.debug('output: {0}'.format(raw_lds))
|
||||
return None
|
||||
return lds
|
||||
|
||||
def get_pds(self):
|
||||
raw_pds = self.execute(self.arcconf.pd_info())
|
||||
if not raw_pds:
|
||||
return None
|
||||
|
||||
pds = find_pds(raw_pds)
|
||||
if not pds:
|
||||
self.error('failed to parse "{0}" output'.format(' '.join(self.arcconf.pd_info())))
|
||||
self.debug('output: {0}'.format(raw_pds))
|
||||
return None
|
||||
return pds
|
||||
|
||||
def update_charts(self, lds, pds):
|
||||
charts = self.definitions
|
||||
for ld in lds:
|
||||
dim = ['ld_{0}_status'.format(ld.id), 'ld {0}'.format(ld.id)]
|
||||
charts['ld_status']['lines'].append(dim)
|
||||
|
||||
for pd in pds:
|
||||
dim = ['pd_{0}_state'.format(pd.id), 'pd {0}'.format(pd.id)]
|
||||
charts['pd_state']['lines'].append(dim)
|
||||
|
||||
dim = ['pd_{0}_smart_warnings'.format(pd.id), 'pd {0}'.format(pd.id)]
|
||||
charts['pd_smart_warnings']['lines'].append(dim)
|
||||
|
||||
dim = ['pd_{0}_temperature'.format(pd.id), 'pd {0}'.format(pd.id)]
|
||||
charts['pd_temperature']['lines'].append(dim)
|
59
collectors/python.d.plugin/adaptec_raid/adaptec_raid.conf
Normal file
59
collectors/python.d.plugin/adaptec_raid/adaptec_raid.conf
Normal file
|
@ -0,0 +1,59 @@
|
|||
# netdata python.d.plugin configuration for adaptec raid
|
||||
#
|
||||
# This file is in YaML format. Generally the format is:
|
||||
#
|
||||
# name: value
|
||||
#
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Global Variables
|
||||
# These variables set the defaults for all JOBs, however each JOB
|
||||
# may define its own, overriding the defaults.
|
||||
|
||||
# update_every sets the default data collection frequency.
|
||||
# If unset, the python.d.plugin default is used.
|
||||
# update_every: 1
|
||||
|
||||
# priority controls the order of charts at the netdata dashboard.
|
||||
# Lower numbers move the charts towards the top of the page.
|
||||
# If unset, the default for python.d.plugin is used.
|
||||
# priority: 60000
|
||||
|
||||
# retries sets the number of retries to be made in case of failures.
|
||||
# If unset, the default for python.d.plugin is used.
|
||||
# Attempts to restore the service are made once every update_every
|
||||
# and only if the module has collected values in the past.
|
||||
# retries: 60
|
||||
|
||||
# autodetection_retry sets the job re-check interval in seconds.
|
||||
# The job is not deleted if check fails.
|
||||
# Attempts to start the job are made once every autodetection_retry.
|
||||
# This feature is disabled by default.
|
||||
# autodetection_retry: 0
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# JOBS (data collection sources)
|
||||
#
|
||||
# The default JOBS share the same *name*. JOBS with the same name
|
||||
# are mutually exclusive. Only one of them will be allowed running at
|
||||
# any time. This allows autodetection to try several alternatives and
|
||||
# pick the one that works.
|
||||
#
|
||||
# Any number of jobs is supported.
|
||||
#
|
||||
# All python.d.plugin JOBS (for all its modules) support a set of
|
||||
# predefined parameters. These are:
|
||||
#
|
||||
# job_name:
|
||||
# name: myname # the JOB's name as it will appear at the
|
||||
# # dashboard (by default is the job_name)
|
||||
# # JOBs sharing a name are mutually exclusive
|
||||
# update_every: 1 # the JOB's data collection frequency
|
||||
# priority: 60000 # the JOB's order on the dashboard
|
||||
# retries: 60 # the JOB's number of restoration attempts
|
||||
# autodetection_retry: 0 # the JOB's re-check interval in seconds
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
# IMPORTANT
|
||||
# The netdata user needs to be able to sudo the arcconf program without password:
|
||||
# netdata ALL=(root) NOPASSWD: /path/to/arcconf
|
|
@ -22,6 +22,7 @@ dist_userhealthconfig_DATA = \
|
|||
healthconfigdir=$(libconfigdir)/health.d
|
||||
dist_healthconfig_DATA = \
|
||||
$(top_srcdir)/installer/.keep \
|
||||
health.d/adaptec_raid.conf \
|
||||
health.d/apache.conf \
|
||||
health.d/apcupsd.conf \
|
||||
health.d/backend.conf \
|
||||
|
|
24
health/health.d/adaptec_raid.conf
Normal file
24
health/health.d/adaptec_raid.conf
Normal file
|
@ -0,0 +1,24 @@
|
|||
|
||||
# logical device status check
|
||||
|
||||
template: adapter_raid_ld_status
|
||||
on: adapter_raid.ld_status
|
||||
lookup: max -5s
|
||||
units: bool
|
||||
every: 10s
|
||||
crit: $this > 0
|
||||
delay: down 5m multiplier 1.5 max 1h
|
||||
info: at least 1 logical device is failed or degraded
|
||||
to: sysadmin
|
||||
|
||||
# physical device state check
|
||||
|
||||
template: adapter_raid_pd_state
|
||||
on: adapter_raid.pd_state
|
||||
lookup: max -5s
|
||||
units: bool
|
||||
every: 10s
|
||||
crit: $this > 0
|
||||
delay: down 5m multiplier 1.5 max 1h
|
||||
info: at least 1 physical device is not in online state
|
||||
to: sysadmin
|
Loading…
Add table
Add a link
Reference in a new issue