mirror of
https://github.com/netdata/netdata.git
synced 2025-04-27 06:10:43 +00:00
adaptec_raid python module (#4429)
* adaptec_raid module init version * adaptec_raid minor * adaptec_raid minor * adaptec_raid minor * adaptec_raid arcconf command fix * adaptec_raid minor fixes * adaptec_raid add alarms * adaptec_raid add link to screenshot to the readme
This commit is contained in:
parent
c0c5318100
commit
b85833f081
7 changed files with 370 additions and 0 deletions
collectors/python.d.plugin
health
|
@ -37,6 +37,7 @@ dist_pythonconfig_DATA = \
|
||||||
$(top_srcdir)/installer/.keep \
|
$(top_srcdir)/installer/.keep \
|
||||||
$(NULL)
|
$(NULL)
|
||||||
|
|
||||||
|
include adaptec_raid/Makefile.inc
|
||||||
include apache/Makefile.inc
|
include apache/Makefile.inc
|
||||||
include beanstalk/Makefile.inc
|
include beanstalk/Makefile.inc
|
||||||
include bind_rndc/Makefile.inc
|
include bind_rndc/Makefile.inc
|
||||||
|
|
13
collectors/python.d.plugin/adaptec_raid/Makefile.inc
Normal file
13
collectors/python.d.plugin/adaptec_raid/Makefile.inc
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
|
# THIS IS NOT A COMPLETE Makefile
|
||||||
|
# IT IS INCLUDED BY ITS PARENT'S Makefile.am
|
||||||
|
# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT
|
||||||
|
|
||||||
|
# install these files
|
||||||
|
dist_python_DATA += adaptec_raid/adaptec_raid.chart.py
|
||||||
|
dist_pythonconfig_DATA += adaptec_raid/adaptec_raid.conf
|
||||||
|
|
||||||
|
# do not install these files, but include them in the distribution
|
||||||
|
dist_noinst_DATA += adaptec_raid/README.md adaptec_raid/Makefile.inc
|
||||||
|
|
27
collectors/python.d.plugin/adaptec_raid/README.md
Normal file
27
collectors/python.d.plugin/adaptec_raid/README.md
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
# adaptec raid
|
||||||
|
|
||||||
|
Module collects logical and physical devices health metrics.
|
||||||
|
|
||||||
|
**Requirements:**
|
||||||
|
* `netdata` user needs to be able to sudo the `arcconf` program without password
|
||||||
|
|
||||||
|
To grab stats it executes:
|
||||||
|
* `sudo -n arcconf GETCONFIG 1 LD`
|
||||||
|
* `sudo -n arcconf GETCONFIG 1 PD`
|
||||||
|
|
||||||
|
|
||||||
|
It produces:
|
||||||
|
|
||||||
|
1. **Logical Device Status**
|
||||||
|
|
||||||
|
2. **Physical Device State**
|
||||||
|
|
||||||
|
3. **Physical Device S.M.A.R.T warnings**
|
||||||
|
|
||||||
|
4. **Physical Device Temperature**
|
||||||
|
|
||||||
|
Screenshot:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
---
|
245
collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py
Normal file
245
collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py
Normal file
|
@ -0,0 +1,245 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Description: adaptec_raid netdata python.d module
|
||||||
|
# Author: Ilya Mashchenko (l2isbad)
|
||||||
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
from bases.FrameworkServices.ExecutableService import ExecutableService
|
||||||
|
from bases.collection import find_binary
|
||||||
|
|
||||||
|
|
||||||
|
update_every = 5
|
||||||
|
|
||||||
|
ORDER = [
|
||||||
|
'ld_status',
|
||||||
|
'pd_state',
|
||||||
|
'pd_smart_warnings',
|
||||||
|
'pd_temperature',
|
||||||
|
]
|
||||||
|
|
||||||
|
CHARTS = {
|
||||||
|
'ld_status': {
|
||||||
|
'options': [None, 'Status Is Not OK', 'bool', 'logical devices', 'adapter_raid.ld_status', 'line'],
|
||||||
|
'lines': []
|
||||||
|
},
|
||||||
|
'pd_state': {
|
||||||
|
'options': [None, 'State Is Not OK', 'bool', 'physical devices', 'adapter_raid.pd_state', 'line'],
|
||||||
|
'lines': []
|
||||||
|
},
|
||||||
|
'pd_smart_warnings': {
|
||||||
|
'options': [None, 'S.M.A.R.T warnings', 'count', 'physical devices',
|
||||||
|
'adapter_raid.smart_warnings', 'line'],
|
||||||
|
'lines': []
|
||||||
|
},
|
||||||
|
'pd_temperature': {
|
||||||
|
'options': [None, 'Temperature', 'celsius', 'physical devices', 'adapter_raid.temperature', 'line'],
|
||||||
|
'lines': []
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
SUDO = 'sudo'
|
||||||
|
ARCCONF = 'arcconf'
|
||||||
|
|
||||||
|
BAD_LD_STATUS = (
|
||||||
|
'Degraded',
|
||||||
|
'Failed',
|
||||||
|
)
|
||||||
|
|
||||||
|
GOOD_PD_STATUS = (
|
||||||
|
'Online',
|
||||||
|
)
|
||||||
|
|
||||||
|
RE_LD = re.compile(
|
||||||
|
r'Logical device number\s+([0-9]+).*?'
|
||||||
|
r'Status of logical device\s+: ([a-zA-Z]+)'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def find_lds(d):
|
||||||
|
d = ' '.join(v.strip() for v in d)
|
||||||
|
return [LD(*v) for v in RE_LD.findall(d)]
|
||||||
|
|
||||||
|
|
||||||
|
def find_pds(d):
|
||||||
|
pds = list()
|
||||||
|
pd = PD()
|
||||||
|
|
||||||
|
for row in d:
|
||||||
|
row = row.strip()
|
||||||
|
if row.startswith('Device #'):
|
||||||
|
pd = PD()
|
||||||
|
pd.id = row.split('#')[-1]
|
||||||
|
elif not pd.id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if row.startswith('State'):
|
||||||
|
v = row.split()[-1]
|
||||||
|
pd.state = v
|
||||||
|
elif row.startswith('S.M.A.R.T. warnings'):
|
||||||
|
v = row.split()[-1]
|
||||||
|
pd.smart_warnings = v
|
||||||
|
elif row.startswith('Temperature'):
|
||||||
|
v = row.split(':')[-1].split()[0]
|
||||||
|
pd.temperature = v
|
||||||
|
elif row.startswith('NCQ status'):
|
||||||
|
if pd.id and pd.state and pd.smart_warnings:
|
||||||
|
pds.append(pd)
|
||||||
|
pd = PD()
|
||||||
|
|
||||||
|
return pds
|
||||||
|
|
||||||
|
|
||||||
|
class LD:
|
||||||
|
def __init__(self, ld_id, status):
|
||||||
|
self.id = ld_id
|
||||||
|
self.status = status
|
||||||
|
|
||||||
|
def data(self):
|
||||||
|
return {
|
||||||
|
'ld_{0}_status'.format(self.id): int(self.status in BAD_LD_STATUS)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PD:
|
||||||
|
def __init__(self):
|
||||||
|
self.id = None
|
||||||
|
self.state = None
|
||||||
|
self.smart_warnings = None
|
||||||
|
self.temperature = None
|
||||||
|
|
||||||
|
def data(self):
|
||||||
|
data = {
|
||||||
|
'pd_{0}_state'.format(self.id): int(self.state not in GOOD_PD_STATUS),
|
||||||
|
'pd_{0}_smart_warnings'.format(self.id): self.smart_warnings,
|
||||||
|
}
|
||||||
|
if self.temperature and self.temperature.isdigit():
|
||||||
|
data['pd_{0}_temperature'.format(self.id)] = self.temperature
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class Arcconf:
|
||||||
|
def __init__(self, arcconf):
|
||||||
|
self.arcconf = arcconf
|
||||||
|
|
||||||
|
def ld_info(self):
|
||||||
|
return [self.arcconf, 'GETCONFIG', '1', 'LD']
|
||||||
|
|
||||||
|
def pd_info(self):
|
||||||
|
return [self.arcconf, 'GETCONFIG', '1', 'PD']
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: hardcoded sudo...
|
||||||
|
class SudoArcconf:
|
||||||
|
def __init__(self, arcconf, sudo):
|
||||||
|
self.arcconf = Arcconf(arcconf)
|
||||||
|
self.sudo = sudo
|
||||||
|
|
||||||
|
def ld_info(self):
|
||||||
|
return [self.sudo, '-n'] + self.arcconf.ld_info()
|
||||||
|
|
||||||
|
def pd_info(self):
|
||||||
|
return [self.sudo, '-n'] + self.arcconf.pd_info()
|
||||||
|
|
||||||
|
|
||||||
|
class Service(ExecutableService):
|
||||||
|
def __init__(self, configuration=None, name=None):
|
||||||
|
ExecutableService.__init__(self, configuration=configuration, name=name)
|
||||||
|
self.order = ORDER
|
||||||
|
self.definitions = deepcopy(CHARTS)
|
||||||
|
self.use_sudo = self.configuration.get('use_sudo', True)
|
||||||
|
self.arcconf = None
|
||||||
|
|
||||||
|
def execute(self, command, stderr=False):
|
||||||
|
return self._get_raw_data(command=command, stderr=stderr)
|
||||||
|
|
||||||
|
def check(self):
|
||||||
|
sudo = find_binary(SUDO)
|
||||||
|
if self.use_sudo:
|
||||||
|
if not sudo:
|
||||||
|
self.error('can\'t locate "{0}" binary'.format(SUDO))
|
||||||
|
return False
|
||||||
|
err = self.execute([sudo, '-n', '-v'], True)
|
||||||
|
if err:
|
||||||
|
self.error(' '.join(err))
|
||||||
|
return False
|
||||||
|
|
||||||
|
arcconf = find_binary(ARCCONF)
|
||||||
|
if not arcconf:
|
||||||
|
self.error('can\'t locate "{0}" binary'.format(ARCCONF))
|
||||||
|
return False
|
||||||
|
|
||||||
|
if self.use_sudo:
|
||||||
|
self.arcconf = SudoArcconf(arcconf, sudo)
|
||||||
|
else:
|
||||||
|
self.arcconf = Arcconf(arcconf)
|
||||||
|
|
||||||
|
lds = self.get_lds()
|
||||||
|
if not lds:
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.debug('discovered logical devices ids: {0}'.format([ld.id for ld in lds]))
|
||||||
|
|
||||||
|
pds = self.get_pds()
|
||||||
|
if not pds:
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.debug('discovered physical devices ids: {0}'.format([pd.id for pd in pds]))
|
||||||
|
|
||||||
|
self.update_charts(lds, pds)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get_data(self):
|
||||||
|
data = dict()
|
||||||
|
|
||||||
|
for ld in self.get_lds():
|
||||||
|
data.update(ld.data())
|
||||||
|
|
||||||
|
for pd in self.get_pds():
|
||||||
|
data.update(pd.data())
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def get_lds(self):
|
||||||
|
raw_lds = self.execute(self.arcconf.ld_info())
|
||||||
|
if not raw_lds:
|
||||||
|
return None
|
||||||
|
|
||||||
|
lds = find_lds(raw_lds)
|
||||||
|
if not lds:
|
||||||
|
self.error('failed to parse "{0}" output'.format(' '.join(self.arcconf.ld_info())))
|
||||||
|
self.debug('output: {0}'.format(raw_lds))
|
||||||
|
return None
|
||||||
|
return lds
|
||||||
|
|
||||||
|
def get_pds(self):
|
||||||
|
raw_pds = self.execute(self.arcconf.pd_info())
|
||||||
|
if not raw_pds:
|
||||||
|
return None
|
||||||
|
|
||||||
|
pds = find_pds(raw_pds)
|
||||||
|
if not pds:
|
||||||
|
self.error('failed to parse "{0}" output'.format(' '.join(self.arcconf.pd_info())))
|
||||||
|
self.debug('output: {0}'.format(raw_pds))
|
||||||
|
return None
|
||||||
|
return pds
|
||||||
|
|
||||||
|
def update_charts(self, lds, pds):
|
||||||
|
charts = self.definitions
|
||||||
|
for ld in lds:
|
||||||
|
dim = ['ld_{0}_status'.format(ld.id), 'ld {0}'.format(ld.id)]
|
||||||
|
charts['ld_status']['lines'].append(dim)
|
||||||
|
|
||||||
|
for pd in pds:
|
||||||
|
dim = ['pd_{0}_state'.format(pd.id), 'pd {0}'.format(pd.id)]
|
||||||
|
charts['pd_state']['lines'].append(dim)
|
||||||
|
|
||||||
|
dim = ['pd_{0}_smart_warnings'.format(pd.id), 'pd {0}'.format(pd.id)]
|
||||||
|
charts['pd_smart_warnings']['lines'].append(dim)
|
||||||
|
|
||||||
|
dim = ['pd_{0}_temperature'.format(pd.id), 'pd {0}'.format(pd.id)]
|
||||||
|
charts['pd_temperature']['lines'].append(dim)
|
59
collectors/python.d.plugin/adaptec_raid/adaptec_raid.conf
Normal file
59
collectors/python.d.plugin/adaptec_raid/adaptec_raid.conf
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
# netdata python.d.plugin configuration for adaptec raid
|
||||||
|
#
|
||||||
|
# This file is in YaML format. Generally the format is:
|
||||||
|
#
|
||||||
|
# name: value
|
||||||
|
#
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# Global Variables
|
||||||
|
# These variables set the defaults for all JOBs, however each JOB
|
||||||
|
# may define its own, overriding the defaults.
|
||||||
|
|
||||||
|
# update_every sets the default data collection frequency.
|
||||||
|
# If unset, the python.d.plugin default is used.
|
||||||
|
# update_every: 1
|
||||||
|
|
||||||
|
# priority controls the order of charts at the netdata dashboard.
|
||||||
|
# Lower numbers move the charts towards the top of the page.
|
||||||
|
# If unset, the default for python.d.plugin is used.
|
||||||
|
# priority: 60000
|
||||||
|
|
||||||
|
# retries sets the number of retries to be made in case of failures.
|
||||||
|
# If unset, the default for python.d.plugin is used.
|
||||||
|
# Attempts to restore the service are made once every update_every
|
||||||
|
# and only if the module has collected values in the past.
|
||||||
|
# retries: 60
|
||||||
|
|
||||||
|
# autodetection_retry sets the job re-check interval in seconds.
|
||||||
|
# The job is not deleted if check fails.
|
||||||
|
# Attempts to start the job are made once every autodetection_retry.
|
||||||
|
# This feature is disabled by default.
|
||||||
|
# autodetection_retry: 0
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# JOBS (data collection sources)
|
||||||
|
#
|
||||||
|
# The default JOBS share the same *name*. JOBS with the same name
|
||||||
|
# are mutually exclusive. Only one of them will be allowed running at
|
||||||
|
# any time. This allows autodetection to try several alternatives and
|
||||||
|
# pick the one that works.
|
||||||
|
#
|
||||||
|
# Any number of jobs is supported.
|
||||||
|
#
|
||||||
|
# All python.d.plugin JOBS (for all its modules) support a set of
|
||||||
|
# predefined parameters. These are:
|
||||||
|
#
|
||||||
|
# job_name:
|
||||||
|
# name: myname # the JOB's name as it will appear at the
|
||||||
|
# # dashboard (by default is the job_name)
|
||||||
|
# # JOBs sharing a name are mutually exclusive
|
||||||
|
# update_every: 1 # the JOB's data collection frequency
|
||||||
|
# priority: 60000 # the JOB's order on the dashboard
|
||||||
|
# retries: 60 # the JOB's number of restoration attempts
|
||||||
|
# autodetection_retry: 0 # the JOB's re-check interval in seconds
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
# IMPORTANT
|
||||||
|
# The netdata user needs to be able to sudo the arcconf program without password:
|
||||||
|
# netdata ALL=(root) NOPASSWD: /path/to/arcconf
|
|
@ -22,6 +22,7 @@ dist_userhealthconfig_DATA = \
|
||||||
healthconfigdir=$(libconfigdir)/health.d
|
healthconfigdir=$(libconfigdir)/health.d
|
||||||
dist_healthconfig_DATA = \
|
dist_healthconfig_DATA = \
|
||||||
$(top_srcdir)/installer/.keep \
|
$(top_srcdir)/installer/.keep \
|
||||||
|
health.d/adaptec_raid.conf \
|
||||||
health.d/apache.conf \
|
health.d/apache.conf \
|
||||||
health.d/apcupsd.conf \
|
health.d/apcupsd.conf \
|
||||||
health.d/backend.conf \
|
health.d/backend.conf \
|
||||||
|
|
24
health/health.d/adaptec_raid.conf
Normal file
24
health/health.d/adaptec_raid.conf
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
|
||||||
|
# logical device status check
|
||||||
|
|
||||||
|
template: adapter_raid_ld_status
|
||||||
|
on: adapter_raid.ld_status
|
||||||
|
lookup: max -5s
|
||||||
|
units: bool
|
||||||
|
every: 10s
|
||||||
|
crit: $this > 0
|
||||||
|
delay: down 5m multiplier 1.5 max 1h
|
||||||
|
info: at least 1 logical device is failed or degraded
|
||||||
|
to: sysadmin
|
||||||
|
|
||||||
|
# physical device state check
|
||||||
|
|
||||||
|
template: adapter_raid_pd_state
|
||||||
|
on: adapter_raid.pd_state
|
||||||
|
lookup: max -5s
|
||||||
|
units: bool
|
||||||
|
every: 10s
|
||||||
|
crit: $this > 0
|
||||||
|
delay: down 5m multiplier 1.5 max 1h
|
||||||
|
info: at least 1 physical device is not in online state
|
||||||
|
to: sysadmin
|
Loading…
Add table
Add a link
Reference in a new issue