mirror of
https://github.com/alerta/alerta.git
synced 2025-01-24 17:29:39 +00:00
478 lines
17 KiB
Python
Executable file
478 lines
17 KiB
Python
Executable file
#!/usr/bin/env python
|
|
########################################
|
|
#
|
|
# alert-notify.py - Notify alerts via SMS
|
|
#
|
|
########################################
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import threading
|
|
import yaml
|
|
try:
|
|
import json
|
|
except ImportError:
|
|
import simplejson as json
|
|
import stomp
|
|
import logging
|
|
import urllib
|
|
import urllib2
|
|
from email.mime.multipart import MIMEMultipart
|
|
from email.mime.text import MIMEText
|
|
from email.mime.image import MIMEImage
|
|
import smtplib
|
|
import datetime
|
|
import pytz
|
|
import uuid
|
|
|
|
__version__ = '1.0.2'
|
|
|
|
BROKER_LIST = [('localhost', 61613)] # list of brokers for failover
|
|
NOTIFY_TOPIC = '/topic/notify'
|
|
|
|
DISABLE = '/opt/alerta/conf/alert-notify.disable'
|
|
LOGFILE = '/var/log/alerta/alert-notify.log'
|
|
PIDFILE = '/var/run/alerta/alert-notify.pid'
|
|
CONFIGFILE ='/opt/alerta/conf/alert-notify.yaml'
|
|
GRACEPERIOD = 300
|
|
API_SERVER = 'monitoring.guprod.gnl'
|
|
SMTP_SERVER = 'mx'
|
|
ALERTER_MAIL = 'alerta@guardian.co.uk'
|
|
TIMEZONE = 'Europe/London'
|
|
|
|
# AQL constancs
|
|
USERNAME = ''
|
|
PASSWORD = ''
|
|
API_URL = 'http://gw.aql.com/sms/sms_gw.php'
|
|
|
|
#AQL API Responses
|
|
status = {
|
|
'0': 'SMS successfully queued',
|
|
'1': 'SMS queued partially',
|
|
'2': 'Authentication error',
|
|
'3': 'Destination number(s) error',
|
|
'4': 'Send time error',
|
|
'5': 'Insufficient credit or invalid number of msg/destination',
|
|
'9': 'Undefined error',
|
|
}
|
|
|
|
# Global dicts
|
|
owners = dict()
|
|
hold = dict()
|
|
alert = dict()
|
|
tokens = dict()
|
|
|
|
_TokenThread = None # Worker thread object
|
|
_NotifyThread = None
|
|
_Lock = threading.Lock() # Synchronization lock
|
|
TOKEN_LIMIT = 10
|
|
_token_rate = 60 # Add a token every 60 seconds
|
|
INITIAL_TOKENS = 5
|
|
|
|
def should_we_notify(alertid):
|
|
for tag in alert[alertid]['tags']:
|
|
if tag.startswith('sms:') or tag.startswith('email:'):
|
|
return 1
|
|
|
|
return 0
|
|
|
|
def who_to_notify(tag):
|
|
owner = tag.split(':')[1]
|
|
logging.info('Identifing owner as %s', owner)
|
|
return owner
|
|
|
|
def sms_notify(alertid, username, password, destination, url=API_URL):
|
|
|
|
message = alert[alertid]['summary']
|
|
|
|
data = urllib.urlencode({'username': username, 'password': password, 'destination': destination, 'message': message})
|
|
|
|
logging.info('Api call %s', url+'?'+data)
|
|
|
|
req = urllib2.Request(url, data)
|
|
f = urllib2.urlopen(req)
|
|
response = f.read()
|
|
f.close()
|
|
|
|
#response = '0:1 SMS successfully queued'
|
|
#response = '2:0 Authentication error'
|
|
|
|
# Api call response syntax.
|
|
# <status no>:<no of credits used> <description>
|
|
logging.info('Api response %s', response)
|
|
|
|
# Verify response
|
|
if status['0'] in response:
|
|
return 0
|
|
else:
|
|
return
|
|
|
|
def email_notify(alertid, email):
|
|
|
|
MAILING_LIST = email
|
|
|
|
createTime = datetime.datetime.strptime(alert[alertid]['createTime'], '%Y-%m-%dT%H:%M:%S.%fZ')
|
|
createTime = createTime.replace(tzinfo=pytz.utc)
|
|
tz = pytz.timezone(TIMEZONE)
|
|
localTime = createTime.astimezone(tz)
|
|
|
|
text = ''
|
|
text += '[%s] %s\n' % (alert[alertid]['status'], alert[alertid]['summary'])
|
|
text += 'Alert Details\n'
|
|
text += 'Alert ID: %s\n' % (alert[alertid]['id'])
|
|
text += 'Create Time: %s\n' % (localTime.strftime('%Y/%m/%d %H:%M:%S'))
|
|
text += 'Resource: %s\n' % (alert[alertid]['resource'])
|
|
text += 'Environment: %s\n' % (','.join(alert[alertid]['environment']))
|
|
text += 'Service: %s\n' % (','.join(alert[alertid]['service']))
|
|
text += 'Event Name: %s\n' % (alert[alertid]['event'])
|
|
text += 'Event Group: %s\n' % (alert[alertid]['group'])
|
|
text += 'Event Value: %s\n' % (alert[alertid]['value'])
|
|
text += 'Severity: %s -> %s\n' % (alert[alertid]['previousSeverity'], alert[alertid]['severity'])
|
|
text += 'Status: %s\n' % (alert[alertid]['status'])
|
|
text += 'Text: %s\n' % (alert[alertid]['text'])
|
|
|
|
if 'thresholdInfo' in alert[alertid]:
|
|
text += 'Threshold Info: %s\n' % (alert[alertid]['thresholdInfo'])
|
|
if 'duplicateCount' in alert[alertid]:
|
|
text += 'Duplicate Count: %s\n' % (alert[alertid]['duplicateCount'])
|
|
if 'moreInfo' in alert[alertid]:
|
|
text += 'More Info: %s\n' % (alert[alertid]['moreInfo'])
|
|
text += 'Historical Data\n'
|
|
if 'graphs' in alert[alertid]:
|
|
for g in alert[alertid]['graphs']:
|
|
text += '%s\n' % (g)
|
|
text += 'Raw Alert\n'
|
|
text += '%s\n' % (json.dumps(alert[alertid]))
|
|
text += 'Generated by %s on %s at %s\n' % ('alert-notify.py', os.uname()[1], datetime.datetime.now().strftime("%a %d %b %H:%M:%S"))
|
|
|
|
logging.debug('Raw Text: %s', text)
|
|
|
|
html = '<p><table border="0" cellpadding="0" cellspacing="0" width="100%">\n' # table used to center email
|
|
html += '<tr><td bgcolor="#ffffff" align="center">\n'
|
|
html += '<table border="0" cellpadding="0" cellspacing="0" width="700">\n' # table used to set width of email
|
|
html += '<tr><td bgcolor="#425470"><p align="center" style="font-size:24px;color:#d9fffd;font-weight:bold;"><strong>[%s] %s</strong></p>\n' % (alert[alertid]['status'], alert[alertid]['summary'])
|
|
html += '<tr><td><p align="left" style="font-size:18px;line-height:22px;color:#c25130;font-weight:bold;">Alert Details</p>\n'
|
|
html += '<table>\n'
|
|
html += '<tr><td><b>Alert ID:</b></td><td><a href="%s/alerta/details.php?id=%s" target="_blank">%s</a></td></tr>\n' % (API_SERVER, alert[alertid]['id'], alert[alertid]['id'])
|
|
html += '<tr><td><b>Create Time:</b></td><td>%s</td></tr>\n' % (localTime.strftime('%Y/%m/%d %H:%M:%S'))
|
|
html += '<tr><td><b>Resource:</b></td><td>%s</td></tr>\n' % (alert[alertid]['resource'])
|
|
html += '<tr><td><b>Environment:</b></td><td>%s</td></tr>\n' % (','.join(alert[alertid]['environment']))
|
|
html += '<tr><td><b>Service:</b></td><td>%s</td></tr>\n' % (','.join(alert[alertid]['service']))
|
|
html += '<tr><td><b>Event Name:</b></td><td>%s</td></tr>\n' % (alert[alertid]['event'])
|
|
html += '<tr><td><b>Event Group:</b></td><td>%s</td></tr>\n' % (alert[alertid]['group'])
|
|
html += '<tr><td><b>Event Value:</b></td><td>%s</td></tr>\n' % (alert[alertid]['value'])
|
|
html += '<tr><td><b>Severity:</b></td><td>%s -> %s</td></tr>\n' % (alert[alertid]['previousSeverity'], alert[alertid]['severity'])
|
|
html += '<tr><td><b>Status:</b></td><td>%s</td></tr>\n' % (alert[alertid]['status'])
|
|
html += '<tr><td><b>Text:</b></td><td>%s</td></tr>\n' % (alert[alertid]['text'])
|
|
if 'thresholdInfo' in alert[alertid]:
|
|
html += '<tr><td><b>Threshold Info:</b></td><td>%s</td></tr>\n' % (alert[alertid]['thresholdInfo'])
|
|
if 'duplicateCount' in alert[alertid]:
|
|
html += '<tr><td><b>Duplicate Count:</b></td><td>%s</td></tr>\n' % (alert[alertid]['duplicateCount'])
|
|
if 'moreInfo' in alert[alertid]:
|
|
html += '<tr><td><b>More Info:</b></td><td><a href="%s">ganglia</a></td></tr>\n' % (alert[alertid]['moreInfo'])
|
|
html += '</table>\n'
|
|
html += '</td></tr>\n'
|
|
html += '<tr><td><p align="left" style="font-size:18px;line-height:22px;color:#c25130;font-weight:bold;">Historical Data</p>\n'
|
|
if 'graphs' in alert[alertid]:
|
|
graph_cid = dict()
|
|
for g in alert[alertid]['graphs']:
|
|
graph_cid[g] = str(uuid.uuid4())
|
|
html += '<tr><td><img src="cid:'+graph_cid[g]+'"></td></tr>\n'
|
|
html += '<tr><td><p align="left" style="font-size:18px;line-height:22px;color:#c25130;font-weight:bold;">Raw Alert</p>\n'
|
|
html += '<tr><td><p align="left" style="font-family: \'Courier New\', Courier, monospace">%s</p></td></tr>\n' % (json.dumps(alert[alertid]))
|
|
html += '<tr><td>Generated by %s on %s at %s</td></tr>\n' % ('alert-mailer.py', os.uname()[1], datetime.datetime.now().strftime("%a %d %b %H:%M:%S"))
|
|
html += '</table>'
|
|
html += '</td></tr></table>'
|
|
html += '</td></tr></table>'
|
|
|
|
logging.debug('HTML Text %s', html)
|
|
|
|
msg_root = MIMEMultipart('related')
|
|
msg_root['Subject'] = '[%s] %s' % (alert[alertid]['status'], alert[alertid]['summary'])
|
|
msg_root['From'] = ALERTER_MAIL
|
|
msg_root['To'] = MAILING_LIST
|
|
msg_root.preamble = 'This is a multi-part message in MIME format.'
|
|
|
|
msg_alt = MIMEMultipart('alternative')
|
|
msg_root.attach(msg_alt)
|
|
|
|
msg_text = MIMEText(text, 'plain')
|
|
msg_alt.attach(msg_text)
|
|
|
|
msg_html = MIMEText(html, 'html')
|
|
msg_alt.attach(msg_html)
|
|
|
|
if 'graphs' in alert[alertid]:
|
|
msg_img = dict()
|
|
for g in alert[alertid]['graphs']:
|
|
try:
|
|
image = urllib2.urlopen(g).read()
|
|
msg_img[g] = MIMEImage(image)
|
|
logging.debug('graph cid %s', graph_cid[g])
|
|
msg_img[g].add_header('Content-ID', '<'+graph_cid[g]+'>')
|
|
msg_root.attach(msg_img[g])
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
logging.info('%s : Send email to %s', alert[alertid]['lastReceiveId'], MAILING_LIST)
|
|
s = smtplib.SMTP(SMTP_SERVER)
|
|
s.sendmail(ALERTER_MAIL, MAILING_LIST, msg_root.as_string())
|
|
s.quit()
|
|
except smtplib.SMTPException, e:
|
|
logging.error('%s : Sendmail failed - %s', alert[alertid]['lastReceiveId'], e)
|
|
|
|
def init_tokens():
|
|
|
|
global tokens
|
|
|
|
try:
|
|
for owner in owners:
|
|
tokens[owner, 'sms'] = INITIAL_TOKENS
|
|
tokens[owner, 'email'] = INITIAL_TOKENS
|
|
|
|
except Exception, e:
|
|
logging.error('Failed to initialize tokens %s', e)
|
|
pass
|
|
|
|
def init_config():
|
|
|
|
global owners, USERNAME, PASSWORD
|
|
|
|
logging.info('Loading config.')
|
|
|
|
try:
|
|
config = yaml.load(open(CONFIGFILE))
|
|
except Exception, e:
|
|
logging.error('Failed to load alert config: %s', e)
|
|
pass
|
|
|
|
USERNAME = config['global']['USERNAME']
|
|
PASSWORD = config['global']['PASSWORD']
|
|
owners = config['owners']
|
|
|
|
logging.info('Loaded %d owners in config.', len(owners))
|
|
|
|
init_tokens()
|
|
|
|
def send_notify(alertid):
|
|
|
|
global tokens, hold
|
|
|
|
try:
|
|
for tag in alert[alertid]['tags']:
|
|
|
|
if tag.startswith('sms:') or tag.startswith('email:'):
|
|
who = who_to_notify(tag)
|
|
message = alert[alertid]['summary']
|
|
|
|
if tag.startswith('sms:') and tokens[who, 'sms'] > 0:
|
|
_Lock.acquire()
|
|
tokens[who, 'sms'] -= 1
|
|
_Lock.release()
|
|
logging.debug('Taken a sms token from %s, there are only %d left', who, tokens[who, 'sms'])
|
|
sms_notify(alertid, USERNAME, PASSWORD, owners[who]['mobile'])
|
|
elif tokens[who, 'sms'] == 0:
|
|
logging.error('%s run out of sms tokens. Failed to notify %s.', who, alert[alertid]['lastReceiveId'])
|
|
|
|
if tag.startswith('email:') and tokens[who, 'email'] > 0:
|
|
_Lock.acquire()
|
|
tokens[who, 'email'] -= 1
|
|
_Lock.release()
|
|
logging.debug('Taken a email token from %s, there are only %d left', who, tokens[who, 'sms'])
|
|
email_notify(alertid, owners[who]['email'])
|
|
elif tokens[who, 'email'] == 0:
|
|
logging.error('%s run out of email tokens. Failed to notify %s.', who, alert[alertid]['lastReceiveId'])
|
|
|
|
except Exception, e:
|
|
logging.error('Notify sending failed for "%s" - %s - %s', alert[alertid]['lastReceiveId'], message, e)
|
|
pass
|
|
|
|
|
|
class MessageHandler(object):
|
|
|
|
def on_error(self, headers, body):
|
|
logging.error('Received an error %s', body)
|
|
|
|
def on_message(self, headers, body):
|
|
global alert, hold
|
|
|
|
logging.debug("Received alert : %s", body)
|
|
|
|
alertid = json.loads(body)['id']
|
|
|
|
alert[alertid] = json.loads(body)
|
|
|
|
logging.info('%s : [%s] %s', alert[alertid]['lastReceiveId'], alert[alertid]['status'], alert[alertid]['summary'])
|
|
|
|
if not should_we_notify(alertid):
|
|
logging.debug('%s : NOT PAGING for [%s] %s', alert[alertid]['lastReceiveId'], alert[alertid]['status'], alert[alertid]['summary'])
|
|
del alert[alertid]
|
|
return
|
|
|
|
if alertid in hold:
|
|
if alert[alertid]['severity'] == 'NORMAL':
|
|
logging.info('%s : Dropping NORMAL alert %s', alert[alertid]['lastReceiveId'], alertid)
|
|
del hold[alertid]
|
|
del alert[alertid]
|
|
else:
|
|
logging.info('%s : Update alert %s details', alert[alertid]['lastReceiveId'], alertid)
|
|
else:
|
|
hold[alertid] = time.time() + GRACEPERIOD
|
|
logging.info('%s : Holding onto alert %s for %s seconds', alert[alertid]['lastReceiveId'], alertid, GRACEPERIOD)
|
|
|
|
def on_disconnected(self):
|
|
global conn
|
|
|
|
logging.warning('Connection lost. Attempting auto-reconnect to %s', NOTIFY_TOPIC)
|
|
conn.start()
|
|
conn.connect(wait=True)
|
|
conn.subscribe(destination=NOTIFY_TOPIC, ack='auto', headers={'selector': "repeat = 'false'"})
|
|
|
|
class ReleaseThread(threading.Thread):
|
|
|
|
def __init__(self):
|
|
threading.Thread.__init__(self)
|
|
self.running = False
|
|
self.shuttingdown = False
|
|
|
|
def shutdown(self):
|
|
self.shuttingdown = True
|
|
if not self.running:
|
|
return
|
|
self.join()
|
|
|
|
def run(self):
|
|
global alert, hold
|
|
self.running = True
|
|
|
|
while not self.shuttingdown:
|
|
if self.shuttingdown:
|
|
break
|
|
|
|
notified = dict()
|
|
for alertid in hold:
|
|
if hold[alertid] < time.time():
|
|
logging.warning('Hold expired for %s and trigger notification', alertid)
|
|
send_notify(alertid)
|
|
notified[alertid] = 1
|
|
|
|
for alertid in notified:
|
|
del alert[alertid]
|
|
del hold[alertid]
|
|
|
|
if not self.shuttingdown:
|
|
time.sleep(5)
|
|
|
|
self.running = False
|
|
|
|
|
|
class TokenTopUp(threading.Thread):
|
|
|
|
def __init__(self):
|
|
threading.Thread.__init__(self)
|
|
self.running = False
|
|
self.shuttingdown = False
|
|
|
|
def shutdown(self):
|
|
self.shuttingdown = True
|
|
if not self.running:
|
|
return
|
|
self.join()
|
|
|
|
def run(self):
|
|
global tokens, _token_rate
|
|
self.running = True
|
|
i = 0
|
|
|
|
while not self.shuttingdown:
|
|
if self.shuttingdown:
|
|
break
|
|
|
|
if i == 6:
|
|
try:
|
|
i = 0
|
|
for owner in owners:
|
|
if tokens[owner, 'sms'] < TOKEN_LIMIT:
|
|
_Lock.acquire()
|
|
tokens[owner, 'sms'] += 1
|
|
_Lock.release()
|
|
|
|
if tokens[owner, 'email'] < TOKEN_LIMIT:
|
|
_Lock.acquire()
|
|
tokens[owner, 'email'] += 1
|
|
_Lock.release()
|
|
except OSError:
|
|
pass
|
|
|
|
if not self.shuttingdown:
|
|
time.sleep(_token_rate/6)
|
|
i += 1
|
|
|
|
self.running = False
|
|
|
|
def main():
|
|
global conn
|
|
|
|
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s alert-notify[%(process)d] %(levelname)s - %(message)s", filename=LOGFILE)
|
|
logging.info('Starting up Alert Notify version %s', __version__)
|
|
|
|
# Write pid file
|
|
if os.path.isfile(PIDFILE):
|
|
logging.error('%s already exists, exiting', PIDFILE)
|
|
f = open(PIDFILE)
|
|
pid = f.read()
|
|
f.close()
|
|
try:
|
|
os.kill(int(pid), 0)
|
|
sys.exit(1)
|
|
except OSError:
|
|
pass
|
|
|
|
file(PIDFILE, 'w').write(str(os.getpid()))
|
|
|
|
while os.path.isfile(DISABLE):
|
|
logging.warning('Disable flag exists (%s). Sleeping...', DISABLE)
|
|
time.sleep(120)
|
|
|
|
# Initialiase alert config
|
|
init_config()
|
|
config_mod_time = os.path.getmtime(CONFIGFILE)
|
|
|
|
# Connect to message broker
|
|
try:
|
|
conn = stomp.Connection(BROKER_LIST)
|
|
conn.set_listener('', MessageHandler())
|
|
conn.start()
|
|
conn.connect(wait=True)
|
|
conn.subscribe(destination=NOTIFY_TOPIC, ack='auto', headers={'selector': "repeat = 'false'"})
|
|
except Exception, e:
|
|
logging.error('Stomp connection error: %s', e)
|
|
|
|
# Start token bucket thread
|
|
_TokenThread = TokenTopUp()
|
|
_TokenThread.start()
|
|
|
|
# Start notify thread
|
|
_NotifyThread = ReleaseThread()
|
|
_NotifyThread.start()
|
|
|
|
# Main Loop
|
|
while True:
|
|
try:
|
|
# Read (or re-read) config as necessary
|
|
if os.path.getmtime(CONFIGFILE) != config_mod_time:
|
|
init_config()
|
|
config_mod_time = os.path.getmtime(CONFIGFILE)
|
|
|
|
time.sleep(0.5)
|
|
except (KeyboardInterrupt, SystemExit):
|
|
conn.disconnect()
|
|
_TokenThread.shutdown()
|
|
_NotifyThread.shutdown()
|
|
os.unlink(PIDFILE)
|
|
sys.exit(0)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|