0
0
Fork 0
mirror of https://github.com/alerta/alerta.git synced 2025-01-24 17:29:39 +00:00
alerta_alerta/bin/alert-mailer.py
2012-09-03 10:09:05 +01:00

289 lines
12 KiB
Python
Executable file

#!/usr/bin/env python
########################################
#
# alert-mailer.py - Alert Mailer module
#
########################################
import os
import sys
import time
import threading
import urllib2
try:
import json
except ImportError:
import simplejson as json
import smtplib
# from email.MIMEMultipart import MIMEMultipart
# from email.MIMEText import MIMEText
# from email.MIMEImage import MIMEImage
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
import stomp
import datetime
import pytz
import logging
import uuid
__version__ = '1.0.8'
BROKER_LIST = [('localhost', 61613)] # list of brokers for failover
NOTIFY_TOPIC = '/topic/notify'
ALERTA_URL = 'http://monitoring.guprod.gnl'
SMTP_SERVER = 'mx'
ALERTER_MAIL = 'alerta@guardian.co.uk'
# MAILING_LIST = ['nick.satterly@guardian.co.uk', 'simon.huggins@guardian.co.uk']
MAILING_LIST = ['nick.satterly@guardian.co.uk']
TIMEZONE = 'Europe/London'
DISABLE = '/opt/alerta/conf/alert-mailer.disable'
LOGFILE = '/var/log/alerta/alert-mailer.log'
PIDFILE = '/var/run/alerta/alert-mailer.pid'
_TokenThread = None # Worker thread object
_Lock = threading.Lock() # Synchronization lock
TOKEN_LIMIT = 20
_token_rate = 30 # Add a token every 30 seconds
tokens = 20
class MessageHandler(object):
def on_error(self, headers, body):
logging.error('Received an error %s', body)
def on_message(self, headers, body):
global tokens
logging.debug("Received alert : %s", body)
alert = dict()
alert = json.loads(body)
logging.info('%s : [%s] %s', alert['lastReceiveId'], alert['status'],alert['summary'])
# Only send a NORMAL email for alerts that have cleared
if alert['severity'] == 'NORMAL' and alert['previousSeverity'] == 'UNKNOWN':
logging.info('%s : Skip this NORMAL alert because it is not clearing a known alarm', alert['lastReceiveId'])
return
# WARNINGs to/from NORMAL or UNKNOWN severity should not trigger emails
if ((alert['severity'] == 'WARNING' and alert['previousSeverity'] in ['NORMAL','UNKNOWN']) or
(alert['severity'] == 'NORMAL' and alert['previousSeverity'] == 'WARNING')):
logging.info('%s : Skip this state change to/from WARNING alert because warnings should not trigger emails', alert['lastReceiveId'])
return
if tokens:
_Lock.acquire()
tokens -= 1
_Lock.release()
logging.debug('Taken a token, there are only %d left', tokens)
else:
logging.warning('%s : No tokens left, rate limiting this alert', alert['lastReceiveId'])
return
# Convert createTime to local time (set TIMEZONE above)
createTime = datetime.datetime.strptime(alert['createTime'], '%Y-%m-%dT%H:%M:%S.%fZ')
createTime = createTime.replace(tzinfo=pytz.utc)
tz = pytz.timezone(TIMEZONE)
localTime = createTime.astimezone(tz)
text = ''
text += '[%s] %s\n' % (alert['status'], alert['summary'])
text += 'Alert Details\n'
text += 'Alert ID: %s\n' % (alert['id'])
text += 'Create Time: %s\n' % (localTime.strftime('%Y/%m/%d %H:%M:%S'))
text += 'Resource: %s\n' % (alert['resource'])
text += 'Environment: %s\n' % (','.join(alert['environment']))
text += 'Service: %s\n' % (','.join(alert['service']))
text += 'Event Name: %s\n' % (alert['event'])
text += 'Event Group: %s\n' % (alert['group'])
text += 'Event Value: %s\n' % (alert['value'])
text += 'Severity: %s -> %s\n' % (alert['previousSeverity'], alert['severity'])
text += 'Status: %s\n' % (alert['status'])
text += 'Text: %s\n' % (alert['text'])
if 'thresholdInfo' in alert:
text += 'Threshold Info: %s\n' % (alert['thresholdInfo'])
if 'duplicateCount' in alert:
text += 'Duplicate Count: %s\n' % (alert['duplicateCount'])
if 'moreInfo' in alert:
text += 'More Info: %s\n' % (alert['moreInfo'])
text += 'Historical Data\n'
if 'graphs' in alert:
for g in alert['graphs']:
text += '%s\n' % (g)
text += 'Raw Alert\n'
text += '%s\n' % (json.dumps(alert))
text += 'Generated by %s on %s at %s\n' % ('alert-mailer.py', os.uname()[1], datetime.datetime.now().strftime("%a %d %b %H:%M:%S"))
logging.debug('Raw Text: %s', text)
html = '<p><table border="0" cellpadding="0" cellspacing="0" width="100%">\n' # table used to center email
html += '<tr><td bgcolor="#ffffff" align="center">\n'
html += '<table border="0" cellpadding="0" cellspacing="0" width="700">\n' # table used to set width of email
html += '<tr><td bgcolor="#425470"><p align="center" style="font-size:24px;color:#d9fffd;font-weight:bold;"><strong>[%s] %s</strong></p>\n' % (alert['status'], alert['summary'])
html += '<tr><td><p align="left" style="font-size:18px;line-height:22px;color:#c25130;font-weight:bold;">Alert Details</p>\n'
html += '<table>\n'
html += '<tr><td><b>Alert ID:</b></td><td><a href="%s/alerta/details.php?id=%s" target="_blank">%s</a></td></tr>\n' % (ALERTA_URL, alert['id'], alert['id'])
html += '<tr><td><b>Create Time:</b></td><td>%s</td></tr>\n' % (localTime.strftime('%Y/%m/%d %H:%M:%S'))
html += '<tr><td><b>Resource:</b></td><td>%s</td></tr>\n' % (alert['resource'])
html += '<tr><td><b>Environment:</b></td><td>%s</td></tr>\n' % (','.join(alert['environment']))
html += '<tr><td><b>Service:</b></td><td>%s</td></tr>\n' % (','.join(alert['service']))
html += '<tr><td><b>Event Name:</b></td><td>%s</td></tr>\n' % (alert['event'])
html += '<tr><td><b>Event Group:</b></td><td>%s</td></tr>\n' % (alert['group'])
html += '<tr><td><b>Event Value:</b></td><td>%s</td></tr>\n' % (alert['value'])
html += '<tr><td><b>Severity:</b></td><td>%s -> %s</td></tr>\n' % (alert['previousSeverity'], alert['severity'])
html += '<tr><td><b>Status:</b></td><td>%s</td></tr>\n' % (alert['status'])
html += '<tr><td><b>Text:</b></td><td>%s</td></tr>\n' % (alert['text'])
if 'thresholdInfo' in alert:
html += '<tr><td><b>Threshold Info:</b></td><td>%s</td></tr>\n' % (alert['thresholdInfo'])
if 'duplicateCount' in alert:
html += '<tr><td><b>Duplicate Count:</b></td><td>%s</td></tr>\n' % (alert['duplicateCount'])
if 'moreInfo' in alert:
html += '<tr><td><b>More Info:</b></td><td><a href="%s">ganglia</a></td></tr>\n' % (alert['moreInfo'])
html += '</table>\n'
html += '</td></tr>\n'
html += '<tr><td><p align="left" style="font-size:18px;line-height:22px;color:#c25130;font-weight:bold;">Historical Data</p>\n'
if 'graphs' in alert:
graph_cid = dict()
for g in alert['graphs']:
graph_cid[g] = str(uuid.uuid4())
html += '<tr><td><img src="cid:'+graph_cid[g]+'"></td></tr>\n'
html += '<tr><td><p align="left" style="font-size:18px;line-height:22px;color:#c25130;font-weight:bold;">Raw Alert</p>\n'
html += '<tr><td><p align="left" style="font-family: \'Courier New\', Courier, monospace">%s</p></td></tr>\n' % (json.dumps(alert))
html += '<tr><td>Generated by %s on %s at %s</td></tr>\n' % ('alert-mailer.py', os.uname()[1], datetime.datetime.now().strftime("%a %d %b %H:%M:%S"))
html += '</table>'
html += '</td></tr></table>'
html += '</td></tr></table>'
logging.debug('HTML Text %s', html)
msg_root = MIMEMultipart('related')
msg_root['Subject'] = '[%s] %s' % (alert['status'], alert['summary'])
msg_root['From'] = ALERTER_MAIL
msg_root['To'] = ','.join(MAILING_LIST)
msg_root.preamble = 'This is a multi-part message in MIME format.'
msg_alt = MIMEMultipart('alternative')
msg_root.attach(msg_alt)
msg_text = MIMEText(text, 'plain')
msg_alt.attach(msg_text)
msg_html = MIMEText(html, 'html')
msg_alt.attach(msg_html)
if 'graphs' in alert:
msg_img = dict()
for g in alert['graphs']:
try:
image = urllib2.urlopen(g).read()
msg_img[g] = MIMEImage(image)
logging.debug('graph cid %s', graph_cid[g])
msg_img[g].add_header('Content-ID', '<'+graph_cid[g]+'>')
msg_root.attach(msg_img[g])
except:
pass
try:
logging.info('%s : Send email to %s', alert['lastReceiveId'], ','.join(MAILING_LIST))
s = smtplib.SMTP(SMTP_SERVER)
# s.set_debuglevel(1) # XXX - uncomment for detailed SMTP debugging
s.sendmail(ALERTER_MAIL, MAILING_LIST, msg_root.as_string())
s.quit()
except smtplib.SMTPException, e:
logging.error('%s : Sendmail failed - %s', alert['lastReceiveId'], e)
def on_disconnected(self):
global conn
logging.warning('Connection lost. Attempting auto-reconnect to %s', NOTIFY_TOPIC)
conn.start()
conn.connect(wait=True)
conn.subscribe(destination=NOTIFY_TOPIC)
class TokenTopUp(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
self.running = False
self.shuttingdown = False
def shutdown(self):
self.shuttingdown = True
if not self.running:
return
self.join()
def run(self):
global _token_rate, tokens
self.running = True
while not self.shuttingdown:
if self.shuttingdown:
break
if tokens < TOKEN_LIMIT:
_Lock.acquire()
tokens += 1
_Lock.release()
if not self.shuttingdown:
logging.debug('Added token to bucket. There are now %d tokens', tokens)
time.sleep(_token_rate)
self.running = False
def main():
global conn
logging.basicConfig(level=logging.INFO, format="%(asctime)s alert-mailer[%(process)d] %(levelname)s - %(message)s", filename=LOGFILE)
logging.info('Starting up Alert Mailer version %s', __version__)
# Write pid file if not already running
if os.path.isfile(PIDFILE):
pid = open(PIDFILE).read()
try:
os.kill(int(pid), 0)
logging.error('Process with pid %s already exists, exiting', pid)
sys.exit(1)
except OSError:
pass
file(PIDFILE, 'w').write(str(os.getpid()))
while os.path.isfile(DISABLE):
logging.warning('Disable flag exists (%s). Sleeping...', DISABLE)
time.sleep(120)
# Connect to message broker
try:
conn = stomp.Connection(
BROKER_LIST,
reconnect_sleep_increase = 5.0,
reconnect_sleep_max = 120.0,
reconnect_attempts_max = 20
)
conn.set_listener('', MessageHandler())
conn.start()
conn.connect(wait=True)
conn.subscribe(destination=NOTIFY_TOPIC)
except Exception, e:
logging.error('Stomp connection error: %s', e)
# Start token bucket thread
logging.info('Start token bucket rate limiting thread')
_TokenThread = TokenTopUp()
_TokenThread.start()
while True:
try:
time.sleep(0.01)
except (KeyboardInterrupt, SystemExit):
conn.disconnect()
_TokenThread.shutdown()
os.unlink(PIDFILE)
sys.exit(0)
if __name__ == '__main__':
main()