#!/usr/local/bin/python
#
# Downtimer.py: Load and run calculations for downtime periods from QOS logs.
#
# Doug White
import sys
import os
import string
import time
import normalDate
import rangefind
QOS_COLLECTEDDATA = '/usr/local/qosserver/CollectedData'
class Downtimer:
def __init__(self, agent='', entity='', startdate=None, starttime=None, enddate=None, endtime=None, datadir=QOS_COLLECTEDDATA):
# note that startdate and enddate are normalDate objects
# unfortunately we still need the time in UNIX format for comparisons
self.timezone = time.strftime("%Z", time.localtime(time.time()))
print 'Downtimer timezone: %s' % self.timezone
(self.startdate, self.starttime) = self.convert_date_and_time(startdate, starttime)
(self.enddate, self.endtime) = self.convert_date_and_time(enddate, endtime)
self.agent = agent
self.entity = entity
self.datadir = datadir
# run da report
def run(self):
"""
Run the downtime report. Returned tuple is of format:
downtime list in [start, duration] format,
percent uptime,
longest downtime,
average downtime duration,
longest uptime duration
"""
rf = rangefind.RangeFind(agent=self.agent,
entity=self.entity,
startdate=self.startdate,
starttime=self.starttime,
enddate=self.enddate,
endtime=self.endtime
)
stime = time.time()
datalines = rf.extract()
etime = time.time()
print "extract split: %s" % (etime - stime)
# now calculate the downtime ranges...
stime = time.time()
downtimes = self.find_downtimes(datalines)
etime = time.time()
print "scan split: %s" % (etime - stime)
return self.tricky_math(downtimes)
def tricky_math(self, downtimes):
# now the nasty math part .. we're trying to do this in one pass so
# as to avoid racking up yet more passes through the data (this is #3)
totaldtime = 0
avgdtime = 0
maxdtime = (0, 0)
maxutime = (0, 0)
lastupstart = self.starttime
if len(downtimes) > 0 and downtimes[-1][1] == 0:
downtimes[-1][1] = self.endtime - downtimes[-1][0]
for entry in downtimes:
totaldtime = totaldtime + entry[1]
if avgdtime == 0 :
avgdtime = entry[1]
else:
avgdtime = (avgdtime + entry[1]) / 2
if maxdtime[1] < entry[1] :
maxdtime = entry
if maxutime[1] < entry[0] - lastupstart :
maxutime = (lastupstart, entry[0] - lastupstart)
lastupstart = entry[0] + entry[1]
if len(downtimes) > 0 and maxutime[1] < downtimes[-1][0] - lastupstart :
maxutime = ( downtimes[-1][0]+downtimes[-1][1], downtimes[-1][0]+downtimes[-1][1] - lastupstart )
elif len(downtimes) == 0:
# short-circuit
maxutime = (self.starttime, self.endtime - self.starttime)
# If we didn't screw something up, at this point:
# (1 - (totaldtime/(endtime-starttime))) * 100 is the uptime%
# totaldtime is the total downtime, in seconds
# maxdtime is the max downtime in [starttime, duration] format
# maxutime is the max uptime in [starttime, duration] format
# avgdtime is the average downtime duration in seconds
return (downtimes, (1 - totaldtime/(self.endtime-self.starttime)), totaldtime, maxdtime, avgdtime, maxutime)
# Combine: Take dt.run() output as an input list and output a new tuple that takes all the downtime
# listings and ORs them together. Also recalc all the stat fields.
def combine(self, targets):
newdtimes = []
for targ in targets:
newdtimes.extend(targ[0])
t = time.time()
newdtimes.sort()
print "Sort split: %s" % (time.time() - t)
#print newdtimes
# Now we have our elevator-sorted values, go through and flatten the values.
ln = len(newdtimes) - 1 # don't want to run off the edge
i = 0
while(i < len(newdtimes) - 1):
# our start and end points
s = newdtimes[i][0]
e = s + newdtimes[i][1]
# now ask, does the next event fall within that interval?
n = newdtimes[i+1]
if n[0] >= s and n[0] <= e:
# now does it extend past the current event?
if n[0] + n[1] > e:
# extend! (remember e = delta-s = run length)
del newdtimes[i]
newdtimes.insert(i, (s, n[0] + n[1] - s))
#newdtimes[i][1] = n[0] + n[1] - s
# now dump it, whether we wanted it or not
del newdtimes[i+1]
# must be after our event, so move on to the next
else :
i = i + 1
# Now we have a new unique downtime list. Bust some math
# on its ass.
return self.tricky_math(newdtimes)
def sort_downtimes(self, first, second):
if first[0] < second[0]:
return -1
elif first[0] > second[0]:
return 1
else :
return 0
def find_downtimes(self, logentries):
downtimes = []
startrange = 0
endrange = 0
rf = rangefind.RangeFind()
# Here's the logic..
# We loop over the given log entries looking for when we get the
# first bad line (state != G, at character 20)
# We decode the date/time to a unix timestamp, stuff it, and continue.
# When we go good again we calculate the range of the outage
# and add that to the downtimes list along with the start date.
# If at the end we're in a range, add an entry with a 0 duration
# to indicate that we were in downtime at the end of the date range.
# Chop comments (for safety)
for ent in logentries :
if ent[0] == '#' :
# print 'Hit comment in dt.find_downtimes'
continue
if ent[20] != 'G' :
if startrange == 0:
startrange = rf.log_time(ent)
else:
if startrange != 0:
endrange = rf.log_time(ent)
# hit end of one range...
downtimes.append((startrange, endrange-startrange))
startrange = endrange = 0
if startrange != 0 and endrange == 0:
downtimes.append([startrange, 0])
return downtimes
### UTILITY
def convert_date_and_time(self, date, btime):
# This is a butt-ugly hack but is due to a bug in FreeBSD libc.
# This feeds the timezone to interpret to strptime so it handles
# daylight/standard time properly. tm->isdst is fed dreictly into
# the tuple returned by strptime and must be set otherwise all the
# times are interpreted 1 hour off by mktime().
#line = '%s %s %s' % (btime, date, self.timezone)
#datetuple = time.strptime(line, "%H:%M %m/%d/%Y %Z")
line = '%s %s' % (btime, date)
datetuple = time.strptime(line, "%H:%M %m/%d/%Y")
return (normalDate.ND(datetuple), time.mktime(datetuple))