diff --git a/.gitignore b/.gitignore index c6bdecf..77a8919 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.sublime-* build/* dist/* +.idea/* MANIFEST *.pyc \ No newline at end of file diff --git a/sar/__init__.py b/sar/__init__.py index 3222268..ccd067a 100644 --- a/sar/__init__.py +++ b/sar/__init__.py @@ -6,7 +6,7 @@ """Regexp terms for finding fields in SAR parts for CPU""" FIELDS_CPU = [ - '\%(usr|user)', '\%nice', '\%sys', '\%iowait', '\%idle' + '^\%(usr|user)', '^\%nice', '^\%sys', '^\%iowait', '^\%idle' ] """Pair regexp terms with field names in CPU output dictionary""" @@ -20,14 +20,14 @@ """Regexp terms for finding fields in SAR parts for memory usage""" FIELDS_MEM = [ - 'kbmemfree', 'kbmemused', '\%memused', 'kbbuffers', 'kbcached' + '^kbmemfree', '^kbmemused', '^\%memused', '^kbbuffers', '^kbcached' ] """Pair regexp terms with field names in memory usage output dictionary""" FIELD_PAIRS_MEM = { - 'memfree': FIELDS_MEM[0], 'memused': FIELDS_MEM[1], - 'memusedpercent': FIELDS_MEM[2], 'membuffer': FIELDS_MEM[3], - 'memcache': FIELDS_MEM[4] + 'memfreekb': FIELDS_MEM[0], 'memusedkb': FIELDS_MEM[1], + 'memusedpercent': FIELDS_MEM[2], 'membufferkb': FIELDS_MEM[3], + 'memcachekb': FIELDS_MEM[4] } """Swap usage regexp pattern for detecting SAR section header""" @@ -35,12 +35,12 @@ """Regexp terms for finding fields in SAR parts for swap usage""" FIELDS_SWP = [ - 'kbswpfree', 'kbswpused', '\%swpused' + '^kbswpfree', '^kbswpused', '^\%swpused' ] """Pair regexp terms with field names in swap usage output dictionary""" FIELD_PAIRS_SWP = { - 'swapfree': FIELDS_SWP[0], 'swapused': FIELDS_SWP[1], + 'swapfreekb': FIELDS_SWP[0], 'swapusedkb': FIELDS_SWP[1], 'swapusedpercent': FIELDS_SWP[2] } @@ -49,13 +49,13 @@ """Regexp terms for finding fields in SAR parts for I/O usage""" FIELDS_IO = [ - '^tps', '^rtps', '^wtps', 'bread\/s', 'bwrtn\/s' + '^tps', '^rtps', '^wtps', '^bread\/s', '^bwrtn\/s' ] """Pair regexp terms with field names in I/O usage output dictionary""" FIELD_PAIRS_IO = { 'tps': FIELDS_IO[0], 'rtps': FIELDS_IO[1], 'wtps': FIELDS_IO[2], - 'bread': FIELDS_IO[3], 'bwrite': FIELDS_IO[4], + 'readb': FIELDS_IO[3], 'writeb': FIELDS_IO[4], } """Task creation and system switching regexp pattern for SAR section header""" @@ -63,7 +63,7 @@ """Regexp terms for finding fields in SAR parts for task creation and system switching""" FIELDS_TASK = [ - 'proc', 'cswch' + '^proc\/s', '^cswch\/s' ] """Pair regexp terms with field names in Task creation and system switching output dictionary""" @@ -71,6 +71,63 @@ 'proc': FIELDS_TASK[0], 'cswch': FIELDS_TASK[1] } +"""Network usage regexp pattern for SAR section header""" +PATTERN_IFACE = '.*IFACE.*rxpck\/s.*txpck\/s.*rxkB\/s.*txkB\/s.*rxcmp\/s.*txcmp\/s.*rxmcst\/s' + +"""Regexp terms for finding fields in SAR parts for network usage""" +FIELDS_IFACE = [ + '^rxpck\/s','^txpck\/s','^rxkB\/s','^txkB\/s','^rxcmp\/s','^txcmp\/s','^rxmcst\/s' +] + +"""Pair regexp terms with field names in network usage dictionary""" +FIELD_PAIRS_IFACE = { + 'rxpck':FIELDS_IFACE[0], 'txpck':FIELDS_IFACE[1], 'rxkb':FIELDS_IFACE[2], + 'txkb':FIELDS_IFACE[3], 'rxcmp':FIELDS_IFACE[4], 'txcmp':FIELDS_IFACE[5], 'rxmcst':FIELDS_IFACE[6] +} + +"""Network usage regexp pattern for SAR section header""" +PATTERN_LOAD = '.*runq-sz.*plist-sz.*ldavg-1.*ldavg-5.*ldavg-15.*blocked' + +"""Regexp terms for finding fields in SAR parts for network usage""" +FIELDS_LOAD = [ + '^runq-sz','^plist-sz','^ldavg-1$','^ldavg-5','^ldavg-15$','^blocked' +] + +"""Pair regexp terms with field names in network usage dictionary""" +FIELD_PAIRS_LOAD = { + 'runq-sz':FIELDS_LOAD[0], 'plist-sz':FIELDS_LOAD[1], 'ldavg-1':FIELDS_LOAD[2], 'ldavg-5':FIELDS_LOAD[3], + 'ldavg-15':FIELDS_LOAD[4], 'blocked':FIELDS_LOAD[5] +} + +"""Network usage regexp pattern for SAR section header""" +PATTERN_DEV = '.*DEV.*tps.*rd_sec\/s.*wr_sec\/s.*avgrq-sz.*avgqu-sz.*await.*svctm.*util' + +"""Regexp terms for finding fields in SAR parts for network usage""" +FIELDS_DEV = [ + '^tps','^rd_sec\/s','^wr_sec\/s','^avgrq-sz','^avgqu-sz','^await', '^\%util' +] + +"""Pair regexp terms with field names in network usage dictionary""" +FIELD_PAIRS_DEV = { + 'tps':FIELDS_DEV[0], 'rd_sec':FIELDS_DEV[1], 'wr_sec':FIELDS_DEV[2], 'avgrq-sz':FIELDS_DEV[3], + 'avgqu-sz':FIELDS_DEV[4], 'await':FIELDS_DEV[5], 'utilpercent':FIELDS_DEV[6] +} + +"""Network usage regexp pattern for SAR section header""" +PATTERN_PAGING = '.*pgpgin\/s.*pgpgout\/s.*fault\/s.*majflt\/s.*pgfree\/s.*pgscank\/s.*pgscand\/s.*pgsteal\/s.*vmeff' + +"""Regexp terms for finding fields in SAR parts for network usage""" +FIELDS_PAGING = [ + '^pgpgin\/s','^pgpgout\/s','^fault\/s','^majflt\/s','^pgfree\/s','^pgscank\/s', '^pgscand\/s', '^pgsteal\/s', '^\%vmeff' +] + +"""Pair regexp terms with field names in network usage dictionary""" +FIELD_PAIRS_PAGING = { + 'pgpgin':FIELDS_PAGING[0], 'pgpgout':FIELDS_PAGING[1], 'fault':FIELDS_PAGING[2], 'majflt':FIELDS_PAGING[3], + 'pgfree':FIELDS_PAGING[4], 'pgscank':FIELDS_PAGING[5], 'pgscand':FIELDS_PAGING[6], 'pgsteal': FIELDS_PAGING[7], + 'vmeffpercent': FIELDS_PAGING[8] +} + """Restart time regexp pattern for detecting SAR restart notices""" PATTERN_RESTART = '.*LINUX\ RESTART.*' @@ -105,11 +162,36 @@ 'PATTERN': PATTERN_TASK, 'FIELDS': FIELDS_TASK, 'PAIRS': FIELD_PAIRS_TASK - + }, + 'IFACE': { + 'PATTERN': PATTERN_IFACE, + 'FIELDS': FIELDS_IFACE, + 'PAIRS': FIELD_PAIRS_IFACE + }, + 'LOAD': { + 'PATTERN': PATTERN_LOAD, + 'FIELDS': FIELDS_LOAD, + 'PAIRS': FIELD_PAIRS_LOAD + }, + 'PAGING': { + 'PATTERN': PATTERN_PAGING, + 'FIELDS': FIELDS_PAGING, + 'PAIRS': FIELD_PAIRS_PAGING + }, + 'DEV': { + 'PATTERN': PATTERN_DEV, + 'FIELDS': FIELDS_DEV, + 'PAIRS': FIELD_PAIRS_DEV } } +INT_FIELDS = ['memfreekb', 'memusedkb', 'membufferkb', 'memcachekb', 'runq-sz', 'plist-sz', 'blocked'] + +STR_FIELDS = ['iface', 'dev'] + +MULTILINE_PATTERNS = ['CPU', 'IFACE', 'DEV'] + __all__ = [ - 'PATTERN_RESTART', 'PATTERN_MULTISPLIT', - 'PATTERN_DATE', 'ALL_PATTERNS' + 'PATTERN_RESTART', 'PATTERN_MULTISPLIT', 'PATTERN_DATE', 'ALL_PATTERNS', 'INT_FIELDS', + 'STR_FIELDS', 'MULTILINE_PATTERNS' ] diff --git a/sar/multiparser.py b/sar/multiparser.py index 6098011..8ffeb21 100644 --- a/sar/multiparser.py +++ b/sar/multiparser.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -''' +""" :mod:`sar.multiparser` is a module containing class for parsing SAR output files where multiple files are merged into one huge file. @@ -12,41 +12,38 @@ (24hr output compared to AM/PM output). Following versions might support ``SAR -A`` parsing. -''' +""" import sar.parser as sarparse from sar import PATTERN_MULTISPLIT import mmap import os import traceback -from types import StringType class Multiparser(object): - ''' + """ Multifile parser for SAR files. Derives from SAR Parser class :param filename: Name of the SAR output file, with combined data :type filename: str. - ''' + """ def __init__(self, combo_filename=''): self.__sarinfos = {} - '''Dictionary for multiple dictionaries from - :class:`com.nimium.sys.util.sar.parser.Parser`''' + """Dictionary for multiple dictionaries from + :class:`com.nimium.sys.util.sar.parser.Parser`""" self.__splitpointers = [] - '''List of pointers inside combo file where each file starts''' + """List of pointers inside combo file where each file starts""" self.__filename = combo_filename - '''SAR output filename to be parsed''' - - return None + """SAR output filename to be parsed""" def load_file(self): - ''' + """ Loads combined SAR format logfile in ASCII format. :return: ``True`` if loading and parsing of file went fine, \ ``False`` if it failed (at any point) - ''' + """ daychunks = self.__split_file() if (daychunks): @@ -61,39 +58,29 @@ def load_file(self): chunk = self.__get_chunk(start, end) parser = sarparse.Parser() - cpu_usage, mem_usage, swp_usage, io_usage = \ - parser._parse_file(parser._split_file(chunk)) - - self.__sarinfos[self.__get_part_date(chunk)] = { - "cpu": cpu_usage, - "mem": mem_usage, - "swap": swp_usage, - "io": io_usage - } - del(cpu_usage) - del(mem_usage) - del(swp_usage) - del(io_usage) - del(parser) + usage = parser._parse_file(parser._split_file(chunk)) + + self.__sarinfos[self.__get_part_date(chunk)] = usage + del usage return(True) def get_sar_info(self): - ''' + """ Returns parsed sar info :return: ``Dictionary``-style list of SAR data - ''' + """ return self.__sarinfos def __get_chunk(self, start=0, end=None): - ''' + """ Gets chunk from the sar combo file, from start to end :param start: where to start a pulled chunk :type start: int. :param end: where to end a pulled chunk :type end: int. :return: str. - ''' + """ piece = False if (self.__filename and os.access(self.__filename, os.R_OK)): @@ -130,13 +117,13 @@ def __get_chunk(self, start=0, end=None): return(piece) def __split_file(self): - ''' + """ Splits combined SAR output file (in ASCII format) in order to extract info we need for it, in the format we want. :return: ``List``-style of SAR file sections separated by the type of info they contain (SAR file sections) without parsing what is exactly what at this point - ''' + """ # Filename passed checks through __init__ if (self.__filename and os.access(self.__filename, os.R_OK)): @@ -162,7 +149,7 @@ def __split_file(self): while (sfpos > -1): - '''Split by day found''' + """Split by day found""" self.__splitpointers.append(sfpos) # Iterate for new position @@ -180,14 +167,14 @@ def __split_file(self): return False def __get_part_date(self, part=''): - ''' + """ Retrieves date of the combo part from the file :param part: Part of the combo file (parsed out whole SAR file from the combo :type part: str. :return: string containing date in ISO format (YYY-MM-DD) - ''' - if (type(part) is not StringType): + """ + if (type(part) is not str): # We can cope with strings only return False diff --git a/sar/parser.py b/sar/parser.py index 157859c..ac67fab 100644 --- a/sar/parser.py +++ b/sar/parser.py @@ -6,7 +6,7 @@ Parses SAR ASCII output only, not binary files! """ -from sar import PATTERN_RESTART, ALL_PATTERNS +from sar import PATTERN_RESTART, ALL_PATTERNS, INT_FIELDS, STR_FIELDS, MULTILINE_PATTERNS import mmap import os import re @@ -79,7 +79,7 @@ def get_sar_info(self): """ try: - test = self._sarinfo["CPU"] + test = self._sarinfo['CPU'] del test except KeyError: @@ -117,7 +117,7 @@ def _split_file(self, data=''): try: fhandle = os.open(self.__filename, os.O_RDONLY) except OSError: - print(("Couldn't open file %s" % self.__filename)) + print(('Couldn\'t open file %s' % self.__filename)) fhandle = None if fhandle or data != '': @@ -194,7 +194,7 @@ def _split_file(self, data=''): except ValueError: print(('Out of bounds (%s)!\n' % (sarmap.tell()))) # Now we repeat find. - dlpos = sarmap.find("\n\n") + dlpos = sarmap.find('\n\n') # If it wasn't the end of file, we want last piece of it if oldchunkpos < size: @@ -348,7 +348,7 @@ def __split_info(self, info_part, patternsname, patterns): elems = part_line.split() full_time = elems[0].strip() - if full_time != "Average:": + if full_time != 'Average:': # Convert time to 24hr format if needed if is_24hr is False: @@ -372,33 +372,31 @@ def __split_info(self, info_part, patternsname, patterns): return_dict[full_time] = {} fields = self.__fields[patternsname] - pairs = patterns["PAIRS"] + pairs = patterns['PAIRS'] for sectionname in pairs.iterkeys(): value = elems[fields[pairs[sectionname]]] - if sectionname == 'membuffer' or \ - sectionname == 'memcache' or \ - sectionname == 'memfree' or \ - sectionname == 'memused' or \ - sectionname == 'swapfree' or \ - sectionname == 'swapused': + if sectionname in INT_FIELDS: value = int(value) + elif sectionname in STR_FIELDS: + value = str(value) else: value = float(value) - if patternsname == 'CPU': - cpuid = elems[(1 if is_24hr is True else 2)] + if patternsname in MULTILINE_PATTERNS: + rowid = elems[(1 if is_24hr is True else 2)] try: - blah = return_dict[full_time][cpuid] + blah = return_dict[full_time][rowid] del blah except KeyError: - return_dict[full_time][cpuid] = {} - return_dict[full_time][cpuid][sectionname] = \ - value + return_dict[full_time][rowid] = {} + return_dict[full_time][rowid][sectionname] = value else: - return_dict[full_time][sectionname] = value + if 'single_line' not in return_dict[full_time]: + return_dict[full_time]['single_line'] = {} + return_dict[full_time]['single_line'][sectionname] = value return return_dict @@ -412,7 +410,7 @@ def __get_filedate(self): # Read first line of the file try: - sar_file = open(self.__filename, "r") + sar_file = open(self.__filename, 'r') except OSError: ### DEBUG