From c3ffd72bcc254e20b7a78f8334495271024b52e0 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Sun, 11 Jun 2017 20:39:07 -0700 Subject: [PATCH 1/6] trappy/ftrace: Parse data string along with special fields This is needed so that we're not dependent anymore on making sure data starts at something with '=' and will allow to parse custom systrace tracing_mark_write events which don't have an '=' in them. The new regex will also work for lines that do already have an '=' and doesn't change their behavior. The last line of the new regex in this patch works like this: r"(?P[0-9]+\.[0-9]+): (\w+:\s+)+(?P.+)" We use skip over ': ' and the next thing we find is considered the start of the data. This works well to find the start of a line for existing traces and is needed for systraces which don't have '='. Signed-off-by: Joel Fernandes --- trappy/ftrace.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/trappy/ftrace.py b/trappy/ftrace.py index dd0a2fc3..74445165 100644 --- a/trappy/ftrace.py +++ b/trappy/ftrace.py @@ -168,11 +168,10 @@ def contains_unique_word(line, unique_words=cls_for_unique_word.keys()): return True return False - special_fields_regexp = r"^\s*(?P.*)-(?P\d+)(?:\s+\(.*\))"\ + fields_regexp = r"^\s*(?P.*)-(?P\d+)(?:\s+\(.*\))"\ r"?\s+\[(?P\d+)\](?:\s+....)?\s+"\ - r"(?P[0-9]+\.[0-9]+):" - special_fields_regexp = re.compile(special_fields_regexp) - start_match = re.compile(r"[A-Za-z0-9_]+=") + r"(?P[0-9]+\.[0-9]+): (\w+:\s+)+(?P.+)" + fields_regexp = re.compile(fields_regexp) actual_trace = itertools.dropwhile(self.trace_hasnt_started(), fin) actual_trace = itertools.takewhile(self.trace_hasnt_finished(), @@ -188,13 +187,14 @@ def contains_unique_word(line, unique_words=cls_for_unique_word.keys()): line = line[:-1] - special_fields_match = special_fields_regexp.match(line) - if not special_fields_match: + fields_match = fields_regexp.match(line) + if not fields_match: raise FTraceParseError("Couldn't match special fields in '{}'".format(line)) - comm = special_fields_match.group('comm') - pid = int(special_fields_match.group('pid')) - cpu = int(special_fields_match.group('cpu')) - timestamp = float(special_fields_match.group('timestamp')) + comm = fields_match.group('comm') + pid = int(fields_match.group('pid')) + cpu = int(fields_match.group('cpu')) + timestamp = float(fields_match.group('timestamp')) + data_str = fields_match.group('data') if not self.basetime: self.basetime = timestamp @@ -207,13 +207,6 @@ def contains_unique_word(line, unique_words=cls_for_unique_word.keys()): (abs_window[1] and timestamp > abs_window[1]): return - try: - data_start_idx = start_match.search(line).start() - except AttributeError: - continue - - data_str = line[data_start_idx:] - # Remove empty arrays from the trace data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str) From 11ce74e1a28905e2210299cc492f5cf178160da8 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Sun, 23 Apr 2017 01:46:28 -0700 Subject: [PATCH 2/6] trappy: Split out parsing of trace string into separate function In preparation of being able to parse systrace, split out data string processing into separate function so it can be overridden by sub classes. Signed-off-by: Joel Fernandes --- trappy/bare_trace.py | 4 ++++ trappy/base.py | 36 +++++++++++++++++++++--------------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/trappy/bare_trace.py b/trappy/bare_trace.py index f3fbd58c..2d4400b8 100644 --- a/trappy/bare_trace.py +++ b/trappy/bare_trace.py @@ -134,5 +134,9 @@ def add_parsed_event(self, name, dfr, pivot=None): def finalize_objects(self): for trace_class in self.trace_classes: + trace_class.tracer = self trace_class.create_dataframe() trace_class.finalize_object() + + def generate_data_dict(self): + return None diff --git a/trappy/base.py b/trappy/base.py index 6b9be4db..e603f37a 100644 --- a/trappy/base.py +++ b/trappy/base.py @@ -99,6 +99,7 @@ class Base(object): """ def __init__(self, parse_raw=False): + self.tracer = None self.data_frame = pd.DataFrame() self.data_array = [] self.time_array = [] @@ -171,6 +172,25 @@ def append_data(self, time, comm, pid, cpu, data): self.cpu_array.append(cpu) self.data_array.append(data) + def generate_data_dict(self, data_str): + data_dict = {} + prev_key = None + for field in data_str.split(): + if "=" not in field: + # Concatenation is supported only for "string" values + if type(data_dict[prev_key]) is not str: + continue + data_dict[prev_key] += ' ' + field + continue + (key, value) = field.split('=', 1) + try: + value = int(value) + except ValueError: + pass + data_dict[key] = value + prev_key = key + return data_dict + def generate_parsed_data(self): # Get a rough idea of how much memory we have to play with @@ -183,21 +203,7 @@ def generate_parsed_data(self): for (comm, pid, cpu, data_str) in zip(self.comm_array, self.pid_array, self.cpu_array, self.data_array): data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu} - prev_key = None - for field in data_str.split(): - if "=" not in field: - # Concatenation is supported only for "string" values - if type(data_dict[prev_key]) is not str: - continue - data_dict[prev_key] += ' ' + field - continue - (key, value) = field.split('=', 1) - try: - value = int(value) - except ValueError: - pass - data_dict[key] = value - prev_key = key + data_dict.update(self.generate_data_dict(data_str)) # When running out of memory, Pandas has been observed to segfault # rather than throwing a proper Python error. From ef28d83a7331ff9fc60e24e8fedf518c9e493861 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Sun, 11 Jun 2017 21:31:19 -0700 Subject: [PATCH 3/6] trappy: Add tracing_mark_write as a new event Create tracing_mark_write as an event and treat it like such. It overrides data string processing since it cannot use 'key=value' parsing. Instead it relies on the tracer to provide one (such as systrace has custom trace format from userspace events). If tracer cannot provide one, then it just returns a default dictionary with key 'string' and value as the string in the event. Signed-off-by: Joel Fernandes --- trappy/tracing_mark_write.py | 40 ++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 trappy/tracing_mark_write.py diff --git a/trappy/tracing_mark_write.py b/trappy/tracing_mark_write.py new file mode 100644 index 00000000..9f10cbc9 --- /dev/null +++ b/trappy/tracing_mark_write.py @@ -0,0 +1,40 @@ +# Copyright 2017 ARM Limited, Google and contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""This module contains the class for representing a tracing_mark_write +trace_event used for ftrace events injected from userspace. +""" + +from trappy.base import Base +from trappy.dynamic import register_ftrace_parser + +class TracingMarkWrite(Base): + """Parse tracing_mark_write events that couldn't be matched with more specific unique words + This class is always used as a fallback if nothing more specific could match the particular + tracing_mark_write event. + """ + + unique_word = "tracing_mark_write" + + def generate_data_dict(self, data_str): + if self.tracer: + data_dict = self.tracer.generate_data_dict(data_str) + if data_dict: + return data_dict + + data_dict = { 'string': data_str } + return data_dict + +register_ftrace_parser(TracingMarkWrite) From c37168fe09a5cb2e1e2738abe9224c7d03586049 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Sun, 11 Jun 2017 22:04:45 -0700 Subject: [PATCH 4/6] trappy/systrace: Provide custom data string parsing Here we provide custom data string parsing for Android userspace events. Borrowed the regex and parsing logic from Patrick's patch. Co-developed-by: Patrick Bellasi Signed-off-by: Joel Fernandes --- trappy/systrace.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/trappy/systrace.py b/trappy/systrace.py index 6e917a65..e18abf85 100644 --- a/trappy/systrace.py +++ b/trappy/systrace.py @@ -14,6 +14,10 @@ # from trappy.ftrace import GenericFTrace +import re + +SYSTRACE_EVENT = re.compile( + r'^(?P[A-Z])(\|(?P\d+)\|(?P.*)(\|(?P\d+))?)?') class drop_before_trace(object): """Object that, when called, returns True if the line is not part of @@ -75,3 +79,16 @@ def trace_hasnt_finished(self): """ return lambda x: not x.endswith("\n") + + def generate_data_dict(self, data_str): + """ Custom parsing for systrace's userspace events """ + data_dict = None + + match = SYSTRACE_EVENT.match(data_str) + if match: + data_dict = { 'event': match.group('event'), + 'pid' : match.group('pid'), + 'func' : match.group('func'), + 'data' : match.group('data') } + + return data_dict From c5482d6c1d257b859d4357002507033f98a4ca16 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Tue, 13 Jun 2017 20:38:08 -0700 Subject: [PATCH 5/6] trappy: Handle the case for special events injected with tracing_mark_write Certain fake events can be injected using tracing_mark_write and should be classified into their own event classes than tracing_mark_write. One way to handle this is to use tracing_mark_write only as a backup. For this reason lets introduced the concept of backup event classes and classify TracingMarkWrite as that. This case normally doesn't happen because dynamic events are inserted into the beginning of the event list from what I've seen. Since tracing_mark_write is a built-in event, and not dynamic, its always at after and so is less preferred. However we should protect for the future case where there might be a built-in which happens to be after TrackingMarkWrite class in the list, so lets handle it now. Signed-off-by: Joel Fernandes --- trappy/base.py | 7 ++++++- trappy/ftrace.py | 6 ++++-- trappy/tracing_mark_write.py | 3 +++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/trappy/base.py b/trappy/base.py index e603f37a..93ce60cd 100644 --- a/trappy/base.py +++ b/trappy/base.py @@ -95,10 +95,15 @@ class Base(object): :param parse_raw: If :code:`True`, raw trace data (-R option) to trace-cmd will be used + :param fallback: If :code:`True`, the parsing class will be used + only if no other candidate class's unique_word matched. subclasses + should override this (for ex. TracingMarkWrite uses it) + This class acts as a base class for all TRAPpy events """ - def __init__(self, parse_raw=False): + def __init__(self, parse_raw=False, fallback=False): + self.fallback = fallback self.tracer = None self.data_frame = pd.DataFrame() self.data_array = [] diff --git a/trappy/ftrace.py b/trappy/ftrace.py index 74445165..5bd6872e 100644 --- a/trappy/ftrace.py +++ b/trappy/ftrace.py @@ -181,9 +181,11 @@ def contains_unique_word(line, unique_words=cls_for_unique_word.keys()): for unique_word, cls in cls_for_unique_word.iteritems(): if unique_word in line: trace_class = cls - break + if not cls.fallback: + break else: - raise FTraceParseError("No unique word in '{}'".format(line)) + if not trace_class: + raise FTraceParseError("No unique word in '{}'".format(line)) line = line[:-1] diff --git a/trappy/tracing_mark_write.py b/trappy/tracing_mark_write.py index 9f10cbc9..49a23b07 100644 --- a/trappy/tracing_mark_write.py +++ b/trappy/tracing_mark_write.py @@ -37,4 +37,7 @@ def generate_data_dict(self, data_str): data_dict = { 'string': data_str } return data_dict + def __init__(self): + super(TracingMarkWrite, self).__init__(fallback=True) + register_ftrace_parser(TracingMarkWrite) From 15ea129c9be4a2cd128172b922e1123718811288 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Tue, 13 Jun 2017 21:20:04 -0700 Subject: [PATCH 6/6] trappy/systrace: Add test for parsing userspace events Signed-off-by: Joel Fernandes --- tests/test_systrace.py | 14 +- tests/trace_surfaceflinger.html | 3111 +++++++++++++++++++++++++++++++ 2 files changed, 3124 insertions(+), 1 deletion(-) create mode 100644 tests/trace_surfaceflinger.html diff --git a/tests/test_systrace.py b/tests/test_systrace.py index 608ed8ec..04424589 100644 --- a/tests/test_systrace.py +++ b/tests/test_systrace.py @@ -21,7 +21,8 @@ class TestSystrace(utils_tests.SetupDirectory): def __init__(self, *args, **kwargs): super(TestSystrace, self).__init__( - [("trace_systrace.html", "trace.html")], + [("trace_systrace.html", "trace.html"), + ("trace_surfaceflinger.html", "trace_sf.html")], *args, **kwargs) @@ -51,6 +52,17 @@ def test_cpu_counting(self): self.assertTrue(hasattr(trace, "_cpus")) self.assertEquals(trace._cpus, 3) + def test_systrace_userspace(self): + """Test parsing of userspace events""" + + trace = trappy.SysTrace("trace_sf.html") + dfr = trace.tracing_mark_write.data_frame + self.assertTrue(dfr['__pid'].iloc[2], 7459) + self.assertTrue(dfr['__comm'].iloc[2], 'RenderThread') + self.assertTrue(dfr['pid'].iloc[2], 7459) + self.assertTrue(dfr['event'].iloc[2], 'B') + self.assertTrue(dfr['func'].iloc[2], 'notifyFramePending') + self.assertTrue(dfr['data'].iloc[-2], 'HW_VSYNC_0') class TestLegacySystrace(utils_tests.SetupDirectory): diff --git a/tests/trace_surfaceflinger.html b/tests/trace_surfaceflinger.html new file mode 100644 index 00000000..aee045e8 --- /dev/null +++ b/tests/trace_surfaceflinger.html @@ -0,0 +1,3111 @@ + + + + + +Android System Trace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +