From b229490bc26a0411607e273141b1172da0d86935 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Wed, 9 May 2018 20:26:55 +0700 Subject: [PATCH 01/10] Added Symbol.raw_name --- actual symbol name (decorated). --- SymbolSort.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/SymbolSort.cs b/SymbolSort.cs index 3e51518..f59901e 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -181,6 +181,7 @@ class Symbol public int rva_end; public string name; public string short_name; + public string raw_name; //decorated symbol name public string source_filename; public string section; public SymbolFlags flags = 0; From 5d5b911036f5ad3132112039814eb77df989cc39 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Wed, 9 May 2018 20:27:10 +0700 Subject: [PATCH 02/10] Record raw_name when parsing COMDAT files. --- SymbolSort.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/SymbolSort.cs b/SymbolSort.cs index f59901e..f110767 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -561,7 +561,7 @@ private static void ReadSymbolsFromNM(List symbols, string inFilename, I private static Regex ReadSymbolsFromCOMDAT_regexName = new Regex(@"\n[ \t]*([^ \t]+)[ \t]+name", RegexOptions.Compiled); private static Regex ReadSymbolsFromCOMDAT_regexSize = new Regex(@"\n[ \t]*([A-Za-z0-9]+)[ \t]+size of raw data", RegexOptions.Compiled); - private static Regex ReadSymbolsFromCOMDAT_regexCOMDAT = new Regex(@"\n[ \t]*COMDAT; sym= \""([^\n\""]+)", RegexOptions.Compiled); + private static Regex ReadSymbolsFromCOMDAT_regexCOMDAT = new Regex(@"\n[ \t]*COMDAT; sym= \""([^\n\""]+)\"" \(([^\n()]+)\)", RegexOptions.Compiled); private static void ReadSymbolsFromCOMDAT(List symbols, string inFilename) { Regex regexName = ReadSymbolsFromCOMDAT_regexName; @@ -613,6 +613,7 @@ record += ln; m = regexCOMDAT.Match(record); symbol.name = m.Groups[1].Value; + symbol.raw_name = m.Groups[2].Value; if (symbol.name != "") { symbol.rva_start = 0; From 1a2bbdc856426d513710a9e93d2e96653f176979 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Sat, 12 May 2018 00:26:01 +0700 Subject: [PATCH 03/10] Implemented function which parses mildly undecorated symbol name and extracts main class path. --- SymbolSort.cs | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/SymbolSort.cs b/SymbolSort.cs index f110767..4cf7374 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -322,6 +322,74 @@ private static string ExtractGroupedSubstrings(string name, char groupBegin, cha return ungrouped_name; } + //extracts the classpath (i.e. namespaces::classes::method) from undecorated symbol name + //input symbol must be stripped of return value and parameters (output from undname.exe with some flags) + //function may return null if symbol format is unknown + private static string[] GetMainClassPath(string short_name) { + Func splitByColons = x => x.Split(new string[] { "::" }, StringSplitOptions.None); + string allowedSpecials = @"<=>,\[\]()!~^&|+\-*\/%" + "$"; + string reClassWord = @"[\w " + allowedSpecials + "]+"; + string reClassPath = String.Format(@"({0}::)*{0}", reClassWord); + + //(all string constaints) + if (short_name == "`string'") + return new string[] { short_name }; + + // Array, Allocator>>::Buffer::capacity" + // std::_Error_objects::_System_object$initializer$ + Regex regexClassPath = new Regex("^" + reClassPath + "$"); + if (regexClassPath.IsMatch(short_name)) + return splitByColons(short_name); + + // std::bad_alloc `RTTI Type Descriptor' + const string rttiDescr = " `RTTI Type Descriptor'"; + if (short_name.EndsWith(rttiDescr)) { + string[] res = GetMainClassPath(short_name.Substring(0, short_name.Length - rttiDescr.Length)); + if (res == null) return null; + return res.Concat(new string[] { rttiDescr.Substring(1) }).ToArray(); + } + + // `CustomHeap::~CustomHeap'::`1'::dtor$0 + // `std::basic_string,std::allocator >::_Copy'::`1'::catch$0 + // `CustomHeap::instance'::`2'::some_var + // `HeapWrap < ShapeImpl >::Stub::get'::`7'::`local static guard' + // `HeapWrap::Stub::get'::`7'::`dynamic atexit destructor for 'g_myHeap'' + // `Mesh::projectPoints'::`13'::$S1 + // `GroupElement::getNumElements'::`2'::MyCounter::`vftable' + string reLocalEnd = @".*"; //@"(`.+'|[\w]+(\$0)?)"; + Regex regexFuncLocalVar = new Regex(String.Format(@"^`({0})'::`[\d]+'::{1}$", reClassPath, reLocalEnd)); + if (regexFuncLocalVar.IsMatch(short_name)) + return GetMainClassPath(regexFuncLocalVar.Match(short_name).Groups[1].Value); + + // `dynamic initializer for 'BoundingBox::Invalid'' + // `dynamic initializer for 'std::_Error_objects::_System_object'' + // std::`dynamic initializer for '_Tuple_alloc'' + // UniquePtr::`scalar deleting destructor' + if (short_name.EndsWith("'")) + { + int backtickPos = short_name.IndexOf('`'); + if (backtickPos >= 0) + { + string prefix = short_name.Substring(0, backtickPos); + string quoted = short_name.Substring(backtickPos + 1, short_name.Length - backtickPos - 2); + if (quoted.Count(c => c == '\'') == 2) + { + int left = quoted.IndexOf('\''); + int right = quoted.LastIndexOf('\''); + quoted = quoted.Substring(left + 1, right - left - 1); + } + string[] quotedWords = GetMainClassPath(quoted); + if (quotedWords == null) + return null; + string[] prefixWords = splitByColons(prefix); + return prefixWords.Take(prefixWords.Length-1).Concat(quotedWords).ToArray(); + } + } + + //Console.WriteLine(short_name); + return null; + } + private static string[] SplitIntoCmdArgs(string text) { //replace spaces inside quotes From 6b54dc5b49a48b351896ce979bddc3863e318a52 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Sat, 12 May 2018 16:45:45 +0700 Subject: [PATCH 04/10] Precompile regexes in GetMainClassPath, wrap it into class. --- SymbolSort.cs | 131 +++++++++++++++++++++++++++----------------------- 1 file changed, 70 insertions(+), 61 deletions(-) diff --git a/SymbolSort.cs b/SymbolSort.cs index 4cf7374..7e8a134 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -322,72 +322,81 @@ private static string ExtractGroupedSubstrings(string name, char groupBegin, cha return ungrouped_name; } - //extracts the classpath (i.e. namespaces::classes::method) from undecorated symbol name - //input symbol must be stripped of return value and parameters (output from undname.exe with some flags) - //function may return null if symbol format is unknown - private static string[] GetMainClassPath(string short_name) { - Func splitByColons = x => x.Split(new string[] { "::" }, StringSplitOptions.None); - string allowedSpecials = @"<=>,\[\]()!~^&|+\-*\/%" + "$"; - string reClassWord = @"[\w " + allowedSpecials + "]+"; - string reClassPath = String.Format(@"({0}::)*{0}", reClassWord); - - //(all string constaints) - if (short_name == "`string'") - return new string[] { short_name }; - - // Array, Allocator>>::Buffer::capacity" - // std::_Error_objects::_System_object$initializer$ - Regex regexClassPath = new Regex("^" + reClassPath + "$"); - if (regexClassPath.IsMatch(short_name)) - return splitByColons(short_name); - - // std::bad_alloc `RTTI Type Descriptor' - const string rttiDescr = " `RTTI Type Descriptor'"; - if (short_name.EndsWith(rttiDescr)) { - string[] res = GetMainClassPath(short_name.Substring(0, short_name.Length - rttiDescr.Length)); - if (res == null) return null; - return res.Concat(new string[] { rttiDescr.Substring(1) }).ToArray(); - } - - // `CustomHeap::~CustomHeap'::`1'::dtor$0 - // `std::basic_string,std::allocator >::_Copy'::`1'::catch$0 - // `CustomHeap::instance'::`2'::some_var - // `HeapWrap < ShapeImpl >::Stub::get'::`7'::`local static guard' - // `HeapWrap::Stub::get'::`7'::`dynamic atexit destructor for 'g_myHeap'' - // `Mesh::projectPoints'::`13'::$S1 - // `GroupElement::getNumElements'::`2'::MyCounter::`vftable' - string reLocalEnd = @".*"; //@"(`.+'|[\w]+(\$0)?)"; - Regex regexFuncLocalVar = new Regex(String.Format(@"^`({0})'::`[\d]+'::{1}$", reClassPath, reLocalEnd)); - if (regexFuncLocalVar.IsMatch(short_name)) - return GetMainClassPath(regexFuncLocalVar.Match(short_name).Groups[1].Value); - - // `dynamic initializer for 'BoundingBox::Invalid'' - // `dynamic initializer for 'std::_Error_objects::_System_object'' - // std::`dynamic initializer for '_Tuple_alloc'' - // UniquePtr::`scalar deleting destructor' - if (short_name.EndsWith("'")) - { - int backtickPos = short_name.IndexOf('`'); - if (backtickPos >= 0) + private class MainClassPathGetter + { + private static string[] splitByColons(string x) + { + return x.Split(new string[] { "::" }, StringSplitOptions.None); + } + + private static string allowedSpecials = @"<=>,\[\]()!~^&|+\-*\/%" + "$"; + private static string reClassWord = @"[\w " + allowedSpecials + "]+"; + private static string reClassPath = String.Format(@"({0}::)*{0}", reClassWord); + private static Regex regexClassPath = new Regex("^" + reClassPath + "$", RegexOptions.Compiled); + private static string reLocalEnd = @".*"; //@"(`.+'|[\w]+(\$0)?)"; + private static Regex regexFuncLocalVar = new Regex(String.Format(@"^`({0})'::`[\d]+'::{1}$", reClassPath, reLocalEnd)); + + //extracts the classpath (i.e. namespaces::classes::method) from undecorated symbol name + //input symbol must be stripped of return value and parameters (output from undname.exe with some flags) + //function may return null if symbol format is unknown + public static string[] Run(string short_name) + { + //(all string constaints) + if (short_name == "`string'") + return new string[] { short_name }; + + // Array, Allocator>>::Buffer::capacity" + // std::_Error_objects::_System_object$initializer$ + if (regexClassPath.IsMatch(short_name)) + return splitByColons(short_name); + + // std::bad_alloc `RTTI Type Descriptor' + const string rttiDescr = " `RTTI Type Descriptor'"; + if (short_name.EndsWith(rttiDescr)) { - string prefix = short_name.Substring(0, backtickPos); - string quoted = short_name.Substring(backtickPos + 1, short_name.Length - backtickPos - 2); - if (quoted.Count(c => c == '\'') == 2) + string[] res = Run(short_name.Substring(0, short_name.Length - rttiDescr.Length)); + if (res == null) return null; + return res.Concat(new string[] { rttiDescr.Substring(1) }).ToArray(); + } + + // `CustomHeap::~CustomHeap'::`1'::dtor$0 + // `std::basic_string,std::allocator >::_Copy'::`1'::catch$0 + // `CustomHeap::instance'::`2'::some_var + // `HeapWrap < ShapeImpl >::Stub::get'::`7'::`local static guard' + // `HeapWrap::Stub::get'::`7'::`dynamic atexit destructor for 'g_myHeap'' + // `Mesh::projectPoints'::`13'::$S1 + // `GroupElement::getNumElements'::`2'::MyCounter::`vftable' + if (regexFuncLocalVar.IsMatch(short_name)) + return Run(regexFuncLocalVar.Match(short_name).Groups[1].Value); + + // `dynamic initializer for 'BoundingBox::Invalid'' + // `dynamic initializer for 'std::_Error_objects::_System_object'' + // std::`dynamic initializer for '_Tuple_alloc'' + // UniquePtr::`scalar deleting destructor' + if (short_name.EndsWith("'")) + { + int backtickPos = short_name.IndexOf('`'); + if (backtickPos >= 0) { - int left = quoted.IndexOf('\''); - int right = quoted.LastIndexOf('\''); - quoted = quoted.Substring(left + 1, right - left - 1); + string prefix = short_name.Substring(0, backtickPos); + string quoted = short_name.Substring(backtickPos + 1, short_name.Length - backtickPos - 2); + if (quoted.Count(c => c == '\'') == 2) + { + int left = quoted.IndexOf('\''); + int right = quoted.LastIndexOf('\''); + quoted = quoted.Substring(left + 1, right - left - 1); + } + string[] quotedWords = Run(quoted); + if (quotedWords == null) + return null; + string[] prefixWords = splitByColons(prefix); + return prefixWords.Take(prefixWords.Length - 1).Concat(quotedWords).ToArray(); } - string[] quotedWords = GetMainClassPath(quoted); - if (quotedWords == null) - return null; - string[] prefixWords = splitByColons(prefix); - return prefixWords.Take(prefixWords.Length-1).Concat(quotedWords).ToArray(); } - } - //Console.WriteLine(short_name); - return null; + //Console.WriteLine(short_name); + return null; + } } private static string[] SplitIntoCmdArgs(string text) From 1baf22bbcd55a4eaab114fc76a1a8971713f6c85 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Sat, 12 May 2018 15:43:29 +0700 Subject: [PATCH 05/10] Implemented undname.exe runner. --- SymbolSort.cs | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/SymbolSort.cs b/SymbolSort.cs index 7e8a134..4b8616f 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -17,6 +17,7 @@ using System.Runtime.InteropServices; using System.Text; using System.Text.RegularExpressions; +using System.Threading; using Dia2Lib; // Most of the interop with msdia90.dll can be generated automatically @@ -399,6 +400,39 @@ public static string[] Run(string short_name) } } + private static string[] RunUndName(string[] symbols, uint flags) + { + //write all symbols to temporary file + string inFName = Path.GetTempFileName(); + var inWriter = new StreamWriter(inFName); + foreach (string s in symbols) + inWriter.WriteLine(s); + inWriter.Close(); + + //run undname.exe on the file + var arguments = String.Format("0x{0:X} {1}", flags, inFName); + var process = new Process + { + StartInfo = new ProcessStartInfo("undname", arguments) + { + UseShellExecute = false, + RedirectStandardOutput = true, + CreateNoWindow = true, + } + }; + + process.Start(); + string output = process.StandardOutput.ReadToEnd(); + System.Threading.Thread.Sleep(50); //just to be sure + Debug.Assert(process.HasExited); + + //postprocess output + string[] lines = output.Split("\r\n".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); + Debug.Assert(lines.Length == symbols.Length); + + return lines; + } + private static string[] SplitIntoCmdArgs(string text) { //replace spaces inside quotes From 07de5e9cef1d04e9612b0bf4cb0d0b5b1ce1bd30 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Sat, 12 May 2018 18:23:49 +0700 Subject: [PATCH 06/10] undname.exe caller fix: check for exception. --- SymbolSort.cs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/SymbolSort.cs b/SymbolSort.cs index 4b8616f..558f567 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -421,16 +421,23 @@ private static string[] RunUndName(string[] symbols, uint flags) } }; - process.Start(); - string output = process.StandardOutput.ReadToEnd(); - System.Threading.Thread.Sleep(50); //just to be sure - Debug.Assert(process.HasExited); + try + { + process.Start(); + string output = process.StandardOutput.ReadToEnd(); + System.Threading.Thread.Sleep(50); //just to be sure + Debug.Assert(process.HasExited); - //postprocess output - string[] lines = output.Split("\r\n".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); - Debug.Assert(lines.Length == symbols.Length); + //postprocess output + string[] lines = output.Split("\r\n".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); + Debug.Assert(lines.Length == symbols.Length); - return lines; + return lines; + } + catch + { + return new string[symbols.Length]; + } } private static string[] SplitIntoCmdArgs(string text) From bf6102df049f88ac15858c3d90de7ab04d0d58bc Mon Sep 17 00:00:00 2001 From: stgatilov Date: Mon, 14 May 2018 12:44:31 +0700 Subject: [PATCH 07/10] Fixed splitting in undname.exe caller (handle empty lines properly). --- SymbolSort.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/SymbolSort.cs b/SymbolSort.cs index 558f567..76deac3 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -429,7 +429,9 @@ private static string[] RunUndName(string[] symbols, uint flags) Debug.Assert(process.HasExited); //postprocess output - string[] lines = output.Split("\r\n".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); + string[] lines = output.Split(new string[] { "\r\n" }, StringSplitOptions.None); + if (lines.Length > symbols.Length && lines.Skip(symbols.Length).All(x => x == "")) + lines = lines.Take(symbols.Length).ToArray(); Debug.Assert(lines.Length == symbols.Length); return lines; From 04ff022999c04e5d089b9af31c70f9cda8433b4f Mon Sep 17 00:00:00 2001 From: stgatilov Date: Sat, 12 May 2018 11:49:26 +0700 Subject: [PATCH 08/10] Refactor DumpFolderStats for custom paths. --- SymbolSort.cs | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/SymbolSort.cs b/SymbolSort.cs index 76deac3..cf8b47e 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -1456,20 +1456,20 @@ private static void WriteSourceStatsList(TextWriter writer, IEnumerable symbolList, int maxCount, bool showDifferences, List pathReplacements) + private static void DumpFolderStats(TextWriter writer, List symbolList, int maxCount, bool showDifferences, Func pathFunc, string separator) { Dictionary sourceStats = new Dictionary(); int childCount = 0; foreach (Symbol s in symbolList) { - string filename = s.source_filename; - filename = PerformRegexReplacements(filename, pathReplacements); - for ( ; ; ) + string[] parts = pathFunc(s); + for (int k = parts.Length; k > 0; k--) { SymbolSourceStats stat; - if (sourceStats.ContainsKey(filename)) + string currentname = String.Join(separator, parts, 0, k); + if (sourceStats.ContainsKey(currentname)) { - stat = sourceStats[filename]; + stat = sourceStats[currentname]; } else { @@ -1477,17 +1477,12 @@ private static void DumpFolderStats(TextWriter writer, List symbolList, stat.count = 0; stat.size = 0; stat.singleChild = false; - sourceStats.Add(filename, stat); + sourceStats.Add(currentname, stat); } stat.count += s.count; stat.size += s.size; stat.singleChild = (stat.count == childCount); childCount = stat.count; - - int searchPos = filename.LastIndexOf('\\'); - if (searchPos < 0) - break; - filename = filename.Remove(searchPos); } } @@ -1498,9 +1493,6 @@ private static void DumpFolderStats(TextWriter writer, List symbolList, return s1.Value.size - s0.Value.size; } ); - writer.WriteLine("File Contributions"); - writer.WriteLine("--------------------------------------"); - if (showDifferences) { writer.WriteLine("Increases in Size"); @@ -2041,7 +2033,14 @@ static void Main(string[] args) } Console.WriteLine("Building folder stats..."); - DumpFolderStats(writer, symbols, opts.maxCount, opts.differenceFiles.Any(), opts.pathReplacements); + writer.WriteLine("File Contributions"); + writer.WriteLine("--------------------------------------"); + DumpFolderStats(writer, symbols, opts.maxCount, opts.differenceFiles.Any(), + delegate(Symbol s) + { + string path = PerformRegexReplacements(s.source_filename, opts.pathReplacements); + return path.Split("/\\".ToCharArray()); + }, "\\"); Console.WriteLine("Computing section stats..."); writer.WriteLine("Merged Sections / Types"); From a0001a73068d9ac3c96ec3b5f3152dcdf1ab8410 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Sat, 12 May 2018 11:49:58 +0700 Subject: [PATCH 09/10] Added reporting classpath stats (using undname.exe) --- SymbolSort.cs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/SymbolSort.cs b/SymbolSort.cs index cf8b47e..43270e0 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -185,6 +185,7 @@ class Symbol public string raw_name; //decorated symbol name public string source_filename; public string section; + public string[] classpath; public SymbolFlags flags = 0; }; @@ -2042,6 +2043,28 @@ static void Main(string[] args) return path.Split("/\\".ToCharArray()); }, "\\"); + Console.WriteLine("Building class and namespace stats..."); + writer.WriteLine("Namespaces and classes Contributions"); + writer.WriteLine("--------------------------------------"); + string[] easyUndecoratedNames = RunUndName(symbols.Select(s => s.raw_name).ToArray(), 0x29FFF); + for (int i = 0; i < symbols.Count; i++) + { + string n = easyUndecoratedNames[i]; + n = ExtractGroupedSubstrings(n, '<', '>', "T"); + string[] parts = MainClassPathGetter.Run(n); + symbols[i].classpath = parts; + } + DumpFolderStats(writer, symbols, opts.maxCount, opts.differenceFiles.Any(), + delegate (Symbol s) + { + string[] parts = s.classpath; + if (parts == null || parts.Length == 0) { + return new string[] { "[unknown]" }; + } + parts = new string[] { "." }.Concat(parts.Take(parts.Length - 1)).ToArray(); + return parts; + }, "::"); + Console.WriteLine("Computing section stats..."); writer.WriteLine("Merged Sections / Types"); DumpMergedSymbols( From 6dd2efe26e4cd15b7efef3c7758c0a5a5a20468a Mon Sep 17 00:00:00 2001 From: stgatilov Date: Sat, 12 May 2018 18:24:21 +0700 Subject: [PATCH 10/10] undname.exe usage fix: check if undecorated name is present. --- SymbolSort.cs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/SymbolSort.cs b/SymbolSort.cs index 43270e0..b4e985a 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -2050,9 +2050,12 @@ static void Main(string[] args) for (int i = 0; i < symbols.Count; i++) { string n = easyUndecoratedNames[i]; - n = ExtractGroupedSubstrings(n, '<', '>', "T"); - string[] parts = MainClassPathGetter.Run(n); - symbols[i].classpath = parts; + if (n != null) + { + n = ExtractGroupedSubstrings(n, '<', '>', "T"); + string[] parts = MainClassPathGetter.Run(n); + symbols[i].classpath = parts; + } } DumpFolderStats(writer, symbols, opts.maxCount, opts.differenceFiles.Any(), delegate (Symbol s)