Skip to content

Commit 2d3ec7b

Browse files
authored
ref: Parse ProGuard files smarter (#55)
Previously, `ProguardCache` and `ProguardMapper` both implemented their (structurally very similar) parsing functions. These parsing functions left a lot to be desired; the simply took records in the order they were encountered and were unable to use later information to refine previously encountered information. This PR splits the parsing logic out into a new module `builder` (please feel free to bikeshed about this), which contains a bunch of auxiliary types and the main `ParsedProguardMapping` type. `ParsedProguardMapping` can then in turn be processed into a `ProguardMapper` or `ProguardCache`, as required. Fixes #54. Fixes RUSTPRO-4. As a follow-up I would like to use the newly introduced `ObfuscatedName` and `OriginalName` types throughout the crate.
1 parent 248d6b5 commit 2d3ec7b

File tree

7 files changed

+483
-264
lines changed

7 files changed

+483
-264
lines changed

benches/proguard_parsing.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ fn criterion_benchmark(c: &mut Criterion) {
1818

1919
let mut group = c.benchmark_group("Proguard Parsing");
2020
group.bench_function("Proguard Mapper", |b| {
21-
b.iter(|| proguard_mapper(black_box(mapping.clone())))
21+
b.iter(|| proguard_mapper(black_box(mapping)))
2222
});
2323

2424
group.bench_function("Proguard Cache creation", |b| {

src/builder.rs

Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
//! Contains functionality for parsing ProGuard mapping files into a
2+
//! structured representation ([`ParsedProguardMapping`]) that can be
3+
//! used to create a [`ProguardMapper`](crate::ProguardMapper) or
4+
//! [`ProguardCache`](crate::ProguardCache).
5+
6+
use std::collections::{HashMap, HashSet};
7+
use std::hash::Hash;
8+
9+
use crate::{mapping::R8Header, ProguardMapping, ProguardRecord};
10+
11+
/// Newtype around &str for obfuscated class and method names.
12+
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
13+
pub(crate) struct ObfuscatedName<'s>(&'s str);
14+
15+
impl<'s> ObfuscatedName<'s> {
16+
pub(crate) fn as_str(&self) -> &'s str {
17+
self.0
18+
}
19+
}
20+
21+
impl std::ops::Deref for ObfuscatedName<'_> {
22+
type Target = str;
23+
24+
fn deref(&self) -> &Self::Target {
25+
self.0
26+
}
27+
}
28+
29+
/// Newtype around &str for original class and method names.
30+
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
31+
pub(crate) struct OriginalName<'s>(&'s str);
32+
33+
impl<'s> OriginalName<'s> {
34+
pub(crate) fn as_str(&self) -> &'s str {
35+
self.0
36+
}
37+
}
38+
39+
impl std::ops::Deref for OriginalName<'_> {
40+
type Target = str;
41+
42+
fn deref(&self) -> &Self::Target {
43+
self.0
44+
}
45+
}
46+
47+
/// Information about a class in a ProGuard file.
48+
#[derive(Clone, Debug, Default)]
49+
pub(crate) struct ClassInfo<'s> {
50+
/// The source file in which the class is defined.
51+
pub(crate) source_file: Option<&'s str>,
52+
}
53+
54+
/// The receiver of a method.
55+
///
56+
/// This enum is used to keep track of whether
57+
/// a method's receiver is the class under which
58+
/// it is encountered (`ThisClass`) or another
59+
/// class (`OtherClass`).
60+
///
61+
/// # Example
62+
/// Consider this mapping:
63+
/// ```text
64+
/// example.Main -> a:
65+
/// 1:1 run() 1:1 -> a
66+
/// 2:2 example.Other.run() 1:1 -> b
67+
/// ```
68+
/// The `receiver` of the first method would be
69+
/// `ThisClass("example.Main")` (because it is defined
70+
/// under `"example.Main"` and has no explicit receiver),
71+
/// while the receiver of the second method would be
72+
/// `OtherClass("example.Other")`.
73+
#[derive(Clone, Copy, Debug)]
74+
pub(crate) enum MethodReceiver<'s> {
75+
ThisClass(OriginalName<'s>),
76+
OtherClass(OriginalName<'s>),
77+
}
78+
79+
impl<'s> MethodReceiver<'s> {
80+
pub(crate) fn name(&self) -> OriginalName<'s> {
81+
match self {
82+
Self::ThisClass(name) => *name,
83+
Self::OtherClass(name) => *name,
84+
}
85+
}
86+
}
87+
88+
impl PartialEq for MethodReceiver<'_> {
89+
fn eq(&self, other: &Self) -> bool {
90+
self.name() == other.name()
91+
}
92+
}
93+
94+
impl Eq for MethodReceiver<'_> {}
95+
96+
impl std::hash::Hash for MethodReceiver<'_> {
97+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
98+
self.name().hash(state)
99+
}
100+
}
101+
102+
/// A key that uniquely identifies a method.
103+
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
104+
pub(crate) struct MethodKey<'s> {
105+
/// The method's receiver.
106+
pub(crate) receiver: MethodReceiver<'s>,
107+
/// The method's name.
108+
pub(crate) name: OriginalName<'s>,
109+
/// The method's argument string.
110+
pub(crate) arguments: &'s str,
111+
}
112+
113+
/// Information about a method in a ProGuard file.
114+
#[derive(Clone, Copy, Debug, Default)]
115+
pub(crate) struct MethodInfo {}
116+
117+
/// A member record in a Proguard file.
118+
#[derive(Clone, Copy, Debug)]
119+
pub(crate) struct Member<'s> {
120+
/// The method the member refers to.
121+
pub(crate) method: MethodKey<'s>,
122+
/// The obfuscated/minified start line.
123+
pub(crate) startline: usize,
124+
/// The obfuscated/minified end line.
125+
pub(crate) endline: usize,
126+
/// The original start line.
127+
pub(crate) original_startline: usize,
128+
/// The original end line.
129+
pub(crate) original_endline: Option<usize>,
130+
}
131+
132+
/// A collection of member records for a particular class
133+
/// and obfuscated method.
134+
#[derive(Clone, Debug, Default)]
135+
pub(crate) struct Members<'s> {
136+
/// The complete list of members for the class and method.
137+
pub(crate) all: Vec<Member<'s>>,
138+
/// The complete list of members for the class and method,
139+
/// grouped by arguments string.
140+
pub(crate) by_params: HashMap<&'s str, Vec<Member<'s>>>,
141+
}
142+
143+
/// A parsed representation of a [`ProguardMapping`].
144+
#[derive(Clone, Debug, Default)]
145+
pub(crate) struct ParsedProguardMapping<'s> {
146+
/// A mapping from obfuscated to original class names.
147+
pub(crate) class_names: HashMap<ObfuscatedName<'s>, OriginalName<'s>>,
148+
/// A mapping from original class names to class information.
149+
pub(crate) class_infos: HashMap<OriginalName<'s>, ClassInfo<'s>>,
150+
/// A mapping from method keys to method information.
151+
pub(crate) method_infos: HashMap<MethodKey<'s>, MethodInfo>,
152+
/// A mapping from obfuscated class and method names to members.
153+
pub(crate) members: HashMap<(ObfuscatedName<'s>, ObfuscatedName<'s>), Members<'s>>,
154+
}
155+
156+
impl<'s> ParsedProguardMapping<'s> {
157+
pub(crate) fn parse(mapping: ProguardMapping<'s>, initialize_param_mapping: bool) -> Self {
158+
let mut slf = Self::default();
159+
let mut current_class_name = None;
160+
let mut current_class = ClassInfo::default();
161+
let mut unique_methods: HashSet<(&str, &str, &str)> = HashSet::new();
162+
163+
let mut records = mapping.iter().filter_map(Result::ok).peekable();
164+
165+
while let Some(record) = records.next() {
166+
match record {
167+
ProguardRecord::Field { .. } => {}
168+
ProguardRecord::Header { .. } => {}
169+
ProguardRecord::R8Header(_) => {
170+
// R8 headers are already handled in the class case below.
171+
}
172+
ProguardRecord::Class {
173+
original,
174+
obfuscated,
175+
} => {
176+
// Flush the previous class if there is one.
177+
if let Some((obfuscated, original)) = current_class_name {
178+
slf.class_names.insert(obfuscated, original);
179+
slf.class_infos.insert(original, current_class);
180+
}
181+
182+
current_class_name = Some((ObfuscatedName(obfuscated), OriginalName(original)));
183+
current_class = ClassInfo::default();
184+
unique_methods.clear();
185+
186+
// Consume R8 headers attached to this class.
187+
while let Some(ProguardRecord::R8Header(r8_header)) = records.peek() {
188+
match r8_header {
189+
R8Header::SourceFile { file_name } => {
190+
current_class.source_file = Some(file_name);
191+
}
192+
R8Header::Other => {}
193+
}
194+
195+
records.next();
196+
}
197+
}
198+
199+
ProguardRecord::Method {
200+
original,
201+
obfuscated,
202+
original_class,
203+
line_mapping,
204+
arguments,
205+
..
206+
} => {
207+
let current_line = if initialize_param_mapping {
208+
line_mapping
209+
} else {
210+
None
211+
};
212+
// in case the mapping has no line records, we use `0` here.
213+
let (startline, endline) =
214+
line_mapping.as_ref().map_or((0, 0), |line_mapping| {
215+
(line_mapping.startline, line_mapping.endline)
216+
});
217+
let (original_startline, original_endline) =
218+
line_mapping.map_or((0, None), |line_mapping| {
219+
match line_mapping.original_startline {
220+
Some(original_startline) => {
221+
(original_startline, line_mapping.original_endline)
222+
}
223+
None => (line_mapping.startline, Some(line_mapping.endline)),
224+
}
225+
});
226+
227+
let Some((current_class_obfuscated, current_class_original)) =
228+
current_class_name
229+
else {
230+
// `current_class_name` is only `None` before the first class entry is encountered.
231+
// If we hit this case, there's a member record before the first class record, which
232+
// is an error. Properly handling this would be nice here, for now we return an empty `Self`.
233+
return Self::default();
234+
};
235+
236+
let members = slf
237+
.members
238+
.entry((current_class_obfuscated, ObfuscatedName(obfuscated)))
239+
.or_default();
240+
241+
let method = MethodKey {
242+
// Save the receiver name, keeping track of whether it's the current class
243+
// (i.e. the one to which this member record belongs) or another class.
244+
receiver: match original_class {
245+
Some(original_class) => {
246+
MethodReceiver::OtherClass(OriginalName(original_class))
247+
}
248+
None => MethodReceiver::ThisClass(current_class_original),
249+
},
250+
name: OriginalName(original),
251+
arguments,
252+
};
253+
254+
// This does nothing for now because we are not saving any per-method information.
255+
let _method_info: &mut MethodInfo = slf.method_infos.entry(method).or_default();
256+
257+
let member = Member {
258+
method,
259+
startline,
260+
endline,
261+
original_startline,
262+
original_endline,
263+
};
264+
265+
members.all.push(member);
266+
267+
if !initialize_param_mapping {
268+
continue;
269+
}
270+
// If the next line has the same leading line range then this method
271+
// has been inlined by the code minification process, as a result
272+
// it can't show in method traces and can be safely ignored.
273+
if let Some(ProguardRecord::Method {
274+
line_mapping: Some(next_line),
275+
..
276+
}) = records.peek()
277+
{
278+
if let Some(current_line_mapping) = current_line {
279+
if (current_line_mapping.startline == next_line.startline)
280+
&& (current_line_mapping.endline == next_line.endline)
281+
{
282+
continue;
283+
}
284+
}
285+
}
286+
287+
let key = (obfuscated, arguments, original);
288+
if unique_methods.insert(key) {
289+
members
290+
.by_params
291+
.entry(arguments)
292+
.or_insert_with(|| Vec::with_capacity(1))
293+
.push(member);
294+
}
295+
} // end ProguardRecord::Method
296+
}
297+
}
298+
299+
// Flush the last class
300+
if let Some((obfuscated, original)) = current_class_name {
301+
slf.class_names.insert(obfuscated, original);
302+
slf.class_infos.insert(original, current_class);
303+
}
304+
305+
slf
306+
}
307+
}

0 commit comments

Comments
 (0)