From 68a1e1fb6ed8acf9215454ed0c978cdfb1baa7c7 Mon Sep 17 00:00:00 2001 From: Sebastian Zivota Date: Wed, 25 Jun 2025 11:11:54 +0200 Subject: [PATCH 1/4] ref: Robustly parse R8 headers R8 headers have a different format from "normal" ProGuard headers. This splits the parsing of the two header formats apart and uses `serde` for R8 headers (they are specified to be in JSON format). --- Cargo.lock | 21 +++++++------ Cargo.toml | 2 ++ src/cache/raw.rs | 13 +++----- src/mapper.rs | 9 +++--- src/mapping.rs | 80 ++++++++++++++++++++++++++++++------------------ 5 files changed, 74 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ee8633b..5d60706 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "aho-corasick" @@ -338,6 +338,8 @@ version = "5.5.0" dependencies = [ "criterion", "lazy_static", + "serde", + "serde_json", "thiserror", "uuid", "watto", @@ -418,18 +420,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.203" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.203" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", @@ -438,11 +440,12 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] @@ -455,9 +458,9 @@ checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012" [[package]] name = "syn" -version = "2.0.66" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 1dfb82c..c1f57d9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,8 @@ uuid = ["dep:uuid", "lazy_static"] [dependencies] lazy_static = { version = "1.4.0", optional = true } +serde = "1.0.219" +serde_json = "1.0.140" thiserror = "1.0.61" uuid = { version = "1.0.0", features = ["v5"], optional = true } watto = { version = "0.1.0", features = ["writer", "strings"] } diff --git a/src/cache/raw.rs b/src/cache/raw.rs index 8650492..d965bec 100644 --- a/src/cache/raw.rs +++ b/src/cache/raw.rs @@ -3,6 +3,7 @@ use std::io::Write; use watto::{Pod, StringTable}; +use crate::mapping::R8Header; use crate::{ProguardMapping, ProguardRecord}; use super::{CacheError, CacheErrorKind}; @@ -194,15 +195,11 @@ impl<'data> ProguardCache<'data> { let mut records = mapping.iter().filter_map(Result::ok).peekable(); while let Some(record) = records.next() { match record { - ProguardRecord::Header { - key, - value: Some(file_name), - } => { - if key == "sourceFile" { - current_class.class.file_name_offset = - string_table.insert(file_name) as u32; - } + ProguardRecord::R8Header(R8Header::SourceFile { file_name }) => { + current_class.class.file_name_offset = string_table.insert(file_name) as u32; } + ProguardRecord::Header { .. } => {} + ProguardRecord::R8Header(R8Header::Other) => {} ProguardRecord::Class { original, obfuscated, diff --git a/src/mapper.rs b/src/mapper.rs index 78c1efc..63e2119 100644 --- a/src/mapper.rs +++ b/src/mapper.rs @@ -5,6 +5,7 @@ use std::fmt::{Error as FmtError, Write}; use std::iter::FusedIterator; use crate::java; +use crate::mapping::R8Header; use crate::mapping::{ProguardMapping, ProguardRecord}; use crate::stacktrace::{self, StackFrame, StackTrace, Throwable}; @@ -236,11 +237,11 @@ impl<'s> ProguardMapper<'s> { let mut records = mapping.iter().filter_map(Result::ok).peekable(); while let Some(record) = records.next() { match record { - ProguardRecord::Header { key, value } => { - if key == "sourceFile" { - class.file_name = value; - } + ProguardRecord::R8Header(R8Header::SourceFile { file_name }) => { + class.file_name = Some(file_name); } + ProguardRecord::Header { .. } => {} + ProguardRecord::R8Header(R8Header::Other) => {} ProguardRecord::Class { original, obfuscated, diff --git a/src/mapping.rs b/src/mapping.rs index bdafe5a..be461fb 100644 --- a/src/mapping.rs +++ b/src/mapping.rs @@ -7,6 +7,8 @@ use std::fmt; use std::ops::Range; use std::str; +use serde::Deserialize; + #[cfg(feature = "uuid")] use uuid::Uuid; @@ -282,7 +284,7 @@ impl<'s> Iterator for ProguardRecordIter<'s> { /// Maps start/end lines of a minified file to original start/end lines. /// /// All line mappings are 1-based and inclusive. -#[derive(Clone, Copy, Debug, PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct LineMapping { /// Start Line, 1-based. pub startline: usize, @@ -294,8 +296,26 @@ pub struct LineMapping { pub original_endline: Option, } +/// An R8 header, as described in +/// . +/// +/// The format is a line starting with `#` and followed by a JSON object. +#[derive(Debug, Clone, Deserialize, PartialEq, Eq)] +#[serde(tag = "id", rename_all = "camelCase")] +pub enum R8Header<'s> { + /// A source file header, stating what source file a class originated from. + /// + /// See . + #[serde(rename_all = "camelCase")] + SourceFile { file_name: &'s str }, + + /// Catchall variant for headers we don't support. + #[serde(other)] + Other, +} + /// A Proguard Mapping Record. -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum ProguardRecord<'s> { /// A Proguard Header. Header { @@ -304,6 +324,8 @@ pub enum ProguardRecord<'s> { /// Optional value if the Header is a KV pair. value: Option<&'s str>, }, + /// An R8 Header. + R8Header(R8Header<'s>), /// A Class Mapping. Class { /// Original name of the class. @@ -436,7 +458,9 @@ impl<'s> ProguardRecord<'s> { fn parse_proguard_record(bytes: &[u8]) -> (Result, &[u8]) { let bytes = consume_leading_newlines(bytes); - let result = if bytes.starts_with(b"#") { + let result = if bytes.starts_with(b"# {") { + parse_r8_header(bytes) + } else if bytes.starts_with(b"#") { parse_proguard_header(bytes) } else if bytes.starts_with(b" ") { parse_proguard_field_or_method(bytes) @@ -459,38 +483,35 @@ fn parse_proguard_record(bytes: &[u8]) -> (Result, & } } -const SOURCE_FILE_PREFIX: &[u8; 32] = br#" {"id":"sourceFile","fileName":""#; - /// Parses a single Proguard Header from a Proguard File. fn parse_proguard_header(bytes: &[u8]) -> Result<(ProguardRecord, &[u8]), ParseError> { let bytes = parse_prefix(bytes, b"#")?; - if let Ok(bytes) = parse_prefix(bytes, SOURCE_FILE_PREFIX) { - let (value, bytes) = parse_until(bytes, |c| *c == b'"')?; - let bytes = parse_prefix(bytes, br#""}"#)?; + // Existing logic for `key: value` format + let (key, bytes) = parse_until(bytes, |c| *c == b':' || is_newline(c))?; - let record = ProguardRecord::Header { - key: "sourceFile", - value: Some(value), - }; + let (value, bytes) = match parse_prefix(bytes, b":") { + Ok(bytes) => parse_until(bytes, is_newline).map(|(v, bytes)| (Some(v), bytes)), + Err(_) => Ok((None, bytes)), + }?; - Ok((record, consume_leading_newlines(bytes))) - } else { - // Existing logic for `key: value` format - let (key, bytes) = parse_until(bytes, |c| *c == b':' || is_newline(c))?; + let record = ProguardRecord::Header { + key: key.trim(), + value: value.map(|v| v.trim()), + }; - let (value, bytes) = match parse_prefix(bytes, b":") { - Ok(bytes) => parse_until(bytes, is_newline).map(|(v, bytes)| (Some(v), bytes)), - Err(_) => Ok((None, bytes)), - }?; + Ok((record, consume_leading_newlines(bytes))) +} - let record = ProguardRecord::Header { - key: key.trim(), - value: value.map(|v| v.trim()), - }; +fn parse_r8_header(bytes: &[u8]) -> Result<(ProguardRecord, &[u8]), ParseError> { + let bytes = parse_prefix(bytes, b"#")?; + let (header, rest) = parse_until(bytes, is_newline)?; - Ok((record, consume_leading_newlines(bytes))) - } + let header = serde_json::from_str(header).unwrap(); + Ok(( + ProguardRecord::R8Header(header), + consume_leading_newlines(rest), + )) } /// Parses a single Proguard Field or Method from a Proguard File. @@ -763,10 +784,9 @@ mod tests { let parsed = ProguardRecord::try_parse(bytes); assert_eq!( parsed, - Ok(ProguardRecord::Header { - key: "sourceFile", - value: Some("Foobar.kt") - }) + Ok(ProguardRecord::R8Header(R8Header::SourceFile { + file_name: "Foobar.kt", + })) ); } From 31deacdc2e6778c15caf36b0531dec0cc029bb2e Mon Sep 17 00:00:00 2001 From: Sebastian Zivota Date: Wed, 25 Jun 2025 11:42:15 +0200 Subject: [PATCH 2/4] Fix serde feature --- Cargo.toml | 2 +- src/cache/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c1f57d9..8106ec0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ uuid = ["dep:uuid", "lazy_static"] [dependencies] lazy_static = { version = "1.4.0", optional = true } -serde = "1.0.219" +serde = { version = "1.0.219", features = ["derive"] } serde_json = "1.0.140" thiserror = "1.0.61" uuid = { version = "1.0.0", features = ["v5"], optional = true } diff --git a/src/cache/mod.rs b/src/cache/mod.rs index cb401dd..9ab3652 100644 --- a/src/cache/mod.rs +++ b/src/cache/mod.rs @@ -92,7 +92,7 @@ pub struct CacheError { } impl CacheError { - /// Returns the corresponding [`ErrorKind`] for this error. + /// Returns the corresponding [`CacheErrorKind`] for this error. pub fn kind(&self) -> CacheErrorKind { self.kind } From e62354331cc6966963b844d4fa454e2487cffe1f Mon Sep 17 00:00:00 2001 From: Sebastian Zivota Date: Wed, 25 Jun 2025 15:41:30 +0200 Subject: [PATCH 3/4] Improve parsing --- src/mapping.rs | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/src/mapping.rs b/src/mapping.rs index be461fb..ed6b32d 100644 --- a/src/mapping.rs +++ b/src/mapping.rs @@ -458,10 +458,15 @@ impl<'s> ProguardRecord<'s> { fn parse_proguard_record(bytes: &[u8]) -> (Result, &[u8]) { let bytes = consume_leading_newlines(bytes); - let result = if bytes.starts_with(b"# {") { - parse_r8_header(bytes) - } else if bytes.starts_with(b"#") { - parse_proguard_header(bytes) + let result = if let Some(bytes) = bytes.strip_prefix(b"#") { + // ProGuard / R8 headers + + let bytes = bytes.trim_ascii_start(); + if bytes.starts_with(b"{") { + parse_r8_header(bytes) + } else { + parse_proguard_header(bytes) + } } else if bytes.starts_with(b" ") { parse_proguard_field_or_method(bytes) } else { @@ -485,7 +490,7 @@ fn parse_proguard_record(bytes: &[u8]) -> (Result, & /// Parses a single Proguard Header from a Proguard File. fn parse_proguard_header(bytes: &[u8]) -> Result<(ProguardRecord, &[u8]), ParseError> { - let bytes = parse_prefix(bytes, b"#")?; + // Note: the leading `#` has already been parsed. // Existing logic for `key: value` format let (key, bytes) = parse_until(bytes, |c| *c == b':' || is_newline(c))?; @@ -504,7 +509,8 @@ fn parse_proguard_header(bytes: &[u8]) -> Result<(ProguardRecord, &[u8]), ParseE } fn parse_r8_header(bytes: &[u8]) -> Result<(ProguardRecord, &[u8]), ParseError> { - let bytes = parse_prefix(bytes, b"#")?; + // Note: the leading `#` has already been parsed. + let (header, rest) = parse_until(bytes, is_newline)?; let header = serde_json::from_str(header).unwrap(); @@ -790,6 +796,26 @@ mod tests { ); } + #[test] + fn try_parse_r8_headers() { + let bytes = br#"# {"id":"foobar"}"#; + assert_eq!( + ProguardRecord::try_parse(bytes).unwrap(), + ProguardRecord::R8Header(R8Header::Other), + ); + + let bytes = br#"#{"id":"foobar"}"#; + assert_eq!( + ProguardRecord::try_parse(bytes).unwrap(), + ProguardRecord::R8Header(R8Header::Other), + ); + let bytes = br#"# {"id":"foobar"}"#; + assert_eq!( + ProguardRecord::try_parse(bytes).unwrap(), + ProguardRecord::R8Header(R8Header::Other), + ); + } + #[test] fn try_parse_class() { let bytes = b"android.support.v4.app.RemoteActionCompatParcelizer -> android.support.v4.app.RemoteActionCompatParcelizer:"; From e0dd9822bf76b8a955304db68aea6dd8806f48fb Mon Sep 17 00:00:00 2001 From: Sebastian Zivota Date: Wed, 25 Jun 2025 16:58:28 +0200 Subject: [PATCH 4/4] Headers can be indented --- src/mapping.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mapping.rs b/src/mapping.rs index 6d91447..690a27b 100644 --- a/src/mapping.rs +++ b/src/mapping.rs @@ -459,7 +459,7 @@ impl<'s> ProguardRecord<'s> { fn parse_proguard_record(bytes: &[u8]) -> (Result, &[u8]) { let bytes = consume_leading_newlines(bytes); - let result = if let Some(bytes) = bytes.strip_prefix(b"#") { + let result = if let Some(bytes) = bytes.trim_ascii_start().strip_prefix(b"#") { // ProGuard / R8 headers let bytes = bytes.trim_ascii_start(); @@ -805,7 +805,7 @@ mod tests { ProguardRecord::R8Header(R8Header::Other), ); - let bytes = br#"#{"id":"foobar"}"#; + let bytes = br#" #{"id":"foobar"}"#; assert_eq!( ProguardRecord::try_parse(bytes).unwrap(), ProguardRecord::R8Header(R8Header::Other), @@ -1051,6 +1051,7 @@ androidx.activity.OnBackPressedCallback -> c.a.b: boolean mEnabled -> a java.util.ArrayDeque mOnBackPressedCallbacks -> b 1:4:void onBackPressed():184:187 -> c + # {\"id\":\"com.android.tools.r8.synthesized\"} androidx.activity.OnBackPressedCallback -> c.a.b: "; @@ -1103,6 +1104,7 @@ androidx.activity.OnBackPressedCallback original_endline: Some(187), }), }), + Ok(ProguardRecord::R8Header(R8Header::Other)), Err(ParseError { line: b"androidx.activity.OnBackPressedCallback \n", kind: ParseErrorKind::ParseError("line is not a valid proguard record"),