diff --git a/CHANGELOG.md b/CHANGELOG.md
index 98c318d..bdc60ac 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
+## [0.4.2] - 2026-01-14
+
+### Fixed
+- RSS 2.0 feeds with self-closing XML elements (e.g., ``) now parse items correctly (#45)
+- Empty elements at both channel and item level are handled properly
+- Self-closing enclosure elements no longer break item parsing
+- Empty `itunes:image` elements now populate `feed.feed.image`
+
## [0.4.1] - 2025-01-12
### Changed
@@ -167,7 +175,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Comprehensive test coverage
- Documentation with examples
-[Unreleased]: https://github.com/bug-ops/feedparser-rs/compare/v0.4.1...HEAD
+[Unreleased]: https://github.com/bug-ops/feedparser-rs/compare/v0.4.2...HEAD
+[0.4.2]: https://github.com/bug-ops/feedparser-rs/compare/v0.4.1...v0.4.2
[0.4.1]: https://github.com/bug-ops/feedparser-rs/compare/v0.4.0...v0.4.1
[0.4.0]: https://github.com/bug-ops/feedparser-rs/compare/v0.3.0...v0.4.0
[0.3.0]: https://github.com/bug-ops/feedparser-rs/compare/v0.2.1...v0.3.0
diff --git a/Cargo.lock b/Cargo.lock
index 462dc11..371fd59 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -310,9 +310,9 @@ dependencies = [
[[package]]
name = "clap_lex"
-version = "0.7.6"
+version = "0.7.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
+checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32"
[[package]]
name = "cmake"
@@ -600,7 +600,7 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "feedparser-rs"
-version = "0.4.1"
+version = "0.4.2"
dependencies = [
"ammonia",
"chrono",
@@ -623,7 +623,7 @@ dependencies = [
[[package]]
name = "feedparser-rs-node"
-version = "0.4.1"
+version = "0.4.2"
dependencies = [
"feedparser-rs",
"napi",
@@ -633,7 +633,7 @@ dependencies = [
[[package]]
name = "feedparser-rs-py"
-version = "0.4.1"
+version = "0.4.2"
dependencies = [
"chrono",
"feedparser-rs",
@@ -649,9 +649,9 @@ checksum = "f449e6c6c08c865631d4890cfacf252b3d396c9bcc83adb6623cdb02a8336c41"
[[package]]
name = "flate2"
-version = "1.1.5"
+version = "1.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
+checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369"
dependencies = [
"crc32fast",
"miniz_oxide",
@@ -1782,7 +1782,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [
"rand_chacha",
- "rand_core 0.9.3",
+ "rand_core 0.9.5",
]
[[package]]
@@ -1792,7 +1792,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
- "rand_core 0.9.3",
+ "rand_core 0.9.5",
]
[[package]]
@@ -1803,9 +1803,9 @@ checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
[[package]]
name = "rand_core"
-version = "0.9.3"
+version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
dependencies = [
"getrandom 0.3.4",
]
@@ -2390,9 +2390,9 @@ dependencies = [
[[package]]
name = "tower"
-version = "0.5.2"
+version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
+checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
dependencies = [
"futures-core",
"futures-util",
@@ -3088,6 +3088,6 @@ dependencies = [
[[package]]
name = "zmij"
-version = "1.0.13"
+version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac93432f5b761b22864c774aac244fa5c0fd877678a4c37ebf6cf42208f9c9ec"
+checksum = "bd8f3f50b848df28f887acb68e41201b5aea6bc8a8dacc00fb40635ff9a72fea"
diff --git a/Cargo.toml b/Cargo.toml
index 809d27b..cfefec4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,7 @@ members = [
resolver = "2"
[workspace.package]
-version = "0.4.1"
+version = "0.4.2"
edition = "2024"
rust-version = "1.88.0"
authors = ["bug-ops"]
diff --git a/crates/feedparser-rs-core/src/parser/rss.rs b/crates/feedparser-rs-core/src/parser/rss.rs
index fcf15b8..ee57693 100644
--- a/crates/feedparser-rs-core/src/parser/rss.rs
+++ b/crates/feedparser-rs-core/src/parser/rss.rs
@@ -155,7 +155,12 @@ fn parse_channel(
loop {
match reader.read_event_into(&mut buf) {
- Ok(Event::Start(e) | Event::Empty(e)) => {
+ Ok(event @ (Event::Start(_) | Event::Empty(_))) => {
+ let is_empty = matches!(event, Event::Empty(_));
+ let (Event::Start(e) | Event::Empty(e)) = &event else {
+ unreachable!()
+ };
+
*depth += 1;
check_depth(*depth, limits.max_nesting_depth)?;
@@ -163,19 +168,21 @@ fn parse_channel(
// We need owned tag data to pass &mut buf to helper functions simultaneously.
// Potential future optimization: restructure helpers to avoid this allocation.
let tag = e.name().as_ref().to_vec();
- let (attrs, has_attr_errors) = collect_attributes(&e);
+ let (attrs, has_attr_errors) = collect_attributes(e);
if has_attr_errors {
feed.bozo = true;
feed.bozo_exception = Some(MALFORMED_ATTRIBUTES_ERROR.to_string());
}
// Extract xml:lang before matching to avoid borrow issues
- let item_lang = extract_xml_lang(&e, limits.max_attribute_length);
+ let item_lang = extract_xml_lang(e, limits.max_attribute_length);
// Use full qualified name to distinguish standard RSS tags from namespaced tags
match tag.as_slice() {
b"title" | b"link" | b"description" | b"language" | b"pubDate"
- | b"managingEditor" | b"webMaster" | b"generator" | b"ttl" | b"category" => {
+ | b"managingEditor" | b"webMaster" | b"generator" | b"ttl" | b"category"
+ if !is_empty =>
+ {
parse_channel_standard(
reader,
&mut buf,
@@ -186,12 +193,12 @@ fn parse_channel(
channel_lang,
)?;
}
- b"image" => {
+ b"image" if !is_empty => {
if let Ok(image) = parse_image(reader, &mut buf, limits, depth) {
feed.feed.image = Some(image);
}
}
- b"item" => {
+ b"item" if !is_empty => {
parse_channel_item(
item_lang.as_deref(),
reader,
@@ -205,7 +212,7 @@ fn parse_channel(
}
_ => {
parse_channel_extension(
- reader, &mut buf, &tag, &attrs, feed, limits, depth,
+ reader, &mut buf, &tag, &attrs, feed, limits, depth, is_empty,
)?;
}
}
@@ -265,6 +272,7 @@ fn parse_channel_item(
/// Parse channel extension elements (iTunes, Podcast, namespaces)
#[inline]
+#[allow(clippy::too_many_arguments)]
fn parse_channel_extension(
reader: &mut Reader<&[u8]>,
buf: &mut Vec,
@@ -273,16 +281,18 @@ fn parse_channel_extension(
feed: &mut ParsedFeed,
limits: &ParserLimits,
depth: &mut usize,
+ is_empty: bool,
) -> Result<()> {
- let mut handled = parse_channel_itunes(reader, buf, tag, attrs, feed, limits, depth)?;
+ let mut handled = parse_channel_itunes(reader, buf, tag, attrs, feed, limits, depth, is_empty)?;
if !handled {
- handled = parse_channel_podcast(reader, buf, tag, attrs, feed, limits)?;
+ handled = parse_channel_podcast(reader, buf, tag, attrs, feed, limits, is_empty)?;
}
if !handled {
- handled = parse_channel_namespace(reader, buf, tag, feed, limits, *depth)?;
+ handled = parse_channel_namespace(reader, buf, tag, feed, limits, *depth, is_empty)?;
}
- if !handled {
+ // Only skip element content if this is NOT an empty element
+ if !handled && !is_empty {
skip_element(reader, buf, limits, *depth)?;
}
@@ -401,6 +411,7 @@ fn parse_channel_standard(
/// Parse iTunes namespace tags at channel level
///
/// Returns `Ok(true)` if the tag was recognized and handled, `Ok(false)` if not recognized.
+#[allow(clippy::too_many_arguments)]
fn parse_channel_itunes(
reader: &mut Reader<&[u8]>,
buf: &mut Vec,
@@ -409,80 +420,107 @@ fn parse_channel_itunes(
feed: &mut ParsedFeed,
limits: &ParserLimits,
depth: &mut usize,
+ is_empty: bool,
) -> Result {
if is_itunes_tag(tag, b"author") {
- let text = read_text(reader, buf, limits)?;
- let itunes = feed
- .feed
- .itunes
- .get_or_insert_with(|| Box::new(ItunesFeedMeta::default()));
- itunes.author = Some(text);
+ if !is_empty {
+ let text = read_text(reader, buf, limits)?;
+ let itunes = feed
+ .feed
+ .itunes
+ .get_or_insert_with(|| Box::new(ItunesFeedMeta::default()));
+ itunes.author = Some(text);
+ }
Ok(true)
} else if is_itunes_tag(tag, b"owner") {
- let itunes = feed
- .feed
- .itunes
- .get_or_insert_with(|| Box::new(ItunesFeedMeta::default()));
- if let Ok(owner) = parse_itunes_owner(reader, buf, limits, depth) {
- itunes.owner = Some(owner);
+ if !is_empty {
+ let itunes = feed
+ .feed
+ .itunes
+ .get_or_insert_with(|| Box::new(ItunesFeedMeta::default()));
+ if let Ok(owner) = parse_itunes_owner(reader, buf, limits, depth) {
+ itunes.owner = Some(owner);
+ }
}
Ok(true)
} else if is_itunes_tag(tag, b"category") {
- parse_itunes_category(reader, buf, attrs, feed, limits);
+ parse_itunes_category(reader, buf, attrs, feed, limits, is_empty);
Ok(true)
} else if is_itunes_tag(tag, b"explicit") {
- let text = read_text(reader, buf, limits)?;
- let itunes = feed
- .feed
- .itunes
- .get_or_insert_with(|| Box::new(ItunesFeedMeta::default()));
- itunes.explicit = parse_explicit(&text);
+ if !is_empty {
+ let text = read_text(reader, buf, limits)?;
+ let itunes = feed
+ .feed
+ .itunes
+ .get_or_insert_with(|| Box::new(ItunesFeedMeta::default()));
+ itunes.explicit = parse_explicit(&text);
+ }
Ok(true)
} else if is_itunes_tag(tag, b"image") {
if let Some(value) = find_attribute(attrs, b"href") {
+ let url = truncate_to_length(value, limits.max_attribute_length);
let itunes = feed
.feed
.itunes
.get_or_insert_with(|| Box::new(ItunesFeedMeta::default()));
- itunes.image = Some(truncate_to_length(value, limits.max_attribute_length).into());
+ itunes.image = Some(url.clone().into());
+ // Also set feed.image if not already set (for Python feedparser compatibility)
+ if feed.feed.image.is_none() {
+ feed.feed.image = Some(Image {
+ url: url.into(),
+ title: None,
+ link: None,
+ width: None,
+ height: None,
+ description: None,
+ });
+ }
}
Ok(true)
} else if is_itunes_tag(tag, b"keywords") {
- let text = read_text(reader, buf, limits)?;
- let itunes = feed
- .feed
- .itunes
- .get_or_insert_with(|| Box::new(ItunesFeedMeta::default()));
- itunes.keywords = text
- .split(',')
- .map(|s| s.trim().to_string())
- .filter(|s| !s.is_empty())
- .collect();
+ if !is_empty {
+ let text = read_text(reader, buf, limits)?;
+ let itunes = feed
+ .feed
+ .itunes
+ .get_or_insert_with(|| Box::new(ItunesFeedMeta::default()));
+ itunes.keywords = text
+ .split(',')
+ .map(|s| s.trim().to_string())
+ .filter(|s| !s.is_empty())
+ .collect();
+ }
Ok(true)
} else if is_itunes_tag(tag, b"type") {
- let text = read_text(reader, buf, limits)?;
- let itunes = feed
- .feed
- .itunes
- .get_or_insert_with(|| Box::new(ItunesFeedMeta::default()));
- itunes.podcast_type = Some(text);
+ if !is_empty {
+ let text = read_text(reader, buf, limits)?;
+ let itunes = feed
+ .feed
+ .itunes
+ .get_or_insert_with(|| Box::new(ItunesFeedMeta::default()));
+ itunes.podcast_type = Some(text);
+ }
Ok(true)
} else if is_itunes_tag(tag, b"complete") {
- let text = read_text(reader, buf, limits)?;
- let itunes = feed
- .feed
- .itunes
- .get_or_insert_with(|| Box::new(ItunesFeedMeta::default()));
- itunes.complete = Some(text.trim().eq_ignore_ascii_case("Yes"));
- Ok(true)
- } else if is_itunes_tag(tag, b"new-feed-url") {
- let text = read_text(reader, buf, limits)?;
- if !text.is_empty() {
+ if !is_empty {
+ let text = read_text(reader, buf, limits)?;
let itunes = feed
.feed
.itunes
.get_or_insert_with(|| Box::new(ItunesFeedMeta::default()));
- itunes.new_feed_url = Some(text.trim().to_string().into());
+ itunes.complete = Some(text.trim().eq_ignore_ascii_case("Yes"));
+ }
+ Ok(true)
+ } else if is_itunes_tag(tag, b"new-feed-url") {
+ if !is_empty {
+ let text = read_text(reader, buf, limits)?;
+ if !text.is_empty() {
+ let itunes = feed
+ .feed
+ .itunes
+ .get_or_insert_with(|| Box::new(ItunesFeedMeta::default()));
+ itunes.new_feed_url = Some(text.trim().to_string().into());
+ }
}
Ok(true)
} else {
@@ -497,20 +535,39 @@ fn parse_itunes_category(
attrs: &[(Vec, String)],
feed: &mut ParsedFeed,
limits: &ParserLimits,
+ is_empty: bool,
) {
let category_text = find_attribute(attrs, b"text")
.map(|v| truncate_to_length(v, limits.max_attribute_length))
.unwrap_or_default();
- // Parse potential nested subcategory
+ // Parse potential nested subcategory (only if not an empty element)
let mut subcategory_text = None;
- let mut nesting = 0;
- loop {
- match reader.read_event_into(buf) {
- Ok(Event::Start(sub_e)) => {
- if is_itunes_tag(sub_e.name().as_ref(), b"category") {
- nesting += 1;
- if nesting == 1 {
+ if !is_empty {
+ let mut nesting = 0;
+ loop {
+ match reader.read_event_into(buf) {
+ Ok(Event::Start(sub_e)) => {
+ if is_itunes_tag(sub_e.name().as_ref(), b"category") {
+ nesting += 1;
+ if nesting == 1 {
+ for attr in sub_e.attributes().flatten() {
+ if attr.key.as_ref() == b"text"
+ && let Ok(value) = attr.unescape_value()
+ {
+ subcategory_text = Some(
+ value.chars().take(limits.max_attribute_length).collect(),
+ );
+ break;
+ }
+ }
+ }
+ }
+ }
+ Ok(Event::Empty(sub_e)) => {
+ if is_itunes_tag(sub_e.name().as_ref(), b"category")
+ && subcategory_text.is_none()
+ {
for attr in sub_e.attributes().flatten() {
if attr.key.as_ref() == b"text"
&& let Ok(value) = attr.unescape_value()
@@ -522,32 +579,19 @@ fn parse_itunes_category(
}
}
}
- }
- Ok(Event::Empty(sub_e)) => {
- if is_itunes_tag(sub_e.name().as_ref(), b"category") && subcategory_text.is_none() {
- for attr in sub_e.attributes().flatten() {
- if attr.key.as_ref() == b"text"
- && let Ok(value) = attr.unescape_value()
- {
- subcategory_text =
- Some(value.chars().take(limits.max_attribute_length).collect());
+ Ok(Event::End(end_e)) => {
+ if is_itunes_tag(end_e.name().as_ref(), b"category") {
+ if nesting == 0 {
break;
}
+ nesting -= 1;
}
}
+ Ok(Event::Eof) | Err(_) => break,
+ _ => {}
}
- Ok(Event::End(end_e)) => {
- if is_itunes_tag(end_e.name().as_ref(), b"category") {
- if nesting == 0 {
- break;
- }
- nesting -= 1;
- }
- }
- Ok(Event::Eof) | Err(_) => break,
- _ => {}
+ buf.clear();
}
- buf.clear();
}
let itunes = feed
@@ -571,24 +615,31 @@ fn parse_channel_podcast(
attrs: &[(Vec, String)],
feed: &mut ParsedFeed,
limits: &ParserLimits,
+ is_empty: bool,
) -> Result {
if tag.starts_with(b"podcast:guid") {
- let text = read_text(reader, buf, limits)?;
- let podcast = feed
- .feed
- .podcast
- .get_or_insert_with(|| Box::new(PodcastMeta::default()));
- podcast.guid = Some(text);
+ if !is_empty {
+ let text = read_text(reader, buf, limits)?;
+ let podcast = feed
+ .feed
+ .podcast
+ .get_or_insert_with(|| Box::new(PodcastMeta::default()));
+ podcast.guid = Some(text);
+ }
Ok(true)
} else if tag.starts_with(b"podcast:funding") {
let url = find_attribute(attrs, b"url")
.map(|v| truncate_to_length(v, limits.max_attribute_length))
.unwrap_or_default();
- let message_text = read_text(reader, buf, limits)?;
- let message = if message_text.is_empty() {
+ let message = if is_empty {
None
} else {
- Some(message_text)
+ let message_text = read_text(reader, buf, limits)?;
+ if message_text.is_empty() {
+ None
+ } else {
+ Some(message_text)
+ }
};
let podcast = feed
.feed
@@ -603,7 +654,9 @@ fn parse_channel_podcast(
);
Ok(true)
} else if tag.starts_with(b"podcast:value") {
- parse_podcast_value(reader, buf, attrs, feed, limits)?;
+ if !is_empty {
+ parse_podcast_value(reader, buf, attrs, feed, limits)?;
+ }
Ok(true)
} else {
Ok(false)
@@ -619,24 +672,35 @@ fn parse_channel_namespace(
feed: &mut ParsedFeed,
limits: &ParserLimits,
depth: usize,
+ is_empty: bool,
) -> Result {
if let Some(dc_element) = is_dc_tag(tag) {
- let dc_elem = dc_element.to_string();
- let text = read_text(reader, buf, limits)?;
- dublin_core::handle_feed_element(&dc_elem, &text, &mut feed.feed);
+ if !is_empty {
+ let dc_elem = dc_element.to_string();
+ let text = read_text(reader, buf, limits)?;
+ dublin_core::handle_feed_element(&dc_elem, &text, &mut feed.feed);
+ }
Ok(true)
} else if let Some(_content_element) = is_content_tag(tag) {
- skip_element(reader, buf, limits, depth)?;
+ if !is_empty {
+ skip_element(reader, buf, limits, depth)?;
+ }
Ok(true)
} else if let Some(_media_element) = is_media_tag(tag) {
- skip_element(reader, buf, limits, depth)?;
+ if !is_empty {
+ skip_element(reader, buf, limits, depth)?;
+ }
Ok(true)
} else if let Some(georss_element) = is_georss_tag(tag) {
- let text = read_text(reader, buf, limits)?;
- georss::handle_feed_element(georss_element.as_bytes(), &text, &mut feed.feed, limits);
+ if !is_empty {
+ let text = read_text(reader, buf, limits)?;
+ georss::handle_feed_element(georss_element.as_bytes(), &text, &mut feed.feed, limits);
+ }
Ok(true)
} else if tag.starts_with(b"creativeCommons:license") || tag == b"license" {
- feed.feed.license = Some(read_text(reader, buf, limits)?);
+ if !is_empty {
+ feed.feed.license = Some(read_text(reader, buf, limits)?);
+ }
Ok(true)
} else {
Ok(false)
@@ -694,7 +758,9 @@ fn parse_item(
.enclosures
.try_push_limited(enclosure, limits.max_enclosures);
}
- skip_element(reader, buf, limits, *depth)?;
+ if !is_empty {
+ skip_element(reader, buf, limits, *depth)?;
+ }
}
b"source" => {
if let Ok(source) = parse_source(reader, buf, limits, depth) {
@@ -716,7 +782,7 @@ fn parse_item(
)?;
}
- if !handled {
+ if !handled && !is_empty {
skip_element(reader, buf, limits, *depth)?;
}
}
diff --git a/crates/feedparser-rs-core/tests/issue45_test.rs b/crates/feedparser-rs-core/tests/issue45_test.rs
new file mode 100644
index 0000000..d80ade3
--- /dev/null
+++ b/crates/feedparser-rs-core/tests/issue45_test.rs
@@ -0,0 +1,539 @@
+//! Tests for GitHub issue #45: RSS 2.0 feeds with atom namespace don't parse items
+//!
+//! This module tests handling of self-closing (empty) XML elements in RSS feeds.
+//! The root cause was that empty elements like `` were treated
+//! identically to `...`, causing `skip_element()` to consume
+//! subsequent events looking for a closing tag that doesn't exist.
+
+#![allow(missing_docs)]
+#![allow(clippy::unwrap_used)]
+
+// =============================================================================
+// Basic regression test for issue #45
+// =============================================================================
+
+#[test]
+fn test_rss20_with_atom_namespace() {
+ let xml = r#"
+
+
+ Example Feed
+
+ https://example.com
+ -
+ First Post
+ https://example.com/post/1
+
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+ assert_eq!(
+ feed.entries.len(),
+ 1,
+ "Should parse the item after atom:link"
+ );
+ assert_eq!(feed.feed.title.as_deref(), Some("Example Feed"));
+ assert_eq!(feed.entries[0].title.as_deref(), Some("First Post"));
+}
+
+// =============================================================================
+// Multiple empty elements at channel level
+// =============================================================================
+
+#[test]
+fn test_multiple_empty_atom_links_in_channel() {
+ let xml = r#"
+
+
+ Multiple Empty Elements Feed
+
+
+
+ https://example.com
+ -
+ First Post
+ https://example.com/post/1
+
+ -
+ Second Post
+ https://example.com/post/2
+
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+ assert_eq!(
+ feed.entries.len(),
+ 2,
+ "Should parse all items after multiple atom:link elements"
+ );
+ assert_eq!(feed.entries[0].title.as_deref(), Some("First Post"));
+ assert_eq!(feed.entries[1].title.as_deref(), Some("Second Post"));
+}
+
+#[test]
+fn test_empty_elements_interleaved_with_items() {
+ let xml = r#"
+
+
+ Interleaved Feed
+
+ -
+ First Post
+
+
+ -
+ Second Post
+
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+ assert_eq!(
+ feed.entries.len(),
+ 2,
+ "Should parse items even when interleaved with empty elements"
+ );
+}
+
+// =============================================================================
+// Empty elements at item level
+// =============================================================================
+
+// Fixed: Empty atom:link inside - elements now works correctly.
+// The is_empty check is now applied at item level in parse_item() (line 771).
+#[test]
+fn test_empty_atom_link_in_item() {
+ let xml = r#"
+
+
+ Item Level Empty Elements
+
-
+ Post with atom:link
+
+ Item description after empty atom:link
+
+ -
+ Second Post
+
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+ assert_eq!(feed.entries.len(), 2);
+ assert!(
+ feed.entries[0].summary.is_some(),
+ "Should parse description after empty atom:link in item"
+ );
+}
+
+// Fixed: Multiple empty namespace elements inside - now work correctly.
+// Same fix as test_empty_atom_link_in_item.
+#[test]
+fn test_multiple_empty_elements_in_item() {
+ let xml = r#"
+
+
+ Multiple Empty Elements in Item
+
-
+ Media Post
+
+
+
+ Description should be parsed
+
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+ assert_eq!(feed.entries.len(), 1);
+ assert!(feed.entries[0].summary.is_some());
+ assert_eq!(feed.entries[0].media_thumbnails.len(), 1);
+ assert_eq!(feed.entries[0].media_content.len(), 1);
+}
+
+// =============================================================================
+// Mixed empty and non-empty namespace elements
+// =============================================================================
+
+#[test]
+fn test_mixed_empty_and_nonempty_namespace_tags() {
+ let xml = r#"
+
+
+ Mixed Elements Feed
+
+ John Doe
+
+ Copyright 2024
+ -
+ Test Post
+ Jane Doe
+
+
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+ assert_eq!(feed.entries.len(), 1);
+ assert_eq!(feed.feed.dc_creator.as_deref(), Some("John Doe"));
+ assert_eq!(feed.feed.dc_rights.as_deref(), Some("Copyright 2024"));
+ assert_eq!(feed.entries[0].dc_creator.as_deref(), Some("Jane Doe"));
+}
+
+#[test]
+fn test_atom_link_before_and_after_content() {
+ let xml = r#"
+
+
+
+ Title After Empty Element
+ https://example.com
+
+ Description after second empty element
+ -
+ Post
+
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+ assert_eq!(
+ feed.feed.title.as_deref(),
+ Some("Title After Empty Element")
+ );
+ assert_eq!(
+ feed.feed.subtitle.as_deref(),
+ Some("Description after second empty element")
+ );
+ assert_eq!(feed.entries.len(), 1);
+}
+
+// =============================================================================
+// Empty iTunes/Podcast namespace elements
+// =============================================================================
+
+// Fixed: Empty itunes:image at channel level now extracts href attribute correctly.
+// The itunes:image handler also sets feed.feed.image for Python feedparser compatibility.
+#[test]
+fn test_empty_itunes_image_in_channel() {
+ let xml = r#"
+
+
+ Podcast Feed
+
+ -
+ Episode 1
+
+ Episode description
+
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+ assert_eq!(feed.entries.len(), 1);
+ assert_eq!(
+ feed.feed.image.as_ref().map(|i| &*i.url),
+ Some("https://example.com/artwork.jpg")
+ );
+ assert!(feed.entries[0].summary.is_some());
+}
+
+#[test]
+fn test_empty_itunes_category() {
+ let xml = r#"
+
+
+ Podcast Feed
+
+
+
+
+ -
+ Episode 1
+
+ -
+ Episode 2
+
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+ assert_eq!(
+ feed.entries.len(),
+ 2,
+ "Should parse items after itunes:category elements"
+ );
+}
+
+#[test]
+fn test_empty_podcast_namespace_elements() {
+ let xml = r#"
+
+
+ Podcast 2.0 Feed
+ no
+ Support the show
+ -
+ Episode 1
+
+
+ Episode with podcast 2.0 elements
+
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+ assert_eq!(feed.entries.len(), 1);
+ assert!(feed.entries[0].summary.is_some());
+}
+
+// =============================================================================
+// Real-world feed patterns
+// =============================================================================
+
+// Fixed: Real-world podcast feed parsing now works correctly.
+// Both itunes:image and enclosure elements are handled properly as empty elements.
+#[test]
+fn test_realistic_podcast_feed_with_atom_self_link() {
+ let xml = r#"
+
+
+ Tech Podcast
+ https://techpodcast.example.com
+ A weekly podcast about technology
+
+
+
+ Tech Team
+
+
+ -
+ Episode 100: Milestone Episode
+ https://techpodcast.example.com/ep100
+ Our 100th episode celebration
+
+ Mon, 01 Jan 2024 10:00:00 +0000
+ 01:23:45
+
+
+
+ -
+ Episode 101: Future of AI
+ https://techpodcast.example.com/ep101
+ Discussion about artificial intelligence
+ Full show notes with HTML
]]>
+
+ Mon, 08 Jan 2024 10:00:00 +0000
+
+
+ -
+ Episode 102: Cloud Computing
+ https://techpodcast.example.com/ep102
+
+
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+
+ assert_eq!(feed.feed.title.as_deref(), Some("Tech Podcast"));
+ assert_eq!(feed.entries.len(), 3, "Should parse all 3 episodes");
+
+ assert_eq!(
+ feed.entries[0].title.as_deref(),
+ Some("Episode 100: Milestone Episode")
+ );
+ assert_eq!(feed.entries[0].enclosures.len(), 1);
+
+ assert_eq!(
+ feed.entries[1].title.as_deref(),
+ Some("Episode 101: Future of AI")
+ );
+ assert!(
+ !feed.entries[1].content.is_empty(),
+ "Should have content:encoded"
+ );
+
+ assert_eq!(
+ feed.entries[2].title.as_deref(),
+ Some("Episode 102: Cloud Computing")
+ );
+}
+
+#[test]
+fn test_wordpress_style_feed_with_atom_link() {
+ let xml = r#"
+
+
+ WordPress Blog
+
+ https://blog.example.com
+ A WordPress blog
+ Mon, 15 Jan 2024 12:00:00 +0000
+ en-US
+ hourly
+ 1
+
+ -
+ First Blog Post
+ https://blog.example.com/first-post/
+
+ Mon, 15 Jan 2024 10:00:00 +0000
+
+ https://blog.example.com/?p=1
+
+ Full post content]]>
+ https://blog.example.com/first-post/feed/
+ 5
+
+
+ -
+ Second Blog Post
+ https://blog.example.com/second-post/
+
+ Tue, 16 Jan 2024 10:00:00 +0000
+
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+
+ assert_eq!(feed.feed.title.as_deref(), Some("WordPress Blog"));
+ assert_eq!(feed.entries.len(), 2);
+ assert_eq!(feed.entries[0].dc_creator.as_deref(), Some("admin"));
+ assert_eq!(feed.entries[1].dc_creator.as_deref(), Some("editor"));
+}
+
+// =============================================================================
+// Edge cases with empty standard RSS elements (defensive tests)
+// =============================================================================
+
+#[test]
+fn test_empty_standard_elements_ignored() {
+ let xml = r#"
+
+
+ Feed with empty elements
+
+
+ -
+ Item title
+
+
+
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+ assert_eq!(feed.entries.len(), 1);
+ assert_eq!(feed.entries[0].title.as_deref(), Some("Item title"));
+}
+
+// Fixed: Self-closing enclosure elements now work correctly.
+// The is_empty check is now applied before calling skip_element for enclosure elements.
+#[test]
+fn test_self_closing_enclosure_followed_by_content() {
+ let xml = r#"
+
+
+ Enclosure Test
+ -
+ Episode with enclosure
+
+ Description after enclosure
+ Mon, 01 Jan 2024 10:00:00 +0000
+
+ -
+ Second Episode
+
+
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+ assert_eq!(feed.entries.len(), 2);
+ assert_eq!(feed.entries[0].enclosures.len(), 1);
+ assert!(
+ feed.entries[0].summary.is_some(),
+ "Description after empty enclosure should be parsed"
+ );
+ assert!(
+ feed.entries[0].published.is_some(),
+ "pubDate after empty enclosure should be parsed"
+ );
+ assert_eq!(feed.entries[1].enclosures.len(), 1);
+}
+
+// =============================================================================
+// Combination stress tests
+// =============================================================================
+
+#[test]
+fn test_many_empty_elements_followed_by_many_items() {
+ let xml = r#"
+
+
+ Stress Test Feed
+
+
+
+
+
+ - Item 1
+ - Item 2
+ - Item 3
+ - Item 4
+ - Item 5
+ - Item 6
+ - Item 7
+ - Item 8
+ - Item 9
+ - Item 10
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+ assert_eq!(
+ feed.entries.len(),
+ 10,
+ "All 10 items should be parsed after multiple empty atom:link elements"
+ );
+ for (i, entry) in feed.entries.iter().enumerate() {
+ assert_eq!(
+ entry.title.as_deref(),
+ Some(format!("Item {}", i + 1).as_str()),
+ "Item {} should have correct title",
+ i + 1
+ );
+ }
+}
+
+#[test]
+fn test_alternating_empty_elements_and_items() {
+ let xml = r#"
+
+
+ Alternating Test
+
+ - Item 1
+
+ - Item 2
+
+ - Item 3
+
+ - Item 4
+
+ - Item 5
+
+"#;
+ let feed = feedparser_rs::parse(xml.as_bytes()).unwrap();
+ assert_eq!(
+ feed.entries.len(),
+ 5,
+ "All items should be parsed when alternating with empty elements"
+ );
+}
diff --git a/crates/feedparser-rs-node/package.json b/crates/feedparser-rs-node/package.json
index fb73df3..13e8345 100644
--- a/crates/feedparser-rs-node/package.json
+++ b/crates/feedparser-rs-node/package.json
@@ -1,6 +1,6 @@
{
"name": "feedparser-rs",
- "version": "0.4.1",
+ "version": "0.4.2",
"description": "High-performance RSS/Atom/JSON Feed parser for Node.js",
"main": "index.js",
"types": "index.d.ts",
diff --git a/crates/feedparser-rs-py/pyproject.toml b/crates/feedparser-rs-py/pyproject.toml
index e332776..b259ff7 100644
--- a/crates/feedparser-rs-py/pyproject.toml
+++ b/crates/feedparser-rs-py/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
[project]
name = "feedparser-rs"
-version = "0.4.1"
+version = "0.4.2"
description = "High-performance RSS/Atom/JSON Feed parser with feedparser-compatible API"
readme = "README.md"
license = { text = "MIT OR Apache-2.0" }
@@ -37,5 +37,5 @@ module-name = "feedparser_rs._feedparser_rs"
[dependency-groups]
dev = [
- "pytest<9",
+ "pytest>=9.0,<10",
]
diff --git a/crates/feedparser-rs-py/uv.lock b/crates/feedparser-rs-py/uv.lock
index 3df9d41..b311d84 100644
--- a/crates/feedparser-rs-py/uv.lock
+++ b/crates/feedparser-rs-py/uv.lock
@@ -29,7 +29,7 @@ wheels = [
[[package]]
name = "feedparser-rs"
-version = "0.3.0"
+version = "0.4.2"
source = { editable = "." }
[package.dev-dependencies]