From e3bc934d6675603855eec39397ec3237f46ded4b Mon Sep 17 00:00:00 2001 From: MineRobber___T Date: Thu, 7 Nov 2019 23:42:39 -0500 Subject: [PATCH 1/2] Add summary to metadata The first div with the `xcontrast_txt` class is the summary/description of the work. --- fanfiction/scraper.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fanfiction/scraper.py b/fanfiction/scraper.py index 3ef6b53..f263daf 100644 --- a/fanfiction/scraper.py +++ b/fanfiction/scraper.py @@ -59,6 +59,7 @@ def scrape_story_metadata(self, story_id): metadata_text = metadata_div.find(class_='xgray xcontrast_txt').text metadata_parts = metadata_text.split('-') genres = self.get_genres(metadata_parts[2].strip()) + summary = soup.find("div",{"class":"xcontrast_txt"}).text metadata = { 'id': story_id, 'canon_type': pre_story_links[0].text, @@ -68,7 +69,8 @@ def scrape_story_metadata(self, story_id): 'updated': int(times[0]['data-xutime']), 'published': int(times[1]['data-xutime']), 'lang': metadata_parts[1].strip(), - 'genres': genres + 'genres': genres, + 'summary': summary } for parts in metadata_parts: parts = parts.strip() From 6e52ea241a5e8c91b63c6bb9fc8a41ff6f9e46d4 Mon Sep 17 00:00:00 2001 From: MineRobber___T Date: Thu, 7 Nov 2019 23:44:35 -0500 Subject: [PATCH 2/2] Update README.md Document the addition of `summary`. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d22b359..2e0886a 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ fanfiction.Scraper.get_story_metadata(story_id) * **num_follows** [int] * **num_words** [int]: Total number of words in all chapters of the story * **rated** [str]: The story's fiction rating. i.e. K, K+, T, M + * **summary** [str]: The summary (or description) of the work. ``` fanfiction.Scraper.scrape_story(story_id, keep_html=False)