Skip to content

Was scraping some of the earlier stories, and kept getting the list index out of range error. #2

@ethanbreck

Description

@ethanbreck

So here for profile 3000 and profile 3304 this error came up.

Traceback (most recent call last):
File "fanfiction-api_test.py", line 44, in
user_write_story_id = get_story_id(user_id)
File "fanfiction-api_test.py", line 30, in get_story_id
scraper.scrape_story_metadata(story["data-storyid"])
File "/usr/local/lib/python3.7/site-packages/fanfiction/scraper.py", line 65, in scrape_story_metadata
'canon': pre_story_links[1].text,
IndexError: list index out of range

All my code, just in case:

from bs4 import BeautifulSoup as bs
from time import sleep
from random import randint
import requests
import re
from fanfiction import Scraper
import csv
import sys

sys.setrecursionlimit(30000)

response = None
response_user_id = None
story_id = None

def get_story_id(user_id):
global response_user_id
global response
if response_user_id != user_id:
response = requests.get('https://www.fanfiction.net/u/{}'.format(user_id))
response_user_id = user_id
sleep(randint(4, 6))
if response.status_code == requests.codes.ok:
soup = bs(response.text, 'html.parser')
my_stories = soup.select("div.mystories")
stories = []
scraper = Scraper()
for story in my_stories:
story_data = [
scraper.scrape_story_metadata(story["data-storyid"])
]
stories.append(story_data)

    return stories
else:
    print("Couldn't download for user id {}".format(user_id))

#completed: 3299

user_id = 3300

while user_id < 10000:
user_id += 1
user_write_story_id = get_story_id(user_id)
story_id = get_story_id(user_id)
if len(user_write_story_id) != 0:
with open("data/stories_id_test.csv", 'a+') as write_stories_ids:
csv_writer = csv.writer(write_stories_ids, quoting=csv.QUOTE_MINIMAL)
for line in user_write_story_id:
csv_writer.writerow(line)
print(user_id)

else:
print("We are at the final user for this batch")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions