Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion goose/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def initialize(self):
)

# test to write a dummy file to the directory
# to check is directory is writtable
# to check is directory is writable
level, path = mkstemp(dir=self.config.local_storage_path)
try:
f = os.fdopen(level, "w")
Expand Down
2 changes: 1 addition & 1 deletion goose/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(self):
# interface to build your own
self.enable_image_fetching = True

# set this valriable to False if you want to force
# set this variable to False if you want to force
# the article language. OtherWise it will attempt to
# find meta language and use the correct stopwords dictionary
self.use_meta_language = True
Expand Down
4 changes: 2 additions & 2 deletions goose/extractors/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def is_boostable(self, node):
it should be connected to other paragraphs,
at least for the first n paragraphs so we'll want to make sure that
the next sibling is a paragraph and has at
least some substatial weight to it
least some substantial weight to it
"""
para = "p"
steps_away = 0
Expand Down Expand Up @@ -181,7 +181,7 @@ def walk_siblings(self, node):

def add_siblings(self, top_node):
# in case the extraction used known attributes
# we don't want to add sibilings
# we don't want to add siblings
if self.is_articlebody(top_node):
return top_node
baselinescore_siblings_para = self.get_siblings_score(top_node)
Expand Down
4 changes: 2 additions & 2 deletions goose/extractors/title.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def clean_title(self, title):
title = pattern.sub("", title).strip()

# split the title in words
# TechCrunch | my wonderfull article
# my wonderfull article | TechCrunch
# TechCrunch | my wonderful article
# my wonderful article | TechCrunch
title_words = title.split()

# check for an empty title
Expand Down
4 changes: 2 additions & 2 deletions goose/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class StopWordsChinese(StopWords):
Chinese segmentation
"""
def __init__(self, language='zh'):
# force zh languahe code
# force zh language code
super(StopWordsChinese, self).__init__(language='zh')

def candiate_words(self, stripped_input):
Expand All @@ -153,7 +153,7 @@ class StopWordsArabic(StopWords):
Arabic segmentation
"""
def __init__(self, language='ar'):
# force ar languahe code
# force ar language code
super(StopWordsArabic, self).__init__(language='ar')

def remove_punctuation(self, content):
Expand Down
4 changes: 2 additions & 2 deletions tests/extractors/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,12 +204,12 @@ def test_detail(self):
for k, v in self.expected_results.items():
self.assertEqual(getattr(image_detail, k), v)

# test image_detail get_ methode
# test image_detail get_ method
for k, v in self.expected_results.items():
attr = 'get_%s' % k
self.assertEqual(getattr(image_detail, attr)(), v)

# test image_detail set_ methode
# test image_detail set_ method
expected_results = {
'width': 10,
'height': 10,
Expand Down