diff --git a/goose/__init__.py b/goose/__init__.py index 409b5732..14bb0663 100644 --- a/goose/__init__.py +++ b/goose/__init__.py @@ -87,7 +87,7 @@ def initialize(self): ) # test to write a dummy file to the directory - # to check is directory is writtable + # to check is directory is writable level, path = mkstemp(dir=self.config.local_storage_path) try: f = os.fdopen(level, "w") diff --git a/goose/configuration.py b/goose/configuration.py index fcfa5b9a..1d7c98e1 100644 --- a/goose/configuration.py +++ b/goose/configuration.py @@ -49,7 +49,7 @@ def __init__(self): # interface to build your own self.enable_image_fetching = True - # set this valriable to False if you want to force + # set this variable to False if you want to force # the article language. OtherWise it will attempt to # find meta language and use the correct stopwords dictionary self.use_meta_language = True diff --git a/goose/extractors/content.py b/goose/extractors/content.py index e0703d55..b2e3a43c 100644 --- a/goose/extractors/content.py +++ b/goose/extractors/content.py @@ -149,7 +149,7 @@ def is_boostable(self, node): it should be connected to other paragraphs, at least for the first n paragraphs so we'll want to make sure that the next sibling is a paragraph and has at - least some substatial weight to it + least some substantial weight to it """ para = "p" steps_away = 0 @@ -181,7 +181,7 @@ def walk_siblings(self, node): def add_siblings(self, top_node): # in case the extraction used known attributes - # we don't want to add sibilings + # we don't want to add siblings if self.is_articlebody(top_node): return top_node baselinescore_siblings_para = self.get_siblings_score(top_node) diff --git a/goose/extractors/title.py b/goose/extractors/title.py index a59dca92..74775c0c 100644 --- a/goose/extractors/title.py +++ b/goose/extractors/title.py @@ -47,8 +47,8 @@ def clean_title(self, title): title = pattern.sub("", title).strip() # split the title in words - # TechCrunch | my wonderfull article - # my wonderfull article | TechCrunch + # TechCrunch | my wonderful article + # my wonderful article | TechCrunch title_words = title.split() # check for an empty title diff --git a/goose/text.py b/goose/text.py index 3ef63d6b..89f1aab2 100644 --- a/goose/text.py +++ b/goose/text.py @@ -137,7 +137,7 @@ class StopWordsChinese(StopWords): Chinese segmentation """ def __init__(self, language='zh'): - # force zh languahe code + # force zh language code super(StopWordsChinese, self).__init__(language='zh') def candiate_words(self, stripped_input): @@ -153,7 +153,7 @@ class StopWordsArabic(StopWords): Arabic segmentation """ def __init__(self, language='ar'): - # force ar languahe code + # force ar language code super(StopWordsArabic, self).__init__(language='ar') def remove_punctuation(self, content): diff --git a/tests/extractors/images.py b/tests/extractors/images.py index e47a1dde..ff650d64 100644 --- a/tests/extractors/images.py +++ b/tests/extractors/images.py @@ -204,12 +204,12 @@ def test_detail(self): for k, v in self.expected_results.items(): self.assertEqual(getattr(image_detail, k), v) - # test image_detail get_ methode + # test image_detail get_ method for k, v in self.expected_results.items(): attr = 'get_%s' % k self.assertEqual(getattr(image_detail, attr)(), v) - # test image_detail set_ methode + # test image_detail set_ method expected_results = { 'width': 10, 'height': 10,