From c7d103f570989fd7d2d2a51fb3283c2044e95f6d Mon Sep 17 00:00:00 2001 From: lokas Date: Fri, 11 Aug 2023 08:27:35 +0300 Subject: [PATCH 1/5] add infobox_football_biography --- tasks/infobox_football_biography/__init__.py | 0 .../pseudocode_structure.md | 121 +++++++++++ tasks/infobox_football_biography/todo.md | 194 ++++++++++++++++++ tasks/test.py | 34 +++ 4 files changed, 349 insertions(+) create mode 100644 tasks/infobox_football_biography/__init__.py create mode 100644 tasks/infobox_football_biography/pseudocode_structure.md create mode 100644 tasks/infobox_football_biography/todo.md create mode 100644 tasks/test.py diff --git a/tasks/infobox_football_biography/__init__.py b/tasks/infobox_football_biography/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/pseudocode_structure.md b/tasks/infobox_football_biography/pseudocode_structure.md new file mode 100644 index 00000000..9a2228e4 --- /dev/null +++ b/tasks/infobox_football_biography/pseudocode_structure.md @@ -0,0 +1,121 @@ +# Football Player Data Translation Bot - pseudocode structure + +## Observer Pattern: Monitoring and Error Handling + +```python +class ErrorObserver: + def update(self, message): +# Log error messages + + +class ProgressObserver: + def update(self, message): +# Log progress messages + +``` + +## Pipeline Pattern: Structured Data Processing + +```python +class DataExtractor: + def extract_data(self, article_url): + wikitext = self.fetch_wikitext(article_url) + parsed_data = self.parse_infobox_template(wikitext) + return parsed_data + + +class DataTranslator: + def __init__(self, translation_array): + self.translation_array = translation_array + + def translate(self, english_data): + # Translation logic using translation_array + pass + + +class TemplateIntegrator: + def integrate(self, arabic_article_url, integrated_template): + arabic_wikitext = self.fetch_wikitext(arabic_article_url) + arabic_template = self.extract_arabic_template(arabic_wikitext) + # Integration logic using integrated_template + pass + + +class QualityAssurer: + def validate_translation(self, translation): + # Validation logic + pass + + def validate_integration(self, integrated_template): + # Validation logic + pass +``` + +## Decorator Pattern: Quality Assurance + +```python +class TranslationQualityChecker: + def __init__(self, translator): + self.translator = translator + + def translate(self, english_data): + translated_data = self.translator.translate(english_data) + # Validate translated_data + return translated_data + + +class IntegrationQualityChecker: + def __init__(self, integrator): + self.integrator = integrator + + def integrate(self, arabic_article_url, integrated_template): + self.integrator.integrate(arabic_article_url, integrated_template) + # Validate integrated_template +``` + +## Template Method Pattern: Bot Operation Structure + +```python + +class FootballPlayerBot: + def __init__(self): + self.error_observer = ErrorObserver() + self.progress_observer = ProgressObserver() + self.translation_array = load_custom_translation_array() + + def notify_error(self, message): + self.error_observer.update(message) + + def notify_progress(self, message): + self.progress_observer.update(message) + + # ... (Other methods) + + def bot_operation(self): + self.notify_progress("Starting bot operation...") + + data_extractor = DataExtractor() + data_translator = DataTranslator(self.translation_array) + template_integrator = TemplateIntegrator() + + quality_translator = TranslationQualityChecker(data_translator) + quality_integrator = IntegrationQualityChecker(template_integrator) + + for each article in English Wikipedia: + english_data = data_extractor.extract_data(article.url) + translated_data = quality_translator.translate(english_data) + + arabic_wikitext = self.fetch_wikitext(article.arabic_url) + integrated_template = quality_integrator.integrate(article.arabic_url, translated_data) + + # ... (Other steps) + + self.notify_progress("Bot operation completed.") +``` + +# Instantiate and run the bot + +```python +bot = FootballPlayerBot() +bot.bot_operation() +``` diff --git a/tasks/infobox_football_biography/todo.md b/tasks/infobox_football_biography/todo.md new file mode 100644 index 00000000..4a7dce00 --- /dev/null +++ b/tasks/infobox_football_biography/todo.md @@ -0,0 +1,194 @@ +# Football Player Data Translation Bot - Arabic Wikipedia + +## Observer Pattern: Monitoring and Error Handling + +- [ ] Implement an observer pattern to monitor the bot's progress and detect errors. +- [ ] Set up error handling mechanisms for each stage of the bot's operation. +- [ ] Create a logging system to record errors and progress. + +## Pipeline Pattern: Structured Data Processing + +- [ ] Data Extraction Stage from English Wikipedia: + - [ ] Identify English Wikipedia articles with Infobox football biography templates. + - [ ] Configure the wikitext parsing library for English. + - [ ] Extract data from the English Infobox template and organize it. + +- [ ] Translation Stage (Using Custom Array): + - [ ] Create a custom translation array mapping English keys to Arabic keys. + - [ ] Implement a translation function using the custom array. + +- [ ] Template Integration Stage on Arabic Wikipedia: + - [ ] Identify target Arabic Wikipedia articles and corresponding templates. + - [ ] Set up the wikitext parsing library for Arabic Wikipedia. + - [ ] Parse the Arabic article wikitext and extract the Arabic template. + - [ ] Integrate the translated and translated data into the Arabic template. + +- [ ] Error Handling and Quality Assurance Stage: + - [ ] Implement quality checks for translations and integrated data. + - [ ] Ensure accurate and contextually appropriate integration. + +## Decorator Pattern: Quality Assurance + +- [ ] Implement quality checks as decorators for translation and integration stages. +- [ ] Validate translations and integrated data for accuracy and context. + +## Template Method Pattern: Bot Operation Structure + +- [ ] Design a template method for the entire bot operation: + - [ ] Extract data from English Wikipedia articles. + - [ ] Translate data using the custom array. + - [ ] Integrate translated data into Arabic Wikipedia templates. + - [ ] Perform error handling and quality assurance. + - [ ] Log progress and errors through the observer pattern. + +## Testing and Iteration + +- [ ] Test the bot operation on a small subset of articles. +- [ ] Identify and address issues with data extraction, translation, or template integration. +- [ ] Iterate based on testing results and feedback. + +## Automation and Scaling (Optional) + +- [ ] Evaluate the feasibility of automating the bot for a larger number of articles. +- [ ] Develop automation scripts or tools, considering Wikipedia's guidelines and rate limits. +- [ ] Strategize to manage potential server load and ensure adherence to Wikipedia's rules. + +## Documentation and Reporting + +- [ ] Document the bot's operation, including setup, implementation, and usage instructions. +- [ ] Prepare a report summarizing the project, challenges encountered, and solutions applied. + +## Legal and Ethical Considerations + +- [ ] Review and adhere to Wikipedia's terms of use and guidelines for bot operation and content modification. +- [ ] Ensure compliance with relevant data protection and copyright regulations. + +## Project Completion + +- [ ] Conduct a final review of the bot's operation and components. +- [ ] Ensure that all tasks are completed and thoroughly tested. +- [ ] Conclude the project, including documentation and any required reporting. + +# Football Player Data Translation Bot - pseduo code + +## Observer Pattern: Monitoring and Error Handling + +```python +class ErrorObserver: + def update(self, message): +# Log error messages + + +class ProgressObserver: + def update(self, message): +# Log progress messages + + +class FootballPlayerBot: + def __init__(self): + self.error_observer = ErrorObserver() + self.progress_observer = ProgressObserver() + self.translation_array = load_custom_translation_array() + + def notify_error(self, message): + self.error_observer.update(message) + + def notify_progress(self, message): + self.progress_observer.update(message) + + # ... (Other methods) +``` + +## Pipeline Pattern: Structured Data Processing + +```python +class DataExtractor: + def extract_data(self, article_url): + wikitext = self.fetch_wikitext(article_url) + parsed_data = self.parse_infobox_template(wikitext) + return parsed_data + + +class DataTranslator: + def __init__(self, translation_array): + self.translation_array = translation_array + + def translate(self, english_data): + # Translation logic using translation_array + pass + + +class TemplateIntegrator: + def integrate(self, arabic_article_url, integrated_template): + arabic_wikitext = self.fetch_wikitext(arabic_article_url) + arabic_template = self.extract_arabic_template(arabic_wikitext) + # Integration logic using integrated_template + pass + + +class QualityAssurer: + def validate_translation(self, translation): + # Validation logic + pass + + def validate_integration(self, integrated_template): + # Validation logic + pass +``` + +## Decorator Pattern: Quality Assurance + +```python +class TranslationQualityChecker: + def __init__(self, translator): + self.translator = translator + + def translate(self, english_data): + translated_data = self.translator.translate(english_data) + # Validate translated_data + return translated_data + + +class IntegrationQualityChecker: + def __init__(self, integrator): + self.integrator = integrator + + def integrate(self, arabic_article_url, integrated_template): + self.integrator.integrate(arabic_article_url, integrated_template) + # Validate integrated_template +``` + +## Template Method Pattern: Bot Operation Structure + +```python +class FootballPlayerBot: + # ... (Other methods) + + def bot_operation(self): + self.notify_progress("Starting bot operation...") + + data_extractor = DataExtractor() + data_translator = DataTranslator(self.translation_array) + template_integrator = TemplateIntegrator() + + quality_translator = TranslationQualityChecker(data_translator) + quality_integrator = IntegrationQualityChecker(template_integrator) + + for each article in English Wikipedia: + english_data = data_extractor.extract_data(article.url) + translated_data = quality_translator.translate(english_data) + + arabic_wikitext = self.fetch_wikitext(article.arabic_url) + integrated_template = quality_integrator.integrate(article.arabic_url, translated_data) + + # ... (Other steps) + + self.notify_progress("Bot operation completed.") +``` + +# Instantiate and run the bot + +```python +bot = FootballPlayerBot() +bot.bot_operation() +``` diff --git a/tasks/test.py b/tasks/test.py new file mode 100644 index 00000000..628b96f5 --- /dev/null +++ b/tasks/test.py @@ -0,0 +1,34 @@ +import pywikibot + +site = pywikibot.Site("ar", "wikipedia") + +# page_name2 = "مستخدم:لوقا/ملعب 37" +# +# page2 = pywikibot.Page(site, page_name2) +# +# print(page2.get_parsed_page()) +# +# exit() + +page_name = "دوال مثلثية" + +page = pywikibot.Page(site, page_name) + +# +rev = page.getOldVersion(63408845) + +print(rev) + +# +# replace_text = """== هوامش وملاحظات == +# {{مراجع|مجموعة=ملاحظة}} +# == مراجع == +# ;فهرس المراجع +# {{مراجع|محاذاة=نعم}} +# """ +# +# rev = rev.replace(replace_text, "") +# +# page2.text = rev +# +# page2.save("تجربة", minor=False, botflag=True) From 6213d4110e4c1c25eab6da45ebe11f8f7ede598f Mon Sep 17 00:00:00 2001 From: lokas Date: Fri, 11 Aug 2023 08:41:44 +0300 Subject: [PATCH 2/5] fix --- tasks/infobox_football_biography/todo.md | 124 ----------------------- tasks/test.py | 34 ------- 2 files changed, 158 deletions(-) delete mode 100644 tasks/test.py diff --git a/tasks/infobox_football_biography/todo.md b/tasks/infobox_football_biography/todo.md index 4a7dce00..c2687e2a 100644 --- a/tasks/infobox_football_biography/todo.md +++ b/tasks/infobox_football_biography/todo.md @@ -68,127 +68,3 @@ - [ ] Conduct a final review of the bot's operation and components. - [ ] Ensure that all tasks are completed and thoroughly tested. - [ ] Conclude the project, including documentation and any required reporting. - -# Football Player Data Translation Bot - pseduo code - -## Observer Pattern: Monitoring and Error Handling - -```python -class ErrorObserver: - def update(self, message): -# Log error messages - - -class ProgressObserver: - def update(self, message): -# Log progress messages - - -class FootballPlayerBot: - def __init__(self): - self.error_observer = ErrorObserver() - self.progress_observer = ProgressObserver() - self.translation_array = load_custom_translation_array() - - def notify_error(self, message): - self.error_observer.update(message) - - def notify_progress(self, message): - self.progress_observer.update(message) - - # ... (Other methods) -``` - -## Pipeline Pattern: Structured Data Processing - -```python -class DataExtractor: - def extract_data(self, article_url): - wikitext = self.fetch_wikitext(article_url) - parsed_data = self.parse_infobox_template(wikitext) - return parsed_data - - -class DataTranslator: - def __init__(self, translation_array): - self.translation_array = translation_array - - def translate(self, english_data): - # Translation logic using translation_array - pass - - -class TemplateIntegrator: - def integrate(self, arabic_article_url, integrated_template): - arabic_wikitext = self.fetch_wikitext(arabic_article_url) - arabic_template = self.extract_arabic_template(arabic_wikitext) - # Integration logic using integrated_template - pass - - -class QualityAssurer: - def validate_translation(self, translation): - # Validation logic - pass - - def validate_integration(self, integrated_template): - # Validation logic - pass -``` - -## Decorator Pattern: Quality Assurance - -```python -class TranslationQualityChecker: - def __init__(self, translator): - self.translator = translator - - def translate(self, english_data): - translated_data = self.translator.translate(english_data) - # Validate translated_data - return translated_data - - -class IntegrationQualityChecker: - def __init__(self, integrator): - self.integrator = integrator - - def integrate(self, arabic_article_url, integrated_template): - self.integrator.integrate(arabic_article_url, integrated_template) - # Validate integrated_template -``` - -## Template Method Pattern: Bot Operation Structure - -```python -class FootballPlayerBot: - # ... (Other methods) - - def bot_operation(self): - self.notify_progress("Starting bot operation...") - - data_extractor = DataExtractor() - data_translator = DataTranslator(self.translation_array) - template_integrator = TemplateIntegrator() - - quality_translator = TranslationQualityChecker(data_translator) - quality_integrator = IntegrationQualityChecker(template_integrator) - - for each article in English Wikipedia: - english_data = data_extractor.extract_data(article.url) - translated_data = quality_translator.translate(english_data) - - arabic_wikitext = self.fetch_wikitext(article.arabic_url) - integrated_template = quality_integrator.integrate(article.arabic_url, translated_data) - - # ... (Other steps) - - self.notify_progress("Bot operation completed.") -``` - -# Instantiate and run the bot - -```python -bot = FootballPlayerBot() -bot.bot_operation() -``` diff --git a/tasks/test.py b/tasks/test.py deleted file mode 100644 index 628b96f5..00000000 --- a/tasks/test.py +++ /dev/null @@ -1,34 +0,0 @@ -import pywikibot - -site = pywikibot.Site("ar", "wikipedia") - -# page_name2 = "مستخدم:لوقا/ملعب 37" -# -# page2 = pywikibot.Page(site, page_name2) -# -# print(page2.get_parsed_page()) -# -# exit() - -page_name = "دوال مثلثية" - -page = pywikibot.Page(site, page_name) - -# -rev = page.getOldVersion(63408845) - -print(rev) - -# -# replace_text = """== هوامش وملاحظات == -# {{مراجع|مجموعة=ملاحظة}} -# == مراجع == -# ;فهرس المراجع -# {{مراجع|محاذاة=نعم}} -# """ -# -# rev = rev.replace(replace_text, "") -# -# page2.text = rev -# -# page2.save("تجربة", minor=False, botflag=True) From 23b332222e717ecc153326c7be311f787ecd85e8 Mon Sep 17 00:00:00 2001 From: lokas Date: Sun, 1 Oct 2023 11:32:57 +0300 Subject: [PATCH 3/5] add log --- .../core/__init__.py | 0 .../core/football_player_bot.py | 24 ++++++++++++++ .../core/logger/__init__.py | 0 .../core/logger/abstract_logger.py | 31 +++++++++++++++++++ .../core/logger/console_logger.py | 13 ++++++++ .../core/logger/error_logger.py | 10 ++++++ .../core/logger/file_logger.py | 10 ++++++ tasks/infobox_football_biography/run.py | 3 ++ tasks/infobox_football_biography/todo.md | 6 ++-- 9 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 tasks/infobox_football_biography/core/__init__.py create mode 100644 tasks/infobox_football_biography/core/football_player_bot.py create mode 100644 tasks/infobox_football_biography/core/logger/__init__.py create mode 100644 tasks/infobox_football_biography/core/logger/abstract_logger.py create mode 100644 tasks/infobox_football_biography/core/logger/console_logger.py create mode 100644 tasks/infobox_football_biography/core/logger/error_logger.py create mode 100644 tasks/infobox_football_biography/core/logger/file_logger.py create mode 100644 tasks/infobox_football_biography/run.py diff --git a/tasks/infobox_football_biography/core/__init__.py b/tasks/infobox_football_biography/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/core/football_player_bot.py b/tasks/infobox_football_biography/core/football_player_bot.py new file mode 100644 index 00000000..f412ea7b --- /dev/null +++ b/tasks/infobox_football_biography/core/football_player_bot.py @@ -0,0 +1,24 @@ +from tasks.infobox_football_biography.core.logger.abstract_logger import AbstractLogger +from tasks.infobox_football_biography.core.logger.console_logger import ConsoleLogger +from tasks.infobox_football_biography.core.logger.error_logger import ErrorLogger +from tasks.infobox_football_biography.core.logger.file_logger import FileLogger + + +class FootballPlayerBot: + + def getChainOfLoggers(self) -> AbstractLogger: + errorLogger = ErrorLogger(AbstractLogger.ERROR) + fileLogger = FileLogger(AbstractLogger.DEBUG) + consoleLogger = ConsoleLogger(AbstractLogger.INFO) + + errorLogger.nextLogger = fileLogger + fileLogger.nextLogger = consoleLogger + + return errorLogger + + def __init__(self): + self.logger = self.getChainOfLoggers() + + self.logger.logMessage(AbstractLogger.INFO, "Start") + self.logger.logMessage(AbstractLogger.DEBUG, "debug message") + self.logger.logMessage(AbstractLogger.ERROR, "error message") diff --git a/tasks/infobox_football_biography/core/logger/__init__.py b/tasks/infobox_football_biography/core/logger/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/core/logger/abstract_logger.py b/tasks/infobox_football_biography/core/logger/abstract_logger.py new file mode 100644 index 00000000..8c1e20a2 --- /dev/null +++ b/tasks/infobox_football_biography/core/logger/abstract_logger.py @@ -0,0 +1,31 @@ +from abc import ABC, abstractmethod + + +class AbstractLogger(ABC): + INFO = 1 + DEBUG = 2 + WARNING = 3 + ERROR = 4 + FATAL = 5 + + _level = None + # next element in chain or responsibility + _nextLogger = None + + @property + def nextLogger(self): + return self._nextLogger + + @nextLogger.setter + def nextLogger(self, value): + self._nextLogger = value + + def logMessage(self, level, message): + if self._level <= level: + self.write(message) + if self._nextLogger is not None: + self._nextLogger.logMessage(level, message) + + @abstractmethod + def write(self, message): + pass diff --git a/tasks/infobox_football_biography/core/logger/console_logger.py b/tasks/infobox_football_biography/core/logger/console_logger.py new file mode 100644 index 00000000..b1e22fad --- /dev/null +++ b/tasks/infobox_football_biography/core/logger/console_logger.py @@ -0,0 +1,13 @@ +from abc import ABC + +from tasks.infobox_football_biography.core.logger.abstract_logger import AbstractLogger + + +class ConsoleLogger(AbstractLogger, ABC): + def __init__(self, level): + super().__init__() + self._level = level + + def write(self, message): + # todo: add Timestamps, context, etc + print("Standard Console::Logger: " + message) diff --git a/tasks/infobox_football_biography/core/logger/error_logger.py b/tasks/infobox_football_biography/core/logger/error_logger.py new file mode 100644 index 00000000..26176b51 --- /dev/null +++ b/tasks/infobox_football_biography/core/logger/error_logger.py @@ -0,0 +1,10 @@ +from tasks.infobox_football_biography.core.logger.abstract_logger import AbstractLogger + + +class ErrorLogger(AbstractLogger): + def __init__(self, level): + super().__init__() + self._level = level + + def write(self, message): + print("Error Console::Logger: " + message) diff --git a/tasks/infobox_football_biography/core/logger/file_logger.py b/tasks/infobox_football_biography/core/logger/file_logger.py new file mode 100644 index 00000000..bac7fa9f --- /dev/null +++ b/tasks/infobox_football_biography/core/logger/file_logger.py @@ -0,0 +1,10 @@ +from tasks.infobox_football_biography.core.logger.abstract_logger import AbstractLogger + + +class FileLogger(AbstractLogger): + def __init__(self, level): + super().__init__() + self._level = level + + def write(self, message): + print("FileLogger::Logger: " + message) diff --git a/tasks/infobox_football_biography/run.py b/tasks/infobox_football_biography/run.py new file mode 100644 index 00000000..4ab7dabe --- /dev/null +++ b/tasks/infobox_football_biography/run.py @@ -0,0 +1,3 @@ +from tasks.infobox_football_biography.core.football_player_bot import FootballPlayerBot + +bot = FootballPlayerBot() diff --git a/tasks/infobox_football_biography/todo.md b/tasks/infobox_football_biography/todo.md index c2687e2a..8b90a25f 100644 --- a/tasks/infobox_football_biography/todo.md +++ b/tasks/infobox_football_biography/todo.md @@ -2,9 +2,9 @@ ## Observer Pattern: Monitoring and Error Handling -- [ ] Implement an observer pattern to monitor the bot's progress and detect errors. -- [ ] Set up error handling mechanisms for each stage of the bot's operation. -- [ ] Create a logging system to record errors and progress. +- [x] Implement an Chain of Responsibility Pattern to monitor the bot's progress and detect errors. +- [x] Set up error handling mechanisms for each stage of the bot's operation. +- [x] Create a logging system to record errors and progress. ## Pipeline Pattern: Structured Data Processing From 57ef6a6972b7f69f5d6bbcae49904a25e171d3d3 Mon Sep 17 00:00:00 2001 From: lokas Date: Sun, 1 Oct 2023 12:35:33 +0300 Subject: [PATCH 4/5] add DataExtractor --- tasks/infobox_football_biography/run.py | 21 +++++++- .../{core => src}/__init__.py | 0 .../data_extraction}/__init__.py | 0 .../src/data_extraction/data_extractor.py | 52 +++++++++++++++++++ .../src/data_extraction/templates/__init__.py | 0 .../templates/infobox_football_biography.py | 36 +++++++++++++ .../{core => src}/football_player_bot.py | 8 +-- .../src/logger/__init__.py | 0 .../{core => src}/logger/abstract_logger.py | 12 ++--- .../{core => src}/logger/console_logger.py | 2 +- .../{core => src}/logger/error_logger.py | 2 +- .../{core => src}/logger/file_logger.py | 2 +- tasks/infobox_football_biography/todo.md | 8 +-- 13 files changed, 125 insertions(+), 18 deletions(-) rename tasks/infobox_football_biography/{core => src}/__init__.py (100%) rename tasks/infobox_football_biography/{core/logger => src/data_extraction}/__init__.py (100%) create mode 100644 tasks/infobox_football_biography/src/data_extraction/data_extractor.py create mode 100644 tasks/infobox_football_biography/src/data_extraction/templates/__init__.py create mode 100644 tasks/infobox_football_biography/src/data_extraction/templates/infobox_football_biography.py rename tasks/infobox_football_biography/{core => src}/football_player_bot.py (65%) create mode 100644 tasks/infobox_football_biography/src/logger/__init__.py rename tasks/infobox_football_biography/{core => src}/logger/abstract_logger.py (82%) rename tasks/infobox_football_biography/{core => src}/logger/console_logger.py (76%) rename tasks/infobox_football_biography/{core => src}/logger/error_logger.py (69%) rename tasks/infobox_football_biography/{core => src}/logger/file_logger.py (69%) diff --git a/tasks/infobox_football_biography/run.py b/tasks/infobox_football_biography/run.py index 4ab7dabe..0f56feb0 100644 --- a/tasks/infobox_football_biography/run.py +++ b/tasks/infobox_football_biography/run.py @@ -1,3 +1,22 @@ -from tasks.infobox_football_biography.core.football_player_bot import FootballPlayerBot +import pywikibot + +from tasks.infobox_football_biography.src.data_extraction.templates.infobox_football_biography import \ + InfoboxFootballBiography +from tasks.infobox_football_biography.src.football_player_bot import FootballPlayerBot bot = FootballPlayerBot() + +site = pywikibot.Site("en", "wikipedia") +page_title = "Paul_Abasolo" +page = pywikibot.Page(site, page_title) + +template = InfoboxFootballBiography( + logger=bot.getChainOfLoggers(), + text_page=page.text +) + +template.parse() +template.template_name() +template.parameters_list() +if template.check(): + template.list.sort(key=lambda x: x["name"]) diff --git a/tasks/infobox_football_biography/core/__init__.py b/tasks/infobox_football_biography/src/__init__.py similarity index 100% rename from tasks/infobox_football_biography/core/__init__.py rename to tasks/infobox_football_biography/src/__init__.py diff --git a/tasks/infobox_football_biography/core/logger/__init__.py b/tasks/infobox_football_biography/src/data_extraction/__init__.py similarity index 100% rename from tasks/infobox_football_biography/core/logger/__init__.py rename to tasks/infobox_football_biography/src/data_extraction/__init__.py diff --git a/tasks/infobox_football_biography/src/data_extraction/data_extractor.py b/tasks/infobox_football_biography/src/data_extraction/data_extractor.py new file mode 100644 index 00000000..2fcc1055 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_extraction/data_extractor.py @@ -0,0 +1,52 @@ +from abc import abstractmethod, ABC + +import wikitextparser as wtp + +from core.utils.helpers import prepare_str +from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger + + +class DataExtractor(ABC): + def __init__(self, text_page: str, logger: AbstractLogger): + self.text_page = text_page + self.logger = logger + self.list = [] + + @abstractmethod + def template_name(self) -> str: + pass + + @abstractmethod + def parameters_list(self) -> list: + pass + + def check(self) -> bool: + return len(self.list) > 0 + + def parse(self): + self.logger.logMessage(AbstractLogger.INFO, "start extract data") + parsed = wtp.parse(self.text_page) + self.logger.logMessage(AbstractLogger.INFO, "end extract data") + self.logger.logMessage(AbstractLogger.INFO, "start extract template") + templates = parsed.templates + if not len(templates): + self.logger.logMessage(AbstractLogger.WARNING, "no template found in page") + return + if self.template_name() is None: + self.logger.logMessage(AbstractLogger.ERROR, "no template name set in class you are using") + return + if self.parameters_list() is None or len(self.parameters_list()) == 0: + self.logger.logMessage(AbstractLogger.ERROR, "no parameters set in class you are using") + return + for template in templates: + if prepare_str(template.name) == prepare_str(self.template_name()): + self.logger.logMessage(AbstractLogger.INFO, "start extract parameters") + for parameter in self.parameters_list(): + for param in template.arguments: + if prepare_str(param.name) == prepare_str(parameter): + self.logger.logMessage(AbstractLogger.INFO, f"{param.name}: {param.value}") + self.list.append({ + "name": param.name, + "value": param.value + }) + break diff --git a/tasks/infobox_football_biography/src/data_extraction/templates/__init__.py b/tasks/infobox_football_biography/src/data_extraction/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/src/data_extraction/templates/infobox_football_biography.py b/tasks/infobox_football_biography/src/data_extraction/templates/infobox_football_biography.py new file mode 100644 index 00000000..5464894a --- /dev/null +++ b/tasks/infobox_football_biography/src/data_extraction/templates/infobox_football_biography.py @@ -0,0 +1,36 @@ +from abc import ABC + +from tasks.infobox_football_biography.src.data_extraction.data_extractor import DataExtractor +from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger + + +class InfoboxFootballBiography(DataExtractor, ABC): + def __init__(self, text_page: str, logger: AbstractLogger): + super().__init__(text_page=text_page, logger=logger) + + def template_name(self) -> str: + return "Infobox football biography" + + def parameters_list(self) -> list: + return [ + "position", + "years1", "clubs1", "caps1", "goals1", + "years2", "clubs2", "caps2", "goals2", + "years3", "clubs3", "caps3", "goals3", + "years4", "clubs4", "caps4", "goals4", + "years5", "clubs5", "caps5", "goals5", + "years6", "clubs6", "caps6", "goals6", + "years7", "clubs7", "caps7", "goals7", + "years8", "clubs8", "caps8", "goals8", + "years9", "clubs9", "caps9", "goals9", + "years10", "clubs10", "caps10", "goals10", + "years11", "clubs11", "caps11", "goals11", + "years12", "clubs12", "caps12", "goals12", + "years13", "clubs13", "caps13", "goals13", + "years14", "clubs14", "caps14", "goals14", + "years15", "clubs15", "caps15", "goals15", + "years16", "clubs16", "caps16", "goals16", + "years17", "clubs17", "caps17", "goals17", + "years18", "clubs18", "caps18", "goals18", + "years19", "clubs19", "caps19", "goals19", + ] diff --git a/tasks/infobox_football_biography/core/football_player_bot.py b/tasks/infobox_football_biography/src/football_player_bot.py similarity index 65% rename from tasks/infobox_football_biography/core/football_player_bot.py rename to tasks/infobox_football_biography/src/football_player_bot.py index f412ea7b..6e3b7747 100644 --- a/tasks/infobox_football_biography/core/football_player_bot.py +++ b/tasks/infobox_football_biography/src/football_player_bot.py @@ -1,7 +1,7 @@ -from tasks.infobox_football_biography.core.logger.abstract_logger import AbstractLogger -from tasks.infobox_football_biography.core.logger.console_logger import ConsoleLogger -from tasks.infobox_football_biography.core.logger.error_logger import ErrorLogger -from tasks.infobox_football_biography.core.logger.file_logger import FileLogger +from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger +from tasks.infobox_football_biography.src.logger.console_logger import ConsoleLogger +from tasks.infobox_football_biography.src.logger.error_logger import ErrorLogger +from tasks.infobox_football_biography.src.logger.file_logger import FileLogger class FootballPlayerBot: diff --git a/tasks/infobox_football_biography/src/logger/__init__.py b/tasks/infobox_football_biography/src/logger/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/core/logger/abstract_logger.py b/tasks/infobox_football_biography/src/logger/abstract_logger.py similarity index 82% rename from tasks/infobox_football_biography/core/logger/abstract_logger.py rename to tasks/infobox_football_biography/src/logger/abstract_logger.py index 8c1e20a2..e18be231 100644 --- a/tasks/infobox_football_biography/core/logger/abstract_logger.py +++ b/tasks/infobox_football_biography/src/logger/abstract_logger.py @@ -2,13 +2,13 @@ class AbstractLogger(ABC): - INFO = 1 - DEBUG = 2 - WARNING = 3 - ERROR = 4 - FATAL = 5 + INFO: int = 1 + DEBUG: int = 2 + WARNING: int = 3 + ERROR: int = 4 + FATAL: int = 5 - _level = None + _level: int = None # next element in chain or responsibility _nextLogger = None diff --git a/tasks/infobox_football_biography/core/logger/console_logger.py b/tasks/infobox_football_biography/src/logger/console_logger.py similarity index 76% rename from tasks/infobox_football_biography/core/logger/console_logger.py rename to tasks/infobox_football_biography/src/logger/console_logger.py index b1e22fad..dbeb112d 100644 --- a/tasks/infobox_football_biography/core/logger/console_logger.py +++ b/tasks/infobox_football_biography/src/logger/console_logger.py @@ -1,6 +1,6 @@ from abc import ABC -from tasks.infobox_football_biography.core.logger.abstract_logger import AbstractLogger +from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger class ConsoleLogger(AbstractLogger, ABC): diff --git a/tasks/infobox_football_biography/core/logger/error_logger.py b/tasks/infobox_football_biography/src/logger/error_logger.py similarity index 69% rename from tasks/infobox_football_biography/core/logger/error_logger.py rename to tasks/infobox_football_biography/src/logger/error_logger.py index 26176b51..cda643bc 100644 --- a/tasks/infobox_football_biography/core/logger/error_logger.py +++ b/tasks/infobox_football_biography/src/logger/error_logger.py @@ -1,4 +1,4 @@ -from tasks.infobox_football_biography.core.logger.abstract_logger import AbstractLogger +from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger class ErrorLogger(AbstractLogger): diff --git a/tasks/infobox_football_biography/core/logger/file_logger.py b/tasks/infobox_football_biography/src/logger/file_logger.py similarity index 69% rename from tasks/infobox_football_biography/core/logger/file_logger.py rename to tasks/infobox_football_biography/src/logger/file_logger.py index bac7fa9f..2a3231dd 100644 --- a/tasks/infobox_football_biography/core/logger/file_logger.py +++ b/tasks/infobox_football_biography/src/logger/file_logger.py @@ -1,4 +1,4 @@ -from tasks.infobox_football_biography.core.logger.abstract_logger import AbstractLogger +from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger class FileLogger(AbstractLogger): diff --git a/tasks/infobox_football_biography/todo.md b/tasks/infobox_football_biography/todo.md index 8b90a25f..dfd7adf1 100644 --- a/tasks/infobox_football_biography/todo.md +++ b/tasks/infobox_football_biography/todo.md @@ -8,10 +8,10 @@ ## Pipeline Pattern: Structured Data Processing -- [ ] Data Extraction Stage from English Wikipedia: - - [ ] Identify English Wikipedia articles with Infobox football biography templates. - - [ ] Configure the wikitext parsing library for English. - - [ ] Extract data from the English Infobox template and organize it. +- [X] Data Extraction Stage from English Wikipedia: + - [X] Identify English Wikipedia articles with Infobox football biography templates. + - [X] Configure the wikitext parsing library for English. + - [X] Extract data from the English Infobox template and organize it. - [ ] Translation Stage (Using Custom Array): - [ ] Create a custom translation array mapping English keys to Arabic keys. From 85406ffb916936aec0783ba920fd820a3f1b68ef Mon Sep 17 00:00:00 2001 From: lokas Date: Sun, 1 Oct 2023 17:30:40 +0300 Subject: [PATCH 5/5] add translation_chain --- .../src/data_translator/__init__.py | 0 .../data_translator/classification_context.py | 13 +++++++ .../data_classification/__init__.py | 0 .../normal_text_classification_strategy.py | 7 ++++ .../number_classification_strategy.py | 11 ++++++ .../wikilink_classification_strategy.py | 10 +++++ .../data_translation/__init__.py | 0 .../normal_text_translation_handler.py | 7 ++++ .../number_translation_handler.py | 7 ++++ .../wikilink_translation_handler.py | 7 ++++ .../data_translation_handler.py | 7 ++++ .../src/data_translator/translation_chain.py | 13 +++++++ .../value_classification_strategy.py | 7 ++++ tasks/infobox_football_biography/test.py | 38 +++++++++++++++++++ 14 files changed, 127 insertions(+) create mode 100644 tasks/infobox_football_biography/src/data_translator/__init__.py create mode 100644 tasks/infobox_football_biography/src/data_translator/classification_context.py create mode 100644 tasks/infobox_football_biography/src/data_translator/data_classification/__init__.py create mode 100644 tasks/infobox_football_biography/src/data_translator/data_classification/normal_text_classification_strategy.py create mode 100644 tasks/infobox_football_biography/src/data_translator/data_classification/number_classification_strategy.py create mode 100644 tasks/infobox_football_biography/src/data_translator/data_classification/wikilink_classification_strategy.py create mode 100644 tasks/infobox_football_biography/src/data_translator/data_translation/__init__.py create mode 100644 tasks/infobox_football_biography/src/data_translator/data_translation/normal_text_translation_handler.py create mode 100644 tasks/infobox_football_biography/src/data_translator/data_translation/number_translation_handler.py create mode 100644 tasks/infobox_football_biography/src/data_translator/data_translation/wikilink_translation_handler.py create mode 100644 tasks/infobox_football_biography/src/data_translator/data_translation_handler.py create mode 100644 tasks/infobox_football_biography/src/data_translator/translation_chain.py create mode 100644 tasks/infobox_football_biography/src/data_translator/value_classification_strategy.py create mode 100644 tasks/infobox_football_biography/test.py diff --git a/tasks/infobox_football_biography/src/data_translator/__init__.py b/tasks/infobox_football_biography/src/data_translator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/src/data_translator/classification_context.py b/tasks/infobox_football_biography/src/data_translator/classification_context.py new file mode 100644 index 00000000..41765f2e --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/classification_context.py @@ -0,0 +1,13 @@ +class ClassificationContext: + def __init__(self): + self.strategies = [] + + def add_strategy(self, strategy): + self.strategies.append(strategy) + + def classify(self, value): + for strategy in self.strategies: + result = strategy.classify(value) + if result: + return result + return "unknown" diff --git a/tasks/infobox_football_biography/src/data_translator/data_classification/__init__.py b/tasks/infobox_football_biography/src/data_translator/data_classification/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/src/data_translator/data_classification/normal_text_classification_strategy.py b/tasks/infobox_football_biography/src/data_translator/data_classification/normal_text_classification_strategy.py new file mode 100644 index 00000000..8784cd31 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/data_classification/normal_text_classification_strategy.py @@ -0,0 +1,7 @@ +from tasks.infobox_football_biography.src.data_translator.value_classification_strategy import \ + ValueClassificationStrategy + + +class NormalTextClassificationStrategy(ValueClassificationStrategy): + def classify(self, value): + return "normal_text" diff --git a/tasks/infobox_football_biography/src/data_translator/data_classification/number_classification_strategy.py b/tasks/infobox_football_biography/src/data_translator/data_classification/number_classification_strategy.py new file mode 100644 index 00000000..72d8350d --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/data_classification/number_classification_strategy.py @@ -0,0 +1,11 @@ +from tasks.infobox_football_biography.src.data_translator.value_classification_strategy import \ + ValueClassificationStrategy + + +class NumberClassificationStrategy(ValueClassificationStrategy): + def classify(self, value): + try: + float(value) + return "number" + except ValueError: + return None diff --git a/tasks/infobox_football_biography/src/data_translator/data_classification/wikilink_classification_strategy.py b/tasks/infobox_football_biography/src/data_translator/data_classification/wikilink_classification_strategy.py new file mode 100644 index 00000000..d6ba71fd --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/data_classification/wikilink_classification_strategy.py @@ -0,0 +1,10 @@ +from tasks.infobox_football_biography.src.data_translator.value_classification_strategy import \ + ValueClassificationStrategy + + +class WikiLinkClassificationStrategy(ValueClassificationStrategy): + def classify(self, value): + # todo: use wpikitextparser + if value.startswith("[[") and value.endswith("]]"): + return "wiki_link" + return None diff --git a/tasks/infobox_football_biography/src/data_translator/data_translation/__init__.py b/tasks/infobox_football_biography/src/data_translator/data_translation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/src/data_translator/data_translation/normal_text_translation_handler.py b/tasks/infobox_football_biography/src/data_translator/data_translation/normal_text_translation_handler.py new file mode 100644 index 00000000..c783ece0 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/data_translation/normal_text_translation_handler.py @@ -0,0 +1,7 @@ +from tasks.infobox_football_biography.src.data_translator.data_translation_handler import DataTranslationHandler + + +class NormalTextTranslationHandler(DataTranslationHandler): + def translate(self, value): + # Translate the normal text to Arabic (implement this logic) + return value diff --git a/tasks/infobox_football_biography/src/data_translator/data_translation/number_translation_handler.py b/tasks/infobox_football_biography/src/data_translator/data_translation/number_translation_handler.py new file mode 100644 index 00000000..f391dfa8 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/data_translation/number_translation_handler.py @@ -0,0 +1,7 @@ +from tasks.infobox_football_biography.src.data_translator.data_translation_handler import DataTranslationHandler + + +class NumberTranslationHandler(DataTranslationHandler): + def translate(self, value): + # Translate the number to Arabic + return str(value) diff --git a/tasks/infobox_football_biography/src/data_translator/data_translation/wikilink_translation_handler.py b/tasks/infobox_football_biography/src/data_translator/data_translation/wikilink_translation_handler.py new file mode 100644 index 00000000..5e35ca48 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/data_translation/wikilink_translation_handler.py @@ -0,0 +1,7 @@ +from tasks.infobox_football_biography.src.data_translator.data_translation_handler import DataTranslationHandler + + +class WikiLinkTranslationHandler(DataTranslationHandler): + def translate(self, value): + # Translate the wiki link to Arabic (implement this logic) + return value diff --git a/tasks/infobox_football_biography/src/data_translator/data_translation_handler.py b/tasks/infobox_football_biography/src/data_translator/data_translation_handler.py new file mode 100644 index 00000000..ebbc1590 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/data_translation_handler.py @@ -0,0 +1,7 @@ +from abc import abstractmethod, ABC + + +class DataTranslationHandler(ABC): + @abstractmethod + def translate(self, value): + pass diff --git a/tasks/infobox_football_biography/src/data_translator/translation_chain.py b/tasks/infobox_football_biography/src/data_translator/translation_chain.py new file mode 100644 index 00000000..fa0f4a02 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/translation_chain.py @@ -0,0 +1,13 @@ +class TranslationChain: + def __init__(self): + self.handlers = [] + + def add_handler(self, handler): + self.handlers.append(handler) + + def translate(self, value): + for handler in self.handlers: + translated_value = handler.translate(value) + if translated_value: + return translated_value + return value diff --git a/tasks/infobox_football_biography/src/data_translator/value_classification_strategy.py b/tasks/infobox_football_biography/src/data_translator/value_classification_strategy.py new file mode 100644 index 00000000..404f54a6 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/value_classification_strategy.py @@ -0,0 +1,7 @@ +from abc import abstractmethod, ABC + + +class ValueClassificationStrategy(ABC): + @abstractmethod + def classify(self, value): + pass diff --git a/tasks/infobox_football_biography/test.py b/tasks/infobox_football_biography/test.py new file mode 100644 index 00000000..272d5534 --- /dev/null +++ b/tasks/infobox_football_biography/test.py @@ -0,0 +1,38 @@ +from tasks.infobox_football_biography.src.data_translator.classification_context import ClassificationContext +from tasks.infobox_football_biography.src.data_translator.data_classification.normal_text_classification_strategy import \ + NormalTextClassificationStrategy +from tasks.infobox_football_biography.src.data_translator.data_classification.number_classification_strategy import \ + NumberClassificationStrategy +from tasks.infobox_football_biography.src.data_translator.data_classification.wikilink_classification_strategy import \ + WikiLinkClassificationStrategy +from tasks.infobox_football_biography.src.data_translator.data_translation.number_translation_handler import \ + NumberTranslationHandler +from tasks.infobox_football_biography.src.data_translator.translation_chain import TranslationChain + +classification_context = ClassificationContext() +classification_context.add_strategy(NumberClassificationStrategy()) +classification_context.add_strategy(WikiLinkClassificationStrategy()) +classification_context.add_strategy(NormalTextClassificationStrategy()) + +# Create translation chain and add translation handlers +translation_chain = TranslationChain() +translation_chain.add_handler(NumberTranslationHandler()) +# Add handlers for WikiLink and NormalText as needed + +# Sample data +data = [ + {"name": "param1", "value": "42"}, + {"name": "param2", "value": "[[link to article]]"}, + {"name": "param3", "value": "This is normal text."} +] + +for item in data: + value = item["value"] + classification = classification_context.classify(value) + translation = translation_chain.translate(value) + + print(f"Name: {item['name']}") + print(f"Value: {value}") + print(f"Classification: {classification}") + print(f"Translation: {translation}") + print("---")