From 6f5f63a6d820ae4004563cc8ff02dd46bc740e72 Mon Sep 17 00:00:00 2001 From: Jesper Hogstrom Date: Wed, 30 Aug 2023 13:33:26 +0200 Subject: [PATCH] Added replacement facility for page content. By using either command line replacement pairs or specifying a file with json defintions of replacements an arbitrary regexp ("pattern") can be detected and replaced with another string, including expanding captured groups in the pattern. The replacement phase is taking place just before upsert, so all other textual manipulations are done by that time. Replacements happen in a deterministic sequence. There are ample opportunities to get unexpected (but logically consistent) results by inadvertently result of a previous replacement. Format of json file: ``` { "environment": [ { "import": "", "path": "" } ], "replacements":[ { "name": "", "pattern": "", "new_value": "" "evaluate": }, ] } ``` The `environment` block is optional and used for very dynamic replacements. By specifying a python source file, it will be dynamically imported at run time. The `new_value` field can then specify a `.` that returns a string value. As an example, the following adds a replacement of "TODAY" to an iso-formatted datetime. ``` { "environment": [ { "import": "funcs", "path": "funcs.py" } ], "replacements":[ { "name": "Todays date", "pattern": "TODAY", "new_value": "funcs.today" "evaluate": true }, ] } ``` Funcs.py: ``` import datetime def today(term): return datetime.datetime.now().isoformat() ``` The parameter `term` is a Match object as per using https://docs.python.org/3/library/re.html#re.subn. --- README.md | 63 ++++++++++++++++++++++++++++++++++++++ md2cf/__main__.py | 19 ++++++++++++ md2cf/replacements.py | 70 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+) create mode 100644 md2cf/replacements.py diff --git a/README.md b/README.md index 18750cf..3bd259e 100644 --- a/README.md +++ b/README.md @@ -293,6 +293,69 @@ folderA/ ``` +## Replacements + +By using either command line replacement pairs or specifying a file with json defintions of replacements an arbitrary regexp ("pattern") can be detected and replaced with another string, including expanding captured groups in the pattern. + +The replacement phase is taking place just before upsert, so all other textual manipulations are done by that time. + +Replacements happen in a deterministic sequence. There are ample opportunities to get unexpected (but logically consistent) results by inadvertently result of a previous replacement. +
+Format of json file + +```json +{ + "environment": [ + { + "import": "", + "path": "" + } + ], + "replacements":[ + { + "name": "", + "pattern": "", + "new_value": "" + "evaluate": + }, + ] +} +``` +
+ +### Advanced replacements + +The `environment` block is optional and used for very dynamic replacements. By specifying a python source file, it will be dynamically imported at run time. The `new_value` field can then specify a `.` that returns a string value. As an example, the following adds a replacement of "TODAY" to an iso-formatted datetime. + +```json +{ + "environment": [ + { + "import": "funcs", + "path": "funcs.py" + } + ], + "replacements":[ + { + "name": "Todays date", + "pattern": "TODAY", + "new_value": "funcs.today" + "evaluate": true + }, + ] +} +``` + +Funcs.py +```python +import datetime + +def today(term): + return datetime.datetime.now().isoformat() +``` + +The parameter `term` is a Match object as per using [re.subn](https://docs.python.org/3/library/re.html#re.subn). + ## Terminal output format By default, `md2cf` produces rich output with animated progress bars that are meant for human consumption. If the output is redirected to a file, the progress bars will not be displayed and only the final result will be written to the file. Error messages are always printed to standard error. diff --git a/md2cf/__main__.py b/md2cf/__main__.py index 7db41fb..7eb6808 100644 --- a/md2cf/__main__.py +++ b/md2cf/__main__.py @@ -23,6 +23,7 @@ minimal_output_console, ) from md2cf.document import Page +from md2cf.replacements import create_replacements from md2cf.tui import Md2cfTUI from md2cf.upsert import upsert_attachment, upsert_page @@ -258,6 +259,20 @@ def get_parser(): help="number of retry attempts if any API call fails", ) + parser.add_argument( + "--replace", + nargs="+", + action="append", + dest="replacements", + help="Specify replacements on the form =. Can be repeated many times", + ) + + parser.add_argument( + "--replacements", + dest="replacementfile", + help="Filename with replacement definition in json format", + ) + return parser @@ -299,6 +314,8 @@ def main(): console.quiet = True json_output_console.quiet = False + replacements = create_replacements(args.replacements, args.replacementfile) + confluence = api.MinimalConfluence( host=args.host, username=args.username, @@ -398,6 +415,8 @@ def main(): for page in pages_to_upload: pre_process_page(page, args, postface_markup, preface_markup, space_info) tui.start_item_task(page.original_title) + for replacement in replacements: + page = replacement.replace(page) upsert_page_result = None try: tui.set_item_progress_label(page.original_title, "Upserting") diff --git a/md2cf/replacements.py b/md2cf/replacements.py new file mode 100644 index 0000000..a5caa48 --- /dev/null +++ b/md2cf/replacements.py @@ -0,0 +1,70 @@ +import importlib.util +import json +import re +from typing import List + +from md2cf.console_output import console + + +class Replacement: + def __init__( + self, name: str, pattern: str, new_value: str, evaluate: bool = False + ) -> None: + self.name = name + self.pattern = pattern + self.new_value = new_value + self.evaluate = evaluate + + def replace(self, page): + console.print(f"Performing replacement '{self.name}'") + if self.evaluate: + new_value = eval(self.new_value) + else: + new_value = self.new_value + page.body, count = re.subn(f"({self.pattern})", new_value, page.body) + console.print(f">> {count} replacements made") + return page + + def __repr__(self) -> str: + return self.name + + +def create_replacements(replacements, replacementfile: str) -> List[Replacement]: + result = [] + commandline_replacements = ( + [item for sublist in replacements for item in sublist] if replacements else [] + ) + + # Create Replacement objects for the commandline replacements + for i, r in enumerate(commandline_replacements): + result.append(Replacement(f"CLI replacement {i}", *r.split("=", 1))) + + # Opt out if no file specified + if not replacementfile: + return result + + file_replacements = json.load(open(replacementfile)) + # Do we need to load any modules? + for env in file_replacements.get("environment", []): + if env.get("import"): + spec = importlib.util.spec_from_file_location( + env["import"], env.get("path") + ) + globals()[env["import"]] = importlib.util.module_from_spec(spec) + spec.loader.exec_module(globals()[env["import"]]) + + # Get the replacement definitions + for i, r in enumerate(file_replacements["replacements"]): + new_value = r["new_value"] + if isinstance(new_value, list): + new_value = "\n".join(new_value) + result.append( + Replacement( + r.get("name", f"File replacement {i}"), + r["pattern"], + new_value, + r.get("evaluate", False), + ) + ) + + return result