From eb4bfb3d6da49f4c84b7beab2a7574da56256736 Mon Sep 17 00:00:00 2001 From: pverkind Date: Fri, 8 Jul 2022 14:56:31 +0200 Subject: [PATCH 1/5] Fix issue with spaces and less than operator The solution to the spaces problem is to wrap the argument in quotation marks (this only works if there is only one pair of quotation marks!) The problem with the less than character is that CALL does not allow the less than character (see https://docs.microsoft.com/en-us/windows-server/administration/windows-commands/call): "Do not use pipes (|) or redirection symbols (< or >) with call." The solution could be to replace that character with a placeholder, and to replace that placeholder with a less than character in seriatim.py. I propose using LESSTHAN rather than the more common "LT" because "LT" is common enough in natural language to create problems if it is replaced with "<". --- bin/passim.cmd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bin/passim.cmd b/bin/passim.cmd index 3769854..6e684fb 100644 --- a/bin/passim.cmd +++ b/bin/passim.cmd @@ -1,3 +1,2 @@ @echo off -call %~dp0seriatim --all-pairs ^ - --fields "xxhash64(series)^ as^ gid" -f gid^ Date: Fri, 8 Jul 2022 15:01:23 +0200 Subject: [PATCH 2/5] Fix less than issue (Windows) A solution to the fact that the less than character is not allowed in CALL (in passim.cmd); use a placeholder "LESSTHAN" which is replaced in the config object with "<". Same approach could be used for the other problematic characters in CALL: pipe and greater than. --- passim/seriatim.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/passim/seriatim.py b/passim/seriatim.py index a5ca6ad..5a5ebe1 100644 --- a/passim/seriatim.py +++ b/passim/seriatim.py @@ -597,6 +597,11 @@ def main(args): parser.add_argument('outputPath', metavar='', help='output') config = parser.parse_args(args) + # replace placeholders in the filterpairs argument: + config.filterpairs = config.filterpairs.replace("LESSTHAN", "<") + config.filterpairs = config.filterpairs.replace("GREATERTHAN", ">") + config.filterpairs = config.filterpairs.replace("PIPE", "|") + print(config) spark = SparkSession.builder.appName('Passim Alignment').getOrCreate() From 8281e0b79b7982bef194220b4bfda6d8f424549d Mon Sep 17 00:00:00 2001 From: pverkind Date: Sat, 9 Jul 2022 00:43:11 +0200 Subject: [PATCH 3/5] Generalize the replacement commands Replace special characters <, >, |, & in all arguments (not just the built-in filterfields argument) with a placeholder which will be replaced with the original character in seriatim.py NB: unfortunately, this does not work with the equals sign; use _EQ_, _LTE_ and _GTE_ for such operators instead. --- bin/passim.cmd | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/bin/passim.cmd b/bin/passim.cmd index 6e684fb..c6bbfa3 100644 --- a/bin/passim.cmd +++ b/bin/passim.cmd @@ -1,2 +1,29 @@ @echo off -call %~dp0seriatim --all-pairs --fields "xxhash64(series) as gid" -f gidLESSTHANgid2 %* + +:: Replace special characters <, >, |, & in all arguments with a placeholder +:: which will be replaced with the original character in seriatim.py +:: NB: unfortunately, this does not work with the equals sign; use _EQ_, _LTE_ and _GTE_ instead. + +:: make sure variables are not expanded before the script is run: +SETLOCAL EnableDelayedExpansion + +:: first, set up an empty variable that will contain all modified variables: +set modifiedArgs= + +:: loop through all arguments passed to the script (%*) +:: and replace all special characters in them +:: finally, add each argument to the modified arguments list +for %%i in (%*) do ( + set arg=%%i + set arg=!arg:^<=_LT_! + set arg=!arg:^>=_GT_! + set arg=!arg:^|=_PIPE_! + set arg=!arg:^&=_AMPERSAND_! + set modifiedArgs=!modifiedArgs! !arg! + echo !arg! +) +::echo MODIFIED ARGS: +::echo %modifiedArgs% + +echo %~dp0seriatim.cmd --all-pairs --fields "xxhash64(series) as gid" -f gid_LT_gid2 %modifiedArgs% +call %~dp0seriatim.cmd --all-pairs --fields "xxhash64(series) as gid" -f gid_LT_gid2 %modifiedArgs% From b35fbc38d0fe0484fd4fca9a3b7419087a8f1b79 Mon Sep 17 00:00:00 2001 From: pverkind Date: Sat, 9 Jul 2022 00:51:34 +0200 Subject: [PATCH 4/5] placeholder replacement for all relevant fields I added replacement of placeholders for special files to all fields I thought would need them. --- passim/seriatim.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/passim/seriatim.py b/passim/seriatim.py index 5a5ebe1..8d5c4f2 100644 --- a/passim/seriatim.py +++ b/passim/seriatim.py @@ -597,11 +597,9 @@ def main(args): parser.add_argument('outputPath', metavar='', help='output') config = parser.parse_args(args) - # replace placeholders in the filterpairs argument: - config.filterpairs = config.filterpairs.replace("LESSTHAN", "<") - config.filterpairs = config.filterpairs.replace("GREATERTHAN", ">") - config.filterpairs = config.filterpairs.replace("PIPE", "|") - + # replace placeholders in the relevant arguments: + config.filterpairs = config.filterpairs.replace("_LT_", "<").replace("_LTE_", "<=").replace("_GT_", ">").replace("_GTE_", ">=").replace("_PIPE_", "|").replace("_AMPERSAND_", "&") + config.link_features = config.link_features.replace("_LT_", "<").replace("_LTE_", "<=").replace("_GT_", ">").replace("_GTE_", ">=").replace("_PIPE_", "|").replace("_AMPERSAND_", "&") print(config) spark = SparkSession.builder.appName('Passim Alignment').getOrCreate() From 55117924bbb8721b9c7043760e6c640b364d12e2 Mon Sep 17 00:00:00 2001 From: pverkind Date: Sat, 9 Jul 2022 01:03:32 +0200 Subject: [PATCH 5/5] wrap replacement in function with try except --- passim/seriatim.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/passim/seriatim.py b/passim/seriatim.py index 8d5c4f2..3780089 100644 --- a/passim/seriatim.py +++ b/passim/seriatim.py @@ -545,6 +545,12 @@ def clusterJoin(self, config, corpus): setattr(DataFrame, 'clusterJoin', clusterJoin) +def replace_placeholders(s): + try: + return s.replace("_LT_", "<").replace("_LTE_", "<=").replace("_GT_", ">").replace("_GTE_", ">=").replace("_PIPE_", "|").replace("_AMPERSAND_", "&") + except: + return s + def main(args): parser = argparse.ArgumentParser(description='Passim Alignment', formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -598,8 +604,8 @@ def main(args): config = parser.parse_args(args) # replace placeholders in the relevant arguments: - config.filterpairs = config.filterpairs.replace("_LT_", "<").replace("_LTE_", "<=").replace("_GT_", ">").replace("_GTE_", ">=").replace("_PIPE_", "|").replace("_AMPERSAND_", "&") - config.link_features = config.link_features.replace("_LT_", "<").replace("_LTE_", "<=").replace("_GT_", ">").replace("_GTE_", ">=").replace("_PIPE_", "|").replace("_AMPERSAND_", "&") + config.filterpairs = replace_placeholders(config.filterpairs) + config.link_features = replace_placeholders(config.link_features) print(config) spark = SparkSession.builder.appName('Passim Alignment').getOrCreate()