diff --git a/decoy/UDFs/reference_udf_specs/ref_udfspec_full_args.yml b/decoy/UDF/reference_udf_specs/ref_udfspec_full_args.yml similarity index 100% rename from decoy/UDFs/reference_udf_specs/ref_udfspec_full_args.yml rename to decoy/UDF/reference_udf_specs/ref_udfspec_full_args.yml diff --git a/decoy/UDFs/reference_udf_specs/ref_udfspec_no_args.yml b/decoy/UDF/reference_udf_specs/ref_udfspec_no_args.yml similarity index 99% rename from decoy/UDFs/reference_udf_specs/ref_udfspec_no_args.yml rename to decoy/UDF/reference_udf_specs/ref_udfspec_no_args.yml index 6f4b74d..5956159 100644 --- a/decoy/UDFs/reference_udf_specs/ref_udfspec_no_args.yml +++ b/decoy/UDF/reference_udf_specs/ref_udfspec_no_args.yml @@ -1052,10 +1052,10 @@ mimesis_en: arguments: [] dispatch: no_arg return_type: INTEGER - development.dsn: - arguments: [] - dispatch: no_arg - return_type: VARCHAR + # development.dsn: + # arguments: [] + # dispatch: no_arg + # return_type: VARCHAR development.os: arguments: [] dispatch: no_arg diff --git a/decoy/UDFs/udf_arrow.py b/decoy/UDF/udf_arrow.py similarity index 100% rename from decoy/UDFs/udf_arrow.py rename to decoy/UDF/udf_arrow.py diff --git a/decoy/UDFs/udf_custom_functions.py b/decoy/UDF/udf_custom_functions.py similarity index 100% rename from decoy/UDFs/udf_custom_functions.py rename to decoy/UDF/udf_custom_functions.py diff --git a/decoy/UDFs/udf_numpy.py b/decoy/UDF/udf_numpy.py similarity index 72% rename from decoy/UDFs/udf_numpy.py rename to decoy/UDF/udf_numpy.py index 19b8b15..4c31b70 100644 --- a/decoy/UDFs/udf_numpy.py +++ b/decoy/UDF/udf_numpy.py @@ -21,10 +21,10 @@ def numpy_sample(): def numpy_choice(a, p): """ - In order to utilise the numpy.random.choice() function, the inputs and outputs need to be formatted in a specific way. + In order to utilise the numpy.random.choice() function, the inputs and outputs need to be formatted in a specific way. Parameters: a (Str): a string of the choices in the format "choice1, choice2, choice3" as string will be split on commas(,) - p (Str): a string of all the choice probabilities, the same length and format as choices. + p (Str): a string of all the choice probabilities, the same length and format as choices. Returns: choice (Str): A STRING of the choice from the choices list (a) based on the probability list (p) """ @@ -48,7 +48,7 @@ def numpy_chisquare(df): def numpy_dirichlet(alpha): """ - In order to use the dirichlet function properly 2+ fields for alpha are needed. + In order to use the dirichlet function properly 2+ fields for alpha are needed. alpha in this function needs to be a comma separated string. eg. '0.2, 0.4, 0.8' as pvals. It also needs to return a string due to SQL not liking variable length arrays @@ -111,7 +111,7 @@ def numpy_logseries(p): def numpy_multinomial(n, pvals): """ pvals in this function needs to be a comma separated string. - eg. '0.2, 0.4, 0.8' as pvals + eg. '0.2, 0.4, 0.8' as pvals """ pvals = [float(x) for x in pvals.split(",")] return npr.multinomial(n=n, pvals=pvals) @@ -198,120 +198,120 @@ def register_numpy_random_functions(con: duckdb.DuckDBPyConnection) -> None: con.create_function( name='numpy_rand', function=numpy_rand, - return_type=None, - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=None, side_effects=True, ) con.create_function( name='numpy_randn', function=numpy_rand, - return_type=None, - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=None, side_effects=True, ) con.create_function( name='numpy_randint', function=numpy_randint, - return_type=[ducktypes.INTEGER, ducktypes.INTEGER], - parameters=ducktypes.INTEGER, + return_type=ducktypes.INTEGER, + parameters=[ducktypes.INTEGER, ducktypes.INTEGER], side_effects=True, ) con.create_function( name='numpy_sample', function=numpy_sample, - return_type=None, - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=None, side_effects=True, ) con.create_function( name='numpy_choice', function=numpy_choice, - return_type=[ducktypes.VARCHAR, ducktypes.VARCHAR], - parameters=ducktypes.VARCHAR, + return_type=ducktypes.VARCHAR, + parameters=[ducktypes.VARCHAR, ducktypes.VARCHAR], side_effects=True, ) con.create_function( name='numpy_beta', function=numpy_beta, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_binomial', function=numpy_binomial, - return_type=[ducktypes.INTEGER, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.INTEGER, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_chisquare', function=numpy_chisquare, - return_type=[ducktypes.INTEGER], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.INTEGER], side_effects=True, ) con.create_function( name='numpy_dirichlet', function=numpy_dirichlet, - return_type=[ducktypes.VARCHAR], - parameters=ducktypes.VARCHAR, + return_type=ducktypes.VARCHAR, + parameters=[ducktypes.VARCHAR], side_effects=True, ) con.create_function( name='numpy_exponential', function=numpy_exponential, - return_type=[ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_f', function=numpy_f, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_gamma', function=numpy_gamma, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_geometric', function=numpy_geometric, - return_type=[ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_gumbel', function=numpy_gumbel, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_hypergeometric', function=numpy_hypergeometric, - return_type=[ducktypes.INTEGER, ducktypes.INTEGER, ducktypes.INTEGER], - parameters=ducktypes.INTEGER, + return_type=ducktypes.INTEGER, + parameters=[ducktypes.INTEGER, ducktypes.INTEGER, ducktypes.INTEGER], side_effects=True, ) @@ -320,191 +320,191 @@ def register_numpy_random_functions(con: duckdb.DuckDBPyConnection) -> None: con.create_function( name='numpy_laplace', function=numpy_laplace, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_logistic', function=numpy_logistic, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_lognormal', function=numpy_lognormal, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_logseries', function=numpy_logseries, - return_type=[ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_multinomial', function=numpy_multinomial, - return_type=[ducktypes.FLOAT, ducktypes.VARCHAR], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.VARCHAR], side_effects=True, ) con.create_function( name='numpy_negative_binomial', function=numpy_negative_binomial, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_noncentral_chisquare', function=numpy_noncentral_chisquare, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_noncentral_f', function=numpy_noncentral_f, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_normal', function=numpy_normal, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_pareto', function=numpy_pareto, - return_type=[ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_poisson', function=numpy_poisson, - return_type=[ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_power', function=numpy_power, - return_type=[ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_rayleigh', function=numpy_rayleigh, - return_type=[ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_standard_cauchy', function=numpy_standard_cauchy, - return_type=[], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[], side_effects=True, ) con.create_function( name='numpy_standard_exponential', function=numpy_standard_exponential, - return_type=[], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[], side_effects=True, ) con.create_function( name='numpy_standard_gamma', function=numpy_standard_gamma, - return_type=[ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_standard_normal', function=numpy_standard_normal, - return_type=[], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[], side_effects=True, ) con.create_function( name='numpy_standard_t', function=numpy_standard_t, - return_type=[ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_triangular', function=numpy_triangular, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_uniform', function=numpy_uniform, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_vonmises', function=numpy_vonmises, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_wald', function=numpy_wald, - return_type=[ducktypes.FLOAT, ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT, ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_weibull', function=numpy_weibull, - return_type=[ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT], side_effects=True, ) con.create_function( name='numpy_zipf', function=numpy_zipf, - return_type=[ducktypes.FLOAT], - parameters=ducktypes.FLOAT, + return_type=ducktypes.FLOAT, + parameters=[ducktypes.FLOAT], side_effects=True, ) diff --git a/decoy/UDFs/udf_scalar.py b/decoy/UDF/udf_scalar.py similarity index 100% rename from decoy/UDFs/udf_scalar.py rename to decoy/UDF/udf_scalar.py diff --git a/decoy/cli_io.py b/decoy/cli_io.py index 3387dcd..60bfd03 100644 --- a/decoy/cli_io.py +++ b/decoy/cli_io.py @@ -5,7 +5,7 @@ from prompt_toolkit.lexers import PygmentsLexer from pygments.lexers.sql import PlPgsqlLexer -from decoy.UDFs.udf_scalar import clear_column_cache +from decoy.UDF.udf_scalar import clear_column_cache exception_list = ( duckdb.ParserException, diff --git a/decoy/database.py b/decoy/database.py index 97cbba1..4b743a7 100644 --- a/decoy/database.py +++ b/decoy/database.py @@ -10,17 +10,17 @@ from mimesis import Generic, Locale from decoy.settings import settings -from decoy.UDFs.udf_arrow import ( +from decoy.UDF.udf_arrow import ( intratable_sample, messy_data_junkadder, messy_data_nullifier, random_shuffle, ) -from decoy.UDFs.udf_scalar import oversample +from decoy.UDF.udf_scalar import oversample from decoy.xeger import xeger_cached -from decoy.UDFs.udf_numpy import register_numpy_random_functions +from decoy.UDF.udf_numpy import register_numpy_random_functions -import decoy.UDFs.udf_custom_functions as c_funcs +import decoy.UDF.udf_custom_functions as c_funcs def getattr_submodule(mod: Any, fpath: str): @@ -51,11 +51,8 @@ def get_connection(register_funcs=True) -> duckdb.DuckDBPyConnection: return con -def register_udf_library( - con: duckdb.DuckDBPyConnection, library, library_config, library_name: str -): - library_functions = {k: v for k, - v in library_config.items() if k != "_meta"} +def register_udf_library(con: duckdb.DuckDBPyConnection, library, library_config, library_name: str): + library_functions = {k: v for k, v in library_config.items() if k != "_meta"} for fname, fconfig in library_functions.items(): rtype = getattr(ducktypes, fconfig["return_type"]) @@ -74,16 +71,16 @@ def register_udf_library( con.create_function( name=function_name, function=getattr_submodule(library, fname), - return_type=fargs, - parameters=rtype, + return_type=rtype, + parameters=fargs, side_effects=True, ) case "no_arg": con.create_function( name=function_name, function=getattr_submodule_noargs(library, fname), - return_type=[], - parameters=rtype, + return_type=rtype, + parameters=[], side_effects=True, ) case default: @@ -112,8 +109,7 @@ def register_custom_udfs(con: duckdb.DuckDBPyConnection) -> None: fargs = [] if config['parameters'] is not None: for farg in config['parameters'].split(','): - fargs.append( - getattr(ducktypes, farg.strip())) + fargs.append(getattr(ducktypes, farg.strip())) match config["function_type"]: case 'scalar': @@ -124,28 +120,27 @@ def register_custom_udfs(con: duckdb.DuckDBPyConnection) -> None: con.create_function( name=fname, function=func[1], - return_type=fargs, - parameters=rtype, + return_type=rtype, + parameters=fargs, side_effects=True, type=ftype, ) def register_udfs(con: duckdb.DuckDBPyConnection) -> None: - con.create_function( name="xeger", function=xeger_cached, - return_type=[ducktypes.VARCHAR], - parameters=ducktypes.VARCHAR, + parameters=[ducktypes.VARCHAR], + return_type=ducktypes.VARCHAR, side_effects=True, ) con.create_function( name="shuffle", function=random_shuffle, - return_type=[ducktypes.VARCHAR], - parameters=ducktypes.VARCHAR, + return_type=ducktypes.VARCHAR, + parameters=[ducktypes.VARCHAR], side_effects=True, type=duckdb.functional.PythonUDFType.ARROW, ) @@ -153,8 +148,8 @@ def register_udfs(con: duckdb.DuckDBPyConnection) -> None: con.create_function( name="intratable_sample", function=intratable_sample, - return_type=[ducktypes.VARCHAR], - parameters=ducktypes.VARCHAR, + return_type=ducktypes.VARCHAR, + parameters=[ducktypes.VARCHAR], side_effects=True, type=duckdb.functional.PythonUDFType.ARROW, ) @@ -162,8 +157,8 @@ def register_udfs(con: duckdb.DuckDBPyConnection) -> None: con.create_function( name="messy_data_nullifier", function=messy_data_nullifier, - return_type=[ducktypes.VARCHAR], - parameters=ducktypes.VARCHAR, + return_type=ducktypes.VARCHAR, + parameters=[ducktypes.VARCHAR], side_effects=True, type=duckdb.functional.PythonUDFType.ARROW, ) @@ -171,8 +166,8 @@ def register_udfs(con: duckdb.DuckDBPyConnection) -> None: con.create_function( name="messy_data_junkadder", function=messy_data_junkadder, - return_type=[ducktypes.VARCHAR], - parameters=ducktypes.VARCHAR, + return_type=ducktypes.VARCHAR, + parameters=[ducktypes.VARCHAR], side_effects=True, type=duckdb.functional.PythonUDFType.ARROW, ) @@ -180,7 +175,7 @@ def register_udfs(con: duckdb.DuckDBPyConnection) -> None: con.create_function( name="oversample", function=oversample, - return_type=[ducktypes.VARCHAR, ducktypes.VARCHAR], - parameters=ducktypes.VARCHAR, + return_type=ducktypes.VARCHAR, + parameters=[ducktypes.VARCHAR, ducktypes.VARCHAR], side_effects=True, ) diff --git a/decoy/kernel.py b/decoy/kernel.py index 599be08..f4c8b65 100644 --- a/decoy/kernel.py +++ b/decoy/kernel.py @@ -3,7 +3,7 @@ from decoy.cli_io import exception_list from decoy.database import get_connection -from decoy.UDFs.udf_scalar import clear_column_cache +from decoy.UDF.udf_scalar import clear_column_cache class DecoyKernel(Kernel): diff --git a/decoy/udfspec.yml b/decoy/udfspec.yml index 02581c9..20157e9 100644 --- a/decoy/udfspec.yml +++ b/decoy/udfspec.yml @@ -1052,10 +1052,10 @@ mimesis_en: arguments: [] dispatch: no_arg return_type: INTEGER - development.dsn: - arguments: [] - dispatch: no_arg - return_type: VARCHAR + # development.dsn: + # arguments: [] + # dispatch: no_arg + # return_type: VARCHAR development.os: arguments: [] dispatch: no_arg @@ -1236,10 +1236,10 @@ mimesis_en: arguments: [] dispatch: no_arg return_type: VARCHAR - internet.emoji: - arguments: [] - dispatch: no_arg - return_type: VARCHAR + # internet.emoji: + # arguments: [] + # dispatch: no_arg + # return_type: VARCHAR internet.hostname: arguments: [] dispatch: no_arg @@ -1404,10 +1404,10 @@ mimesis_en: arguments: [] dispatch: no_arg return_type: VARCHAR - person.age: - arguments: [] - dispatch: no_arg - return_type: INTEGER + # person.age: + # arguments: [] + # dispatch: no_arg + # return_type: INTEGER person.blood_type: arguments: [] dispatch: no_arg @@ -1500,10 +1500,10 @@ mimesis_en: arguments: [] dispatch: no_arg return_type: INTEGER - person.work_experience: - arguments: [] - dispatch: no_arg - return_type: INTEGER + # person.work_experience: + # arguments: [] + # dispatch: no_arg + # return_type: INTEGER person.worldview: arguments: [] dispatch: no_arg @@ -1596,7 +1596,7 @@ numpy: _meta: arg_type: anotated # randint: - # arguments: + # arguments: # - default: "" # kind: POSITIONAL_OR_KEYWORD # name: low @@ -1608,11 +1608,11 @@ random: arg_type: basic betavariate: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: alpha type: FLOAT - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: beta type: FLOAT @@ -1620,7 +1620,7 @@ random: return_type: FLOAT choice: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: seq type: BLOB @@ -1644,7 +1644,7 @@ random: # return_type: VARCHAR expovariate: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: lambd type: FLOAT @@ -1652,11 +1652,11 @@ random: return_type: FLOAT gammavariate: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: alpha type: FLOAT - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: beta type: FLOAT @@ -1664,11 +1664,11 @@ random: return_type: FLOAT gauss: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: mu type: FLOAT - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: sigma type: FLOAT @@ -1676,11 +1676,11 @@ random: return_type: FLOAT lognormvariate: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: mu type: FLOAT - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: sigma type: FLOAT @@ -1688,11 +1688,11 @@ random: return_type: FLOAT normalvariate: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: mu type: FLOAT - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: sigma type: FLOAT @@ -1700,7 +1700,7 @@ random: return_type: FLOAT paretovariate: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: alpha type: FLOAT @@ -1708,7 +1708,7 @@ random: return_type: FLOAT randbytes: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: n type: INTEGER @@ -1716,11 +1716,11 @@ random: return_type: BLOB randint: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: a type: INTEGER - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: b type: INTEGER @@ -1728,15 +1728,15 @@ random: return_type: INTEGER randrange: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: start type: INTEGER - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: stop type: INTEGER - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: step type: INTEGER @@ -1764,15 +1764,15 @@ random: # return_type: VARCHAR triangular: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: low type: FLOAT - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: high type: FLOAT - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: mode type: FLOAT @@ -1780,11 +1780,11 @@ random: return_type: FLOAT uniform: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: a type: FLOAT - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: b type: FLOAT @@ -1792,11 +1792,11 @@ random: return_type: FLOAT vonmisesvariate: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: mu type: FLOAT - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: kappa type: FLOAT @@ -1804,11 +1804,11 @@ random: return_type: FLOAT weibullvariate: arguments: - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: alpha type: FLOAT - - default: "" + - default: '' kind: POSITIONAL_OR_KEYWORD name: beta type: FLOAT diff --git a/decoy/xeger.py b/decoy/xeger.py index fa605d3..a7cf993 100644 --- a/decoy/xeger.py +++ b/decoy/xeger.py @@ -1,6 +1,7 @@ import random import re import sre_parse +import sre_constants ascii_lowercase = "abcdefghijklmnopqrstuvwxyz" ascii_uppercase = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" @@ -12,13 +13,15 @@ printable = digits + ascii_letters + punctuation + control + newline printableNotNL = digits + ascii_letters + punctuation + control +# Default limit for repetitions limit = 10 - +# Cache for generated patterns xeger_cache = {} def xeger_cached(er: str): + """Generate a string that matches the given regular expression.""" if er not in xeger_cache: xeger_cache[er] = Xeger(er) @@ -26,46 +29,316 @@ def xeger_cached(er: str): class Xeger: - def __init__(self, regex: str) -> str: - self.re = sre_parse.parse(regex) + """Class to generate random strings that match a given regular expression.""" + + def __init__(self, regex: str) -> None: + """Initialize the Xeger generator with a regex pattern.""" + try: + self.re = sre_parse.parse(regex) + self.debug = False # Set to True to see detailed debug output + except Exception as e: + print(f"Error parsing regex: {e}") + raise def generate(self): - s = "" - for x in self.re: - s += self.generate_from_regexp(x) - return s + """Generate a random string that matches the regex pattern.""" + try: + s = "" + for x in self.re: + result = self.generate_from_regexp(x) + s += result + return s + except Exception as e: + print(f"Error generating string: {e}") + return "" def generate_from_regexp(self, reg) -> str: - match reg[0]: - case sre_parse.LITERAL: - return chr(reg[1]) - case sre_parse.MAX_REPEAT: - rpt_sub = reg[1][2] - rpt = reg[1][0] - if reg[1][0] != reg[1][1]: - if reg[1][1] == re._constants.MAXREPEAT: - rpt = random.randint(1, limit) + """Generate a string fragment for a specific regex pattern element.""" + try: + pattern_type = reg[0] + pattern_value = reg[1] + + # LITERAL: Represents a single character literal + if pattern_type == sre_parse.LITERAL: + return chr(pattern_value) + + # MAX_REPEAT: Represents repetition patterns like a*, a+, a{m,n} + elif pattern_type == sre_parse.MAX_REPEAT: + try: + min_repeat, max_repeat, subpattern = pattern_value + + # Determine how many times to repeat + if min_repeat != max_repeat: + if max_repeat == sre_constants.MAXREPEAT: + repeat_count = random.randint(min_repeat, min_repeat + limit) + else: + repeat_count = random.randint(min_repeat, max_repeat) else: - rpt = random.randint(reg[1][0], reg[1][1]) - return self.generate_from_subexp(rpt_sub, rpt) - case sre_parse.IN: - s = "" - for r in reg[1]: - s += chr(random.randint(r[1][0], r[1][1])) - return s - case sre_parse.SUBPATTERN: - ss = "" - for x in reg[1][3]: - ss += self.generate_from_regexp(x) - return ss - case _: - print("Skipped:", reg) - return "" + repeat_count = min_repeat - def generate_from_subexp(self, reg, count) -> str: - s = "" - for _ in range(0, count): - for sub in reg: - s += self.generate_from_regexp(sub) + return self.generate_from_subexp(subpattern, repeat_count) + except Exception as e: + if self.debug: + print(f"Error in MAX_REPEAT: {e}") + return "" + + # IN: Represents character sets like [a-z], [^abc] + elif pattern_type == sre_parse.IN: + try: + charset = pattern_value + + # Handle empty character sets + if not charset: + return random.choice(printableNotNL) + + # Check for negation + negative = False + char_choices = [] + + for item in charset: + # Handle negation marker + if not isinstance(item, tuple): + if item == sre_parse.NEGATE: + negative = True + continue + + try: + item_type, item_value = item + + # Handle literal character + if item_type == sre_parse.LITERAL: + char_choices.append(chr(item_value)) + + # Handle character range like a-z + elif item_type == sre_parse.RANGE: + min_char, max_char = item_value + for i in range(min_char, max_char + 1): + char_choices.append(chr(i)) + + # Handle categories like \d, \w, \s + elif item_type == sre_parse.CATEGORY: + category_chars = self._get_category_chars(item_value) + char_choices.extend(category_chars) + except (TypeError, ValueError): + continue + + # Handle negated character class [^...] + if negative: + available_chars = [c for c in printableNotNL if c not in char_choices] + if available_chars: + return random.choice(available_chars) + return random.choice(printableNotNL) + + # Return a random character from the set + if char_choices: + return random.choice(char_choices) + + # Fallback + return random.choice(printableNotNL) + except Exception as e: + if self.debug: + print(f"Error in IN: {e}") + return random.choice(printableNotNL) + + # SUBPATTERN: Represents capturing groups (...) + elif pattern_type == sre_parse.SUBPATTERN: + try: + # Handle different subpattern formats + subpattern = None + + if isinstance(pattern_value, tuple): + # Try to extract the subpattern based on the structure + # Structure might differ between Python versions + if len(pattern_value) >= 4: # Python 3.6+ + subpattern = pattern_value[3] + elif len(pattern_value) > 0: + # Try to find a list in the tuple + for item in pattern_value: + if isinstance(item, list): + subpattern = item + break + + # If subpattern is still None, use the last item + if subpattern is None and isinstance(pattern_value[-1], (list, tuple)): + subpattern = pattern_value[-1] + + # If we still couldn't get the subpattern, use a fallback + if subpattern is None: + return "" + + # Generate from the subpattern + result = "" + for x in subpattern: + result += self.generate_from_regexp(x) + return result + except Exception as e: + if self.debug: + print(f"Error in SUBPATTERN: {e}") + return "" + + # BRANCH: Represents alternation (a|b|c) + elif pattern_type == sre_parse.BRANCH: + try: + # The first element is typically None, the second is the list of branches + branches = None + + if isinstance(pattern_value, tuple) and len(pattern_value) >= 2: + branches = pattern_value[1] + else: + branches = pattern_value - return s + # Ensure we have valid branches + if branches and isinstance(branches, list) and len(branches) > 0: + branch = random.choice(branches) + result = "" + for x in branch: + result += self.generate_from_regexp(x) + return result + + return "" + except Exception as e: + if self.debug: + print(f"Error in BRANCH: {e}") + return "" + + # ANY: Represents the dot . (any character except newline) + elif pattern_type == sre_parse.ANY: + return random.choice(printableNotNL) + + # AT: Represents anchors like ^, $, \b + elif pattern_type == sre_parse.AT: + # Anchors don't generate characters + return "" + + # CATEGORY: Represents character categories like \d, \w, \s + elif pattern_type == sre_parse.CATEGORY: + return self._handle_category(pattern_value) + + # GROUPREF: Represents backreferences like \1, \2 + elif pattern_type == sre_parse.GROUPREF: + # For simplicity, generate a random character + return random.choice(printableNotNL) + + # NOT_LITERAL: Represents negated literals like [^a] + elif pattern_type == sre_parse.NOT_LITERAL: + try: + char = chr(pattern_value) + available_chars = [c for c in printableNotNL if c != char] + if available_chars: + return random.choice(available_chars) + return random.choice(printableNotNL) + except Exception: + return random.choice(printableNotNL) + + # Handle other pattern types with a sensible default + else: + if self.debug: + print(f"Unhandled pattern type: {pattern_type}") + return "" + + except Exception as e: + if self.debug: + print(f"Error in generate_from_regexp: {e} for pattern: {reg}") + return "" + + def _handle_category(self, category_value): + """Handle character categories like \d, \w, \s.""" + try: + # Handle _NamedIntConstant objects + if hasattr(category_value, 'name'): + category_name = category_value.name + + if 'DIGIT' in category_name: + if 'NOT' in category_name: + return random.choice("".join(c for c in printableNotNL if c not in digits)) + return random.choice(digits) + + elif 'WORD' in category_name: + if 'NOT' in category_name: + return random.choice("".join( + c for c in printableNotNL if c not in ascii_letters and c not in digits and c != "_")) + return random.choice(ascii_letters + digits + "_") + + elif 'SPACE' in category_name: + if 'NOT' in category_name: + return random.choice("".join(c for c in printableNotNL if c not in " \t\n\r\f\v")) + return random.choice(" \t\n\r\f\v") + + # Handle direct integers for categories + elif isinstance(category_value, int): + category_map = { + sre_constants.CATEGORY_DIGIT: digits, + sre_constants.CATEGORY_NOT_DIGIT: "".join(c for c in printableNotNL if c not in digits), + sre_constants.CATEGORY_WORD: ascii_letters + digits + "_", + sre_constants.CATEGORY_NOT_WORD: "".join( + c for c in printableNotNL if c not in ascii_letters and c not in digits and c != "_"), + sre_constants.CATEGORY_SPACE: " \t\n\r\f\v", + sre_constants.CATEGORY_NOT_SPACE: "".join(c for c in printableNotNL if c not in " \t\n\r\f\v"), + } + + if category_value in category_map: + return random.choice(category_map[category_value]) + + # Fallback for unknown categories + return random.choice(printableNotNL) + except Exception as e: + if self.debug: + print(f"Error in _handle_category: {e}") + return random.choice(printableNotNL) + + def _get_category_chars(self, category_value): + """Get the characters for a specific category value.""" + try: + # Handle _NamedIntConstant objects + if hasattr(category_value, 'name'): + category_name = category_value.name + + if 'DIGIT' in category_name: + if 'NOT' in category_name: + return [c for c in printableNotNL if c not in digits] + return list(digits) + + elif 'WORD' in category_name: + if 'NOT' in category_name: + return [c for c in printableNotNL if c not in ascii_letters and c not in digits and c != "_"] + return list(ascii_letters + digits + "_") + + elif 'SPACE' in category_name: + if 'NOT' in category_name: + return [c for c in printableNotNL if c not in " \t\n\r\f\v"] + return list(" \t\n\r\f\v") + + # Handle direct integers for categories + elif isinstance(category_value, int): + category_map = { + sre_constants.CATEGORY_DIGIT: list(digits), + sre_constants.CATEGORY_NOT_DIGIT: [c for c in printableNotNL if c not in digits], + sre_constants.CATEGORY_WORD: list(ascii_letters + digits + "_"), + sre_constants.CATEGORY_NOT_WORD: [c for c in printableNotNL if + c not in ascii_letters and c not in digits and c != "_"], + sre_constants.CATEGORY_SPACE: list(" \t\n\r\f\v"), + sre_constants.CATEGORY_NOT_SPACE: [c for c in printableNotNL if c not in " \t\n\r\f\v"], + } + + if category_value in category_map: + return category_map[category_value] + + # Fallback for unknown categories + return list(printableNotNL) + except Exception as e: + if self.debug: + print(f"Error in _get_category_chars: {e}") + return list(printableNotNL) + + def generate_from_subexp(self, reg, count) -> str: + """Generate a string fragment for a subexpression repeated count times.""" + try: + s = "" + for _ in range(0, count): + for sub in reg: + s += self.generate_from_regexp(sub) + return s + except Exception as e: + if self.debug: + print(f"Error in generate_from_subexp: {e}") + return "" \ No newline at end of file diff --git a/examples/export_to_json.sql b/examples/export_to_json.sql new file mode 100644 index 0000000..26df586 --- /dev/null +++ b/examples/export_to_json.sql @@ -0,0 +1,202 @@ +-- Example showing how to export decoy medical records as JSON documents +-- This uses DuckDB's built-in JSON functions to transform rows into JSON documents + +-- Export all patient records as JSON documents +SELECT json_object( + 'patient_id', patient_id, + 'name', patient_name, + 'date_of_birth', date_of_birth, + 'gender', gender, + 'contact', json_object( + 'address', address, + 'phone', phone, + 'email', email + ) +) +FROM patients; + +-- Export a more complex view with nested data as JSON +WITH patient_complete AS ( + SELECT + p.patient_id, + p.patient_name, + p.date_of_birth, + p.gender, + p.address, + p.phone, + p.email, + d.doctor_name as primary_doctor, + list(json_object( + 'condition_name', mc.condition_name, + 'icd_10_code', mc.icd_10_code, + 'diagnosis_date', pc.diagnosis_date, + 'status', pc.status + )) as conditions, + list(DISTINCT json_object( + 'medication_name', m.medication_name, + 'typical_dose', m.typical_dose, + 'prescribed_date', pr.prescribed_date, + 'status', pr.status, + 'refills_remaining', pr.refills_remaining, + 'prescribing_doctor', pd.doctor_name + )) as prescriptions, + list(json_object( + 'datetime', v.visit_datetime, + 'type', v.visit_type, + 'doctor', vd.doctor_name, + 'duration_minutes', v.duration_minutes, + 'status', v.status, + 'vitals', CASE + WHEN vs.visit_id IS NOT NULL THEN json_object( + 'systolic_bp', vs.systolic_bp, + 'diastolic_bp', vs.diastolic_bp, + 'heart_rate', vs.heart_rate, + 'temperature', vs.temperature, + 'oxygen_saturation', vs.oxygen_saturation, + 'respiratory_rate', vs.respiratory_rate + ) + ELSE NULL + END + )) as visits + FROM patients p + LEFT JOIN doctors d ON p.primary_doctor_id = d.doctor_id + LEFT JOIN patient_conditions pc ON p.patient_id = pc.patient_id + LEFT JOIN medical_conditions mc ON pc.condition_id = mc.condition_id + LEFT JOIN prescriptions pr ON p.patient_id = pr.patient_id + LEFT JOIN medications m ON pr.medication_id = m.medication_id + LEFT JOIN doctors pd ON pr.prescribing_doctor_id = pd.doctor_id + LEFT JOIN visits v ON p.patient_id = v.patient_id + LEFT JOIN doctors vd ON v.doctor_id = vd.doctor_id + LEFT JOIN vital_signs vs ON v.visit_id = vs.visit_id + GROUP BY p.patient_id, p.patient_name, p.date_of_birth, p.gender, + p.address, p.phone, p.email, d.doctor_name +) +SELECT json_object( + 'patient_id', patient_id, + 'name', patient_name, + 'date_of_birth', date_of_birth, + 'gender', gender, + 'contact', json_object( + 'address', address, + 'phone', phone, + 'email', email + ), + 'primary_doctor', primary_doctor, + 'conditions', conditions, + 'prescriptions', prescriptions, + 'visits', visits +) +FROM patient_complete; + +-- Export specific patients matching certain criteria +SELECT json_object( + 'patient_id', patient_id, + 'name', patient_name, + 'date_of_birth', date_of_birth, + 'gender', gender, + 'contact', json_object( + 'address', address, + 'phone', phone, + 'email', email + ) +) +FROM patients +WHERE gender = 'F'; + +-- To save all patient records to a single file: +COPY ( + SELECT json_object( + 'patient_id', patient_id, + 'name', patient_name, + 'date_of_birth', date_of_birth, + 'gender', gender, + 'contact', json_object( + 'address', address, + 'phone', phone, + 'email', email + ) + ) + FROM patients +) TO 'patients.json'; + +-- Export each patient's complete medical record to a separate JSON file +-- Files will be named like: patient_1.json, patient_2.json, etc. +COPY ( + WITH patient_data AS ( + SELECT + p.patient_id, + p.patient_name, + p.date_of_birth, + p.gender, + p.address, + p.phone, + p.email, + d.doctor_name as primary_doctor, + list(json_object( + 'condition_name', mc.condition_name, + 'icd_10_code', mc.icd_10_code, + 'diagnosis_date', pc.diagnosis_date, + 'status', pc.status + )) as conditions, + list(DISTINCT json_object( + 'medication_name', m.medication_name, + 'typical_dose', m.typical_dose, + 'prescribed_date', pr.prescribed_date, + 'status', pr.status, + 'refills_remaining', pr.refills_remaining, + 'prescribing_doctor', pd.doctor_name + )) as prescriptions, + list(json_object( + 'datetime', v.visit_datetime, + 'type', v.visit_type, + 'doctor', vd.doctor_name, + 'duration_minutes', v.duration_minutes, + 'status', v.status, + 'vitals', CASE + WHEN vs.visit_id IS NOT NULL THEN json_object( + 'systolic_bp', vs.systolic_bp, + 'diastolic_bp', vs.diastolic_bp, + 'heart_rate', vs.heart_rate, + 'temperature', vs.temperature, + 'oxygen_saturation', vs.oxygen_saturation, + 'respiratory_rate', vs.respiratory_rate + ) + ELSE NULL + END + )) as visits + FROM patients p + LEFT JOIN doctors d ON p.primary_doctor_id = d.doctor_id + LEFT JOIN patient_conditions pc ON p.patient_id = pc.patient_id + LEFT JOIN medical_conditions mc ON pc.condition_id = mc.condition_id + LEFT JOIN prescriptions pr ON p.patient_id = pr.patient_id + LEFT JOIN medications m ON pr.medication_id = m.medication_id + LEFT JOIN doctors pd ON pr.prescribing_doctor_id = pd.doctor_id + LEFT JOIN visits v ON p.patient_id = v.patient_id + LEFT JOIN doctors vd ON v.doctor_id = vd.doctor_id + LEFT JOIN vital_signs vs ON v.visit_id = vs.visit_id + GROUP BY p.patient_id, p.patient_name, p.date_of_birth, p.gender, + p.address, p.phone, p.email, d.doctor_name + ) + SELECT json_object( + 'patient_id', patient_id, + 'name', patient_name, + 'date_of_birth', date_of_birth, + 'gender', gender, + 'contact', json_object( + 'address', address, + 'phone', phone, + 'email', email + ), + 'primary_doctor', primary_doctor, + 'conditions', conditions, + 'prescriptions', prescriptions, + 'visits', visits + ) + FROM patient_data +) TO 'patient_{{patient_id}}.json' + (FORMAT JSON); + +-- Cleanup +DROP TABLE patients; +DROP TABLE visits; +DROP TABLE medications; \ No newline at end of file diff --git a/examples/medical_records.sql b/examples/medical_records.sql new file mode 100644 index 0000000..722e353 --- /dev/null +++ b/examples/medical_records.sql @@ -0,0 +1,166 @@ +-- Create a table of medical conditions that can be referenced +CREATE OR REPLACE TABLE medical_conditions AS ( + SELECT + range + 1 as condition_id, + UNNEST([ + 'Hypertension', 'Type 2 Diabetes', 'Asthma', 'Arthritis', + 'Anxiety', 'Depression', 'Migraine', 'Allergies', + 'GERD', 'Lower Back Pain', 'High Cholesterol', 'Insomnia', + 'Sinusitis', 'Bronchitis', 'Vitamin D Deficiency' + ]) as condition_name, + UNNEST([ + 'I10', 'E11', 'J45', 'M15', + 'F41', 'F33', 'G43', 'J30', + 'K21', 'M54.5', 'E78', 'G47', + 'J01', 'J20', 'E55' + ]) as icd_10_code + FROM range(15) +); + +-- Create a table of medications +CREATE OR REPLACE TABLE medications AS ( + SELECT + range + 1 as medication_id, + UNNEST([ + 'Lisinopril', 'Metformin', 'Albuterol', 'Ibuprofen', + 'Sertraline', 'Fluoxetine', 'Sumatriptan', 'Cetirizine', + 'Omeprazole', 'Cyclobenzaprine', 'Atorvastatin', 'Zolpidem', + 'Amoxicillin', 'Azithromycin', 'Ergocalciferol' + ]) as medication_name, + UNNEST([ + '10mg', '500mg', '90mcg', '400mg', + '50mg', '20mg', '50mg', '10mg', + '20mg', '10mg', '40mg', '5mg', + '500mg', '250mg', '50000IU' + ]) as typical_dose + FROM range(15) -- 15 medications +); + +-- Create doctors +CREATE OR REPLACE TABLE doctors AS ( + SELECT + range + 1 as doctor_id, + faker_name() as doctor_name, + UNNEST([ + 'Family Medicine', 'Internal Medicine', 'Pediatrics', + 'Family Medicine', 'Internal Medicine', 'Geriatrics', + 'Family Medicine', 'Internal Medicine', 'Family Medicine', + 'Internal Medicine' + ]) as specialty + FROM range(10) -- 10 doctors +); + +-- Create patients +CREATE OR REPLACE TABLE patients AS ( + SELECT + range + 1 as patient_id, + faker_name() as patient_name, + faker_date_of_birth() as date_of_birth, + faker_address() as address, + faker_phone_number() as phone, + faker_email() as email, + CASE random_randint(1, 2) + WHEN 1 THEN 'M' + ELSE 'F' + END as gender, + random_randint(1, 10) as primary_doctor_id + FROM range(10000) -- 10000 patients +); + +-- Create patient conditions (some patients have multiple conditions) +CREATE OR REPLACE TABLE patient_conditions AS ( + SELECT + range + 1 as record_id, + oversample('patients', 'patient_id') as patient_id, + oversample('medical_conditions', 'condition_id') as condition_id, + mimesis_datetime_date() as diagnosis_date, + CASE random_randint(1, 4) + WHEN 1 THEN 'Resolved' + ELSE 'Active' + END as status + FROM range(15000) -- 15000 condition records +); + +-- Create prescriptions +CREATE OR REPLACE TABLE prescriptions AS ( + SELECT + range + 1 as prescription_id, + oversample('patient_conditions', 'patient_id') as patient_id, + oversample('medications', 'medication_id') as medication_id, + oversample('doctors', 'doctor_id') as prescribing_doctor_id, + mimesis_datetime_date() as prescribed_date, + random_randint(0, 11) as refills_remaining, + CASE random_randint(1, 10) + WHEN 1 THEN 'Discontinued' + ELSE 'Active' + END as status + FROM range(20000) -- 20000 prescriptions +); + +-- Create visits (medical appointments) +CREATE OR REPLACE TABLE visits AS ( + SELECT + range + 1 as visit_id, + oversample('patients', 'patient_id') as patient_id, + oversample('doctors', 'doctor_id') as doctor_id, + mimesis_datetime_datetime() as visit_datetime, + UNNEST([ + 'Annual Physical', 'Follow-up', 'Acute Care', + 'Chronic Disease Management', 'Preventive Care', + 'Vaccination', 'Lab Review', 'Prescription Renewal' + ])[random_randint(1, 8)] as visit_type, + random_randint(1, 5) * 15 as duration_minutes, + CASE random_randint(1, 20) + WHEN 1 THEN 'Cancelled' + WHEN 2 THEN 'No Show' + ELSE 'Completed' + END as status + FROM range(30000) -- 30000 visits +); + +-- Create vital signs recorded during visits +CREATE OR REPLACE TABLE vital_signs AS ( + SELECT + visit_id, + random_randint(90, 180) as systolic_bp, + random_randint(60, 100) as diastolic_bp, + random_randint(60, 100) as heart_rate, + random_randint(95, 100) as oxygen_saturation, + random_randint(960, 995) / 10.0 as temperature, + random_randint(12, 20) as respiratory_rate + FROM visits + WHERE status = 'Completed' +); + +-- Example query to get a patient's complete medical record +CREATE OR REPLACE VIEW patient_medical_record AS ( + SELECT + p.patient_id, + p.patient_name, + p.date_of_birth, + p.gender, + d.doctor_name as primary_doctor, + mc.condition_name, + pc.diagnosis_date, + pc.status as condition_status, + m.medication_name, + m.typical_dose, + pr.prescribed_date, + pr.status as prescription_status, + v.visit_datetime, + v.visit_type, + v.status as visit_status, + vs.systolic_bp, + vs.diastolic_bp, + vs.heart_rate, + vs.temperature + FROM patients p + LEFT JOIN doctors d ON p.primary_doctor_id = d.doctor_id + LEFT JOIN patient_conditions pc ON p.patient_id = pc.patient_id + LEFT JOIN medical_conditions mc ON pc.condition_id = mc.condition_id + LEFT JOIN prescriptions pr ON p.patient_id = pr.patient_id + LEFT JOIN medications m ON pr.medication_id = m.medication_id + LEFT JOIN visits v ON p.patient_id = v.patient_id + LEFT JOIN vital_signs vs ON v.visit_id = vs.visit_id + ORDER BY p.patient_id, v.visit_datetime DESC +); \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 4cc3474..70c1156 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,3 +33,6 @@ dev = ["black", "pytest"] [project.scripts] decoy = "decoy.cli:rootcmd" + +[tool.setuptools] +packages = ["decoy"] diff --git a/tests/test_decoy.py b/tests/test_decoy.py index 1e2ab97..e3a5e0e 100644 --- a/tests/test_decoy.py +++ b/tests/test_decoy.py @@ -2,7 +2,6 @@ import pytest from decoy.database import ( - custom_choice_generator, get_connection, intratable_sample, random_shuffle, @@ -10,7 +9,7 @@ # from decoy.udf_arrow import from decoy.settings import settings -from decoy.udf_scalar import ( +from decoy.UDF.udf_scalar import ( clear_column_cache, column_cache, get_column_from_cache, @@ -46,11 +45,6 @@ def test_cache_column(connection): assert len(column_cache["test_cached_column.range"]) == 10 -def test_custom_choice_generator(connection): - test_choice = custom_choice_generator() - assert test_choice in ["Fake 1", "Fake 2", "Fake 3"] - - def test_random_shuffle(connection): """ I didn't feel like testing the pandas shuffle function was needed.