diff --git a/README.md b/README.md index 9652a8d..460aa77 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ This script will test the UDF by We would love your contributions. See the [contributing](contributing.md) page for more details on creating a fork of the project and a pull request of your contribution. -> Pull requests will be tested using a Github workflow which leverages the above testing scripts. Please execute these script prior to submitting a pull request to ensure the request is approved quickly. When executed in the test enviornment the [RedshiftRole](#redshift-role) will be defined as follows. You can create a similar role in your local environment for testing. +> Pull requests will be tested using a Github workflow which leverages the above testing scripts. Please execute these script prior to submitting a pull request to ensure the request is approved quickly. When executed in the test environment the [RedshiftRole](#redshift-role) will be defined as follows. You can create a similar role in your local environment for testing. ##Appendix diff --git a/lambda-udfs/fn_lambda_levenshtein_distance(varchar,varchar)/lambda.yaml b/lambda-udfs/fn_lambda_levenshtein_distance(varchar,varchar)/lambda.yaml index 5831093..0c03040 100644 --- a/lambda-udfs/fn_lambda_levenshtein_distance(varchar,varchar)/lambda.yaml +++ b/lambda-udfs/fn_lambda_levenshtein_distance(varchar,varchar)/lambda.yaml @@ -85,8 +85,8 @@ Resources: except Exception as e: print (str(e)) ret['error_msg'] = str(e) - ret['sucess'] = False - ret['sucess'] = True + ret['success'] = False + ret['success'] = True ret['results'] = res return json.dumps(ret) Handler: index.lambda_handler diff --git a/python-udfs/f_bitwise_to_delimited(bigint,int,char)/function.sql b/python-udfs/f_bitwise_to_delimited(bigint,int,char)/function.sql index 6bc20f9..2737c4c 100644 --- a/python-udfs/f_bitwise_to_delimited(bigint,int,char)/function.sql +++ b/python-udfs/f_bitwise_to_delimited(bigint,int,char)/function.sql @@ -11,13 +11,13 @@ Arguments: test 2015-10-15: created by Joe Harris (https://github.com/joeharris76) */ -CREATE OR REPLACE FUNCTION f_bitwise_to_delimited(bitwise_column BIGINT, bits_in_column INT, delimter CHAR(1)) +CREATE OR REPLACE FUNCTION f_bitwise_to_delimited(bitwise_column BIGINT, bits_in_column INT, delimiter CHAR(1)) RETURNS VARCHAR(512) STABLE AS $$ # Convert column to binary, strip "0b" prefix, pad out with zeroes b = bin(bitwise_column)[2:].zfill(bits_in_column) # Convert each character to a member of an array, join array into string using delimiter - o = delimter.join([b[i:i+1] for i in range(0, len(b), 1)]) + o = delimiter.join([b[i:i+1] for i in range(0, len(b), 1)]) return o $$ LANGUAGE plpythonu; diff --git a/python-udfs/f_format_number(float,varchar,varchar,int,int,bool)/function.sql b/python-udfs/f_format_number(float,varchar,varchar,int,int,bool)/function.sql index c7182e5..947b577 100644 --- a/python-udfs/f_format_number(float,varchar,varchar,int,int,bool)/function.sql +++ b/python-udfs/f_format_number(float,varchar,varchar,int,int,bool)/function.sql @@ -48,12 +48,12 @@ AS $$ try: value_float = float(value) except ValueError, e: - print('A problem occured with formatting, numeric value was expected.') + print('A problem occurred with formatting, numeric value was expected.') raise(e) try: assert decimal_sep != group_sep except AssertionError, e: - print('A problem occured with formatting, group and decimal separators should not be equal!') + print('A problem occurred with formatting, group and decimal separators should not be equal!') raise(e) if value < 0: diff --git a/sql-udfs/f_mask_bigint(bigint,varchar,varchar)/function.sql b/sql-udfs/f_mask_bigint(bigint,varchar,varchar)/function.sql index a46ca57..011d19b 100644 --- a/sql-udfs/f_mask_bigint(bigint,varchar,varchar)/function.sql +++ b/sql-udfs/f_mask_bigint(bigint,varchar,varchar)/function.sql @@ -8,7 +8,7 @@ Purpose: inputs: src - the table column which needs to be masked/unmasked class - the classification of data, i.e. different class values may have different masking partial or full masking rules. - priv - the level of privilage allowed for this user. e.g. if + priv - the level of privilege allowed for this user. e.g. if not supplied/null, function should return null if 'N' - no masking, will return source value if 'F' - the data should be fully masked diff --git a/sql-udfs/f_mask_timestamp(timestamp,varchar,varchar)/function.sql b/sql-udfs/f_mask_timestamp(timestamp,varchar,varchar)/function.sql index ffe4861..460d147 100644 --- a/sql-udfs/f_mask_timestamp(timestamp,varchar,varchar)/function.sql +++ b/sql-udfs/f_mask_timestamp(timestamp,varchar,varchar)/function.sql @@ -8,14 +8,14 @@ Purpose: inputs: src - the table column which needs to be masked/unmasked class - the classification of data, i.e. different class values may have different masking partial or full masking rules. - priv - the level of privilage allowed for this user. e.g. if + priv - the level of privilege allowed for this user. e.g. if not supplied/null, function should return null if 'N' - no masking, will return source value if 'F' - the data should be fully masked if 'P' - the data should be partially masked note: - this function is volitile and will fail on the test for full masking because it uses a RANDOM function, this is by design. + this function is volatile and will fail on the test for full masking because it uses a RANDOM function, this is by design. 2021-09-03: written by rjvgupta */ diff --git a/sql-udfs/f_mask_varchar(varchar,varchar,varchar)/function.sql b/sql-udfs/f_mask_varchar(varchar,varchar,varchar)/function.sql index 3046610..0b87843 100644 --- a/sql-udfs/f_mask_varchar(varchar,varchar,varchar)/function.sql +++ b/sql-udfs/f_mask_varchar(varchar,varchar,varchar)/function.sql @@ -8,7 +8,7 @@ Purpose: inputs: src - the table column which needs to be masked/unmasked class - the classification of data, i.e. different class values may have different masking partial or full masking rules. - priv - the level of privilage allowed for this user. e.g. if + priv - the level of privilege allowed for this user. e.g. if not supplied/null, function should return null if 'N' - no masking, will return source value if 'F' - the data should be fully masked diff --git a/stored-procedures/sp_analyze_minimal/sp_analyze_minimal.sql b/stored-procedures/sp_analyze_minimal/sp_analyze_minimal.sql index 36a0920..40d06bc 100644 --- a/stored-procedures/sp_analyze_minimal/sp_analyze_minimal.sql +++ b/stored-procedures/sp_analyze_minimal/sp_analyze_minimal.sql @@ -36,7 +36,7 @@ BEGIN schema_n := NVL(schema_name,'public'); -- Default to 1 percent anlyz_pct := NVL(analyze_percent,1); - -- Generagte ANALYZE SQL + -- Generate ANALYZE SQL anlyz_set := 'SET ANALYZE_THRESHOLD_PERCENT TO '||anlyz_pct::varchar||'; '; SELECT INTO anlyz_sql 'ANALYZE '||n.nspname||'.'||c.relname||' ('||NVL(NVL(srtk.attname,dstk.attname),cols.attname)||');' AS sql FROM pg_namespace n diff --git a/stored-procedures/sp_check_foreign_key/sp_check_foreign_key.sql b/stored-procedures/sp_check_foreign_key/sp_check_foreign_key.sql index 7cd2a57..4e3f0ec 100644 --- a/stored-procedures/sp_check_foreign_key/sp_check_foreign_key.sql +++ b/stored-procedures/sp_check_foreign_key/sp_check_foreign_key.sql @@ -1,7 +1,7 @@ /********************************************************************************************** Purpose: Check the integrity of the FOREIGN KEY declared on a table and a column -Notes: If the foreign key dosen't have a primary key, it will raise a info. +Notes: If the foreign key doesn't have a primary key, it will raise a info. Parameters: batch_time : Timestamp for this batch. Can be used to group multiple fixes diff --git a/stored-procedures/sp_check_primary_key/sp_check_primary_key.sql b/stored-procedures/sp_check_primary_key/sp_check_primary_key.sql index bb847f6..be7deee 100644 --- a/stored-procedures/sp_check_primary_key/sp_check_primary_key.sql +++ b/stored-procedures/sp_check_primary_key/sp_check_primary_key.sql @@ -86,7 +86,7 @@ BEGIN EXECUTE 'INSERT INTO tmp_sp_fix_pk'|| ' SELECT DISTINCT * FROM '||check_table||' WHERE ('||pk_columns||') IN (SELECT '||pk_columns|| ' FROM '||check_table||' GROUP BY '||pk_columns||' HAVING COUNT(*) > 1)'; - --Check that PK duplciates are removed in the temp table + --Check that PK duplicates are removed in the temp table EXECUTE 'SELECT COUNT(*) FROM (SELECT '||pk_columns|| ' FROM tmp_sp_fix_pk GROUP BY '||pk_columns||' HAVING COUNT(*) > 1)' INTO temp_count ; IF temp_count > 0 THEN diff --git a/stored-procedures/sp_kmeans/sp_kmeans.sql b/stored-procedures/sp_kmeans/sp_kmeans.sql index 9cdd537..83e0be8 100644 --- a/stored-procedures/sp_kmeans/sp_kmeans.sql +++ b/stored-procedures/sp_kmeans/sp_kmeans.sql @@ -3,7 +3,7 @@ Purpose: Generate K-Means clusters from vector embeddings. Notes: This procedure is used to support vector search capabilities by creating K-Means clusters generated and loaded into Redshift from embeddings; typically created by foundational models. - The ouput of this procedure will be the _kmeans table containing the cluster & centroid + The output of this procedure will be the _kmeans table containing the cluster & centroid and a _kmeans_clusters table contain the member records of the cluster. See the following article for more info: https://repost.aws/articles/ARPoweQIN2ROOXZiJAtSQvkQ/vector-search-with-amazon-redshift @@ -38,7 +38,7 @@ BEGIN -- create kmeans tables and choose random starting centroids EXECUTE 'CREATE TABLE IF NOT EXISTS ' || tablename || '_kmeans ( cluster int, centroid SUPER, startts timestamp, endts timestamp, - interations int) DISTSTYLE ALL'; + iterations int) DISTSTYLE ALL'; EXECUTE 'TRUNCATE TABLE ' || tablename || '_kmeans'; EXECUTE 'CREATE TABLE IF NOT EXISTS ' || tablename || '_kmeans_clusters ( @@ -89,7 +89,7 @@ BEGIN ' || tablename || '_kmeans m, m.centroid mv at mvi where m.cluster = '|| cluster ||' and kvi = mvi' INTO similarity; COMMIT; - EXECUTE 'UPDATE ' || tablename || '_kmeans SET centroid = (select centroid from #centroid), endts = CURRENT_TIMESTAMP, interations = '|| i ||' where cluster = ' || cluster; + EXECUTE 'UPDATE ' || tablename || '_kmeans SET centroid = (select centroid from #centroid), endts = CURRENT_TIMESTAMP, iterations = '|| i ||' where cluster = ' || cluster; COMMIT; i := i+1; COMMIT; diff --git a/stored-procedures/sp_update_permissions/README.md b/stored-procedures/sp_update_permissions/README.md index 4ac5f6d..d06985b 100644 --- a/stored-procedures/sp_update_permissions/README.md +++ b/stored-procedures/sp_update_permissions/README.md @@ -1,6 +1,6 @@ # sp_update_permissions -Stored procedure that reads user, group and role permission matrix from Amazon S3 and updates authorisation in Redshift accordingly. It helps startups and small to medium organisations that haven't integrated Amazon Redshift with an identity provider to streamline security measures and acceess control for their data warehouse built with Amazon Redshift. This SP can be used for bulk update of permissions for principals mentioned above, at schema, table, column and row level. +Stored procedure that reads user, group and role permission matrix from Amazon S3 and updates authorisation in Redshift accordingly. It helps startups and small to medium organisations that haven't integrated Amazon Redshift with an identity provider to streamline security measures and access control for their data warehouse built with Amazon Redshift. This SP can be used for bulk update of permissions for principals mentioned above, at schema, table, column and row level. It expects the file in delimited text format with following schema and "|" as delimiter:      id|operation|principal|principal_type|object_type|object_name|access_option diff --git a/stored-procedures/sp_update_permissions/test_scenarios.sql b/stored-procedures/sp_update_permissions/test_scenarios.sql index f57a4c5..35959d0 100644 --- a/stored-procedures/sp_update_permissions/test_scenarios.sql +++ b/stored-procedures/sp_update_permissions/test_scenarios.sql @@ -77,7 +77,7 @@ SELECT f_sql_greater (10,20); /*--- Test 4: REVOKE execute function permission from user1 & GRANT execute function permission to group1. Note that the same mechanism works for stored procedures as well. ---*/ -- Load user_access_details4.csv into S3 bucket, delete user_access_details3.csv and execute SP (as superuser) --- Expected result: user1 should NOT be able to execute function. user2 should be able to execute function, sinc ehe is member of group1 +-- Expected result: user1 should NOT be able to execute function. user2 should be able to execute function, since he is a member of group1 SET SESSION AUTHORIZATION 'user1'; SELECT f_sql_greater (10,20); @@ -86,7 +86,7 @@ SET SESSION AUTHORIZATION 'user2'; SELECT f_sql_greater (10,20); /*--- Test 5: GRANT access to all tables in schema & grant access with RBAC ---*/ --- First creatae role +-- First create role SET SESSION AUTHORIZATION '[superuser]'; CREATE ROLE role1; GRANT ROLE role1 to user1; diff --git a/stored-procedures/sp_vector_search/sp_vector_search.sql b/stored-procedures/sp_vector_search/sp_vector_search.sql index 15b9d48..eb26418 100644 --- a/stored-procedures/sp_vector_search/sp_vector_search.sql +++ b/stored-procedures/sp_vector_search/sp_vector_search.sql @@ -1,14 +1,14 @@ /********************************************************************************************** Purpose: Execute vector search leveraging vector indexes Notes: - This procedure is used to execute a vectorized searh of your query string. The procedure will create - and on-the-fly embedding using the LambdaUDF f_titan_embeding(varchar) and compare the result to your + This procedure is used to execute a vectorized search of your query string. The procedure will create + and on-the-fly embedding using the LambdaUDF f_titan_embedding(varchar) and compare the result to your K-Means clusters create using the stored procedure sp_vector_search. See the following article for more info: https://repost.aws/articles/ARPoweQIN2ROOXZiJAtSQvkQ/vector-search-with-amazon-redshift Parameters: tablename : The table which was the source of the data which contains the batch embeddings and K-Means clusters. - search : The texst you want to search + search : The text you want to search cnt : The number of results you want to return tmp_name : The name of the temp table that will be created to return your search results. @@ -22,7 +22,7 @@ Requirements: centroid SUPER, startts timestamp, endts timestamp, - interations int) DISTSTYLE ALL; + iterations int) DISTSTYLE ALL; CREATE TABLE $(tablename)_kmeans_clusters ( cluster int, "recordId" VARCHAR(15), diff --git a/stored-procedures/sp_vector_search_all/sp_vector_search_all.sql b/stored-procedures/sp_vector_search_all/sp_vector_search_all.sql index b5909cf..8716e95 100644 --- a/stored-procedures/sp_vector_search_all/sp_vector_search_all.sql +++ b/stored-procedures/sp_vector_search_all/sp_vector_search_all.sql @@ -1,14 +1,14 @@ /********************************************************************************************** Purpose: Execute vector search leveraging vector indexes Notes: - This procedure is used to execute a vectorized searh of your query string. The procedure will create - and on-the-fly embedding using the LambdaUDF f_titan_embeding(varchar) and compare the result to all + This procedure is used to execute a vectorized search of your query string. The procedure will create + and on-the-fly embedding using the LambdaUDF f_titan_embedding(varchar) and compare the result to all data in your $(tablename)_embeddings table. See the following article for more info: https://repost.aws/articles/ARPoweQIN2ROOXZiJAtSQvkQ/vector-search-with-amazon-redshift Parameters: tablename : The table which was the source of the data which contains the batch embeddings and K-Means clusters. - search : The texst you want to search + search : The text you want to search cnt : The number of results you want to return tmp_name : The name of the temp table that will be created to return your search results.