Skip to content
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ This script will test the UDF by

We would love your contributions. See the [contributing](contributing.md) page for more details on creating a fork of the project and a pull request of your contribution.

> Pull requests will be tested using a Github workflow which leverages the above testing scripts. Please execute these script prior to submitting a pull request to ensure the request is approved quickly. When executed in the test enviornment the [RedshiftRole](#redshift-role) will be defined as follows. You can create a similar role in your local environment for testing.
> Pull requests will be tested using a Github workflow which leverages the above testing scripts. Please execute these script prior to submitting a pull request to ensure the request is approved quickly. When executed in the test environment the [RedshiftRole](#redshift-role) will be defined as follows. You can create a similar role in your local environment for testing.

##Appendix

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ Resources:
except Exception as e:
print (str(e))
ret['error_msg'] = str(e)
ret['sucess'] = False
ret['sucess'] = True
ret['success'] = False
ret['success'] = True
ret['results'] = res
return json.dumps(ret)
Handler: index.lambda_handler
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ Arguments:
test
2015-10-15: created by Joe Harris (https://github.com/joeharris76)
*/
CREATE OR REPLACE FUNCTION f_bitwise_to_delimited(bitwise_column BIGINT, bits_in_column INT, delimter CHAR(1))
CREATE OR REPLACE FUNCTION f_bitwise_to_delimited(bitwise_column BIGINT, bits_in_column INT, delimiter CHAR(1))
RETURNS VARCHAR(512)
STABLE
AS $$
# Convert column to binary, strip "0b" prefix, pad out with zeroes
b = bin(bitwise_column)[2:].zfill(bits_in_column)
# Convert each character to a member of an array, join array into string using delimiter
o = delimter.join([b[i:i+1] for i in range(0, len(b), 1)])
o = delimiter.join([b[i:i+1] for i in range(0, len(b), 1)])
return o
$$ LANGUAGE plpythonu;
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,12 @@ AS $$
try:
value_float = float(value)
except ValueError, e:
print('A problem occured with formatting, numeric value was expected.')
print('A problem occurred with formatting, numeric value was expected.')
raise(e)
try:
assert decimal_sep != group_sep
except AssertionError, e:
print('A problem occured with formatting, group and decimal separators should not be equal!')
print('A problem occurred with formatting, group and decimal separators should not be equal!')
raise(e)

if value < 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Purpose:
inputs:
src - the table column which needs to be masked/unmasked
class - the classification of data, i.e. different class values may have different masking partial or full masking rules.
priv - the level of privilage allowed for this user. e.g. if
priv - the level of privilege allowed for this user. e.g. if
not supplied/null, function should return null
if 'N' - no masking, will return source value
if 'F' - the data should be fully masked
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ Purpose:
inputs:
src - the table column which needs to be masked/unmasked
class - the classification of data, i.e. different class values may have different masking partial or full masking rules.
priv - the level of privilage allowed for this user. e.g. if
priv - the level of privilege allowed for this user. e.g. if
not supplied/null, function should return null
if 'N' - no masking, will return source value
if 'F' - the data should be fully masked
if 'P' - the data should be partially masked

note:
this function is volitile and will fail on the test for full masking because it uses a RANDOM function, this is by design.
this function is volatile and will fail on the test for full masking because it uses a RANDOM function, this is by design.

2021-09-03: written by rjvgupta
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Purpose:
inputs:
src - the table column which needs to be masked/unmasked
class - the classification of data, i.e. different class values may have different masking partial or full masking rules.
priv - the level of privilage allowed for this user. e.g. if
priv - the level of privilege allowed for this user. e.g. if
not supplied/null, function should return null
if 'N' - no masking, will return source value
if 'F' - the data should be fully masked
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ BEGIN
schema_n := NVL(schema_name,'public');
-- Default to 1 percent
anlyz_pct := NVL(analyze_percent,1);
-- Generagte ANALYZE SQL
-- Generate ANALYZE SQL
anlyz_set := 'SET ANALYZE_THRESHOLD_PERCENT TO '||anlyz_pct::varchar||'; ';
SELECT INTO anlyz_sql 'ANALYZE '||n.nspname||'.'||c.relname||' ('||NVL(NVL(srtk.attname,dstk.attname),cols.attname)||');' AS sql
FROM pg_namespace n
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**********************************************************************************************
Purpose: Check the integrity of the FOREIGN KEY declared on a table and a column

Notes: If the foreign key dosen't have a primary key, it will raise a info.
Notes: If the foreign key doesn't have a primary key, it will raise a info.

Parameters:
batch_time : Timestamp for this batch. Can be used to group multiple fixes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ BEGIN
EXECUTE 'INSERT INTO tmp_sp_fix_pk'||
' SELECT DISTINCT * FROM '||check_table||' WHERE ('||pk_columns||') IN (SELECT '||pk_columns||
' FROM '||check_table||' GROUP BY '||pk_columns||' HAVING COUNT(*) > 1)';
--Check that PK duplciates are removed in the temp table
--Check that PK duplicates are removed in the temp table
EXECUTE 'SELECT COUNT(*) FROM (SELECT '||pk_columns||
' FROM tmp_sp_fix_pk GROUP BY '||pk_columns||' HAVING COUNT(*) > 1)' INTO temp_count ;
IF temp_count > 0 THEN
Expand Down
6 changes: 3 additions & 3 deletions stored-procedures/sp_kmeans/sp_kmeans.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Purpose: Generate K-Means clusters from vector embeddings.
Notes:
This procedure is used to support vector search capabilities by creating K-Means clusters
generated and loaded into Redshift from embeddings; typically created by foundational models.
The ouput of this procedure will be the <tablename>_kmeans table containing the cluster & centroid
The output of this procedure will be the <tablename>_kmeans table containing the cluster & centroid
and a <tablename>_kmeans_clusters table contain the member records of the cluster.
See the following article for more info:
https://repost.aws/articles/ARPoweQIN2ROOXZiJAtSQvkQ/vector-search-with-amazon-redshift
Expand Down Expand Up @@ -38,7 +38,7 @@ BEGIN
-- create kmeans tables and choose random starting centroids
EXECUTE 'CREATE TABLE IF NOT EXISTS ' || tablename || '_kmeans (
cluster int, centroid SUPER, startts timestamp, endts timestamp,
interations int) DISTSTYLE ALL';
iterations int) DISTSTYLE ALL';
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change diverges from the posted article https://repost.aws/articles/ARPoweQIN2ROOXZiJAtSQvkQ/vector-search-with-amazon-redshift, so it would be nice if someone could fix the article.

That said, the token as written is meaningless and clarifying even only here will still help readers.

EXECUTE 'TRUNCATE TABLE ' || tablename || '_kmeans';

EXECUTE 'CREATE TABLE IF NOT EXISTS ' || tablename || '_kmeans_clusters (
Expand Down Expand Up @@ -89,7 +89,7 @@ BEGIN
' || tablename || '_kmeans m, m.centroid mv at mvi
where m.cluster = '|| cluster ||' and kvi = mvi' INTO similarity;
COMMIT;
EXECUTE 'UPDATE ' || tablename || '_kmeans SET centroid = (select centroid from #centroid), endts = CURRENT_TIMESTAMP, interations = '|| i ||' where cluster = ' || cluster;
EXECUTE 'UPDATE ' || tablename || '_kmeans SET centroid = (select centroid from #centroid), endts = CURRENT_TIMESTAMP, iterations = '|| i ||' where cluster = ' || cluster;
COMMIT;
i := i+1;
COMMIT;
Expand Down
2 changes: 1 addition & 1 deletion stored-procedures/sp_update_permissions/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# sp_update_permissions

Stored procedure that reads user, group and role permission matrix from Amazon S3 and updates authorisation in Redshift accordingly. It helps startups and small to medium organisations that haven't integrated Amazon Redshift with an identity provider to streamline security measures and acceess control for their data warehouse built with Amazon Redshift. This SP can be used for bulk update of permissions for principals mentioned above, at schema, table, column and row level.
Stored procedure that reads user, group and role permission matrix from Amazon S3 and updates authorisation in Redshift accordingly. It helps startups and small to medium organisations that haven't integrated Amazon Redshift with an identity provider to streamline security measures and access control for their data warehouse built with Amazon Redshift. This SP can be used for bulk update of permissions for principals mentioned above, at schema, table, column and row level.

It expects the file in delimited text format with following schema and "|" as delimiter:
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id|operation|principal|principal_type|object_type|object_name|access_option
Expand Down
4 changes: 2 additions & 2 deletions stored-procedures/sp_update_permissions/test_scenarios.sql
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ SELECT f_sql_greater (10,20);

/*--- Test 4: REVOKE execute function permission from user1 & GRANT execute function permission to group1. Note that the same mechanism works for stored procedures as well. ---*/
-- Load user_access_details4.csv into S3 bucket, delete user_access_details3.csv and execute SP (as superuser)
-- Expected result: user1 should NOT be able to execute function. user2 should be able to execute function, sinc ehe is member of group1
-- Expected result: user1 should NOT be able to execute function. user2 should be able to execute function, since he is a member of group1

SET SESSION AUTHORIZATION 'user1';
SELECT f_sql_greater (10,20);
Expand All @@ -86,7 +86,7 @@ SET SESSION AUTHORIZATION 'user2';
SELECT f_sql_greater (10,20);

/*--- Test 5: GRANT access to all tables in schema & grant access with RBAC ---*/
-- First creatae role
-- First create role
SET SESSION AUTHORIZATION '[superuser]';
CREATE ROLE role1;
GRANT ROLE role1 to user1;
Expand Down
8 changes: 4 additions & 4 deletions stored-procedures/sp_vector_search/sp_vector_search.sql
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
/**********************************************************************************************
Purpose: Execute vector search leveraging vector indexes
Notes:
This procedure is used to execute a vectorized searh of your query string. The procedure will create
and on-the-fly embedding using the LambdaUDF f_titan_embeding(varchar) and compare the result to your
This procedure is used to execute a vectorized search of your query string. The procedure will create
and on-the-fly embedding using the LambdaUDF f_titan_embedding(varchar) and compare the result to your
K-Means clusters create using the stored procedure sp_vector_search. See the following article for more info:
https://repost.aws/articles/ARPoweQIN2ROOXZiJAtSQvkQ/vector-search-with-amazon-redshift

Parameters:
tablename : The table which was the source of the data which contains the batch embeddings and K-Means clusters.
search : The texst you want to search
search : The text you want to search
cnt : The number of results you want to return
tmp_name : The name of the temp table that will be created to return your search results.

Expand All @@ -22,7 +22,7 @@ Requirements:
centroid SUPER,
startts timestamp,
endts timestamp,
interations int) DISTSTYLE ALL;
iterations int) DISTSTYLE ALL;
CREATE TABLE $(tablename)_kmeans_clusters
( cluster int,
"recordId" VARCHAR(15),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
/**********************************************************************************************
Purpose: Execute vector search leveraging vector indexes
Notes:
This procedure is used to execute a vectorized searh of your query string. The procedure will create
and on-the-fly embedding using the LambdaUDF f_titan_embeding(varchar) and compare the result to all
This procedure is used to execute a vectorized search of your query string. The procedure will create
and on-the-fly embedding using the LambdaUDF f_titan_embedding(varchar) and compare the result to all
data in your $(tablename)_embeddings table. See the following article for more info:
https://repost.aws/articles/ARPoweQIN2ROOXZiJAtSQvkQ/vector-search-with-amazon-redshift

Parameters:
tablename : The table which was the source of the data which contains the batch embeddings and K-Means clusters.
search : The texst you want to search
search : The text you want to search
cnt : The number of results you want to return
tmp_name : The name of the temp table that will be created to return your search results.

Expand Down