Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

if sys.version_info < (3, 8):
raise RuntimeError("skyflow requires Python 3.8+")
current_version = '2.0.0b6'
current_version = '2.0.0b6.dev0+c3095a9'

setup(
name='skyflow',
Expand Down
3 changes: 2 additions & 1 deletion skyflow/utils/_skyflow_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ class Error(Enum):
INVALID_PLAIN_TEXT_ENTITIES_IN_REIDENTIFY= f"{error_prefix} Validation error. The plainTextEntities field must be an array of DetectEntities enums. Specify a valid plainTextEntities."

INVALID_DEIDENTIFY_FILE_REQUEST= f"{error_prefix} Validation error. Invalid deidentify file request. Specify a valid deidentify file request."
INVALID_DEIDENTIFY_FILE_INPUT= f"{error_prefix} Validation error. Invalid deidentify file input. Please provide either a file or a file path."
EMPTY_FILE_OBJECT= f"{error_prefix} Validation error. File object cannot be empty. Specify a valid file object."
INVALID_FILE_FORMAT= f"{error_prefix} Validation error. Invalid file format. Specify a valid file format."
MISSING_FILE_SOURCE= f"{error_prefix} Validation error. Provide exactly one of filePath, base64, or fileObject."
Expand Down Expand Up @@ -197,7 +198,7 @@ class Error(Enum):
INVALID_FILE_OR_ENCODED_FILE= f"{error_prefix} . Error while decoding base64 and saving file"
INVALID_FILE_TYPE = f"{error_prefix} Validation error. Invalid file type. Specify a valid file type."
INVALID_FILE_NAME= f"{error_prefix} Validation error. Invalid file name. Specify a valid file name."
FILE_READ_ERROR= f"{error_prefix} Validation error. Unable to read file. Verify the file path."
INVALID_DEIDENTIFY_FILE_PATH= f"{error_prefix} Validation error. Invalid file path. Specify a valid file path."
INVALID_BASE64_HEADER= f"{error_prefix} Validation error. Invalid base64 header. Specify a valid base64 header."
INVALID_WAIT_TIME= f"{error_prefix} Validation error. Invalid wait time. Specify a valid wait time as number and should not be greater than 64 secs."
INVALID_OUTPUT_DIRECTORY= f"{error_prefix} Validation error. Invalid output directory. Specify a valid output directory as string."
Expand Down
18 changes: 7 additions & 11 deletions skyflow/utils/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,6 @@ def get_metrics():
}
return details_dic


def parse_insert_response(api_response, continue_on_error):
# Retrieve the headers and data from the API response
api_response_headers = api_response.headers
Expand Down Expand Up @@ -239,13 +238,13 @@ def parse_insert_response(api_response, continue_on_error):
error = {
'request_index': idx,
'request_id': request_id,
'error': response['Body']['error']
'error': response['Body']['error'],
'http_code': response['Status'],
}
errors.append(error)

insert_response.inserted_fields = inserted_fields
insert_response.errors = errors

insert_response.errors = errors if len(errors) > 0 else None
else:
for record in api_response_data.records:
field_data = {
Expand All @@ -257,6 +256,7 @@ def parse_insert_response(api_response, continue_on_error):

inserted_fields.append(field_data)
insert_response.inserted_fields = inserted_fields
insert_response.errors = None

return insert_response

Expand All @@ -275,21 +275,17 @@ def parse_delete_response(api_response: V1BulkDeleteRecordResponse):
delete_response = DeleteResponse()
deleted_ids = api_response.record_id_response
delete_response.deleted_ids = deleted_ids
delete_response.errors = []
delete_response.errors = None
return delete_response


def parse_get_response(api_response: V1BulkGetRecordResponse):
get_response = GetResponse()
data = []
errors = []
for record in api_response.records:
field_data = {field: value for field, value in record.fields.items()}
data.append(field_data)

get_response.data = data
get_response.errors = errors

return get_response

def parse_detokenize_response(api_response: HttpResponse[V1DetokenizeResponse]):
Expand Down Expand Up @@ -320,7 +316,7 @@ def parse_detokenize_response(api_response: HttpResponse[V1DetokenizeResponse]):
errors = errors
detokenize_response = DetokenizeResponse()
detokenize_response.detokenized_fields = detokenized_fields
detokenize_response.errors = errors
detokenize_response.errors = errors if len(errors) > 0 else None

return detokenize_response

Expand Down Expand Up @@ -357,7 +353,7 @@ def parse_invoke_connection_response(api_response: requests.Response):
if 'x-request-id' in api_response.headers:
metadata['request_id'] = api_response.headers['x-request-id']

return InvokeConnectionResponse(data=data, metadata=metadata)
return InvokeConnectionResponse(data=data, metadata=metadata, errors=None)
except Exception as e:
raise SkyflowError(SkyflowMessages.Error.RESPONSE_NOT_JSON.value.format(content), status_code)
except HTTPError:
Expand Down
2 changes: 1 addition & 1 deletion skyflow/utils/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
SDK_VERSION = '2.0.0b6'
SDK_VERSION = '2.0.0b6.dev0+c3095a9'
34 changes: 34 additions & 0 deletions skyflow/utils/validations/_validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from skyflow.utils.logger import log_info, log_error_log
from skyflow.vault.detect import DeidentifyTextRequest, ReidentifyTextRequest, TokenFormat, Transformations, \
GetDetectRunRequest, Bleep, DeidentifyFileRequest
from skyflow.vault.detect._file_input import FileInput

valid_vault_config_keys = ["vault_id", "cluster_id", "credentials", "env"]
valid_connection_config_keys = ["connection_id", "connection_url", "credentials"]
Expand Down Expand Up @@ -257,9 +258,42 @@ def validate_update_connection_config(logger, config):

return True

def validate_file_from_request(file_input: FileInput):
if file_input is None:
raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_INPUT.value, invalid_input_error_code)

has_file = hasattr(file_input, 'file') and file_input.file is not None
has_file_path = hasattr(file_input, 'file_path') and file_input.file_path is not None

# Must provide exactly one of file or file_path
if (has_file and has_file_path) or (not has_file and not has_file_path):
raise SkyflowError(SkyflowMessages.Error.INVALID_DEIDENTIFY_FILE_INPUT.value, invalid_input_error_code)

if has_file:
file = file_input.file
# Validate file object has required attributes
if not hasattr(file, 'name') or not isinstance(file.name, str) or not file.name.strip():
raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_TYPE.value, invalid_input_error_code)

# Validate file name
file_name = os.path.splitext(file.name)[0]
if not file_name or not file_name.strip():
raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_NAME.value, invalid_input_error_code)

elif has_file_path:
file_path = file_input.file_path
if not isinstance(file_path, str) or not file_path.strip():
raise SkyflowError(SkyflowMessages.Error.INVALID_DEIDENTIFY_FILE_PATH.value, invalid_input_error_code)

if not os.path.exists(file_path) or not os.path.isfile(file_path):
raise SkyflowError(SkyflowMessages.Error.INVALID_DEIDENTIFY_FILE_PATH.value, invalid_input_error_code)

def validate_deidentify_file_request(logger, request: DeidentifyFileRequest):
if not hasattr(request, 'file') or request.file is None:
raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_INPUT.value, invalid_input_error_code)

# Validate file input first
validate_file_from_request(request.file)

# Optional: entities
if hasattr(request, 'entities') and request.entities is not None:
Expand Down
5 changes: 3 additions & 2 deletions skyflow/vault/connection/_invoke_connection_response.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
class InvokeConnectionResponse:
def __init__(self, data=None, metadata=None):
def __init__(self, data=None, metadata=None, errors=None):
self.data = data
self.metadata = metadata if metadata else {}
self.errors = errors if errors else None

def __repr__(self):
return f"ConnectionResponse('data'={self.data},'metadata'={self.metadata})"
return f"ConnectionResponse('data'={self.data},'metadata'={self.metadata}), 'errors'={self.errors})"

def __str__(self):
return self.__repr__()
36 changes: 30 additions & 6 deletions skyflow/vault/controller/_detect.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import io
import json
import os
from skyflow.error import SkyflowError
Expand All @@ -20,6 +21,7 @@
from skyflow.vault.detect import DeidentifyTextRequest, DeidentifyTextResponse, ReidentifyTextRequest, \
ReidentifyTextResponse, DeidentifyFileRequest, DeidentifyFileResponse, GetDetectRunRequest


class Detect:
def __init__(self, vault_client):
self.__vault_client = vault_client
Expand Down Expand Up @@ -124,10 +126,22 @@ def output_to_dict_list(output):
word_count = getattr(word_character_count, "word_count", None)
char_count = getattr(word_character_count, "character_count", None)

base64_string = first_output.get("file", None)
extension = first_output.get("extension", None)

file_obj = None
if base64_string is not None:
file_bytes = base64.b64decode(base64_string)
file_obj = io.BytesIO(file_bytes)
file_obj.name = f"deidentified.{extension}" if extension else "processed_file"
else:
file_obj = None

return DeidentifyFileResponse(
file=first_output.get("file", None),
file_base64=base64_string,
file=file_obj, # File class will be instantiated in DeidentifyFileResponse
type=first_output.get("type", None),
extension=first_output.get("extension", None),
extension=extension,
word_count=word_count,
char_count=char_count,
size_in_kb=size,
Expand All @@ -137,7 +151,7 @@ def output_to_dict_list(output):
entities=entities,
run_id=run_id_val,
status=status_val,
errors=[]
errors=None
)

def __get_token_format(self, request):
Expand Down Expand Up @@ -216,16 +230,26 @@ def reidentify_text(self, request: ReidentifyTextRequest) -> ReidentifyTextRespo
log_error_log(SkyflowMessages.ErrorLogs.REIDENTIFY_TEXT_REQUEST_REJECTED.value, self.__vault_client.get_logger())
handle_exception(e, self.__vault_client.get_logger())

def __get_file_from_request(self, request: DeidentifyFileRequest):
file_input = request.file

# Check for file
if hasattr(file_input, 'file') and file_input.file is not None:
return file_input.file

# Check for file_path if file is not provided
if hasattr(file_input, 'file_path') and file_input.file_path is not None:
return open(file_input.file_path, 'rb')

def deidentify_file(self, request: DeidentifyFileRequest):
log_info(SkyflowMessages.Info.DETECT_FILE_TRIGGERED.value, self.__vault_client.get_logger())
validate_deidentify_file_request(self.__vault_client.get_logger(), request)
self.__initialize()
files_api = self.__vault_client.get_detect_file_api().with_raw_response
file_obj = request.file
file_obj = self.__get_file_from_request(request)
file_name = getattr(file_obj, 'name', None)
file_extension = self._get_file_extension(file_name) if file_name else None
file_content = file_obj.read()

base64_string = base64.b64encode(file_content).decode('utf-8')

try:
Expand Down Expand Up @@ -375,7 +399,7 @@ def deidentify_file(self, request: DeidentifyFileRequest):
file_name_only = 'processed-'+os.path.basename(file_name)
output_file_path = f"{request.output_directory}/{file_name_only}"
with open(output_file_path, 'wb') as output_file:
output_file.write(base64.b64decode(parsed_response.file))
output_file.write(base64.b64decode(parsed_response.file_base64))
log_info(SkyflowMessages.Info.DETECT_FILE_SUCCESS.value, self.__vault_client.get_logger())
return parsed_response

Expand Down
2 changes: 0 additions & 2 deletions skyflow/vault/data/_insert_response.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
class InsertResponse:
def __init__(self, inserted_fields = None, errors=None):
if errors is None:
errors = list()
self.inserted_fields = inserted_fields
self.errors = errors

Expand Down
2 changes: 1 addition & 1 deletion skyflow/vault/data/_query_response.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
class QueryResponse:
def __init__(self):
self.fields = []
self.errors = []
self.errors = None

def __repr__(self):
return f"QueryResponse(fields={self.fields}, errors={self.errors})"
Expand Down
2 changes: 1 addition & 1 deletion skyflow/vault/data/_update_response.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
class UpdateResponse:
def __init__(self, updated_field = None, errors=None):
self.updated_field = updated_field
self.errors = errors if errors is not None else []
self.errors = errors

def __repr__(self):
return f"UpdateResponse(updated_field={self.updated_field}, errors={self.errors})"
Expand Down
3 changes: 2 additions & 1 deletion skyflow/vault/detect/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@
from ._deidentify_file_request import DeidentifyFileRequest
from ._audio_bleep import Bleep
from ._deidentify_file_response import DeidentifyFileResponse
from ._get_detect_run_request import GetDetectRunRequest
from ._get_detect_run_request import GetDetectRunRequest
from ._file_input import FileInput
3 changes: 2 additions & 1 deletion skyflow/vault/detect/_deidentify_file_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from skyflow.vault.detect import TokenFormat, Transformations
from skyflow.vault.detect._audio_bleep import Bleep
from skyflow.utils.enums import MaskingMethod, DetectOutputTranscriptions
from skyflow.vault.detect._file_input import FileInput

class DeidentifyFileRequest:
def __init__(
Expand All @@ -24,7 +25,7 @@ def __init__(
output_directory: Optional[str] = None,
wait_time: Optional[Union[int, float]] = None
):
self.file: object = file
self.file: FileInput = file
self.entities: Optional[List[DetectEntities]] = entities
self.allow_regex_list: Optional[List[str]] = allow_regex_list
self.restrict_regex_list: Optional[List[str]] = restrict_regex_list
Expand Down
23 changes: 14 additions & 9 deletions skyflow/vault/detect/_deidentify_file_response.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import io
from skyflow.vault.detect._file import File

class DeidentifyFileResponse:
def __init__(
self,
file: str = None,
file_base64: str = None,
file: io.BytesIO = None,
type: str = None,
extension: str = None,
word_count: int = None,
Expand All @@ -13,9 +17,10 @@ def __init__(
entities: list = None, # list of dicts with keys 'file' and 'extension'
run_id: str = None,
status: str = None,
errors: list = [],
errors: list = None,
):
self.file = file
self.file_base64 = file_base64
self.file = File(file) if file else None
self.type = type
self.extension = extension
self.word_count = word_count
Expand All @@ -32,12 +37,12 @@ def __init__(
def __repr__(self):
return (
f"DeidentifyFileResponse("
f"file={self.file!r}, type={self.type!r}, extension={self.extension!r}, "
f"word_count={self.word_count!r}, char_count={self.char_count!r}, "
f"size_in_kb={self.size_in_kb!r}, duration_in_seconds={self.duration_in_seconds!r}, "
f"page_count={self.page_count!r}, slide_count={self.slide_count!r}, "
f"entities={self.entities!r}, run_id={self.run_id!r}, status={self.status!r}),"
f"errors={self.errors!r})"
f"file_base64={self.file_base64!r}, file={self.file!r}, type={self.type!r}, "
f"extension={self.extension!r}, word_count={self.word_count!r}, "
f"char_count={self.char_count!r}, size_in_kb={self.size_in_kb!r}, "
f"duration_in_seconds={self.duration_in_seconds!r}, page_count={self.page_count!r}, "
f"slide_count={self.slide_count!r}, entities={self.entities!r}, "
f"run_id={self.run_id!r}, status={self.status!r}, errors={self.errors!r})"
)

def __str__(self):
Expand Down
Loading
Loading