From dd329e31b56a720317d3d7751a0c335d0e2647ab Mon Sep 17 00:00:00 2001 From: Laurent Leturgez Date: Fri, 24 Oct 2025 17:54:31 +0200 Subject: [PATCH 01/12] Oracle add-on --- .../labs/lakebridge/assessments/_constants.py | 4 +- .../assessments/configure_assessment.py | 32 +++++ .../connections/database_manager.py | 19 +++ .../resources/assessments/oracle/__init__.py | 0 .../assessments/oracle/config_containers.sql | 8 ++ .../assessments/oracle/config_db_features.sql | 35 +++++ .../assessments/oracle/config_instance.sql | 8 ++ .../assessments/oracle/config_memory_evol.sql | 34 +++++ .../assessments/oracle/config_pdb_objects.sql | 16 +++ .../oracle/config_pdb_partitions.sql | 28 ++++ .../assessments/oracle/config_storage.sql | 44 ++++++ .../assessments/oracle/perf_cpu_waits.sql | 29 ++++ .../oracle/perf_fgd_session_evol.sql | 37 +++++ .../resources/assessments/oracle/perf_hm.sql | 132 ++++++++++++++++++ .../assessments/oracle/perf_hm_raw.sql | 26 ++++ .../assessments/oracle/perf_sqltext.sql | 69 +++++++++ .../assessments/oracle/pipeline_config.yml | 12 ++ .../assessments/oracle/spoolhead.sql | 15 ++ 18 files changed, 547 insertions(+), 1 deletion(-) create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/__init__.py create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evol.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evol.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/perf_hm.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/perf_hm_raw.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/spoolhead.sql diff --git a/src/databricks/labs/lakebridge/assessments/_constants.py b/src/databricks/labs/lakebridge/assessments/_constants.py index 4f81c38859..9ce50c6b72 100644 --- a/src/databricks/labs/lakebridge/assessments/_constants.py +++ b/src/databricks/labs/lakebridge/assessments/_constants.py @@ -5,10 +5,11 @@ PLATFORM_TO_SOURCE_TECHNOLOGY_CFG = { "synapse": "src/databricks/labs/lakebridge/resources/assessments/synapse/pipeline_config.yml", + "oracle": "src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml", } # TODO modify this PLATFORM_TO_SOURCE_TECHNOLOGY.keys() once all platforms are supported -PROFILER_SOURCE_SYSTEM = ["mssql", "synapse"] +PROFILER_SOURCE_SYSTEM = ["mssql", "synapse","oracle"] # This flag indicates whether a connector is required for the source system when pipeline is trigger @@ -18,4 +19,5 @@ CONNECTOR_REQUIRED = { "synapse": False, "mssql": True, + "oracle": True, } diff --git a/src/databricks/labs/lakebridge/assessments/configure_assessment.py b/src/databricks/labs/lakebridge/assessments/configure_assessment.py index 0da1c28efa..cbfbd72df1 100644 --- a/src/databricks/labs/lakebridge/assessments/configure_assessment.py +++ b/src/databricks/labs/lakebridge/assessments/configure_assessment.py @@ -72,6 +72,37 @@ def run(self): self._test_connection(source, cred_manager) logger.info(f"{source.capitalize()} Assessment Configuration Completed") +class ConfigureOracleAssessment(AssessmentConfigurator): + """Oracle specific assessment configuration.""" + + def _configure_credentials(self) -> str: + cred_file = self._credential_file + source = self._source_name + + logger.info( + "\n(local | env) \nlocal means values are read as plain text \nenv means values are read " + "from environment variables fall back to plain text if not variable is not found\n", + ) + secret_vault_type = str(self.prompts.choice("Enter secret vault type (local | env)", ["local", "env"])).lower() + secret_vault_name = None + + logger.info("Please refer to the documentation to understand the difference between local and env.") + + credential = { + "secret_vault_type": secret_vault_type, + "secret_vault_name": secret_vault_name, + source: { + "host": self.prompts.question("Enter the host details (Server name, IP address, SCAN Name)"), + "tnsPort": int(self.prompts.question("Enter the TNS Listener port number", default=1521, valid_number=True)), + "tnsService": self.prompts.question("Enter the TNS service name as registered in the Oracle listener", default="orcl"), + "user": self.prompts.question("Enter user name with system privileges", default="SYSTEM"), + "password": self.prompts.password("Enter user password"), + }, + } + + _save_to_disk(credential, cred_file) + logger.info(f"Credential template created for {source}.") + return source class ConfigureSqlServerAssessment(AssessmentConfigurator): """SQL Server specific assessment configuration.""" @@ -186,6 +217,7 @@ def create_assessment_configurator( configurators = { "mssql": ConfigureSqlServerAssessment, "synapse": ConfigureSynapseAssessment, + "oracle": ConfigureOracleAssessment, } if source_system not in configurators: diff --git a/src/databricks/labs/lakebridge/connections/database_manager.py b/src/databricks/labs/lakebridge/connections/database_manager.py index 501adcd866..2dd6193fc5 100644 --- a/src/databricks/labs/lakebridge/connections/database_manager.py +++ b/src/databricks/labs/lakebridge/connections/database_manager.py @@ -53,6 +53,7 @@ def _create_connector(db_type: str, config: dict[str, Any]) -> DatabaseConnector "snowflake": SnowflakeConnector, "mssql": MSSQLConnector, "tsql": MSSQLConnector, + "oracle": OracleConnector, } connector_class = connectors.get(db_type.lower()) @@ -67,6 +68,24 @@ class SnowflakeConnector(_BaseConnector): def _connect(self) -> Engine: raise NotImplementedError("Snowflake connector not implemented") +class OracleConnector(_BaseConnector): + + def _connect(self) -> Engine: + def _connect(self) -> Engine: + raise NotImplementedError("Oracle connector not implemented") + + # db_name = self.config.get('tnsService') + # + # connection_string = URL.create( + # drivername="oracle+oracledb", + # username=self.config['user'], + # password=self.config['password'], + # host=self.config['host'], + # port=self.config.get('tnsPort', 1521), + # database=db_name + # ) + # return create_engine(connection_string) + class MSSQLConnector(_BaseConnector): def _connect(self) -> Engine: diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/__init__.py b/src/databricks/labs/lakebridge/resources/assessments/oracle/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql new file mode 100644 index 0000000000..850803214a --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql @@ -0,0 +1,8 @@ +-- Has to be executed on CDB +@spoolhead.sql +-- spool results/config_containers.csv +select listagg(inst_id, ',') within group (order by name) as inst_ids,name,open_mode,pdb_count from gv$containers group by name,open_mode,pdb_count +/ +spool off +exit + diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features.sql new file mode 100644 index 0000000000..8ee0c98ca0 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features.sql @@ -0,0 +1,35 @@ +-- Has to be executed on CDB +@./spoolhead.sql +-- spool results/config_db_features.csv +with t as (select version from product_component_version where product like 'Oracle Database%'), + inst_cnt as (select count(*) as cnt from gv$instance), + pdb_cnt as (select count(distinct con_id ) as cnt from gv$pdbs where name!='PDB$SEED'), + pdb_namelist as (select listagg(name,',') within group (order by name) as pdbnames from (select distinct name from gv$pdbs where name !='PDB$SEED')), +-- select listagg(distinct name,',') as pdbnames from gv$pdbs where name !='PDB$SEED'), + cpu_cores_global as (select 'CLUSTER' as scope, null , stat_name,'CPU GLOBAL (Cluster): '||stat_name as detailed_stat_name,sum(to_number(value)) as value + from gv$osstat + where stat_name in ('NUM_CPUS','NUM_CPU_CORES','NUM_CPU_SOCKETS') + group by stat_name), + cpu_cores_details as (select 'INSTANCE' as scope,inst_id, stat_name,'CPU per Instance Id: '||inst_id||' - '||stat_name as detailed_stat_name, to_number(value) as value + from gv$osstat + where stat_name in ('NUM_CPUS','NUM_CPU_CORES','NUM_CPU_SOCKETS') + order by inst_id,stat_name) +select * from +( +select null as scope, null as inst_id, null as stat_name, 'VERSION' as name,to_char(t.version) as value from t +union +select null as scope, null as inst_id, null as stat_name, 'INSTANCE COUNT', to_char(inst_cnt.cnt) from inst_cnt +union +select null as scope, null as inst_id, null as stat_name, 'PDB COUNT',to_char(pdb_cnt.cnt) from pdb_cnt +union +select null as scope, null as inst_id, null as stat_name, 'PDB LIST', to_char(pdb_namelist.pdbnames) from pdb_namelist +union +select scope, null as inst_id, stat_name,detailed_stat_name, to_char(value) from cpu_cores_global +union +select scope, inst_id, stat_name, detailed_stat_name, to_char(value) from cpu_cores_details +) +order by name +/ +spool off +exit + diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance.sql new file mode 100644 index 0000000000..1ae0799609 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance.sql @@ -0,0 +1,8 @@ +-- Has to be executed on CDB +@./spoolhead.sql +-- spool results/config_instance.csv +select inst_id,instance_name,version,database_type from gv$instance +/ +spool off +exit + diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evol.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evol.sql new file mode 100644 index 0000000000..a098aaa235 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evol.sql @@ -0,0 +1,34 @@ +@./spoolhead.sql +-- spool results/config_memory_evolution.csv + +select NVL(con.name,'Entire CDB/Non CDB') con_name, + param.instance_number, + to_char(snap.snap_time,'yyyy-mm-dd HH24:MI:SS') as snap_time, + parameter_name, + value +from cdb_hist_parameter param, +(select con_id,name from v$containers) con, +(select con_id,snap_id,instance_number,begin_interval_time snap_time from cdb_hist_snapshot) snap +where 1=1 +and param.snap_id=snap.snap_id +and param.con_id=con.con_id(+) +and param.instance_number=snap.instance_number +and param.parameter_name in ('sga_target', + 'pga_aggregate_target', + 'db_cache_size', + 'shared_pool_size', + 'large_pool_size', + 'java_pool_size', + 'streams_pool_size', + 'db_16k_cache_size', + 'db_2k_cache_size', + 'db_32k_cache_size', + 'db_4k_cache_size', + 'db_8k_cache_size', + 'memory_target', + 'memory_max_target') +order by 1,3,2 +/ + +spool off +exit diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects.sql new file mode 100644 index 0000000000..1ff1420bd0 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects.sql @@ -0,0 +1,16 @@ +-- Has to be executed on CDB +@./spoolhead.sql +-- spool results/config_pdb_objects.csv + +SELECT cont.NAME as PDB_NAME,OWNER,OBJECT_TYPE,COUNT(*) as CNT +FROM CDB_OBJECTS o, + (select distinct con_id as con_id, name from gv$containers) cont +WHERE o.CON_ID=cont.con_id +AND OWNER in (select username from cdb_users where oracle_maintained='N' and cont.con_id=con_id) +GROUP BY cont.name,OWNER,OBJECT_TYPE +ORDER BY 1,2 +/ + +spool off +exit + diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions.sql new file mode 100644 index 0000000000..451fcfe9d2 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions.sql @@ -0,0 +1,28 @@ +-- Has to be executed on CDB +@./spoolhead.sql +-- spool results/config_pdb_partitions.csv + +SELECT cont.NAME as PDB_NAME,OWNER,OBJECT_TYPE,CNT +from +( + select owner,con_id,'TABLE (NON PARTITIONED)' as OBJECT_TYPE,count(*) as cnt from cdb_tables where partitioned='NO' group by owner,con_id + union + select owner,con_id,'TABLE (PARTITIONED)',count(*) as cnt from cdb_tables where partitioned='YES' group by owner,con_id + union + select owner,con_id,'INDEX (NON PARTITIONED)',count(*) as cnt from cdb_indexes where partitioned='NO' group by owner,con_id + union + select owner,con_id,'INDEX (PARTITIONED)',count(*) as cnt from cdb_indexes where partitioned='YES' group by owner,con_id + union + select owner,con_id,'LOBS (NON PARTITIONED)',count(*) as cnt from cdb_lobs where partitioned='NO' group by owner,con_id + union + select owner,con_id,'LOBS (PARTITIONED)',count(*) as cnt from cdb_lobs where partitioned='YES' group by owner,con_id +) u, +(select distinct con_id as con_id, name from gv$containers) cont +WHERE u.CON_ID=cont.con_id +AND OWNER in (select username from cdb_users where oracle_maintained='N' and cont.con_id=u.con_id) +ORDER BY 1,2 +/ + +spool off +exit + diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage.sql new file mode 100644 index 0000000000..4c95bd1a22 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage.sql @@ -0,0 +1,44 @@ +@./spoolhead.sql +-- results/config_storage.csv +col tablespace_type for a20 + +select con_name, +-- sub.tablespace_name, + case + when tablespace_name in ('SYSTEM','SYSAUX') then 'SYSTEM' + when tablespace_name in (select tablespace_name from cdb_tablespaces where contents='UNDO' and con_id=sub.con_id) then 'UNDO' + -- when tablespace_name in (select tablespace_name from cdb_tablespaces where contents='TEMPORARY' and con_id=sub.con_id) then 'TEMP' + ELSE 'USER_DATA' + end as tablespace_type, + sum(gb) gb, + sum(freegb) freegb, + sum(maxgb) maxgb +from +( + select c.name as con_name,c.con_id, + f.tablespace_name, + f.bytes/1024/1024 mb, f.bytes/1024/1024/1024 gb, + t.free_bytes/1024/1024 freemb, t.free_bytes/1024/1024/1024 freegb, + f.maxbytes/1024/1024 maxmb, f.maxbytes/1024/1024/1024 maxgb + from + (select con_id,tablespace_name,bytes,maxbytes from cdb_data_files ) f, + (select con_id,tablespace_name,sum(bytes) free_bytes from cdb_free_space group by con_id,tablespace_name ) t, + (select distinct con_id,name from gv$containers) c + where 1=1 + and t.con_id=f.con_id + and t.con_id=c.con_id + and t.tablespace_name=f.tablespace_name +) sub +group by con_name, +--tablespace_name + case + when tablespace_name in ('SYSTEM','SYSAUX') then 'SYSTEM' + when tablespace_name in (select tablespace_name from cdb_tablespaces where contents='UNDO' and con_id=sub.con_id) then 'UNDO' + -- when tablespace_name in (select tablespace_name from cdb_tablespaces where contents='TEMPORARY' and con_id=sub.con_id) then 'TEMP' + ELSE 'USER_DATA' + end +order by 1 +/ +spool off +exit + diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql new file mode 100644 index 0000000000..cdc2442412 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql @@ -0,0 +1,29 @@ + -- Has to be executed on CDB +@./spoolhead.sql +-- spool results/perf_cpu_waits.csv +select cont.name as pdb_name, + ash.instance_number, + ash.mtime, + ash.event, + ash.wait_class, + ash.total_wait_time +from (SELECT instance_number,con_id,CON_DBID, + TO_CHAR(sample_time,'YYYY-MM-DD HH24') mtime, + NVL(a.event, 'ON CPU') AS event, + NVL(a.wait_class, 'ON CPU') AS wait_class, + COUNT(*)*10 AS total_wait_time + FROM cdb_hist_active_sess_history a + GROUP BY instance_number, + con_id, CON_DBID, + TO_CHAR(sample_time,'YYYY-MM-DD HH24'), + a.event, + a.wait_class + ) ash, + (select distinct con_id,name,dbid from gv$containers) cont +where cont.con_id=ash.con_id + and cont.dbid=ash.CON_DBID +ORDER BY pdb_name,mtime +/ +spool off +exit + diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evol.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evol.sql new file mode 100644 index 0000000000..d963877e49 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evol.sql @@ -0,0 +1,37 @@ +@./spoolhead.sql +-- spool results/perf_fgd_session_evol.csv + +select con.name, + sh.instance_number,u.username, + to_char(sh.sample_time,'YYYY-MM-DD HH24:MI') as snap_time, + count(distinct sh.session_id||','||sh.session_serial#) as foregd_session_cnt +from cdb_hist_active_sess_history sh, + ( + select con_id, + dbid, + name + from v$containers + where name != 'PDB$SEED' + ) con, + (select distinct user_id,username + from cdb_users + where username not in ('SYS','SYSTEM','XS$NULL','OJVMSYS','LBACSYS','OUTLN','SYS$UMF','DBSNMP','APPQOSSYS','DBSFWUSER','GGSYS','ANONYMOUS','CTXSYS','DVF','DVSYS','GSMADMIN_INTERNAL','MDSYS','OLAPSYS','XDB','WMSYS', +'GSMCATUSER','MDDATA','REMOTE_SCHEDULER_AGENT','SYSBACKUP','GSMUSER','GSMROOTUSER','SYSRAC','SI_INFORMTN_SCHEMA','AUDSYS','DIP','ORDPLUGINS','ORDDATA','SYSKM','ORACLE_OCM','ORDSYS','SYSDG',' +SYS','SYSTEM','XS$NULL','LBACSYS','OUTLN','DBSNMP','APPQOSSYS','DBSFWUSER','GGSYS','ANONYMOUS','CTXSYS','DVF','DVSYS','GSMADMIN_INTERNAL','MDSYS','OLAPSYS','XDB','WMSYS','GSMCATUSER','MDDATA +','REMOTE_SCHEDULER_AGENT','SYSBACKUP','GSMUSER','SYSRAC','OJVMSYS','SI_INFORMTN_SCHEMA','AUDSYS','DIP','ORDPLUGINS','ORDDATA','SYSKM','ORACLE_OCM','SYS$UMF','ORDSYS','SYSDG') + ) u +where 1 = 1 + and sh.con_id = con.con_id + and sh.dbid=con.dbid + and u.user_id=sh.user_id + and sh.session_type = 'FOREGROUND' +group by con.name, + sh.instance_number, + to_char(sh.sample_time,'YYYY-MM-DD HH24:MI'), + u.username +order by 1, 4, 2 +/ + +spool off +exit + diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_hm.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_hm.sql new file mode 100644 index 0000000000..1045f88f85 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_hm.sql @@ -0,0 +1,132 @@ +@./spoolhead.sql +-- spool results/perf_hm.csv + +SELECT TO_CHAR(mtime,'YYYY/MM/DD') mtime, + pdb_name, + instance_number, + TO_CHAR(mtime,'HH24') d, + core_nb, + LOAD AS value + FROM + (SELECT to_date(mtime,'YYYY-MM-DD HH24') mtime, + cont.name as pdb_name, + instance_number , + core_nb, + ROUND(SUM(c1),2) AAS_WAIT, + ROUND(SUM(c2),2) AAS_CPU, + ROUND(SUM(cnt),2) AAS, + ROUND(SUM(load),2) LOAD + FROM + (SELECT ash.instance_number, + ash.con_id, + TO_CHAR(sample_time,'YYYY-MM-DD HH24') mtime, + cpu.core_nb, + DECODE(session_state,'WAITING',COUNT(*),0)/360 c1, + DECODE(session_state,'ON CPU',COUNT( *),0) /360 c2, + COUNT( *)/360 cnt, + COUNT( *)/360/cpu.core_nb load + FROM cdb_hist_active_sess_history ash, + (select inst_id,stat_name,to_number(value) as core_nb from gv$osstat where stat_name='NUM_CPUS' ) cpu + WHERE + 1=1 + and ash.instance_number=cpu.inst_id + -- and sample_time > sysdate - 30 + GROUP BY ash.instance_number,ash.con_id,cpu.core_nb, + TO_CHAR(sample_time,'YYYY-MM-DD HH24'), + session_state, + cpu.core_nb + ) q, + (select distinct con_id,name from gv$containers) cont + WHERE cont.con_id=q.con_id + GROUP BY cont.name, mtime,instance_number,core_nb + order by 1,2,3 + ) +/ + +-- col "00-01_ " for 90.99 +-- col "01-02_ " for 90.99 +-- col "02-03_ " for 90.99 +-- col "03-04_ " for 90.99 +-- col "04-05_ " for 90.99 +-- col "05-06_ " for 90.99 +-- col "06-07_ " for 90.99 +-- col "07-08_ " for 90.99 +-- col "08-09_ " for 90.99 +-- col "09-10_ " for 90.99 +-- col "10-11_ " for 90.99 +-- col "11-12_ " for 90.99 +-- col "12-13_ " for 90.99 +-- col "13-14_ " for 90.99 +-- col "14-15_ " for 90.99 +-- col "15-16_ " for 90.99 +-- col "16-17_ " for 90.99 +-- col "17-18_ " for 90.99 +-- col "18-19_ " for 90.99 +-- col "19-20_ " for 90.99 +-- col "20-21_ " for 90.99 +-- col "21-22_ " for 90.99 +-- col "22-23_ " for 90.99 +-- col "23-24_ " for 90.99 + +-- WITH t AS +-- (SELECT TO_CHAR(mtime,'YYYY/MM/DD') mtime, +-- TO_CHAR(mtime,'HH24') d, +-- LOAD AS value +-- FROM +-- (SELECT to_date(mtime,'YYYY-MM-DD HH24') mtime, +-- ROUND(SUM(c1),2) AAS_WAIT, +-- ROUND(SUM(c2),2) AAS_CPU, +-- ROUND(SUM(cnt),2) AAS, +-- ROUND(SUM(load),2) LOAD +-- FROM +-- (SELECT TO_CHAR(sample_time,'YYYY-MM-DD HH24') mtime, +-- DECODE(session_state,'WAITING',COUNT(*),0)/360 c1, +-- DECODE(session_state,'ON CPU',COUNT( *),0) /360 c2, +-- COUNT( *)/360 cnt, +-- COUNT( *)/360/cpu.core_nb load +-- FROM cdb_hist_active_sess_history, +-- (select stat_name,sum(to_number(value)) as core_nb from gv$osstat where stat_name='NUM_CPU_CORES' group by stat_name) cpu +-- WHERE +-- sample_time > sysdate - 30 and +-- con_id=(select distinct con_id from gv$containers where name='&1') +-- GROUP BY TO_CHAR(sample_time,'YYYY-MM-DD HH24'), +-- session_state, +-- cpu.core_nb +-- ) +-- GROUP BY mtime +-- ) +-- ) +-- SELECT mtime, +-- NVL("00-01_ ",0) "00-01_ ", +-- NVL("01-02_ ",0) "01-02_ ", +-- NVL("02-03_ ",0) "02-03_ ", +-- NVL("03-04_ ",0) "03-04_ ", +-- NVL("04-05_ ",0) "04-05_ ", +-- NVL("05-06_ ",0) "05-06_ ", +-- NVL("06-07_ ",0) "06-07_ ", +-- NVL("07-08_ ",0) "07-08_ ", +-- NVL("08-09_ ",0) "08-09_ ", +-- NVL("09-10_ ",0) "09-10_ ", +-- NVL("10-11_ ",0) "10-11_ ", +-- NVL("11-12_ ",0) "11-12_ ", +-- NVL("12-13_ ",0) "12-13_ ", +-- NVL("13-14_ ",0) "13-14_ ", +-- NVL("14-15_ ",0) "14-15_ ", +-- NVL("15-16_ ",0) "15-16_ ", +-- NVL("16-17_ ",0) "16-17_ ", +-- NVL("17-18_ ",0) "17-18_ ", +-- NVL("18-19_ ",0) "18-19_ ", +-- NVL("19-20_ ",0) "19-20_ ", +-- NVL("20-21_ ",0) "20-21_ ", +-- NVL("21-22_ ",0) "21-22_ ", +-- NVL("22-23_ ",0) "22-23_ ", +-- NVL("23-24_ ",0) "23-24_ " +-- FROM t pivot( SUM(value) AS " " FOR d IN ('00' AS "00-01",'01' AS "01-02",'02' AS "02-03",'03' AS "03-04",'04' AS "04-05",'05' AS "05-06",'06' AS "06-07",'07' AS "07-08", +-- '08' AS "08-09",'09' AS "09-10",'10' AS "10-11", '11' AS "11-12",'12' AS "12-13",'13' AS "13-14",'14' AS "14-15",'15' AS "15-16", +-- '16' AS "16-17",'17' AS "17-18",'18' AS "18-19",'19' AS "19-20",'20' AS "20-21",'21' AS "21-22", '22' AS "22-23",'23' AS "23-24") +-- ) +-- ORDER BY mtime +-- / +spool off +exit + diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_hm_raw.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_hm_raw.sql new file mode 100644 index 0000000000..538d03f539 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_hm_raw.sql @@ -0,0 +1,26 @@ +set serveroutput on +@./spoolhead.sql +--set timing off +--set heading off +--set termout off +--set verify off + +-- spool scripts/main/perf_addon/script.sql + +declare + cursor c is select inst_id from gv$instance order by 1; +begin + for i in c + loop + dbms_output.put_line('@./scripts/main/perf_addon/perf_hm_raw.sql '||i.inst_id); + dbms_output.put_line(chr(13)||chr(10)); + end loop; + dbms_output.put_line('exit'); +end; +/ + +spool off + +start ./scripts/main/perf_addon/script.sql +exit + diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql new file mode 100644 index 0000000000..137720fdbb --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql @@ -0,0 +1,69 @@ +-- Uncomment sql_text columns if you want to see sql statement caught and see if the classification is correct +@./spoolhead.sql +-- spool results/perf_sqltext.csv + +select dt + ,cont.name as pdb_name + ,command + ,parsing_schema_name + --,sql_text + ,instance_number + ,count(*) as cnt + ,sum(elapsed_time)/1000000 as total_run_time_secs +from +( select g.con_id,g.instance_number, + to_char(begin_interval_time,'YYYY-MM-DD HH24:MI') dt, + -- to_char(begin_interval_time,'YYYY-MM-DD AMHH:MI:SS') dt, + case WHEN name='CREATE TABLE' AND instr(upper(TRANSLATE(dbms_lob.substr(t.SQL_TEXT,2000),chr(10)||chr(13), ' ')), 'SELECT') > 0 THEN 'ETL' + WHEN name='INSERT' AND instr(upper(TRANSLATE(dbms_lob.substr(t.SQL_TEXT,2000),chr(10)||chr(13), ' ')), 'SELECT') > 0 THEN 'ETL' + WHEN name='UPDATE' AND instr(upper(TRANSLATE(dbms_lob.substr(t.SQL_TEXT,2000),chr(10)||chr(13), ' ')), 'SELECT') > 0 THEN 'ETL' + WHEN name='DELETE' AND instr(upper(TRANSLATE(dbms_lob.substr(t.SQL_TEXT,2000),chr(10)||chr(13), ' ')), 'SELECT') > 0 THEN 'ETL' + WHEN name='INSERT' THEN 'INSERT' + WHEN name='UPDATE' THEN 'UPDATE' + WHEN name='DELETE' THEN 'DELETE' + WHEN name IN ('TRUNCATE TABLE','TRUCATE CLUSTER') THEN 'TRUNCATE' + WHEN name='UPSERT' THEN 'ETL' + WHEN name='SELECT' THEN 'BI/QUERY' + WHEN name='PL/SQL EXECUTE' THEN 'PL/SQL EXECUTE' + WHEN name IN ('CREATE TABLE','CREATE CLUSTER','CREATE INDEX', + 'ALTER TABLE','ALTER CLUSTER','ALTER INDEX', + 'DROP TABLE','DROP CLUSTER','DROP INDEX') THEN 'DDL/SEGMENT' + WHEN name IN ('CREATE MATERIALIZED VIEW','CREATE MATERIALIZED VIEW LOG','CREATE MATERIALIZED ZONEMAP', + 'ALTER MATERIALIZED VIEW','ALTER MATERIALIZED VIEW LOG','ALTER MATERIALIZED ZONEMAP', + 'DROP MATERIALIZED VIEW','DROP MATERIALIZED VIEW LOG','DROP MATERIALIZED ZONEMAP') THEN 'DDL/MVIEW' + WHEN name IN ('CREATE FUNCTION','CREATE PACKAGE','CREATE PACKAGE BODY','CREATE PROCEDURE','CREATE TRIGGER','CREATE TYPE','CREATE TYPE BODY','CREATE VIEW','CREATE SEQUENCE', + 'ALTER FUNCTION','ALTER PACKAGE','ALTER PACKAGE BODY','ALTER PROCEDURE','ALTER TRIGGER','ALTER TYPE','ALTER TYPE BODY','ALTER VIEW','ALTER SEQUENCE', + 'DROP FUNCTION','DROP PACKAGE','DROP PACKAGE BODY','DROP PROCEDURE','DROP TRIGGER','DROP TYPE','DROP TYPE BODY','DROP VIEW','DROP SEQUENCE') THEN 'DDL/PLSQL' + -- WHEN 'GRANT OBJECT' THEN 'DCL' + ELSE 'OTHER' + end as command, + name command_name, + translate(dbms_lob.substr(t.sql_text,2000) ,chr(10)||chr(13),' ') sql_text, + parsing_schema_name, + -- g.elapsed_time_total / nullif(executions_total,0) as elapsed_time + g.elapsed_time_delta as elapsed_time +from CDB_HIST_SQLSTAT g + left join CDB_HIST_SNAPSHOT s + on (g.SNAP_ID=s.SNAP_ID) + inner join CDB_HIST_SQLTEXT t + on (g.SQL_ID=t.sql_id) + inner join audit_actions aa + on (COMMAND_TYPE = aa.ACTION) +where + parsing_schema_name in (select username from CDB_users where oracle_maintained='N') + and g.con_id=t.con_id + and g.snap_id=s.snap_id + -- and s.begin_interval_time > sysdate - 7 +) sub, +(select distinct con_id, name from gv$containers) cont +where cont.con_id=sub.con_id +group by dt +,instance_number +,cont.name +, command +, parsing_schema_name +order by dt +/ + +spool off +exit diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml b/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml new file mode 100644 index 0000000000..9e7532b99a --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml @@ -0,0 +1,12 @@ +name: oracle_assessment +version: "1.0" +extract_folder: "/tmp/data/oracle_assessment" +steps: + - name: workspace_info # to be updated + type: sql + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql + mode: overwrite + frequency: once + flag: active + + diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/spoolhead.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/spoolhead.sql new file mode 100644 index 0000000000..a80790aa51 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/spoolhead.sql @@ -0,0 +1,15 @@ +WHENEVER SQLERROR EXIT SQL.SQLCODE +WHENEVER OSERROR EXIT +set embedded on +set pagesize 0 +set colsep ';' +set underline off +set echo off +set feedback off +set linesize 5000 +set long 99999 +set trimspool on +set headsep off +set verify off +alter session set NLS_NUMERIC_CHARACTERS='.,'; + From 62770c1a8462b717edbb8ff990f22e4d956bea09 Mon Sep 17 00:00:00 2001 From: Laurent Leturgez Date: Fri, 31 Oct 2025 17:39:08 +0100 Subject: [PATCH 02/12] Oracle add-on --- .gitignore | 4 +- pyproject.toml | 3 +- .../connections/database_manager.py | 43 ++++++++----------- .../assessments/oracle/pipeline_config.yml | 3 +- 4 files changed, 25 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index 466c618f0a..1b47fdf1dd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,13 @@ +.venv/ .venv .python-version .sdkmanrc -.DS_Store +.DS_Store/ *.pyc __pycache__ dist .idea +.idea/ /htmlcov/ *.iml target/ diff --git a/pyproject.toml b/pyproject.toml index f61e8abe31..58b0b4aad1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,8 @@ dependencies = [ "SQLAlchemy~=2.0.40", "pygls~=2.0.0a2", "duckdb~=1.2.2", - "requests>=2.28.1,<3" # Matches databricks-sdk (and 'types-requests' below), to avoid conflicts. + "requests>=2.28.1,<3", # Matches databricks-sdk (and 'types-requests' below), to avoid conflicts. + "oracledb==3.4.0" ] [project.urls] diff --git a/src/databricks/labs/lakebridge/connections/database_manager.py b/src/databricks/labs/lakebridge/connections/database_manager.py index 2dd6193fc5..90b6af2866 100644 --- a/src/databricks/labs/lakebridge/connections/database_manager.py +++ b/src/databricks/labs/lakebridge/connections/database_manager.py @@ -12,8 +12,7 @@ from sqlalchemy.orm.session import Session logger = logging.getLogger(__name__) -logger.setLevel("INFO") - +logger.setLevel(logging.INFO) @dataclasses.dataclass class FetchResult: @@ -30,7 +29,6 @@ def _connect(self) -> Engine: def fetch(self, query: str) -> FetchResult: pass - class _BaseConnector(DatabaseConnector): def __init__(self, config: dict[str, Any]): self.config = config @@ -67,26 +65,6 @@ def _create_connector(db_type: str, config: dict[str, Any]) -> DatabaseConnector class SnowflakeConnector(_BaseConnector): def _connect(self) -> Engine: raise NotImplementedError("Snowflake connector not implemented") - -class OracleConnector(_BaseConnector): - - def _connect(self) -> Engine: - def _connect(self) -> Engine: - raise NotImplementedError("Oracle connector not implemented") - - # db_name = self.config.get('tnsService') - # - # connection_string = URL.create( - # drivername="oracle+oracledb", - # username=self.config['user'], - # password=self.config['password'], - # host=self.config['host'], - # port=self.config.get('tnsPort', 1521), - # database=db_name - # ) - # return create_engine(connection_string) - - class MSSQLConnector(_BaseConnector): def _connect(self) -> Engine: auth_type = self.config.get('auth_type', 'sql_authentication') @@ -117,9 +95,25 @@ def _connect(self) -> Engine: return create_engine(connection_string) +class OracleConnector(_BaseConnector): + def _connect(self) -> Engine: + + db_name = self.config.get('tnsService') + connection_string = URL.create( + drivername="oracle+oracledb", + username=self.config['user'], + password=self.config['password'], + host=self.config['host'], + port=self.config.get('tnsPort', 1521), + database=db_name + ) + + return create_engine(connection_string) + class DatabaseManager: def __init__(self, db_type: str, config: dict[str, Any]): self.connector = _create_connector(db_type, config) + self.db_type = db_type def fetch(self, query: str) -> FetchResult: try: @@ -129,7 +123,8 @@ def fetch(self, query: str) -> FetchResult: raise ConnectionError("Error connecting to the database check credentials") from None def check_connection(self) -> bool: - query = "SELECT 101 AS test_column" + query = "SELECT 101 AS test_column from dual" if self.db_type == "oracle" else "SELECT 101 AS test_column" + result = self.fetch(query) if result is None: return False diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml b/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml index 9e7532b99a..7a556eedb7 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml @@ -2,11 +2,10 @@ name: oracle_assessment version: "1.0" extract_folder: "/tmp/data/oracle_assessment" steps: - - name: workspace_info # to be updated + - name: containers type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql mode: overwrite frequency: once flag: active - From fffac6aa80badb77cdea4f8fb5674ba917d3337f Mon Sep 17 00:00:00 2001 From: Laurent Leturgez Date: Fri, 7 Nov 2025 17:07:17 +0100 Subject: [PATCH 03/12] Oracle add-on --- .../lakebridge/connections/database_manager.py | 7 ++++--- tests/test_blueprint_oracle.py | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 tests/test_blueprint_oracle.py diff --git a/src/databricks/labs/lakebridge/connections/database_manager.py b/src/databricks/labs/lakebridge/connections/database_manager.py index 90b6af2866..a263afd721 100644 --- a/src/databricks/labs/lakebridge/connections/database_manager.py +++ b/src/databricks/labs/lakebridge/connections/database_manager.py @@ -113,7 +113,7 @@ def _connect(self) -> Engine: class DatabaseManager: def __init__(self, db_type: str, config: dict[str, Any]): self.connector = _create_connector(db_type, config) - self.db_type = db_type + self._db_type = db_type def fetch(self, query: str) -> FetchResult: try: @@ -123,8 +123,9 @@ def fetch(self, query: str) -> FetchResult: raise ConnectionError("Error connecting to the database check credentials") from None def check_connection(self) -> bool: - query = "SELECT 101 AS test_column from dual" if self.db_type == "oracle" else "SELECT 101 AS test_column" - + query = "SELECT 101 AS test_column" + if self._db_type.lower() == "Oracle": + query = "SELECT 101 AS test_column FROM dual" result = self.fetch(query) if result is None: return False diff --git a/tests/test_blueprint_oracle.py b/tests/test_blueprint_oracle.py new file mode 100644 index 0000000000..a8dfb2126d --- /dev/null +++ b/tests/test_blueprint_oracle.py @@ -0,0 +1,15 @@ +from pathlib import Path + +from databricks.labs.lakebridge.assessments.pipeline import PipelineClass +from databricks.labs.lakebridge.assessments.profiler import Profiler +from databricks.labs.lakebridge.connections.database_manager import OracleConnector +from databricks.labs.lakebridge.connections.database_manager import DatabaseManager + +pipeline_config_file="../src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml" +config=PipelineClass.load_config_from_yaml(pipeline_config_file) +print(config) + +profiler = Profiler("oracle", None) +# supported_platforms = profiler.supported_platforms() +# print(supported_platforms) +profiler.profile(pipeline_config=config) From e8f7bdbc687852798c98583508f87665246e2d57 Mon Sep 17 00:00:00 2001 From: Laurent Leturgez Date: Wed, 12 Nov 2025 18:48:35 +0100 Subject: [PATCH 04/12] Oracle tests. Updates on sql scripts to remove spooling "decoration" --- .../assessments/oracle/config_containers.sql | 5 +- .../assessments/oracle/config_db_features.sql | 6 -- .../assessments/oracle/config_instance.sql | 4 -- ...y_evol.sql => config_memory_evolution.sql} | 7 -- .../assessments/oracle/config_pdb_objects.sql | 6 -- .../oracle/config_pdb_partitions.sql | 7 -- .../assessments/oracle/config_storage.sql | 8 --- .../assessments/oracle/perf_cpu_waits.sql | 6 +- .../oracle/perf_fgd_session_evol.sql | 37 ---------- .../oracle/perf_fgd_session_evolution.sql | 37 ++++++++++ .../oracle/{perf_hm.sql => perf_heatmap.sql} | 7 -- .../{perf_hm_raw.sql => perf_heatmap_raw.sql} | 9 --- .../assessments/oracle/perf_sqltext.sql | 43 +++--------- .../assessments/oracle/pipeline_config.yml | 69 ++++++++++++++++++- .../assessments/oracle/spoolhead.sql | 15 ---- .../integration/assessments/test_profiler.py | 24 +++++++ .../pipeline_oracle_config_main.yml | 11 +++ 17 files changed, 151 insertions(+), 150 deletions(-) rename src/databricks/labs/lakebridge/resources/assessments/oracle/{config_memory_evol.sql => config_memory_evolution.sql} (93%) delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evol.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution.sql rename src/databricks/labs/lakebridge/resources/assessments/oracle/{perf_hm.sql => perf_heatmap.sql} (98%) rename src/databricks/labs/lakebridge/resources/assessments/oracle/{perf_hm_raw.sql => perf_heatmap_raw.sql} (68%) delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/spoolhead.sql create mode 100644 tests/resources/assessments/pipeline_oracle_config_main.yml diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql index 850803214a..a8b01f41ab 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql @@ -1,8 +1,5 @@ -- Has to be executed on CDB -@spoolhead.sql +-- @spoolhead.sql -- spool results/config_containers.csv select listagg(inst_id, ',') within group (order by name) as inst_ids,name,open_mode,pdb_count from gv$containers group by name,open_mode,pdb_count -/ -spool off -exit diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features.sql index 8ee0c98ca0..d8e1f64393 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features.sql @@ -1,6 +1,4 @@ -- Has to be executed on CDB -@./spoolhead.sql --- spool results/config_db_features.csv with t as (select version from product_component_version where product like 'Oracle Database%'), inst_cnt as (select count(*) as cnt from gv$instance), pdb_cnt as (select count(distinct con_id ) as cnt from gv$pdbs where name!='PDB$SEED'), @@ -29,7 +27,3 @@ union select scope, inst_id, stat_name, detailed_stat_name, to_char(value) from cpu_cores_details ) order by name -/ -spool off -exit - diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance.sql index 1ae0799609..03f3ec1631 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance.sql @@ -1,8 +1,4 @@ -- Has to be executed on CDB -@./spoolhead.sql -- spool results/config_instance.csv select inst_id,instance_name,version,database_type from gv$instance -/ -spool off -exit diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evol.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evolution.sql similarity index 93% rename from src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evol.sql rename to src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evolution.sql index a098aaa235..0d055f75cd 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evol.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evolution.sql @@ -1,6 +1,3 @@ -@./spoolhead.sql --- spool results/config_memory_evolution.csv - select NVL(con.name,'Entire CDB/Non CDB') con_name, param.instance_number, to_char(snap.snap_time,'yyyy-mm-dd HH24:MI:SS') as snap_time, @@ -28,7 +25,3 @@ and param.parameter_name in ('sga_target', 'memory_target', 'memory_max_target') order by 1,3,2 -/ - -spool off -exit diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects.sql index 1ff1420bd0..5b5559f626 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects.sql @@ -1,6 +1,4 @@ -- Has to be executed on CDB -@./spoolhead.sql --- spool results/config_pdb_objects.csv SELECT cont.NAME as PDB_NAME,OWNER,OBJECT_TYPE,COUNT(*) as CNT FROM CDB_OBJECTS o, @@ -9,8 +7,4 @@ WHERE o.CON_ID=cont.con_id AND OWNER in (select username from cdb_users where oracle_maintained='N' and cont.con_id=con_id) GROUP BY cont.name,OWNER,OBJECT_TYPE ORDER BY 1,2 -/ - -spool off -exit diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions.sql index 451fcfe9d2..3160555056 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions.sql @@ -1,7 +1,4 @@ -- Has to be executed on CDB -@./spoolhead.sql --- spool results/config_pdb_partitions.csv - SELECT cont.NAME as PDB_NAME,OWNER,OBJECT_TYPE,CNT from ( @@ -21,8 +18,4 @@ from WHERE u.CON_ID=cont.con_id AND OWNER in (select username from cdb_users where oracle_maintained='N' and cont.con_id=u.con_id) ORDER BY 1,2 -/ - -spool off -exit diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage.sql index 4c95bd1a22..2ffdb781c8 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage.sql @@ -1,7 +1,3 @@ -@./spoolhead.sql --- results/config_storage.csv -col tablespace_type for a20 - select con_name, -- sub.tablespace_name, case @@ -38,7 +34,3 @@ group by con_name, ELSE 'USER_DATA' end order by 1 -/ -spool off -exit - diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql index cdc2442412..46312392fa 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql @@ -1,6 +1,5 @@ -- Has to be executed on CDB -@./spoolhead.sql --- spool results/perf_cpu_waits.csv + select cont.name as pdb_name, ash.instance_number, ash.mtime, @@ -23,7 +22,4 @@ from (SELECT instance_number,con_id,CON_DBID, where cont.con_id=ash.con_id and cont.dbid=ash.CON_DBID ORDER BY pdb_name,mtime -/ -spool off -exit diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evol.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evol.sql deleted file mode 100644 index d963877e49..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evol.sql +++ /dev/null @@ -1,37 +0,0 @@ -@./spoolhead.sql --- spool results/perf_fgd_session_evol.csv - -select con.name, - sh.instance_number,u.username, - to_char(sh.sample_time,'YYYY-MM-DD HH24:MI') as snap_time, - count(distinct sh.session_id||','||sh.session_serial#) as foregd_session_cnt -from cdb_hist_active_sess_history sh, - ( - select con_id, - dbid, - name - from v$containers - where name != 'PDB$SEED' - ) con, - (select distinct user_id,username - from cdb_users - where username not in ('SYS','SYSTEM','XS$NULL','OJVMSYS','LBACSYS','OUTLN','SYS$UMF','DBSNMP','APPQOSSYS','DBSFWUSER','GGSYS','ANONYMOUS','CTXSYS','DVF','DVSYS','GSMADMIN_INTERNAL','MDSYS','OLAPSYS','XDB','WMSYS', -'GSMCATUSER','MDDATA','REMOTE_SCHEDULER_AGENT','SYSBACKUP','GSMUSER','GSMROOTUSER','SYSRAC','SI_INFORMTN_SCHEMA','AUDSYS','DIP','ORDPLUGINS','ORDDATA','SYSKM','ORACLE_OCM','ORDSYS','SYSDG',' -SYS','SYSTEM','XS$NULL','LBACSYS','OUTLN','DBSNMP','APPQOSSYS','DBSFWUSER','GGSYS','ANONYMOUS','CTXSYS','DVF','DVSYS','GSMADMIN_INTERNAL','MDSYS','OLAPSYS','XDB','WMSYS','GSMCATUSER','MDDATA -','REMOTE_SCHEDULER_AGENT','SYSBACKUP','GSMUSER','SYSRAC','OJVMSYS','SI_INFORMTN_SCHEMA','AUDSYS','DIP','ORDPLUGINS','ORDDATA','SYSKM','ORACLE_OCM','SYS$UMF','ORDSYS','SYSDG') - ) u -where 1 = 1 - and sh.con_id = con.con_id - and sh.dbid=con.dbid - and u.user_id=sh.user_id - and sh.session_type = 'FOREGROUND' -group by con.name, - sh.instance_number, - to_char(sh.sample_time,'YYYY-MM-DD HH24:MI'), - u.username -order by 1, 4, 2 -/ - -spool off -exit - diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution.sql new file mode 100644 index 0000000000..d58729a039 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution.sql @@ -0,0 +1,37 @@ +select con.name, + sh.instance_number,u.username, + to_char(sh.sample_time,'YYYY-MM-DD HH24:MI') as snap_time, + count(distinct sh.session_id||','||sh.session_serial#) as foregd_session_cnt +from cdb_hist_active_sess_history sh, + ( + select con_id, dbid, name + from v$containers + where name != 'PDB$SEED' + ) con, + (select distinct user_id,username + from cdb_users + where username not in ('SYS','SYSTEM','XS$NULL', +'OJVMSYS','LBACSYS','OUTLN','SYS$UMF','DBSNMP', +'APPQOSSYS','DBSFWUSER','GGSYS','ANONYMOUS','CTXSYS', +'DVF','DVSYS','GSMADMIN_INTERNAL','MDSYS','OLAPSYS', +'XDB','WMSYS','GSMCATUSER','MDDATA','REMOTE_SCHEDULER_AGENT', +'SYSBACKUP','GSMUSER','GSMROOTUSER','SYSRAC', +'SI_INFORMTN_SCHEMA','AUDSYS','DIP','ORDPLUGINS','ORDDATA','SYSKM', +'ORACLE_OCM','ORDSYS','SYSDG','SYS','SYSTEM','XS$NULL','LBACSYS', +'OUTLN','DBSNMP','APPQOSSYS','DBSFWUSER','GGSYS', +'ANONYMOUS','CTXSYS','DVF','DVSYS','GSMADMIN_INTERNAL', +'MDSYS','OLAPSYS','XDB','WMSYS','GSMCATUSER','MDDATA', +'REMOTE_SCHEDULER_AGENT','SYSBACKUP','GSMUSER','SYSRAC', +'OJVMSYS','SI_INFORMTN_SCHEMA','AUDSYS','DIP', +'ORDPLUGINS','ORDDATA','SYSKM','ORACLE_OCM', +'SYS$UMF','ORDSYS','SYSDG') + ) u +where sh.con_id = con.con_id + -- and sh.dbid=con.dbid + and u.user_id=sh.user_id + and sh.session_type = 'FOREGROUND' +group by con.name, + sh.instance_number, + to_char(sh.sample_time,'YYYY-MM-DD HH24:MI'), + u.username +order by 1, 4, 2 diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_hm.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap.sql similarity index 98% rename from src/databricks/labs/lakebridge/resources/assessments/oracle/perf_hm.sql rename to src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap.sql index 1045f88f85..ef9c51968c 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_hm.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap.sql @@ -1,6 +1,3 @@ -@./spoolhead.sql --- spool results/perf_hm.csv - SELECT TO_CHAR(mtime,'YYYY/MM/DD') mtime, pdb_name, instance_number, @@ -41,7 +38,6 @@ SELECT TO_CHAR(mtime,'YYYY/MM/DD') mtime, GROUP BY cont.name, mtime,instance_number,core_nb order by 1,2,3 ) -/ -- col "00-01_ " for 90.99 -- col "01-02_ " for 90.99 @@ -127,6 +123,3 @@ SELECT TO_CHAR(mtime,'YYYY/MM/DD') mtime, -- ) -- ORDER BY mtime -- / -spool off -exit - diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_hm_raw.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap_raw.sql similarity index 68% rename from src/databricks/labs/lakebridge/resources/assessments/oracle/perf_hm_raw.sql rename to src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap_raw.sql index 538d03f539..c0a8175143 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_hm_raw.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap_raw.sql @@ -1,12 +1,3 @@ -set serveroutput on -@./spoolhead.sql ---set timing off ---set heading off ---set termout off ---set verify off - --- spool scripts/main/perf_addon/script.sql - declare cursor c is select inst_id from gv$instance order by 1; begin diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql index 137720fdbb..d489cd60f5 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql @@ -1,19 +1,13 @@ --- Uncomment sql_text columns if you want to see sql statement caught and see if the classification is correct -@./spoolhead.sql --- spool results/perf_sqltext.csv - select dt ,cont.name as pdb_name ,command ,parsing_schema_name - --,sql_text ,instance_number ,count(*) as cnt ,sum(elapsed_time)/1000000 as total_run_time_secs from ( select g.con_id,g.instance_number, to_char(begin_interval_time,'YYYY-MM-DD HH24:MI') dt, - -- to_char(begin_interval_time,'YYYY-MM-DD AMHH:MI:SS') dt, case WHEN name='CREATE TABLE' AND instr(upper(TRANSLATE(dbms_lob.substr(t.SQL_TEXT,2000),chr(10)||chr(13), ' ')), 'SELECT') > 0 THEN 'ETL' WHEN name='INSERT' AND instr(upper(TRANSLATE(dbms_lob.substr(t.SQL_TEXT,2000),chr(10)||chr(13), ' ')), 'SELECT') > 0 THEN 'ETL' WHEN name='UPDATE' AND instr(upper(TRANSLATE(dbms_lob.substr(t.SQL_TEXT,2000),chr(10)||chr(13), ' ')), 'SELECT') > 0 THEN 'ETL' @@ -21,39 +15,26 @@ from WHEN name='INSERT' THEN 'INSERT' WHEN name='UPDATE' THEN 'UPDATE' WHEN name='DELETE' THEN 'DELETE' - WHEN name IN ('TRUNCATE TABLE','TRUCATE CLUSTER') THEN 'TRUNCATE' + WHEN name IN ('TRUNCATE TABLE','TRUNCATE CLUSTER') THEN 'TRUNCATE' WHEN name='UPSERT' THEN 'ETL' WHEN name='SELECT' THEN 'BI/QUERY' WHEN name='PL/SQL EXECUTE' THEN 'PL/SQL EXECUTE' - WHEN name IN ('CREATE TABLE','CREATE CLUSTER','CREATE INDEX', - 'ALTER TABLE','ALTER CLUSTER','ALTER INDEX', - 'DROP TABLE','DROP CLUSTER','DROP INDEX') THEN 'DDL/SEGMENT' - WHEN name IN ('CREATE MATERIALIZED VIEW','CREATE MATERIALIZED VIEW LOG','CREATE MATERIALIZED ZONEMAP', - 'ALTER MATERIALIZED VIEW','ALTER MATERIALIZED VIEW LOG','ALTER MATERIALIZED ZONEMAP', - 'DROP MATERIALIZED VIEW','DROP MATERIALIZED VIEW LOG','DROP MATERIALIZED ZONEMAP') THEN 'DDL/MVIEW' - WHEN name IN ('CREATE FUNCTION','CREATE PACKAGE','CREATE PACKAGE BODY','CREATE PROCEDURE','CREATE TRIGGER','CREATE TYPE','CREATE TYPE BODY','CREATE VIEW','CREATE SEQUENCE', - 'ALTER FUNCTION','ALTER PACKAGE','ALTER PACKAGE BODY','ALTER PROCEDURE','ALTER TRIGGER','ALTER TYPE','ALTER TYPE BODY','ALTER VIEW','ALTER SEQUENCE', - 'DROP FUNCTION','DROP PACKAGE','DROP PACKAGE BODY','DROP PROCEDURE','DROP TRIGGER','DROP TYPE','DROP TYPE BODY','DROP VIEW','DROP SEQUENCE') THEN 'DDL/PLSQL' - -- WHEN 'GRANT OBJECT' THEN 'DCL' - ELSE 'OTHER' + WHEN name IN ('CREATE TABLE','CREATE CLUSTER','CREATE INDEX', 'ALTER TABLE','ALTER CLUSTER','ALTER INDEX','DROP TABLE','DROP CLUSTER','DROP INDEX') THEN 'DDL/SEGMENT' + WHEN name IN ('CREATE MATERIALIZED VIEW','CREATE MATERIALIZED VIEW LOG','CREATE MATERIALIZED ZONEMAP','ALTER MATERIALIZED VIEW','ALTER MATERIALIZED VIEW LOG','ALTER MATERIALIZED ZONEMAP','DROP MATERIALIZED VIEW','DROP MATERIALIZED VIEW LOG','DROP MATERIALIZED ZONEMAP') THEN 'DDL/MVIEW' + WHEN name IN ('CREATE FUNCTION','CREATE PACKAGE','CREATE PACKAGE BODY','CREATE PROCEDURE','CREATE TRIGGER','CREATE TYPE','CREATE TYPE BODY','CREATE VIEW','CREATE SEQUENCE','ALTER FUNCTION','ALTER PACKAGE','ALTER PACKAGE BODY','ALTER PROCEDURE','ALTER TRIGGER','ALTER TYPE','ALTER TYPE BODY','ALTER VIEW','ALTER SEQUENCE','DROP FUNCTION','DROP PACKAGE','DROP PACKAGE BODY','DROP PROCEDURE','DROP TRIGGER','DROP TYPE','DROP TYPE BODY','DROP VIEW','DROP SEQUENCE') THEN 'DDL/PLSQL' + ELSE 'OTHER' end as command, - name command_name, - translate(dbms_lob.substr(t.sql_text,2000) ,chr(10)||chr(13),' ') sql_text, + name as command_name, + translate(dbms_lob.substr(t.sql_text,2000) ,chr(10)||chr(13),' ') as sql_text, parsing_schema_name, - -- g.elapsed_time_total / nullif(executions_total,0) as elapsed_time g.elapsed_time_delta as elapsed_time -from CDB_HIST_SQLSTAT g - left join CDB_HIST_SNAPSHOT s - on (g.SNAP_ID=s.SNAP_ID) - inner join CDB_HIST_SQLTEXT t - on (g.SQL_ID=t.sql_id) - inner join audit_actions aa - on (COMMAND_TYPE = aa.ACTION) +from CDB_HIST_SQLSTAT g left join CDB_HIST_SNAPSHOT s on (g.SNAP_ID=s.SNAP_ID) + inner join CDB_HIST_SQLTEXT t on (g.SQL_ID=t.sql_id) + inner join audit_actions aa on (COMMAND_TYPE = aa.ACTION) where parsing_schema_name in (select username from CDB_users where oracle_maintained='N') and g.con_id=t.con_id and g.snap_id=s.snap_id - -- and s.begin_interval_time > sysdate - 7 ) sub, (select distinct con_id, name from gv$containers) cont where cont.con_id=sub.con_id @@ -63,7 +44,3 @@ group by dt , command , parsing_schema_name order by dt -/ - -spool off -exit diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml b/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml index 7a556eedb7..5e0f6eff93 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml @@ -2,10 +2,75 @@ name: oracle_assessment version: "1.0" extract_folder: "/tmp/data/oracle_assessment" steps: - - name: containers + - name: config_containers type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql mode: overwrite frequency: once flag: active - + - name: config_db_features + type: sql + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features.sql + mode: overwrite + frequency: once + flag: active + - name: config_instance + type: sql + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance.sql + mode: overwrite + frequency: once + flag: active + - name: config_memory_evolution + type: sql + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evolution.sql + mode: overwrite + frequency: once + flag: active + - name: config_pdb_objects + type: sql + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects.sql + mode: overwrite + frequency: once + flag: active + - name: config_pdb_partitions + type: sql + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions.sql + mode: overwrite + frequency: once + flag: active + - name: config_storage + type: sql + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage.sql + mode: overwrite + frequency: once + flag: active + - name: perf_cpu_waits + type: sql + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql + mode: overwrite + frequency: once + flag: active + - name: perf_fgd_session_evolution + type: sql + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution.sql + mode: overwrite + frequency: once + flag: active + - name: perf_heatmap + type: sql + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap.sql + mode: overwrite + frequency: once + flag: active + - name: perf_heatmap_raw + type: sql + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap_raw.sql + mode: overwrite + frequency: once + flag: inactive + - name: perf_sqltext + type: sql + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql + mode: overwrite + frequency: once + flag: inactive diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/spoolhead.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/spoolhead.sql deleted file mode 100644 index a80790aa51..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/spoolhead.sql +++ /dev/null @@ -1,15 +0,0 @@ -WHENEVER SQLERROR EXIT SQL.SQLCODE -WHENEVER OSERROR EXIT -set embedded on -set pagesize 0 -set colsep ';' -set underline off -set echo off -set feedback off -set linesize 5000 -set long 99999 -set trimspool on -set headsep off -set verify off -alter session set NLS_NUMERIC_CHARACTERS='.,'; - diff --git a/tests/integration/assessments/test_profiler.py b/tests/integration/assessments/test_profiler.py index 19fc8f5d27..c808c793f5 100644 --- a/tests/integration/assessments/test_profiler.py +++ b/tests/integration/assessments/test_profiler.py @@ -8,6 +8,30 @@ from databricks.labs.lakebridge.assessments.pipeline import PipelineClass from databricks.labs.lakebridge.assessments.profiler import Profiler +# Tests for Oracle profiler section +def test_Oracle_as_supported_source_technologies() -> None: + """Test that supported source technologies are correctly returned""" + profiler = Profiler("oracle", None) + supported_platforms = profiler.supported_platforms() + assert isinstance(supported_platforms, list) + assert "oracle" in supported_platforms + +def test_Oracle_profile_missing_platform_config() -> None: + """Test that profiling an unsupported platform raises ValueError""" + with pytest.raises(ValueError, match="Cannot Proceed without a valid pipeline configuration for oracle"): + profiler = Profiler("oracle", None) + profiler.profile() + +def test_Oracle_profile_execution() -> None: + """Test successful profiling execution using actual pipeline configuration""" + profiler = Profiler("oracle") + path_prefix = Path(__file__).parent / "../../../" + config_file = path_prefix / "src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml" + config = profiler.path_modifier(config_file=config_file, path_prefix=path_prefix) + profiler.profile(pipeline_config=config) + assert Path("/tmp/profiler_main/profiler_extract.db").exists(), "Profiler extract database should be created" + +# End of Oracle profiler tests section def test_supported_source_technologies() -> None: """Test that supported source technologies are correctly returned""" diff --git a/tests/resources/assessments/pipeline_oracle_config_main.yml b/tests/resources/assessments/pipeline_oracle_config_main.yml new file mode 100644 index 0000000000..7a556eedb7 --- /dev/null +++ b/tests/resources/assessments/pipeline_oracle_config_main.yml @@ -0,0 +1,11 @@ +name: oracle_assessment +version: "1.0" +extract_folder: "/tmp/data/oracle_assessment" +steps: + - name: containers + type: sql + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql + mode: overwrite + frequency: once + flag: active + From 380943f552ab73279803be7c9ac18e3b5fe3dffa Mon Sep 17 00:00:00 2001 From: Laurent Leturgez Date: Wed, 26 Nov 2025 11:03:15 +0100 Subject: [PATCH 05/12] Oracle tests. Updates on sql scripts to remove spooling "decoration" --- .../resources/assessments/oracle/config_containers.sql | 3 +-- .../resources/assessments/oracle/pipeline_config.yml | 8 +++++++- .../resources/assessments/oracle/test_nulloutput.sql | 1 + tests/integration/assessments/test_profiler.py | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/test_nulloutput.sql diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql index a8b01f41ab..e5d41ad146 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql @@ -1,5 +1,4 @@ -- Has to be executed on CDB --- @spoolhead.sql --- spool results/config_containers.csv + select listagg(inst_id, ',') within group (order by name) as inst_ids,name,open_mode,pdb_count from gv$containers group by name,open_mode,pdb_count diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml b/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml index 5e0f6eff93..2e6939d762 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml @@ -73,4 +73,10 @@ steps: extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql mode: overwrite frequency: once - flag: inactive + flag: active + - name: test_nulloutput + type: sql + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/test_nulloutput.sql + mode: overwrite + frequency: once + flag: active diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/test_nulloutput.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/test_nulloutput.sql new file mode 100644 index 0000000000..c399e16ee8 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/test_nulloutput.sql @@ -0,0 +1 @@ +select 'value' as X from dual where 1=2 diff --git a/tests/integration/assessments/test_profiler.py b/tests/integration/assessments/test_profiler.py index c808c793f5..aac1166caf 100644 --- a/tests/integration/assessments/test_profiler.py +++ b/tests/integration/assessments/test_profiler.py @@ -29,7 +29,7 @@ def test_Oracle_profile_execution() -> None: config_file = path_prefix / "src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml" config = profiler.path_modifier(config_file=config_file, path_prefix=path_prefix) profiler.profile(pipeline_config=config) - assert Path("/tmp/profiler_main/profiler_extract.db").exists(), "Profiler extract database should be created" + assert Path("/tmp/data/oracle_assessment/profiler_extract.db").exists(), "Profiler extract database should be created" # End of Oracle profiler tests section From 57b207c46b2af52adeea0f94e068af931a38f609 Mon Sep 17 00:00:00 2001 From: Laurent Leturgez Date: Fri, 30 Jan 2026 16:22:00 +0100 Subject: [PATCH 06/12] Oracle tests. Updates on sql scripts to remove spooling "decoration" --- .../assessments/oracle/perf_cpu_waits.sql | 2 +- .../oracle/perf_fgd_session_evolution.sql | 4 +- .../assessments/oracle/perf_heatmap.sql | 4 +- .../assessments/oracle/perf_sqltext.sql | 2 +- .../assessments/oracle/pipeline_config.yml | 6 --- .../validation/oracle_extract_schema.yml | 51 +++++++++++++++++++ 6 files changed, 57 insertions(+), 12 deletions(-) create mode 100644 src/databricks/labs/lakebridge/resources/assessments/validation/oracle_extract_schema.yml diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql index 46312392fa..029d50160b 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql @@ -7,7 +7,7 @@ select cont.name as pdb_name, ash.wait_class, ash.total_wait_time from (SELECT instance_number,con_id,CON_DBID, - TO_CHAR(sample_time,'YYYY-MM-DD HH24') mtime, + TO_DATE(TO_CHAR(sample_time,'YYYY-MM-DD HH24')) mtime, NVL(a.event, 'ON CPU') AS event, NVL(a.wait_class, 'ON CPU') AS wait_class, COUNT(*)*10 AS total_wait_time diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution.sql index d58729a039..7031aadfc5 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution.sql @@ -1,6 +1,6 @@ select con.name, sh.instance_number,u.username, - to_char(sh.sample_time,'YYYY-MM-DD HH24:MI') as snap_time, + to_date(to_char(sh.sample_time,'YYYY-MM-DD HH24:MI')) as snap_time, count(distinct sh.session_id||','||sh.session_serial#) as foregd_session_cnt from cdb_hist_active_sess_history sh, ( @@ -32,6 +32,6 @@ where sh.con_id = con.con_id and sh.session_type = 'FOREGROUND' group by con.name, sh.instance_number, - to_char(sh.sample_time,'YYYY-MM-DD HH24:MI'), + to_date(to_char(sh.sample_time,'YYYY-MM-DD HH24:MI')), u.username order by 1, 4, 2 diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap.sql index ef9c51968c..6eafb9c726 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap.sql @@ -1,7 +1,7 @@ -SELECT TO_CHAR(mtime,'YYYY/MM/DD') mtime, +SELECT TO_DATE(TO_CHAR(mtime,'YYYY/MM/DD'), 'YYYY/MM/DD') mtime, pdb_name, instance_number, - TO_CHAR(mtime,'HH24') d, + TO_CHAR(mtime,'HH24') hour, core_nb, LOAD AS value FROM diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql index d489cd60f5..63a8001183 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql @@ -7,7 +7,7 @@ select dt ,sum(elapsed_time)/1000000 as total_run_time_secs from ( select g.con_id,g.instance_number, - to_char(begin_interval_time,'YYYY-MM-DD HH24:MI') dt, + to_date(to_char(begin_interval_time,'YYYY-MM-DD HH24:MI')) dt, case WHEN name='CREATE TABLE' AND instr(upper(TRANSLATE(dbms_lob.substr(t.SQL_TEXT,2000),chr(10)||chr(13), ' ')), 'SELECT') > 0 THEN 'ETL' WHEN name='INSERT' AND instr(upper(TRANSLATE(dbms_lob.substr(t.SQL_TEXT,2000),chr(10)||chr(13), ' ')), 'SELECT') > 0 THEN 'ETL' WHEN name='UPDATE' AND instr(upper(TRANSLATE(dbms_lob.substr(t.SQL_TEXT,2000),chr(10)||chr(13), ' ')), 'SELECT') > 0 THEN 'ETL' diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml b/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml index 2e6939d762..f61f007457 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml @@ -62,12 +62,6 @@ steps: mode: overwrite frequency: once flag: active - - name: perf_heatmap_raw - type: sql - extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap_raw.sql - mode: overwrite - frequency: once - flag: inactive - name: perf_sqltext type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql diff --git a/src/databricks/labs/lakebridge/resources/assessments/validation/oracle_extract_schema.yml b/src/databricks/labs/lakebridge/resources/assessments/validation/oracle_extract_schema.yml new file mode 100644 index 0000000000..e562df4d28 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/validation/oracle_extract_schema.yml @@ -0,0 +1,51 @@ +source_tech: synapse +version: 0.1 + +schemas: + main: + tables: + config_containers: + columns: + - name: TABLE_CATALOG + type: VARCHAR + config_db_features: + columns: + - name: + type: + config_instance: + columns: + - name: + type: + config_memory_evolution: + columns: + - name: + type: + config_pdb_objects: + columns: + - name: + type: + config_pdb_partitions: + columns: + - name: + type: + config_storage: + columns: + - name: + type: + perf_cpu_waits: + columns: + - name: + type: + perf_fgd_session_evolution: + columns: + - name: + type: + perf_heatmap: + columns: + - name: + type: + perf_sqltext: + columns: + - name: + type: + From f7325d767c0697a142a4535a92c9952d3d78d9c2 Mon Sep 17 00:00:00 2001 From: Laurent Leturgez Date: Fri, 30 Jan 2026 16:41:59 +0100 Subject: [PATCH 07/12] merge with the nost recent main branch (Jan 30th) --- pyproject.toml | 2 +- src/databricks/labs/lakebridge/connections/database_manager.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 94dd9b2b16..773f3f8c22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "SQLAlchemy~=2.0.40", "pygls~=2.0.0", "duckdb~=1.2.2", - "oracledb==3.4.0" + "oracledb==3.4.0", "databricks-switch-plugin~=0.1.6", # Temporary, until Switch is migrated to be a transpiler (LSP) plugin. "requests>=2.28.1,<3", # Matches databricks-sdk (and 'types-requests' below), to avoid conflicts. "pandas~=2.3.1", # Required for new configure assessment diff --git a/src/databricks/labs/lakebridge/connections/database_manager.py b/src/databricks/labs/lakebridge/connections/database_manager.py index 0af5d55336..a5dae139f2 100644 --- a/src/databricks/labs/lakebridge/connections/database_manager.py +++ b/src/databricks/labs/lakebridge/connections/database_manager.py @@ -132,7 +132,7 @@ def fetch(self, query: str) -> FetchResult: def check_connection(self) -> bool: query = "SELECT 101 AS test_column" - if self._db_type.lower() == "Oracle": + if self._db_type.lower() == "oracle": query = "SELECT 101 AS test_column FROM dual" result = self.fetch(query) if result is None: From b9054ea6a50160dfa2cfabd91db3274c925d3b7b Mon Sep 17 00:00:00 2001 From: Laurent Leturgez Date: Fri, 6 Feb 2026 14:49:08 +0100 Subject: [PATCH 08/12] merge with the nost recent main branch (Jan 30th) --- .../labs/lakebridge/connections/database_manager.py | 1 + .../resources/assessments/oracle/perf_cpu_waits.sql | 4 ++-- .../assessments/oracle/perf_fgd_session_evolution.sql | 6 ++++-- .../resources/assessments/oracle/perf_heatmap.sql | 2 +- .../resources/assessments/oracle/perf_sqltext.sql | 3 ++- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/databricks/labs/lakebridge/connections/database_manager.py b/src/databricks/labs/lakebridge/connections/database_manager.py index a5dae139f2..900b0fafc1 100644 --- a/src/databricks/labs/lakebridge/connections/database_manager.py +++ b/src/databricks/labs/lakebridge/connections/database_manager.py @@ -73,6 +73,7 @@ def _create_connector(db_type: str, config: dict[str, Any]) -> DatabaseConnector class SnowflakeConnector(_BaseConnector): def _connect(self) -> Engine: raise NotImplementedError("Snowflake connector not implemented") + class MSSQLConnector(_BaseConnector): def _connect(self) -> Engine: auth_type = self.config.get('auth_type', 'sql_authentication') diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql index 029d50160b..e77b5aae9e 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql @@ -2,12 +2,12 @@ select cont.name as pdb_name, ash.instance_number, - ash.mtime, + TO_DATE(ash.mtime,'YYYY-MM-DD HH24') as mtime, ash.event, ash.wait_class, ash.total_wait_time from (SELECT instance_number,con_id,CON_DBID, - TO_DATE(TO_CHAR(sample_time,'YYYY-MM-DD HH24')) mtime, + TO_CHAR(sample_time,'YYYY-MM-DD HH24') mtime, NVL(a.event, 'ON CPU') AS event, NVL(a.wait_class, 'ON CPU') AS wait_class, COUNT(*)*10 AS total_wait_time diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution.sql index 7031aadfc5..829b95f735 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution.sql @@ -1,6 +1,7 @@ select con.name, sh.instance_number,u.username, - to_date(to_char(sh.sample_time,'YYYY-MM-DD HH24:MI')) as snap_time, + trunc(sh.sample_time,'MI') as snap_time, + -- to_date(to_char(sh.sample_time,'YYYY-MM-DD HH24:MI')) as snap_time, count(distinct sh.session_id||','||sh.session_serial#) as foregd_session_cnt from cdb_hist_active_sess_history sh, ( @@ -32,6 +33,7 @@ where sh.con_id = con.con_id and sh.session_type = 'FOREGROUND' group by con.name, sh.instance_number, - to_date(to_char(sh.sample_time,'YYYY-MM-DD HH24:MI')), + trunc(sh.sample_time,'MI') , + -- to_date(to_char(sh.sample_time,'YYYY-MM-DD HH24:MI')), u.username order by 1, 4, 2 diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap.sql index 6eafb9c726..ab1fb5f78d 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap.sql @@ -1,4 +1,4 @@ -SELECT TO_DATE(TO_CHAR(mtime,'YYYY/MM/DD'), 'YYYY/MM/DD') mtime, +SELECT TO_CHAR(mtime,'YYYY/MM/DD') as mtime, pdb_name, instance_number, TO_CHAR(mtime,'HH24') hour, diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql index 63a8001183..4fce4507dc 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql @@ -7,7 +7,8 @@ select dt ,sum(elapsed_time)/1000000 as total_run_time_secs from ( select g.con_id,g.instance_number, - to_date(to_char(begin_interval_time,'YYYY-MM-DD HH24:MI')) dt, + trunc(begin_interval_time,'MI') as dt, + -- to_date(to_char(begin_interval_time,'YYYY-MM-DD HH24:MI')) dt, case WHEN name='CREATE TABLE' AND instr(upper(TRANSLATE(dbms_lob.substr(t.SQL_TEXT,2000),chr(10)||chr(13), ' ')), 'SELECT') > 0 THEN 'ETL' WHEN name='INSERT' AND instr(upper(TRANSLATE(dbms_lob.substr(t.SQL_TEXT,2000),chr(10)||chr(13), ' ')), 'SELECT') > 0 THEN 'ETL' WHEN name='UPDATE' AND instr(upper(TRANSLATE(dbms_lob.substr(t.SQL_TEXT,2000),chr(10)||chr(13), ' ')), 'SELECT') > 0 THEN 'ETL' From 3073af03afb52ddde28013d6b332d001b4d508a2 Mon Sep 17 00:00:00 2001 From: Laurent Leturgez Date: Fri, 6 Mar 2026 16:14:16 +0100 Subject: [PATCH 09/12] Add support for ddl scripts --- .../labs/lakebridge/assessments/_constants.py | 2 +- .../assessments/configure_assessment.py | 10 ++- .../connections/database_manager.py | 14 ++-- .../oracle/config_containers_ddl.sql | 6 ++ .../oracle/config_db_features_ddl.sql | 7 ++ .../oracle/config_instance_ddl.sql | 6 ++ .../oracle/config_memory_evolution.sql | 2 +- .../oracle/config_memory_evolution_ddl.sql | 7 ++ .../oracle/config_pdb_objects_ddl.sql | 6 ++ .../oracle/config_pdb_partitions_ddl.sql | 6 ++ .../assessments/oracle/config_storage_ddl.sql | 7 ++ .../assessments/oracle/perf_cpu_waits_ddl.sql | 8 +++ .../oracle/perf_fgd_session_evolution_ddl.sql | 7 ++ .../assessments/oracle/perf_heatmap_ddl.sql | 8 +++ .../assessments/oracle/perf_sqltext_ddl.sql | 9 +++ .../assessments/oracle/pipeline_config.yml | 72 +++++++++++++++++++ .../oracle/test_nulloutput_ddl.sql | 3 + .../integration/assessments/test_profiler.py | 9 ++- tests/test_blueprint_oracle.py | 7 +- 19 files changed, 180 insertions(+), 16 deletions(-) create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers_ddl.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features_ddl.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance_ddl.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evolution_ddl.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects_ddl.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions_ddl.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage_ddl.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits_ddl.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution_ddl.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap_ddl.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext_ddl.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/oracle/test_nulloutput_ddl.sql diff --git a/src/databricks/labs/lakebridge/assessments/_constants.py b/src/databricks/labs/lakebridge/assessments/_constants.py index 9ce50c6b72..1f5efc0527 100644 --- a/src/databricks/labs/lakebridge/assessments/_constants.py +++ b/src/databricks/labs/lakebridge/assessments/_constants.py @@ -9,7 +9,7 @@ } # TODO modify this PLATFORM_TO_SOURCE_TECHNOLOGY.keys() once all platforms are supported -PROFILER_SOURCE_SYSTEM = ["mssql", "synapse","oracle"] +PROFILER_SOURCE_SYSTEM = ["mssql", "synapse", "oracle"] # This flag indicates whether a connector is required for the source system when pipeline is trigger diff --git a/src/databricks/labs/lakebridge/assessments/configure_assessment.py b/src/databricks/labs/lakebridge/assessments/configure_assessment.py index 7c8f1badbe..64924fe980 100644 --- a/src/databricks/labs/lakebridge/assessments/configure_assessment.py +++ b/src/databricks/labs/lakebridge/assessments/configure_assessment.py @@ -71,6 +71,7 @@ def run(self): self._test_connection(source, cred_manager) logger.info(f"{source.capitalize()} Assessment Configuration Completed") + class ConfigureOracleAssessment(AssessmentConfigurator): """Oracle specific assessment configuration.""" @@ -92,8 +93,12 @@ def _configure_credentials(self) -> str: "secret_vault_name": secret_vault_name, source: { "host": self.prompts.question("Enter the host details (Server name, IP address, SCAN Name)"), - "tnsPort": int(self.prompts.question("Enter the TNS Listener port number", default=1521, valid_number=True)), - "tnsService": self.prompts.question("Enter the TNS service name as registered in the Oracle listener", default="orcl"), + "tnsPort": int( + self.prompts.question("Enter the TNS Listener port number", default=1521, valid_number=True) + ), + "tnsService": self.prompts.question( + "Enter the TNS service name as registered in the Oracle listener", default="orcl" + ), "user": self.prompts.question("Enter user name with system privileges", default="SYSTEM"), "password": self.prompts.password("Enter user password"), }, @@ -103,6 +108,7 @@ def _configure_credentials(self) -> str: logger.info(f"Credential template created for {source}.") return source + class ConfigureSqlServerAssessment(AssessmentConfigurator): """SQL Server specific assessment configuration.""" diff --git a/src/databricks/labs/lakebridge/connections/database_manager.py b/src/databricks/labs/lakebridge/connections/database_manager.py index 9741441205..f77b8ebdc5 100644 --- a/src/databricks/labs/lakebridge/connections/database_manager.py +++ b/src/databricks/labs/lakebridge/connections/database_manager.py @@ -45,6 +45,7 @@ def fetch(self, query: str) -> FetchResult: def close(self) -> None: pass + class _BaseConnector(DatabaseConnector): def __init__(self, config: JsonObject): self.config = config @@ -129,16 +130,17 @@ def _connect(self) -> Engine: db_name = self.config.get('tnsService') connection_string = URL.create( - drivername="oracle+oracledb", - username=self.config['user'], - password=self.config['password'], - host=self.config['host'], - port=self.config.get('tnsPort', 1521), - database=db_name + drivername="oracle+oracledb", + username=self.config['user'], + password=self.config['password'], + host=self.config['host'], + port=self.config.get('tnsPort', 1521), + database=db_name, ) return create_engine(connection_string) + class DatabaseManager: def __init__(self, db_type: str, config: JsonObject): self.connector = _create_connector(db_type, config) diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers_ddl.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers_ddl.sql new file mode 100644 index 0000000000..19561d65dc --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers_ddl.sql @@ -0,0 +1,6 @@ +CREATE TABLE config_containers ( + inst_ids VARCHAR, + name VARCHAR, + open_mode VARCHAR, + pdb_count INTEGER +); diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features_ddl.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features_ddl.sql new file mode 100644 index 0000000000..1b6645bc9e --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features_ddl.sql @@ -0,0 +1,7 @@ +CREATE TABLE config_db_features ( + scope VARCHAR, + inst_id INTEGER, + stat_name VARCHAR, + name VARCHAR, + value VARCHAR +); diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance_ddl.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance_ddl.sql new file mode 100644 index 0000000000..c786bb6580 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance_ddl.sql @@ -0,0 +1,6 @@ +CREATE TABLE config_instance ( + inst_id INTEGER, + instance_name VARCHAR, + version VARCHAR, + database_type VARCHAR +); diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evolution.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evolution.sql index 0d055f75cd..f9dd12ae77 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evolution.sql +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evolution.sql @@ -1,6 +1,6 @@ select NVL(con.name,'Entire CDB/Non CDB') con_name, param.instance_number, - to_char(snap.snap_time,'yyyy-mm-dd HH24:MI:SS') as snap_time, + snap.snap_time, parameter_name, value from cdb_hist_parameter param, diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evolution_ddl.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evolution_ddl.sql new file mode 100644 index 0000000000..e99ec0297e --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evolution_ddl.sql @@ -0,0 +1,7 @@ +CREATE TABLE config_memory_evolution ( + con_name VARCHAR, + instance_number INTEGER, + snap_time TIMESTAMP, + parameter_name VARCHAR, + value VARCHAR +); diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects_ddl.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects_ddl.sql new file mode 100644 index 0000000000..21f7518920 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects_ddl.sql @@ -0,0 +1,6 @@ +CREATE TABLE config_pdb_objects ( + pdb_name VARCHAR, + owner VARCHAR, + object_type VARCHAR, + cnt INTEGER +); diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions_ddl.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions_ddl.sql new file mode 100644 index 0000000000..2c6f2181a5 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions_ddl.sql @@ -0,0 +1,6 @@ +CREATE TABLE config_pdb_partitions ( + pdb_name VARCHAR, + owner VARCHAR, + object_type VARCHAR, + cnt INTEGER +); diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage_ddl.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage_ddl.sql new file mode 100644 index 0000000000..0aef526256 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage_ddl.sql @@ -0,0 +1,7 @@ +CREATE TABLE config_storage ( + con_name VARCHAR, + tablespace_type VARCHAR, + gb DOUBLE, + freegb DOUBLE, + maxgb DOUBLE +); diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits_ddl.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits_ddl.sql new file mode 100644 index 0000000000..844b36d80b --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits_ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE perf_cpu_waits ( + pdb_name VARCHAR, + instance_number INTEGER, + mtime TIMESTAMP, + event VARCHAR, + wait_class VARCHAR, + total_wait_time BIGINT +); diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution_ddl.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution_ddl.sql new file mode 100644 index 0000000000..86ad491992 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution_ddl.sql @@ -0,0 +1,7 @@ +CREATE TABLE perf_fgd_session_evolution ( + name VARCHAR, + instance_number INTEGER, + username VARCHAR, + snap_time TIMESTAMP, + foregd_session_cnt INTEGER +); diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap_ddl.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap_ddl.sql new file mode 100644 index 0000000000..9580ac4086 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap_ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE perf_heatmap ( + mtime VARCHAR, + pdb_name VARCHAR, + instance_number INTEGER, + hour VARCHAR, + core_nb INTEGER, + value DOUBLE +); diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext_ddl.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext_ddl.sql new file mode 100644 index 0000000000..2aeb3d6710 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext_ddl.sql @@ -0,0 +1,9 @@ +CREATE TABLE perf_sqltext ( + dt TIMESTAMP, + pdb_name VARCHAR, + command VARCHAR, + parsing_schema_name VARCHAR, + instance_number INTEGER, + cnt INTEGER, + total_run_time_secs DOUBLE +); diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml b/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml index f61f007457..db05803c41 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml @@ -2,72 +2,144 @@ name: oracle_assessment version: "1.0" extract_folder: "/tmp/data/oracle_assessment" steps: + - name: config_containers + type: ddl + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers_ddl.sql + mode: overwrite + frequency: once + flag: active - name: config_containers type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_containers.sql mode: overwrite frequency: once flag: active + - name: config_db_features + type: ddl + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features_ddl.sql + mode: overwrite + frequency: once + flag: active - name: config_db_features type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_db_features.sql mode: overwrite frequency: once flag: active + - name: config_instance + type: ddl + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance_ddl.sql + mode: overwrite + frequency: once + flag: active - name: config_instance type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_instance.sql mode: overwrite frequency: once flag: active + - name: config_memory_evolution + type: ddl + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evolution_ddl.sql + mode: overwrite + frequency: once + flag: active - name: config_memory_evolution type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_memory_evolution.sql mode: overwrite frequency: once flag: active + - name: config_pdb_objects + type: ddl + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects_ddl.sql + mode: overwrite + frequency: once + flag: active - name: config_pdb_objects type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_objects.sql mode: overwrite frequency: once flag: active + - name: config_pdb_partitions + type: ddl + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions_ddl.sql + mode: overwrite + frequency: once + flag: active - name: config_pdb_partitions type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_pdb_partitions.sql mode: overwrite frequency: once flag: active + - name: config_storage + type: ddl + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage_ddl.sql + mode: overwrite + frequency: once + flag: active - name: config_storage type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/config_storage.sql mode: overwrite frequency: once flag: active + - name: perf_cpu_waits + type: ddl + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits_ddl.sql + mode: overwrite + frequency: once + flag: active - name: perf_cpu_waits type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_cpu_waits.sql mode: overwrite frequency: once flag: active + - name: perf_fgd_session_evolution + type: ddl + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution_ddl.sql + mode: overwrite + frequency: once + flag: active - name: perf_fgd_session_evolution type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_fgd_session_evolution.sql mode: overwrite frequency: once flag: active + - name: perf_heatmap + type: ddl + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap_ddl.sql + mode: overwrite + frequency: once + flag: active - name: perf_heatmap type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_heatmap.sql mode: overwrite frequency: once flag: active + - name: perf_sqltext + type: ddl + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext_ddl.sql + mode: overwrite + frequency: once + flag: active - name: perf_sqltext type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/perf_sqltext.sql mode: overwrite frequency: once flag: active + - name: test_nulloutput + type: ddl + extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/test_nulloutput_ddl.sql + mode: overwrite + frequency: once + flag: active - name: test_nulloutput type: sql extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/test_nulloutput.sql diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/test_nulloutput_ddl.sql b/src/databricks/labs/lakebridge/resources/assessments/oracle/test_nulloutput_ddl.sql new file mode 100644 index 0000000000..8117de5977 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/test_nulloutput_ddl.sql @@ -0,0 +1,3 @@ +CREATE TABLE test_nulloutput ( + x VARCHAR +); diff --git a/tests/integration/assessments/test_profiler.py b/tests/integration/assessments/test_profiler.py index 348fffcc52..36d0f2a61f 100644 --- a/tests/integration/assessments/test_profiler.py +++ b/tests/integration/assessments/test_profiler.py @@ -7,6 +7,7 @@ from databricks.labs.lakebridge.assessments.pipeline import PipelineClass from databricks.labs.lakebridge.assessments.profiler import Profiler + # Tests for Oracle profiler section def test_Oracle_as_supported_source_technologies() -> None: """Test that supported source technologies are correctly returned""" @@ -15,12 +16,14 @@ def test_Oracle_as_supported_source_technologies() -> None: assert isinstance(supported_platforms, list) assert "oracle" in supported_platforms + def test_Oracle_profile_missing_platform_config() -> None: """Test that profiling an unsupported platform raises ValueError""" with pytest.raises(ValueError, match="Cannot Proceed without a valid pipeline configuration for oracle"): profiler = Profiler("oracle", None) profiler.profile() + def test_Oracle_profile_execution() -> None: """Test successful profiling execution using actual pipeline configuration""" profiler = Profiler("oracle") @@ -28,10 +31,14 @@ def test_Oracle_profile_execution() -> None: config_file = path_prefix / "src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml" config = profiler.path_modifier(config_file=config_file, path_prefix=path_prefix) profiler.profile(pipeline_config=config) - assert Path("/tmp/data/oracle_assessment/profiler_extract.db").exists(), "Profiler extract database should be created" + assert Path( + "/tmp/data/oracle_assessment/profiler_extract.db" + ).exists(), "Profiler extract database should be created" + # End of Oracle profiler tests section + def test_supported_source_technologies() -> None: """Test that supported source technologies are correctly returned""" profiler = Profiler("synapse", None) diff --git a/tests/test_blueprint_oracle.py b/tests/test_blueprint_oracle.py index a8dfb2126d..d6f1690e1a 100644 --- a/tests/test_blueprint_oracle.py +++ b/tests/test_blueprint_oracle.py @@ -1,12 +1,9 @@ -from pathlib import Path from databricks.labs.lakebridge.assessments.pipeline import PipelineClass from databricks.labs.lakebridge.assessments.profiler import Profiler -from databricks.labs.lakebridge.connections.database_manager import OracleConnector -from databricks.labs.lakebridge.connections.database_manager import DatabaseManager -pipeline_config_file="../src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml" -config=PipelineClass.load_config_from_yaml(pipeline_config_file) +pipeline_config_file = "../src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml" +config = PipelineClass.load_config_from_yaml(pipeline_config_file) print(config) profiler = Profiler("oracle", None) From 4a01196f5ca8a0c0a75ff4890ec326ae237e97d1 Mon Sep 17 00:00:00 2001 From: Laurent Leturgez Date: Fri, 6 Mar 2026 16:27:09 +0100 Subject: [PATCH 10/12] Add support for ddl scripts --- .../lakebridge/assessments/configure_assessment.py | 2 +- .../labs/lakebridge/connections/database_manager.py | 10 +++++----- tests/test_blueprint_oracle.py | 1 - 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/databricks/labs/lakebridge/assessments/configure_assessment.py b/src/databricks/labs/lakebridge/assessments/configure_assessment.py index 64924fe980..f3ed40fcc5 100644 --- a/src/databricks/labs/lakebridge/assessments/configure_assessment.py +++ b/src/databricks/labs/lakebridge/assessments/configure_assessment.py @@ -94,7 +94,7 @@ def _configure_credentials(self) -> str: source: { "host": self.prompts.question("Enter the host details (Server name, IP address, SCAN Name)"), "tnsPort": int( - self.prompts.question("Enter the TNS Listener port number", default=1521, valid_number=True) + self.prompts.question("Enter the TNS Listener port number", default=str(1521), valid_number=True) ), "tnsService": self.prompts.question( "Enter the TNS service name as registered in the Oracle listener", default="orcl" diff --git a/src/databricks/labs/lakebridge/connections/database_manager.py b/src/databricks/labs/lakebridge/connections/database_manager.py index f77b8ebdc5..120fd60400 100644 --- a/src/databricks/labs/lakebridge/connections/database_manager.py +++ b/src/databricks/labs/lakebridge/connections/database_manager.py @@ -131,11 +131,11 @@ def _connect(self) -> Engine: db_name = self.config.get('tnsService') connection_string = URL.create( drivername="oracle+oracledb", - username=self.config['user'], - password=self.config['password'], - host=self.config['host'], - port=self.config.get('tnsPort', 1521), - database=db_name, + username=str(self.config['user']), + password=str(self.config['password']), + host=str(self.config['host']), + port=int(str(self.config.get('tnsPort', 1521))), + database=str(db_name), ) return create_engine(connection_string) diff --git a/tests/test_blueprint_oracle.py b/tests/test_blueprint_oracle.py index d6f1690e1a..5173b1affe 100644 --- a/tests/test_blueprint_oracle.py +++ b/tests/test_blueprint_oracle.py @@ -1,4 +1,3 @@ - from databricks.labs.lakebridge.assessments.pipeline import PipelineClass from databricks.labs.lakebridge.assessments.profiler import Profiler From f99b785b9fff86c54953ec4cc52899f2d64561fd Mon Sep 17 00:00:00 2001 From: Laurent Leturgez Date: Fri, 6 Mar 2026 16:33:20 +0100 Subject: [PATCH 11/12] schema validator for the Oracle profiler --- .../validation/oracle_extract_schema.yml | 137 +++++++++++++++--- 1 file changed, 114 insertions(+), 23 deletions(-) diff --git a/src/databricks/labs/lakebridge/resources/assessments/validation/oracle_extract_schema.yml b/src/databricks/labs/lakebridge/resources/assessments/validation/oracle_extract_schema.yml index e562df4d28..13692f35ab 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/validation/oracle_extract_schema.yml +++ b/src/databricks/labs/lakebridge/resources/assessments/validation/oracle_extract_schema.yml @@ -1,4 +1,4 @@ -source_tech: synapse +source_tech: oracle version: 0.1 schemas: @@ -6,46 +6,137 @@ schemas: tables: config_containers: columns: - - name: TABLE_CATALOG + - name: inst_ids type: VARCHAR + - name: name + type: VARCHAR + - name: open_mode + type: VARCHAR + - name: pdb_count + type: INTEGER config_db_features: columns: - - name: - type: + - name: scope + type: VARCHAR + - name: inst_id + type: INTEGER + - name: stat_name + type: VARCHAR + - name: name + type: VARCHAR + - name: value + type: VARCHAR config_instance: columns: - - name: - type: + - name: inst_id + type: INTEGER + - name: instance_name + type: VARCHAR + - name: version + type: VARCHAR + - name: database_type + type: VARCHAR config_memory_evolution: columns: - - name: - type: + - name: con_name + type: VARCHAR + - name: instance_number + type: INTEGER + - name: snap_time + type: TIMESTAMP + - name: parameter_name + type: VARCHAR + - name: value + type: VARCHAR config_pdb_objects: columns: - - name: - type: + - name: pdb_name + type: VARCHAR + - name: owner + type: VARCHAR + - name: object_type + type: VARCHAR + - name: cnt + type: INTEGER config_pdb_partitions: columns: - - name: - type: + - name: pdb_name + type: VARCHAR + - name: owner + type: VARCHAR + - name: object_type + type: VARCHAR + - name: cnt + type: INTEGER config_storage: columns: - - name: - type: + - name: con_name + type: VARCHAR + - name: tablespace_type + type: VARCHAR + - name: gb + type: DOUBLE + - name: freegb + type: DOUBLE + - name: maxgb + type: DOUBLE perf_cpu_waits: columns: - - name: - type: + - name: pdb_name + type: VARCHAR + - name: instance_number + type: INTEGER + - name: mtime + type: TIMESTAMP + - name: event + type: VARCHAR + - name: wait_class + type: VARCHAR + - name: total_wait_time + type: BIGINT perf_fgd_session_evolution: columns: - - name: - type: + - name: name + type: VARCHAR + - name: instance_number + type: INTEGER + - name: username + type: VARCHAR + - name: snap_time + type: TIMESTAMP + - name: foregd_session_cnt + type: INTEGER perf_heatmap: columns: - - name: - type: + - name: mtime + type: VARCHAR + - name: pdb_name + type: VARCHAR + - name: instance_number + type: INTEGER + - name: hour + type: VARCHAR + - name: core_nb + type: INTEGER + - name: value + type: DOUBLE perf_sqltext: columns: - - name: - type: - + - name: dt + type: TIMESTAMP + - name: pdb_name + type: VARCHAR + - name: command + type: VARCHAR + - name: parsing_schema_name + type: VARCHAR + - name: instance_number + type: INTEGER + - name: cnt + type: INTEGER + - name: total_run_time_secs + type: DOUBLE + test_nulloutput: + columns: + - name: x + type: VARCHAR From a6751d35bd74c498e2f0d0567f7bf63a66f232c8 Mon Sep 17 00:00:00 2001 From: Laurent Leturgez Date: Fri, 13 Mar 2026 10:03:39 +0100 Subject: [PATCH 12/12] removed the test null output as it has been fixed --- .../lakebridge/assessments/configure_assessment.py | 3 ++- .../resources/assessments/oracle/pipeline_config.yml | 12 ------------ .../assessments/validation/oracle_extract_schema.yml | 4 ---- 3 files changed, 2 insertions(+), 17 deletions(-) diff --git a/src/databricks/labs/lakebridge/assessments/configure_assessment.py b/src/databricks/labs/lakebridge/assessments/configure_assessment.py index f3ed40fcc5..1afeede277 100644 --- a/src/databricks/labs/lakebridge/assessments/configure_assessment.py +++ b/src/databricks/labs/lakebridge/assessments/configure_assessment.py @@ -1,4 +1,5 @@ from abc import ABC, abstractmethod +from collections.abc import Callable from pathlib import Path import logging import shutil @@ -219,7 +220,7 @@ def create_assessment_configurator( source_system: str, product_name: str, prompts: Prompts, credential_file=None ) -> AssessmentConfigurator: """Factory function to create the appropriate assessment configurator.""" - configurators = { + configurators: dict[str, Callable[..., AssessmentConfigurator]] = { "mssql": ConfigureSqlServerAssessment, "synapse": ConfigureSynapseAssessment, "oracle": ConfigureOracleAssessment, diff --git a/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml b/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml index db05803c41..5da43c5ff1 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml +++ b/src/databricks/labs/lakebridge/resources/assessments/oracle/pipeline_config.yml @@ -134,15 +134,3 @@ steps: mode: overwrite frequency: once flag: active - - name: test_nulloutput - type: ddl - extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/test_nulloutput_ddl.sql - mode: overwrite - frequency: once - flag: active - - name: test_nulloutput - type: sql - extract_source: src/databricks/labs/lakebridge/resources/assessments/oracle/test_nulloutput.sql - mode: overwrite - frequency: once - flag: active diff --git a/src/databricks/labs/lakebridge/resources/assessments/validation/oracle_extract_schema.yml b/src/databricks/labs/lakebridge/resources/assessments/validation/oracle_extract_schema.yml index 13692f35ab..6c2cc48361 100644 --- a/src/databricks/labs/lakebridge/resources/assessments/validation/oracle_extract_schema.yml +++ b/src/databricks/labs/lakebridge/resources/assessments/validation/oracle_extract_schema.yml @@ -136,7 +136,3 @@ schemas: type: INTEGER - name: total_run_time_secs type: DOUBLE - test_nulloutput: - columns: - - name: x - type: VARCHAR