-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathscan_with_user_specified_data_source_formats.py
More file actions
66 lines (40 loc) · 1.19 KB
/
scan_with_user_specified_data_source_formats.py
File metadata and controls
66 lines (40 loc) · 1.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Databricks notebook source
# MAGIC %md
# MAGIC # Scan Tables with User Specified Data Source Formats
# COMMAND ----------
# MAGIC %md
# MAGIC ### Install discoverx lib
# COMMAND ----------
# %pip install dbl-discoverx
# dbutils.library.restartPython()
# COMMAND ----------
# MAGIC %md
# MAGIC ### Declare Variables
# COMMAND ----------
dbutils.widgets.text("catalogs", "*", "Catalogs")
dbutils.widgets.text("schemas", "*", "Schemas")
dbutils.widgets.text("tables", "*", "Tables")
# COMMAND ----------
catalogs = dbutils.widgets.get("catalogs")
schemas = dbutils.widgets.get("schemas")
tables = dbutils.widgets.get("tables")
from_table_statement = ".".join([catalogs, schemas, tables])
# COMMAND ----------
# MAGIC %md
# MAGIC ### Initiaize discoverx
# COMMAND ----------
from discoverx import DX
dx = DX()
# COMMAND ----------
# MAGIC %md
# MAGIC ### DiscoverX will scan all delta tables by default
# COMMAND ----------
dx.from_tables(from_table_statement).scan()
# COMMAND ----------
# MAGIC %md
# MAGIC ### User can specify data source formats as follows
# COMMAND ----------
(dx.from_tables(from_table_statement)
.with_data_source_formats(["DELTA","JSON"])
.scan())
# COMMAND ----------