diff --git a/.assets/Changelog.md b/.assets/Changelog.md
index 1949ae3..8552ad9 100644
--- a/.assets/Changelog.md
+++ b/.assets/Changelog.md
@@ -4,6 +4,7 @@ This page lists all pull requests that made significant changes to bc2adls.
Pull request | Changes
--------------- | ---
+[101](https://github.com/microsoft/bc2adls/pull/101) | It is often desirable to query the data residing in the lake and use it inside Dynamics 365 Business Central (BC). Such data may either have been exported previously out of BC through the `bc2adls` tool, or general tabular data that has been sourced from external systems. This lights up many use cases where the lake becomes the datasource and can be looked up on demand from inside BC through AL language constructs. See [Querying data residing in the lake with bc2adls](/.assets/QueryLakeData.md) to know more.
[79](https://github.com/microsoft/bc2adls/pull/79) | The step to clean up tracked deleted records from the export process has now been removed to make exports more efficient. This clean up step can instead be performed either by clicking on the action **Clear tracked deleted records** on the main setup page, or by invoking the new codeunit **ADLSE Clear Tracked Deletions** through a low- frequency custom job queue entry.
[78](https://github.com/microsoft/bc2adls/pull/78) | Upgrading to new versions may lead the export configuration to enter an incorrect state, say, if a field that was being exported before gets obsoleted in the new version. This fix prevents such an occurence by raising an error during the upgrade process. If corrective actions, say, disabling such fields are not taken after multiple upgrade attempts, the bc2adls extension is uninstalled and upgrade is forced. A subsequent re-install of the extension will then disable such tables from being exported, so that the user can then react to the change in schema later on.
[56](https://github.com/microsoft/bc2adls/pull/56) | The table ADLSE Run has now been added to the retention policy so that the logs for the executions can be cleared periodically, thus taking up less space in the database.
diff --git a/.assets/QueryDataInTheLake.png b/.assets/QueryDataInTheLake.png
new file mode 100644
index 0000000..c496f2e
Binary files /dev/null and b/.assets/QueryDataInTheLake.png differ
diff --git a/.assets/QueryLakeData.md b/.assets/QueryLakeData.md
new file mode 100644
index 0000000..6430832
--- /dev/null
+++ b/.assets/QueryLakeData.md
@@ -0,0 +1,80 @@
+# Querying data residing in the lake with bc2adls
+
+It is often desirable to query the data residing in the lake and use it inside Dynamics 365 Business Central (BC). Such data may either have been exported previously out of BC through the `bc2adls` tool, or general tabular data that has been sourced from external systems. The following steps help you establish a mechanism to query such data directly inside BC through the AL constructs.
+
+Let's go through a few use cases that are enabled by this feature.
+1. Data from BC that has been previously exported and archived into the lake may need to be looked up by the system or a user to see historical entities.
+1. Data created on the lake by external systems (such as IoT devices or [Azure Synapse Link for Dataverse](https://learn.microsoft.com/en-us/power-apps/maker/data-platform/export-to-data-lake)) need to be looked up in BC to make relevant calculations.
+1. Data lake can now be used as a cheaper single-storage solution for miscellaneous tabular data that can be queried by BC on-demand.
+
+## How it works
+**Note the arrows that point from the lake database into BC in the diagram below.** Using the new façades [`ADLSE Query`](/businessCentral/src/Query/ADLSEQuery.Codeunit.al) and [`ADLSE Query Table`](/businessCentral/src/Query/ADLSEQueryTable.Codeunit.al), the AL developer issues a REST API call to the `AdlsProxy` Azure function app while passing information like the table and specific fields to be queried, filters to be applied, etc. The function app then formulates the request as an SQL query to the lake database, which in turn gets the relevant data from the `data` CDM folder in the storage account. The result is then returned as a Json response to BC so that records and corresponding fields in those records can be individually read via the AL language. Please see the documentation of the above façades for more details.
+
+
+
+Currently the funcionality only supports,
+- fetching a specific set (or all) fields in a filtered set of records that is sorted in a certain way, similar to the [Findset](https://learn.microsoft.com/en-us/dynamics365/business-central/dev-itpro/developer/methods-auto/recordref/recordref-findset-method) call.
+- counting the number of records in the lake, similar to the [Count](https://learn.microsoft.com/en-us/dynamics365/business-central/dev-itpro/developer/methods-auto/recordref/recordref-count-method) call.
+- checking if there are any records in the lake, similar to the [IsEmpty](https://learn.microsoft.com/en-us/dynamics365/business-central/dev-itpro/developer/methods-auto/recordref/recordref-isempty-method) call.
+
+> **Note**
+> 1. The approach suggested will **only work for tabular data** that have been structured into shared metadata tables as described in [Creating shared metadata tables](/.assets/SharedMetadataTables.md). For data that was not created through the `bc2adls` export, you may need to create such tables manually, as explained.
+> 1. Since querying from BC requires a number of Azure components to work in tandem, please use this approach only for **non- business critical** processes that allow for network or process latency.
+> 1. The architecture allows for a limited amount of data to be queried from the serverless SQL endpoint. You may get errors if the response is too large for BC to process. Therefore, it is highly recommended that you apply filtering to narrow the results and only fetch the fields that you require.
+
+## Setting it all up
+
+### Pre-requisites
+- You have configured [shared metadata tables](/.assets/SharedMetadataTables.md) for your data on the lake. This may include tables that are unknown to BC.
+- You have sufficient access to create Azure function apps on your subscription.
+- You have [installed and configured](/.assets/Setup.md) `bc2adls`, and the tables and fields in BC to be queried from the lake have been added as per [these instructions](/.assets/Execution.md#exporting-data-from-bc). For tables that are meant to be imported only (and not exported to the lake), set the `Enabled for export` field to be `false`. This step is, of course, only relevant if you wish to read BC data from the lake via the [`ADLSE Query Table`](/businessCentral/src/Query/ADLSEQueryTable.Codeunit.al) façade.
+
+### Create and deploy function app to Azure
+Start Visual Studio Code and open the folder [`adlsProxy`](/adlsProxy/). Follow the instructions given in [the documentation](https://learn.microsoft.com/en-us/azure/azure-functions/create-first-function-vs-code-csharp?tabs=in-process). I used the runtime stack as .NET 7 Isolated. Let's say you chose to name the Function App as `AdlsProxyX`.
+
+### Take note of the function app URL
+Open the newly created function app `AdlsProxyX` in the Azure portal, under **Overview**, take a note of the value in the **URL** field. This should be the format `https://adlsproxyx.azurewebsites.net`.
+
+### Add a system managed identity for the Azure function
+In the Azure function app, and follow [the instructions](https://learn.microsoft.com/en-us/azure/app-service/overview-managed-identity?tabs=portal%2Chttp#add-a-system-assigned-identity) to add a system managed identity. This would create an identity named (usually) the same as the Function App.
+
+### Protect your function app using new AAD credentials
+In the Azure function app, follow the instructions at [Create a new app registration automatically](https://learn.microsoft.com/en-us/azure/app-service/configure-authentication-provider-aad#--option-1-create-a-new-app-registration-automatically). This should create a brand new App registration that can be used to make requests on the function app. Take a note of the following values as they will be required later on,
+- the `App (Client) ID` field, as well as,
+- the newly created client secret stored as the [application setting](https://learn.microsoft.com/en-us/azure/azure-functions/functions-how-to-use-azure-function-app-settings?tabs=portal#settings) named `MICROSOFT_PROVIDER_AUTHENTICATION_SECRET`. Of course, you may just as well create a new secret on the new app registration and use it instead!
+
+### Take a note of the function keys
+In the Azure function app, under **Functions**, you will notice a few functions that have been created. Go inside each of the functions and under `Function Keys`, make a note of the full text of the respective function key.
+> It is recommended to go through the documentation at [Securing Azure functions](https://learn.microsoft.com/en-us/azure/azure-functions/security-concepts) in order to fully understand the different ways to authenticate and authorize functions. This may be handy if, say, you want only some credentials to access entity A, while some others can access entity B, but everyone can access entity C, etc.
+
+### Authorize the created system managed identity to query the data on the serverless SQL endpoint
+Open the SQL query editor from the lake database in the Synapse studio opened from your Synapse workspace and execute the following query,
+
+ CREATE LOGIN [AdlsProxyX] FROM EXTERNAL PROVIDER;
+ CREATE USER AdlsProxyX FROM LOGIN [AdlsProxyX];
+ ALTER ROLE db_datareader ADD member AdlsProxyX;
+
+This will ensure that the function app has the necessary privileges to run SQL queries in the database. Please make sure that the above query has run in the context of the right database, and that you have replaced the word `AdlsProxyX` with the correct name of the system managed identity of the function app.
+
+### Authorize the created system managed identity to read the data on the lake
+As queries from the Azure function will be executed in the context of the system managed identity of the function app, it needs to be assigned the **Storage Blob Data Reader** role on the storage account with the data files.
+
+### Enable BC to send queries to the function app
+On the main setup page of the `bc2adls` extension, you will note a new fast tab called **Query data in the lake**. Fill out the fields in the following way,
+- **Synapse Serverless SQL endpoint** Locate the Synapse workspace resource on the Azure portal and fill this with the value of the field **Serverless SQL endpoint** under **Overview**.
+- **SQL Database Name** The name of the lake database that got created at the [Creating shared metadata tables](/.assets/SharedMetadataTables.md).
+- **Client ID** The value of the app (client) id from the step [Protect your function app using new AAD credentials](#protect-your-function-app-using-new-aad-credentials) above.
+- **Client secret** The value of the client secret from the step [Protect your function app using new AAD credentials](#protect-your-function-app-using-new-aad-credentials) above.
+- **Function app url** The value of the url from the step [Take note of the function app URL](#take-note-of-the-function-app-url) above.
+- **Function key FindSet** The value of the function key for the Findset function gathered at the step [Take a note of the function keys](#take-a-note-of-the-function-keys) above.
+- **Function key IsEmpty** The value of the function key for the IsEmpty function gathered at the step [Take a note of the function keys](#take-a-note-of-the-function-keys) above.
+- **Function key Count** The value of the function key for the Count function gathered at the step [Take a note of the function keys](#take-a-note-of-the-function-keys) above.
+
+
+
+## Making queries in AL
+Phew, that was a lengthy configuration but it is finally time to query the lake! Open Visual Studio Code and go the place in your AL code where you want to query the lake and follow the examples given in the documentation for the two façades,
+1. [`ADLSE Query`](/businessCentral/src/Query/ADLSEQuery.Codeunit.al) used for any tabular data, and
+1. [`ADLSE Query Table`](/businessCentral/src/Query/ADLSEQueryTable.Codeunit.al) used for BC tables.
+
+Any errors that happen during the course of the Rest Api call to the function app are thrown up on the AL side. To troubleshoot further on the function app, it is recommended that you follow instructions at [Monitor executions in Azure functions](https://learn.microsoft.com/en-us/azure/azure-functions/functions-monitoring).
\ No newline at end of file
diff --git a/.assets/Setup.md b/.assets/Setup.md
index c9f4f00..3ecac0a 100644
--- a/.assets/Setup.md
+++ b/.assets/Setup.md
@@ -31,6 +31,8 @@ Let us take a look at the settings show in the sample screenshot below,
- **Emit telemetry** The flag to enable or disable operational telemetry from this extension. It is set to True by default.
- **Multi- company export** The flag to allow exporting data from multiple companies at the same time. You should enable this only after the export schema is finalized- in other words, ensure that at least one export for a company has been successful with all the desired tables and the desired fields in those tables. We recommend that the json files are manually checked in the outbound container before enabling this flag. Changes to the export schema (adding or removing tables as well as changing the field set to be exported) are not allowed as long as this flag is checked.
+The fast tab **Query data in the lake** handles configuration in case you want to read (not export) data from the lake. Please refer to [Querying data residing in the lake with bc2adls](/.assets/QueryLakeData.md) for more details.
+

> **Note**
diff --git a/.assets/architecture.png b/.assets/architecture.png
index 6e6fb45..7e05eb8 100644
Binary files a/.assets/architecture.png and b/.assets/architecture.png differ
diff --git a/.assets/bc2adls_data_architecture.vsdx b/.assets/bc2adls_data_architecture.vsdx
index 7807b71..d1af9b5 100644
Binary files a/.assets/bc2adls_data_architecture.vsdx and b/.assets/bc2adls_data_architecture.vsdx differ
diff --git a/.gitignore b/.gitignore
index c0a88fb..c06454a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -349,6 +349,13 @@ MigrationBackup/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/
+# VSCode related
+**/.vscode/*
+
# AL related
**/*.app
-**/.vscode/*
\ No newline at end of file
+
+# AdlsProxy related
+adlsProxy/bin/
+adlsProxy/obj/
+adlsProxy/local.settings.json
\ No newline at end of file
diff --git a/README.md b/README.md
index 36d85b3..ecb7bc2 100644
--- a/README.md
+++ b/README.md
@@ -2,12 +2,12 @@
# Project
-> **This tool is an experiment on Dynamics 365 Business Central with the sole purpose of discovering the possibilities of having data exported to an Azure Data Lake. To see the details of how this tool is supported, please visit [the Support page](./SUPPORT.md). In case you wish to use this tool for your next project and engage with us, you are welcome to write to bc2adls@microsoft.com. As we are a small team, please expect delays in getting back to you.**
+> **This tool is an experiment on Dynamics 365 Business Central with the sole purpose of discovering the possibilities of having data synced to and from an Azure Data Lake. To see the details of how this tool is supported, please visit [the Support page](./SUPPORT.md). In case you wish to use this tool for your next project and engage with us, you are welcome to write to bc2adls@microsoft.com. As we are a small team, please expect delays in getting back to you.**
## Introduction
-The **bc2adls** tool is used to export data from [Dynamics 365 Business Central](https://dynamics.microsoft.com/en-us/business-central/overview/) (BC) to [Azure Data Lake Storage](https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction) and expose it in the [CDM folder](https://docs.microsoft.com/en-us/common-data-model/data-lake) format. The components involved are the following,
-- the **[businessCentral](/tree/main/businessCentral/)** folder holds a [BC extension](https://docs.microsoft.com/en-gb/dynamics365/business-central/ui-extensions) called `Azure Data Lake Storage Export` (ADLSE) which enables export of incremental data updates to a container on the data lake. The increments are stored in the CDM folder format described by the `deltas.cdm.manifest.json manifest`.
+The **bc2adls** tool is used to exchange data between [Dynamics 365 Business Central](https://dynamics.microsoft.com/en-us/business-central/overview/) (BC) and [Azure Data Lake Storage](https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction) and expose it in the [CDM folder](https://docs.microsoft.com/en-us/common-data-model/data-lake) format in the lake. The components involved are the following,
+- the **[businessCentral](/tree/main/businessCentral/)** folder holds a [BC extension](https://docs.microsoft.com/en-gb/dynamics365/business-central/ui-extensions) called `Azure Data Lake Storage Export` (ADLSE) which enables export of incremental data updates to a container on the data lake. The increments are stored in the CDM folder format described by the `deltas.cdm.manifest.json manifest`. It also provides a library to read the tabular data existing on the lake, including non- BC data.
- the **[synapse](/tree/main/synapse/)** folder holds the templates needed to create an [Azure Synapse](https://azure.microsoft.com/en-gb/services/synapse-analytics/) pipeline that consolidates the increments into a final `data` CDM folder.
The following diagram illustrates the flow of data through a usage scenario- the main points being,
@@ -17,6 +17,7 @@ The following diagram illustrates the flow of data through a usage scenario- the
- CDM: via the `data.cdm.manifest.json manifest`
- CSV/Parquet: via the underlying files for each individual entity inside the `data` folder
- Spark/SQL: via [shared metadata tables](/.assets/SharedMetadataTables.md)
+- The reverse flow is also possible whereby data in the lake can be read into BC via AL constructs.

@@ -24,6 +25,7 @@ More details:
- [Installation and configuration](/.assets/Setup.md)
- [Executing the export and pipeline](/.assets/Execution.md)
- [Creating shared metadata tables](/.assets/SharedMetadataTables.md)
+- [Querying data residing in the lake with bc2adls](/.assets/QueryLakeData.md)
- [Frequently asked questions](/.assets/FAQs.md)
- Webinars
- [[Jan 2022] Webinar introducing bc2adls](https://www.microsoft.com/en-us/videoplayer/embed/RWSHHG)
diff --git a/adlsProxy/AdlsProxy.csproj b/adlsProxy/AdlsProxy.csproj
new file mode 100644
index 0000000..3c94b56
--- /dev/null
+++ b/adlsProxy/AdlsProxy.csproj
@@ -0,0 +1,28 @@
+
+
+ net7.0
+ v4
+ Exe
+ enable
+ enable
+
+
+
+
+
+
+
+
+
+
+ PreserveNewest
+
+
+ PreserveNewest
+ Never
+
+
+
+
+
+
\ No newline at end of file
diff --git a/adlsProxy/CreateQuery.cs b/adlsProxy/CreateQuery.cs
new file mode 100644
index 0000000..fe44eab
--- /dev/null
+++ b/adlsProxy/CreateQuery.cs
@@ -0,0 +1,165 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+using Newtonsoft.Json.Linq;
+
+namespace AdlsProxy
+{
+ internal enum FilterType
+ {
+ Equals,
+ NotEquals,
+ GreaterThan,
+ GreaterThanOrEquals,
+ LessThan,
+ LessThanOrEquals
+ }
+
+ ///
+ /// Creates an SQL query based on the JSON input passed. It is expected that the JSON is formatted in the following way,
+ /// {
+ /// "server": "Serverless SQL endpoint",
+ /// "database": "database name",
+ /// "entity": "custledgerentry_21",
+ /// "fields": [ "EntryNo-1", "CustomerNo-3", "PostingDate-4" ], // optional; if blank, return all fields. Only used by FindSet.
+ /// "filters": [
+ /// { "op": "GreaterThanOrEquals", "field": "CustomerNo-3", "value": "40000" },
+ /// { "op": "LessThan", "field": "EntryNo-1", "value": 1559 },
+ /// { "op": "NotEquals", "field": "PostingDate-4", "value": "2021-03-23T00:00:00" }
+ /// ], // optional; if blank, return unfiltered set of all records
+ /// "orderBy": [
+ /// {
+ /// "field": "PostingDate-4",
+ /// "ascending": false
+ /// },
+ /// {
+ /// "field": "EntryNo-1"
+ /// }
+ /// ] // optional. Only used by FindSet.
+ /// }
+ ///
+ /// The SQL query formed as text.
+
+ internal static class CreateQuery
+ {
+ public static string FindSet(JObject body, JToken database, JToken entity)
+ {
+ var selectFields = body["fields"] as JArray;
+ var filters = body["filters"] as JArray;
+ var orderBy = body["orderBy"] as JArray;
+
+ var fieldListExpression = selectFields == null ? "*" : concatenateItems(selectFields, ",", t => $"[{t.ToString()}]");
+ var filterExpression = filters == null ? "" : $" WHERE {concatenateItems(filters, " AND", filterTransformToken)}";
+ var orderByExpression = orderBy == null ? "" : $" ORDER BY {concatenateItems(orderBy, ",", orderByTransformToken)}";
+ return $"SELECT {fieldListExpression} FROM [{database}].[dbo].[{entity}]{filterExpression}{orderByExpression};";
+ }
+
+ public static string Count(JObject body, JToken database, JToken entity)
+ {
+ var filters = body["filters"] as JArray;
+
+ var filterExpression = filters == null ? "" : $" WHERE {concatenateItems(filters, " AND", filterTransformToken)}";
+ return $"SELECT COUNT(*) FROM [{database}].[dbo].[{entity}]{filterExpression};";
+ }
+
+ public static string IsEmpty(JObject body, JToken database, JToken entity)
+ {
+ var filters = body["filters"] as JArray;
+
+ var filterExpression = filters == null ? "" : $" WHERE {concatenateItems(filters, " AND", filterTransformToken)}";
+ return $"IF EXISTS (SELECT TOP 1 1 FROM [{database}].[dbo].[{entity}]{filterExpression}) SELECT 0 ELSE SELECT 1;";
+ }
+
+ private static string concatenateItems(IEnumerable list, string delimiter, Func transform)
+ {
+ string result = "";
+ var counter = 0;
+ if (list != null)
+ {
+ foreach (var item in list)
+ {
+ result += $"{transform(item)}{delimiter} ";
+ counter++;
+ }
+ if (counter > 0)
+ {
+ // remove the last delimiter added
+ result = result.Remove(result.Length - $"{delimiter} ".Length);
+ }
+ }
+ return result;
+ }
+
+ private static string filterTransformToken(JToken token)
+ {
+ var filter = token as JObject;
+ if (filter == null)
+ {
+ throw new ArgumentException($"Bad item {token} in the filters expression.");
+ }
+ var op = filter["op"];
+ if (op == null || op.Type != JTokenType.String)
+ {
+ throw new ArgumentException($"Bad or missing operator in the filter {token}.");
+ }
+ if (!Enum.TryParse((filter["op"] ?? "").ToString(), true, out FilterType filterType))
+ {
+ throw new ArgumentException($"Bad operator passed in the filter {token}.");
+ }
+ var field = filter["field"] as JToken;
+ if (field == null || field.Type != JTokenType.String)
+ {
+ throw new ArgumentException($"Bad or missing field in the expression {token}.");
+ }
+ var value = filter["value"];
+ if (value == null)
+ {
+ throw new ArgumentException($"Missing value in the filter {token}.");
+ }
+ var valueTokenType = (filter["value"] ?? 0).Type;
+ var useQuotes = new[] { JTokenType.String, JTokenType.Date }.Contains(valueTokenType);
+ return $"[{filter["field"]}] {filterOperator(filterType)} {(useQuotes ? "'" : "")}{filter["value"]}{(useQuotes ? "'" : "")}";
+ }
+
+ private static string filterOperator(FilterType op)
+ {
+ switch (op)
+ {
+ case FilterType.Equals:
+ return "=";
+ case FilterType.NotEquals:
+ return "!=";
+ case FilterType.GreaterThan:
+ return ">";
+ case FilterType.GreaterThanOrEquals:
+ return ">=";
+ case FilterType.LessThan:
+ return "<";
+ case FilterType.LessThanOrEquals:
+ return "<=";
+ default:
+ throw new ArgumentException($"The filter operator {op} is not supported.");
+ }
+ }
+
+ private static bool isQuotedValue(JToken value)
+ {
+ return (value.Type == JTokenType.String || value.Type == JTokenType.Date);
+ }
+
+ private static string orderByTransformToken(JToken token)
+ {
+ var orderByItem = token as JObject;
+ if (orderByItem == null)
+ {
+ throw new ArgumentException($"Bad item {token} in the order by expression.");
+ }
+ var field = orderByItem["field"] as JToken;
+ if (field == null || field.Type != JTokenType.String)
+ {
+ throw new ArgumentException($"Bad or missing field in the expression {token} in the order by expression.");
+ }
+ bool orderByAscending = ((bool?)(orderByItem["ascending"] as JToken)) ?? true;
+ return $"[{field}]{(orderByAscending ? " ASC" : " DESC")}";
+ }
+ }
+}
\ No newline at end of file
diff --git a/adlsProxy/CreateResult.cs b/adlsProxy/CreateResult.cs
new file mode 100644
index 0000000..2694739
--- /dev/null
+++ b/adlsProxy/CreateResult.cs
@@ -0,0 +1,62 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+using Newtonsoft.Json.Linq;
+using Microsoft.Data.SqlClient;
+using Microsoft.Extensions.Logging;
+
+namespace AdlsProxy
+{
+ internal static class CreateResult
+ {
+ public static JToken FindSet(ILogger logger, SqlDataReader reader)
+ {
+ IList columnNames = new List();
+ for (int fldIndex = 0; fldIndex <= reader.FieldCount - 1; fldIndex++)
+ {
+ columnNames.Add(reader.GetName(fldIndex));
+ }
+
+ int recordCount = 0;
+ JArray queryResult = new JArray();
+ while (reader.Read())
+ {
+ IList