diff --git a/.editorconfig b/.editorconfig index 535c4f2f4..b8e8e8dc5 100644 --- a/.editorconfig +++ b/.editorconfig @@ -20,7 +20,7 @@ charset = utf-8 end_of_line = lf trim_trailing_whitespace = true -[*.xml] +[*.{xml,sql,ddl}] indent_size = 4 indent_style = space insert_final_newline = true diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index bc0344893..a2f1254c6 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -* @lokm01 @benedeki @DzMakatun @HuvarVer @dk1844 @AdrianOlosutean +* @benedeki @dk1844 @AdrianOlosutean diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index e4884938c..51f2a7680 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -15,7 +15,7 @@ Steps to reproduce the behavior OR commands run: 3. Enter value '...' 4. See error -## Expected behaviour +## Expected behavior A clear and concise description of what you expected to happen. ## Screenshots diff --git a/.github/ISSUE_TEMPLATE/epic.md b/.github/ISSUE_TEMPLATE/epic.md index 3ec79828b..653b178f4 100644 --- a/.github/ISSUE_TEMPLATE/epic.md +++ b/.github/ISSUE_TEMPLATE/epic.md @@ -16,5 +16,3 @@ A list of expected issues that will be needed to achieve this Epic 1. 2. 3. - -**NB! Don't forget to assign the Epic into a Milestone** diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 64bed30d2..84b87ffa6 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -15,7 +15,7 @@ A description of the requested feature. A simple example if applicable. ## Proposed Solution [Optional] -Solution Ideas +Solution Ideas: 1. 2. 3. diff --git a/.github/ISSUE_TEMPLATE/poc.md b/.github/ISSUE_TEMPLATE/poc.md new file mode 100644 index 000000000..cf9dfe4fc --- /dev/null +++ b/.github/ISSUE_TEMPLATE/poc.md @@ -0,0 +1,18 @@ +--- +name: POC +about: Proof of Concept, usually a middle-sized effort to test some idea +labels: 'poc, under discussion, priority: undecided' + +--- + +## Background +A clear and concise intro into the situation. + +## Goal +The goal that the _Proof of Concept_ wants to test + +## Proposed Approach [Optional] +Approach Ideas: +1. +2. +3. diff --git a/.github/ISSUE_TEMPLATE/release_notes.md b/.github/ISSUE_TEMPLATE/release_notes.md index 9ecf8e06b..bf81ca1f5 100644 --- a/.github/ISSUE_TEMPLATE/release_notes.md +++ b/.github/ISSUE_TEMPLATE/release_notes.md @@ -2,9 +2,9 @@ title: Add release notes for vX.Y.0 name: Release Notes about: Create release notes -labels: 'docs, priority: medium' +labels: 'documentation, priority: medium' --- ## Background -With the release of vX.Y.0, we should add its release notes to the docs. +With the release of vX.Y.0, we should add its release notes to the documentation. diff --git a/.github/workflows/license_check.yml b/.github/workflows/license_check.yml index f8c7697bb..916047963 100644 --- a/.github/workflows/license_check.yml +++ b/.github/workflows/license_check.yml @@ -28,4 +28,4 @@ jobs: - uses: actions/setup-java@v1 with: java-version: 1.8 - - run: mvn -Plicense-check apache-rat:check + - run: mvn --no-transfer-progress -Plicense-check apache-rat:check diff --git a/.github/workflows/pr_labels_check.yml b/.github/workflows/pr_labels_check.yml index 2dba51a28..b9257ca76 100644 --- a/.github/workflows/pr_labels_check.yml +++ b/.github/workflows/pr_labels_check.yml @@ -18,20 +18,20 @@ on: pull_request: types: [opened, labeled, unlabeled, synchronize] jobs: - test_approved_or_docs: - name: Test approved or docs + test_approved_or_documentation: + name: Test approved or documentation runs-on: ubuntu-latest steps: - - uses: danielchabr/pr-labels-checker@master + - uses: danielchabr/pr-labels-checker@v3.0 id: checkLabel with: - hasSome: PR:tested,PR:no testing needed,docs + hasSome: PR:tested,PR:no testing needed,documentation,docs githubToken: ${{ secrets.GITHUB_TOKEN }} merge_blocked: name: Merge not blocked runs-on: ubuntu-latest steps: - - uses: danielchabr/pr-labels-checker@master + - uses: danielchabr/pr-labels-checker@v3.0 id: checkLabel with: hasNone: PR:reviewing,work in progress diff --git a/.gitignore b/.gitignore index b217b745f..94ae1cf36 100644 --- a/.gitignore +++ b/.gitignore @@ -55,8 +55,6 @@ build.log # syntax: regexp # ^\.pc/ -build.log - .cache* dependency-reduced-pom.xml diff --git a/README.md b/README.md index 65389098f..81e8e756b 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,9 @@ # Enceladus +### Latest Release +[![Maven Central](https://maven-badges.herokuapp.com/maven-central/za.co.absa.enceladus/parent/badge.png)](https://maven-badges.herokuapp.com/maven-central/za.co.absa.enceladus/parent/) + ### Build Status | master | develop | | ------------- | ------------- | @@ -20,10 +23,15 @@ ### Code Quality Status [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=AbsaOSS_enceladus&metric=alert_status)](https://sonarcloud.io/dashboard?id=AbsaOSS_enceladus) +### Documentation +[![Read the Docs](https://img.shields.io/badge/docs-latest-brightgreen.svg)](https://absaoss.github.io/enceladus/) +[![Read the Docs](https://img.shields.io/badge/docs-release%20notes-yellow.svg)](https://absaoss.github.io/enceladus/blog/) +[![Read the Docs](https://img.shields.io/badge/docs-release--1.x-red.svg)](https://absaoss.github.io/enceladus/docs/1.0.0/components) ___ - [What is Enceladus?](#what-is-enceladus) + - [REST API](#rest-api) - [Menas](#menas) - [Standardization](#standardization) - [Conformance](#conformance) @@ -32,20 +40,26 @@ ___ - [Plugins](#plugins) - [Built-in Plugins](#built-in-plugins) - [How to contribute](#how-to-contribute) -- [Documentation](#documentation) ## What is Enceladus? **Enceladus** is a **Dynamic Conformance Engine** which allows data from different formats to be standardized to parquet and conformed to group-accepted common reference (e.g. data for country designation which are **DE** in one source system and **Deutschland** in another, can be conformed to **Germany**). -The project is comprised of three main components: -### Menas -This is the user-facing web client, used to **specify the standardization schema**, and **define the steps required to conform** a dataset. -There are three models used to do this: +The project is comprised of four main components: + +### REST API +The REST API exposes the Enceladus endpoints for creating, reading, updating and deleting the models, as well as other functionalities. +The main three models used are: - **Dataset**: Specifies where the dataset will be read from on HDFS (**RAW**), the conformance rules that will be applied to it, and where it will land on HDFS once it is conformed (**PUBLISH**) - **Schema**: Specifies the schema towards which the dataset will be standardized - **Mapping Table**: Specifies where tables with master reference data can be found (parquet on HDFS), which are used when applying Mapping conformance rules (e.g. the dataset uses **Germany**, which maps to the master reference **DE** in the mapping table) +The REST API exposes a Swagger Documentation UI which documents all the HTTP exposed endpoints. It can be found at **REST_API_HOST/swagger-ui.html** + +### Menas +This is the user-facing web client, used to **specify the standardization schema**, and **define the steps required to conform** a dataset. +The Menas web client calls and is based on the REST API to get the needed entities. + ### Standardization This is a Spark job which reads an input dataset in any of the supported formats and **produces a parquet dataset with the Menas-specified schema** as output. @@ -69,7 +83,6 @@ Ensure the properties there fit your environment. - Without tests: `mvn clean package -DskipTests ` - With unit tests: `mvn clean package` - With integration tests: `mvn clean package -Pintegration` -- With component preload file generated: `mvn clean package -PgenerateComponentPreload` #### Test coverage: - Test coverage: `mvn scoverage:report` @@ -77,27 +90,33 @@ Ensure the properties there fit your environment. The coverage reports are written in each module's `target` directory and aggregated in the root `target` directory. ## How to run -#### Menas requirements: +#### REST API requirements: - [**Tomcat 8.5/9.0** installation](https://tomcat.apache.org/download-90.cgi) - [**MongoDB 4.0** installation](https://docs.mongodb.com/manual/administration/install-community/) - [**Spline UI deployment**](https://absaoss.github.io/spline/) - place the [spline.war](https://search.maven.org/remotecontent?filepath=za/co/absa/spline/spline-web/0.3.9/spline-web-0.3.9.war) in your Tomcat webapps directory (rename after downloading to _spline.war_); NB! don't forget to set up the `spline.mongodb.url` configuration for the _war_ - **HADOOP_CONF_DIR** environment variable, pointing to the location of your hadoop configuration (pointing to a hadoop installation) -The _Spline UI_ can be omitted; in such case the **Menas** `spline.urlTemplate` setting should be set to empty string. +The _Spline UI_ can be omitted; in such case the **REST API** `spline.urlTemplate` setting should be set to empty string. + +#### Deploying REST API +Simply copy the **rest-api.war** file produced when building the project into Tomcat's webapps directory. +Another possible method is building the Docker image based on the existing Dockerfile and deploying it as a container. #### Deploying Menas -Simply copy the **menas.war** file produced when building the project into Tomcat's webapps directory. +There are several ways of deploying Menas: +- Tomcat deployment: copy the **menas.war** file produced when building the project into Tomcat's webapps directory. The **"apiUrl"** value in package.json should be set either before building or after building the artifact and modifying it in place +- Docker deployment: build the Docker image based on the existing Dockerfile and deploy it as a container. The **API_URL** environment variable should be provided when running the container +- CDN deployment: copy the built contents in the **dist** directory into your preferred CDN server. The **"apiUrl"** value in package.json in the **dist** directory should be set -#### Speed up initial loading time of menas -- Build the project with the generateComponentPreload profile. Component preload will greatly reduce the number of HTTP requests required for the initial load of Menas +#### Speed up initial loading time of REST API - Enable the HTTP compression -- Configure `spring.resources.cache.cachecontrol.max-age` in `application.properties` of Menas for caching of static resources +- Configure `spring.resources.cache.cachecontrol.max-age` in `application.properties` of REST API for caching of static resources #### Standardization and Conformance requirements: - [**Spark 2.4.4 (Scala 2.11)** installation](https://spark.apache.org/downloads.html) - [**Hadoop 2.7** installation](https://hadoop.apache.org/releases.html) -- **Menas** running instance +- **REST API** running instance - **Menas Credentials File** in your home directory or on HDFS (a configuration file for authenticating the Spark jobs with Menas) - **Use with in-memory authentication** e.g. `~/menas-credential.properties`: @@ -105,7 +124,7 @@ e.g. `~/menas-credential.properties`: username=user password=changeme ``` -- **Menas Keytab File** in your home directory or on HDFS +- **REST API Keytab File** in your home directory or on HDFS - **Use with kerberos authentication**, see [link](https://kb.iu.edu/d/aumh) for details on creating keytab files - **Directory structure** for the **RAW** dataset should follow the convention of `////v`. This date is specified with the `--report-date` option when running the **Standardization** and **Conformance** jobs. - **_INFO file** must be present along with the **RAW** data on HDFS as per the above directory structure. This is a file tracking control measures via [Atum](https://github.com/AbsaOSS/atum), an example can be found [here](examples/data/input/_INFO). @@ -131,7 +150,7 @@ password=changeme --row-tag ``` * Here `row-tag` is a specific option for `raw-format` of type `XML`. For more options for different types please see our WIKI. -* In case Menas is configured for in-memory authentication (e.g. in dev environments), replace `--menas-auth-keytab` with `--menas-credentials-file` +* In case REST API is configured for in-memory authentication (e.g. in dev environments), replace `--menas-auth-keytab` with `--menas-credentials-file` #### Running Conformance ``` @@ -175,7 +194,7 @@ password=changeme --row-tag ``` -* In case Menas is configured for in-memory authentication (e.g. in dev environments), replace `--menas-auth-keytab` with `--menas-credentials-file` +* In case REST API is configured for in-memory authentication (e.g. in dev environments), replace `--menas-auth-keytab` with `--menas-credentials-file` #### Helper scripts for running Standardization, Conformance or both together @@ -272,8 +291,8 @@ The list of all options for running Standardization, Conformance and the combine | Option | Description | |---------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| --menas-auth-keytab **filename** | A keytab file used for Kerberized authentication to Menas. Cannot be used together with `--menas-credentials-file`. | -| --menas-credentials-file **filename** | A credentials file containing a login and a password used to authenticate to Menas. Cannot be used together with `--menas-auth-keytab`. | +| --menas-auth-keytab **filename** | A keytab file used for Kerberized authentication to REST API. Cannot be used together with `--menas-credentials-file`. | +| --menas-credentials-file **filename** | A credentials file containing a login and a password used to authenticate to REST API. Cannot be used together with `--menas-auth-keytab`. | | --dataset-name **name** | A dataset name to be standardized or conformed. | | --dataset-version **version** | A version of a dataset to be standardized or conformed. | | --report-date **YYYY-mm-dd** | A date specifying a day for which a raw data is landed. | @@ -336,6 +355,3 @@ A module containing [examples](examples/README.md) of the project usage. ## How to contribute Please see our [**Contribution Guidelines**](CONTRIBUTING.md). - -## Documentation -Please see the [documentation pages](https://absaoss.github.io/enceladus/). diff --git a/dao/pom.xml b/dao/pom.xml index 3cb416326..058197e70 100644 --- a/dao/pom.xml +++ b/dao/pom.xml @@ -21,7 +21,7 @@ za.co.absa.enceladus parent - 2.23.0 + 3.0.0-SNAPSHOT diff --git a/dao/src/main/scala/za/co/absa/enceladus/dao/rest/AuthClient.scala b/dao/src/main/scala/za/co/absa/enceladus/dao/rest/AuthClient.scala index 40bcdef0f..650ccba9d 100644 --- a/dao/src/main/scala/za/co/absa/enceladus/dao/rest/AuthClient.scala +++ b/dao/src/main/scala/za/co/absa/enceladus/dao/rest/AuthClient.scala @@ -71,14 +71,14 @@ sealed abstract class AuthClient(username: String, restTemplate: RestTemplate, a private def getAuthHeaders(response: ResponseEntity[String]): HttpHeaders = { val headers = response.getHeaders - val sessionCookie = headers.get("set-cookie").asScala.head + val jwt = headers.get("JWT").asScala.head val csrfToken = headers.get("X-CSRF-TOKEN").asScala.head - log.info(s"Session Cookie: $sessionCookie") + log.info(s"JWT: $jwt") log.info(s"CSRF Token: $csrfToken") val resultHeaders = new HttpHeaders() - resultHeaders.add("cookie", sessionCookie) + resultHeaders.add("JWT", jwt) resultHeaders.add("X-CSRF-TOKEN", csrfToken) resultHeaders } diff --git a/dao/src/main/scala/za/co/absa/enceladus/dao/rest/CrossHostApiCaller.scala b/dao/src/main/scala/za/co/absa/enceladus/dao/rest/CrossHostApiCaller.scala index fa244d926..8a44119c7 100644 --- a/dao/src/main/scala/za/co/absa/enceladus/dao/rest/CrossHostApiCaller.scala +++ b/dao/src/main/scala/za/co/absa/enceladus/dao/rest/CrossHostApiCaller.scala @@ -18,43 +18,75 @@ package za.co.absa.enceladus.dao.rest import org.apache.commons.lang.exception.ExceptionUtils import org.slf4j.LoggerFactory import org.springframework.web.client.{ResourceAccessException, RestClientException} +import za.co.absa.enceladus.dao.rest.CrossHostApiCaller.logger import za.co.absa.enceladus.dao.{DaoException, RetryableException} +import scala.annotation.tailrec import scala.util.{Failure, Random, Try} -protected object CrossHostApiCaller { +object CrossHostApiCaller { - def apply(apiBaseUrls: List[String]): CrossHostApiCaller = { - new CrossHostApiCaller(apiBaseUrls, Random.nextInt(apiBaseUrls.size)) + private val logger = LoggerFactory.getLogger(classOf[CrossHostApiCaller]) + + final val DefaultUrlsRetryCount: Int = 0 + + private def createInstance(apiBaseUrls: Seq[String], urlsRetryCount: Int, startWith: Option[Int]): CrossHostApiCaller = { + val maxTryCount: Int = (if (urlsRetryCount < 0) { + logger.warn(s"Urls retry count cannot be negative ($urlsRetryCount). Using default number of retries instead ($DefaultUrlsRetryCount).") //scalastyle:ignore maxLineLength + DefaultUrlsRetryCount + } else { + urlsRetryCount + }) + 1 + val currentHostIndex = startWith.getOrElse(Random.nextInt(Math.max(apiBaseUrls.size, 1))) + new CrossHostApiCaller(apiBaseUrls.toVector, maxTryCount, currentHostIndex) } + def apply(apiBaseUrls: Seq[String], urlsRetryCount: Int = DefaultUrlsRetryCount, startWith: Option[Int] = None): CrossHostApiCaller = { + createInstance(apiBaseUrls, urlsRetryCount, startWith) + } } -protected class CrossHostApiCaller(apiBaseUrls: List[String], var currentHostIndex: Int) extends ApiCaller { - private val logger = LoggerFactory.getLogger(this.getClass) +protected class CrossHostApiCaller private(apiBaseUrls: Vector[String], maxTryCount: Int, private var currentHostIndex: Int) + extends ApiCaller { + + def baseUrlsCount: Int = apiBaseUrls.size + + def currentBaseUrl: String = apiBaseUrls(currentHostIndex) + + def nextBaseUrl(): String = { + currentHostIndex = (currentHostIndex + 1) % baseUrlsCount + currentBaseUrl + } - private val maxAttempts = apiBaseUrls.size - 1 def call[T](fn: String => T): T = { + def logFailure(error: Throwable, url: String, attemptNumber: Int, nextUrl: Option[String]): Unit = { + val rootCause = ExceptionUtils.getRootCauseMessage(error) + val switching = nextUrl.map(s => s", switching host to $s").getOrElse("") + logger.warn(s"Request failed on host $url (attempt $attemptNumber of $maxTryCount)$switching - $rootCause") + } - def attempt(index: Int, attemptCount: Int = 0): Try[T] = { - currentHostIndex = index - val currentBaseUrl = apiBaseUrls(index) - Try { - fn(currentBaseUrl) + @tailrec + def attempt(url: String, attemptNumber: Int, urlsTried: Int): Try[T] = { + val result =Try { + fn(url) }.recoverWith { case e @ (_: ResourceAccessException | _: RestClientException) => Failure(DaoException("Server non-responsive", e)) - }.recoverWith { - case e: RetryableException if attemptCount < maxAttempts => - val nextIndex = (index + 1) % apiBaseUrls.size - val nextBaseUrl = apiBaseUrls(nextIndex) - val rootCause = ExceptionUtils.getRootCauseMessage(e) - logger.warn(s"Request failed on host $currentBaseUrl, switching host to $nextBaseUrl - $rootCause") - attempt(nextIndex, attemptCount + 1) + } + //using match instead of recoverWith to make the function @tailrec + result match { + case Failure(e: RetryableException) if attemptNumber < maxTryCount => + logFailure(e, url, attemptNumber, None) + attempt(url, attemptNumber + 1, urlsTried) + case Failure(e: RetryableException) if urlsTried < baseUrlsCount => + val nextUrl = nextBaseUrl() + logFailure(e, url, attemptNumber, Option(nextUrl)) + attempt(nextUrl, 1, urlsTried + 1) + case _ => result } } - attempt(currentHostIndex).get + attempt(currentBaseUrl,1, 1).get } } diff --git a/dao/src/main/scala/za/co/absa/enceladus/dao/rest/MenasConnectionStringParser.scala b/dao/src/main/scala/za/co/absa/enceladus/dao/rest/MenasConnectionStringParser.scala index a4787d203..719af163f 100644 --- a/dao/src/main/scala/za/co/absa/enceladus/dao/rest/MenasConnectionStringParser.scala +++ b/dao/src/main/scala/za/co/absa/enceladus/dao/rest/MenasConnectionStringParser.scala @@ -29,7 +29,7 @@ object MenasConnectionStringParser { .replaceAll("/$", "") .replaceAll("/api$", "") ) - .toSet + .distinct .toList } diff --git a/dao/src/main/scala/za/co/absa/enceladus/dao/rest/RestDaoFactory.scala b/dao/src/main/scala/za/co/absa/enceladus/dao/rest/RestDaoFactory.scala index 51bd04ecf..5edde52d5 100644 --- a/dao/src/main/scala/za/co/absa/enceladus/dao/rest/RestDaoFactory.scala +++ b/dao/src/main/scala/za/co/absa/enceladus/dao/rest/RestDaoFactory.scala @@ -16,16 +16,33 @@ package za.co.absa.enceladus.dao.rest import za.co.absa.enceladus.dao.auth.MenasCredentials +import za.co.absa.enceladus.dao.rest.RestDaoFactory.AvailabilitySetup.{Fallback, AvailabilitySetup, RoundRobin} object RestDaoFactory { + object AvailabilitySetup extends Enumeration { + final type AvailabilitySetup = Value + + final val RoundRobin = Value("roundrobin") + final val Fallback = Value("fallback") + } + + final val DefaultAvailabilitySetup: AvailabilitySetup = RoundRobin + private val restTemplate = RestTemplateSingleton.instance - def getInstance(authCredentials: MenasCredentials, apiBaseUrls: List[String]): MenasRestDAO = { - val apiCaller = CrossHostApiCaller(apiBaseUrls) + def getInstance(authCredentials: MenasCredentials, + apiBaseUrls: List[String], + urlsRetryCount: Option[Int] = None, + menasSetup: AvailabilitySetup = DefaultAvailabilitySetup): MenasRestDAO = { + val startsWith = if (menasSetup == Fallback) { + Option(0) + } else { + None + } + val apiCaller = CrossHostApiCaller(apiBaseUrls, urlsRetryCount.getOrElse(CrossHostApiCaller.DefaultUrlsRetryCount), startsWith) val authClient = AuthClient(authCredentials, apiCaller) val restClient = new RestClient(authClient, restTemplate) new MenasRestDAO(apiCaller, restClient) } - } diff --git a/dao/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker b/dao/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker new file mode 100644 index 000000000..1f0955d45 --- /dev/null +++ b/dao/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker @@ -0,0 +1 @@ +mock-maker-inline diff --git a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/CrossHostApiCallerSuite.scala b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/CrossHostApiCallerSuite.scala index 67301e22b..29ae6fc0b 100644 --- a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/CrossHostApiCallerSuite.scala +++ b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/CrossHostApiCallerSuite.scala @@ -17,6 +17,7 @@ package za.co.absa.enceladus.dao.rest import org.mockito.Mockito import org.springframework.web.client.ResourceAccessException +import za.co.absa.enceladus.dao.rest.CrossHostApiCaller.DefaultUrlsRetryCount import za.co.absa.enceladus.dao.{DaoException, UnauthorizedException} class CrossHostApiCallerSuite extends BaseTestSuite { @@ -27,12 +28,23 @@ class CrossHostApiCallerSuite extends BaseTestSuite { Mockito.reset(restClient) } + "CrossHostApiCaller" should { + "cycle through urls" in { + val crossHostApiCaller = CrossHostApiCaller(Vector("a", "b", "c", "d"), DefaultUrlsRetryCount, startWith = Some(1)) + crossHostApiCaller.nextBaseUrl() should be("c") + crossHostApiCaller.nextBaseUrl() should be("d") + crossHostApiCaller.nextBaseUrl() should be("a") + crossHostApiCaller.nextBaseUrl() should be("b") + crossHostApiCaller.nextBaseUrl() should be("c") + } + } + "CrossHostApiCaller::call" should { "return the result of the first successful call" when { "there are no failures" in { Mockito.when(restClient.sendGet[String]("a")).thenReturn("success") - val result = new CrossHostApiCaller(List("a", "b", "c"), 0).call { str => + val result = CrossHostApiCaller(Vector("a", "b", "c"), DefaultUrlsRetryCount, startWith = Some(0)).call { str => restClient.sendGet[String](str) } @@ -42,16 +54,33 @@ class CrossHostApiCallerSuite extends BaseTestSuite { "only some calls fail with a retryable exception" in { Mockito.when(restClient.sendGet[String]("a")).thenThrow(DaoException("Something went wrong A")) - Mockito.when(restClient.sendGet[String]("b")).thenReturn("success") + Mockito.when(restClient.sendGet[String]("b")) + .thenThrow(DaoException("Something went wrong B")) + .thenReturn("success") - val result = new CrossHostApiCaller(List("a", "b", "c"), 0).call { str => + val result = CrossHostApiCaller(Vector("a", "b", "c"), 2, Some(0)).call { str => + restClient.sendGet[String](str) + } + + result should be("success") + Mockito.verify(restClient, Mockito.times(3)).sendGet[String]("a") + Mockito.verify(restClient, Mockito.times(2)).sendGet[String]("b") + Mockito.verify(restClient, Mockito.never()).sendGet[String]("c") + } + + "despite retry count is negative" in { + Mockito.when(restClient.sendGet[String]("a")).thenThrow(DaoException("Something went wrong A")) + Mockito.when(restClient.sendGet[String]("b")).thenThrow(DaoException("Something went wrong B")) + Mockito.when(restClient.sendGet[String]("c")).thenReturn("success") + + val result = CrossHostApiCaller(Vector("a", "b", "c"), -2, Some(0)).call { str => restClient.sendGet[String](str) } result should be("success") Mockito.verify(restClient, Mockito.times(1)).sendGet[String]("a") Mockito.verify(restClient, Mockito.times(1)).sendGet[String]("b") - Mockito.verify(restClient, Mockito.never()).sendGet[String]("c") + Mockito.verify(restClient, Mockito.times(1)).sendGet[String]("c") } } @@ -62,7 +91,7 @@ class CrossHostApiCallerSuite extends BaseTestSuite { Mockito.when(restClient.sendGet[String]("c")).thenThrow(DaoException("Something went wrong C")) val exception = intercept[DaoException] { - new CrossHostApiCaller(List("a", "b", "c"), 0).call { str => + CrossHostApiCaller(Vector("a", "b", "c"), 0, Some(0)).call { str => restClient.sendGet[String](str) } } @@ -73,12 +102,29 @@ class CrossHostApiCallerSuite extends BaseTestSuite { Mockito.verify(restClient, Mockito.times(1)).sendGet[String]("c") } + "all calls fail with a retryable exception over multiple attempts" in { + Mockito.when(restClient.sendGet[String]("a")).thenThrow(DaoException("Something went wrong A")) + Mockito.when(restClient.sendGet[String]("b")).thenThrow(DaoException("Something went wrong B")) + Mockito.when(restClient.sendGet[String]("c")).thenThrow(DaoException("Something went wrong C")) + + val exception = intercept[DaoException] { + CrossHostApiCaller(Vector("a", "b", "c"), 1, Some(0)).call { str => + restClient.sendGet[String](str) + } + } + + exception.getMessage should be("Something went wrong C") + Mockito.verify(restClient, Mockito.times(2)).sendGet[String]("a") + Mockito.verify(restClient, Mockito.times(2)).sendGet[String]("b") + Mockito.verify(restClient, Mockito.times(2)).sendGet[String]("c") + } + "any call fails with a non-retryable exception" in { Mockito.when(restClient.sendGet[String]("a")).thenThrow(new ResourceAccessException("Something went wrong A")) Mockito.when(restClient.sendGet[String]("b")).thenThrow(UnauthorizedException("Wrong credentials")) val exception = intercept[UnauthorizedException] { - new CrossHostApiCaller(List("a", "b", "c"), 0).call { str => + CrossHostApiCaller(Vector("a", "b", "c"), 0, Some(0)).call { str => restClient.sendGet[String](str) } } @@ -89,6 +135,17 @@ class CrossHostApiCallerSuite extends BaseTestSuite { Mockito.verify(restClient, Mockito.never()).sendGet[String]("c") } } + + "fail on not having Urls" when { + "none are provided" in { + val exception = intercept[IndexOutOfBoundsException] { + CrossHostApiCaller(Vector()).call { str => + restClient.sendGet[String](str) + } + } + exception.getMessage should be ("0") + } + } } } diff --git a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/JsonSerializerSuite.scala b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/JsonSerializerSuite.scala index 443df246c..d4bb8d4fe 100644 --- a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/JsonSerializerSuite.scala +++ b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/JsonSerializerSuite.scala @@ -15,7 +15,12 @@ package za.co.absa.enceladus.dao.rest +import java.time.ZonedDateTime + import org.scalactic.{AbstractStringUniformity, Uniformity} +import za.co.absa.enceladus.model.conformanceRule.{CastingConformanceRule, LiteralConformanceRule, MappingConformanceRule} +import za.co.absa.enceladus.model.dataFrameFilter._ +import za.co.absa.enceladus.model.menas.MenasReference import za.co.absa.enceladus.model.test.VersionedModelMatchers import za.co.absa.enceladus.model.test.factories.{DatasetFactory, MappingTableFactory, RunFactory, SchemaFactory} import za.co.absa.enceladus.model.{Dataset, MappingTable, Run, Schema} @@ -82,7 +87,7 @@ class JsonSerializerSuite extends BaseTestSuite with VersionedModelMatchers { """{ | "name": "Test", | "version": 5, - | "description": "", + | "description": "some description here", | "hdfsPath": "/bigdata/test", | "hdfsPublishPath": "/bigdata/test2", | "schemaName": "Cobol1", @@ -114,11 +119,61 @@ class JsonSerializerSuite extends BaseTestSuite with VersionedModelMatchers { | }, | "targetAttribute": "CCC", | "outputColumn": "ConformedCCC", - | "isNullSafe": true + | "additionalColumns": null, + | "isNullSafe": true, + | "mappingTableFilter": { + | "_t": "AndJoinedFilters", + | "filterItems": [ + | { + | "_t": "OrJoinedFilters", + | "filterItems": [ + | { + | "_t": "EqualsFilter", + | "columnName": "column1", + | "value": "soughtAfterValue", + | "valueType": "string" + | }, + | { + | "_t": "EqualsFilter", + | "columnName": "column1", + | "value": "alternativeSoughtAfterValue", + | "valueType": "string" + | } + | ] + | }, + | { + | "_t": "DiffersFilter", + | "columnName": "column2", + | "value": "anotherValue", + | "valueType": "string" + | }, + | { + | "_t": "NotFilter", + | "inputFilter": { + | "_t": "IsNullFilter", + | "columnName": "col3" + | } + | } + | ] + | }, + | "overrideMappingTableOwnFilter": true + | }, + | { + | "_t": "MappingConformanceRule", + | "order": 2,"controlCheckpoint": true, + | "mappingTable": "CurrencyMappingTable2", + | "mappingTableVersion": 10, + | "attributeMappings": {}, + | "targetAttribute": "CCC", + | "outputColumn": "ConformedCCC", + | "additionalColumns": null, + | "isNullSafe": false, + | "mappingTableFilter": null, + | "overrideMappingTableOwnFilter": false | }, | { | "_t": "LiteralConformanceRule", - | "order": 2, + | "order": 3, | "outputColumn": "ConformedLiteral", | "controlCheckpoint": false, | "value": "AAA" @@ -130,6 +185,8 @@ class JsonSerializerSuite extends BaseTestSuite with VersionedModelMatchers { | "version": 4 | }, | "schedule": null, + | "properties": null, + | "propertiesValidation": null, | "createdMessage": { | "menasRef": { | "collection": null, @@ -143,14 +200,76 @@ class JsonSerializerSuite extends BaseTestSuite with VersionedModelMatchers { | "field": "", | "oldValue": null, | "newValue": null, - | "message": "Test" + | "message": "Dataset Test created." | } | ] | } |}""".stripMargin - "deserializing should not throw" in { - JsonSerializer.fromJson[Dataset](datasetJson) + val dataset: Dataset = DatasetFactory.getDummyDataset( + name = "Test", + version = 5, + description = Some("some description here"), + hdfsPath = "/bigdata/test", + hdfsPublishPath = "/bigdata/test2", + schemaName = "Cobol1", + schemaVersion = 3, + dateCreated = ZonedDateTime.parse("2019-07-22T08:05:57.47Z"), + userCreated = "system", + lastUpdated = ZonedDateTime.parse("2020-04-02T15:53:02.947Z"), + userUpdated = "system", + + conformance = List( + CastingConformanceRule(0, + outputColumn = "ConformedInt", + controlCheckpoint = false, + inputColumn = "STRING_VAL", + outputDataType = "integer" + ), + MappingConformanceRule(1, + controlCheckpoint = true, + mappingTable = "CurrencyMappingTable", + mappingTableVersion = 9, //scalastyle:ignore magic.number + attributeMappings = Map("InputValue" -> "STRING_VAL"), + targetAttribute = "CCC", + outputColumn = "ConformedCCC", + isNullSafe = true, + mappingTableFilter = Some( + AndJoinedFilters(Set( + OrJoinedFilters(Set( + EqualsFilter("column1", "soughtAfterValue"), + EqualsFilter("column1", "alternativeSoughtAfterValue") + )), + DiffersFilter("column2", "anotherValue"), + NotFilter(IsNullFilter("col3")) + )) + ), + overrideMappingTableOwnFilter = Some(true) + ), + MappingConformanceRule(2, + controlCheckpoint = true, + mappingTable = "CurrencyMappingTable2", + mappingTableVersion = 10, //scalastyle:ignore magic.number + attributeMappings = Map(), + targetAttribute = "CCC", + outputColumn = "ConformedCCC" + ), + LiteralConformanceRule(3, + outputColumn = "ConformedLiteral", + controlCheckpoint = false, + value = "AAA" + ) + ), + parent = Some(MenasReference(Some("dataset"),"Test", 4)) // scalastyle:off magic.number + ) + + "serializing" in { + val result = JsonSerializer.toJson(dataset) + result should equal(datasetJson)(after being whiteSpaceNormalised) + } + "deserializing" in { + val result = JsonSerializer.fromJson[Dataset](datasetJson) + result should matchTo(dataset) } } @@ -227,8 +346,7 @@ class JsonSerializerSuite extends BaseTestSuite with VersionedModelMatchers { "handle MappingTables" when { val mappingTableJson = - """ - |{ + """{ | "name": "dummyName", | "version": 1, | "description": null, @@ -277,6 +395,106 @@ class JsonSerializerSuite extends BaseTestSuite with VersionedModelMatchers { } } + "handle MappingTables with filters" when { + val mappingTableJson = + """ + |{ + | "name": "dummyName", + | "version": 1, + | "description": null, + | "hdfsPath": "/dummy/path", + | "schemaName": "dummySchema", + | "schemaVersion": 1, + | "defaultMappingValue": [], + | "dateCreated": "2017-12-04T16:19:17Z", + | "userCreated": "dummyUser", + | "lastUpdated": "2017-12-04T16:19:17Z", + | "userUpdated": "dummyUser", + | "disabled": false, + | "dateDisabled": null, + | "userDisabled": null, + | "parent": null, + | "filter": { + | "_t": "AndJoinedFilters", + | "filterItems": [ + | { + | "_t": "OrJoinedFilters", + | "filterItems": [ + | { + | "_t": "EqualsFilter", + | "columnName": "column1", + | "value": "soughtAfterValue", + | "valueType": "string" + | }, + | { + | "_t": "EqualsFilter", + | "columnName": "column1", + | "value": "alternativeSoughtAfterValue", + | "valueType": "string" + | } + | ] + | }, + | { + | "_t": "DiffersFilter", + | "columnName": "column2", + | "value": "anotherValue", + | "valueType": "string" + | }, + | { + | "_t": "NotFilter", + | "inputFilter": { + | "_t": "IsNullFilter", + | "columnName": "col3" + | } + | } + | ] + | }, + | "createdMessage": { + | "menasRef": { + | "collection": null, + | "name": "dummyName", + | "version": 1 + | }, + | "updatedBy": "dummyUser", + | "updated": "2017-12-04T16:19:17Z", + | "changes": [ + | { + | "field": "", + | "oldValue": null, + | "newValue": null, + | "message": "Mapping Table dummyName created." + | } + | ] + | }, + | "defaultMappingValues": {} + |} + |""".stripMargin + + val mappingTable = MappingTableFactory.getDummyMappingTable( + filter = Some( + AndJoinedFilters(Set( + OrJoinedFilters(Set( + EqualsFilter("column1", "soughtAfterValue"), + EqualsFilter("column1", "alternativeSoughtAfterValue") + )), + DiffersFilter("column2", "anotherValue"), + NotFilter( + IsNullFilter("col3") + ) + )) + ) + ) + + "serializing" in { + val result = JsonSerializer.toJson(mappingTable) + result should equal(mappingTableJson)(after being whiteSpaceNormalised) + } + "deserializing" in { + val result = JsonSerializer.fromJson[MappingTable](mappingTableJson) + result should matchTo(mappingTable) + } + } + "handle Schemas" when { val schemaJson = """ diff --git a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/MenasConnectionStringParserSuite.scala b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/MenasConnectionStringParserSuite.scala index 6c41f7bec..fce399bff 100644 --- a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/MenasConnectionStringParserSuite.scala +++ b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/MenasConnectionStringParserSuite.scala @@ -202,6 +202,24 @@ class MenasConnectionStringParserSuite extends BaseTestSuite { exception.getMessage should be("Malformed Menas connection string") } } - } + "keep the order of urls" when { + val expectedList = List( + "http://host1:8080/menas", + "http://host2:9000/menas", + "http://host3:8080/menas", + "http://host4:9000/menas", + "http://localhost:8080/menas", + "http://localhost:8090/menas" + ) + "they are full fledged urls separated by semicolon" in { + val result = MenasConnectionStringParser.parse("http://host1:8080/menas;http://host2:9000/menas;http://host3:8080/menas;http://host4:9000/menas;http://localhost:8080/menas;http://localhost:8090/menas") + result should be(expectedList) + } + "varied hosts separated by comma within one url" in { + val result = MenasConnectionStringParser.parse("http://host1:8080,host2:9000,host3:8080,host4:9000,localhost:8080,localhost:8090/menas") + result should be(expectedList) + } + } + } } diff --git a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/RestDaoFactorySuite.scala b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/RestDaoFactorySuite.scala index 5d546150e..e1f2042d2 100644 --- a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/RestDaoFactorySuite.scala +++ b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/RestDaoFactorySuite.scala @@ -15,12 +15,15 @@ package za.co.absa.enceladus.dao.rest +import org.mockito.MockitoSugar.withObjectMocked +import org.mockito.{ArgumentMatchersSugar, Mockito} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import za.co.absa.enceladus.dao.UnauthorizedException import za.co.absa.enceladus.dao.auth.{InvalidMenasCredentials, MenasKerberosCredentials, MenasPlainCredentials} +import za.co.absa.enceladus.dao.rest.RestDaoFactory.AvailabilitySetup -class RestDaoFactorySuite extends AnyWordSpec with Matchers { +class RestDaoFactorySuite extends AnyWordSpec with Matchers with ArgumentMatchersSugar { private val menasApiBaseUrls = List("http://localhost:8080/menas/api") @@ -47,6 +50,44 @@ class RestDaoFactorySuite extends AnyWordSpec with Matchers { exception.getMessage should be("No Menas credentials provided") } } + "properly adjusts the starting URL based on the setup type " when { + val fooCrossHostApiCaller = CrossHostApiCaller(Seq.empty) + val plainCredentials = MenasPlainCredentials("user", "changeme") + "when it's round-robin" in { + withObjectMocked[CrossHostApiCaller.type] { + Mockito.when(CrossHostApiCaller.apply(any[Seq[String]], any[Int], any[Option[Int]])).thenReturn(fooCrossHostApiCaller) + val restDao = RestDaoFactory.getInstance(plainCredentials, menasApiBaseUrls) + getAuthClient(restDao.restClient).getClass should be(classOf[LdapAuthClient]) + Mockito.verify(CrossHostApiCaller, Mockito.times(1)).apply( + menasApiBaseUrls, + CrossHostApiCaller.DefaultUrlsRetryCount, + None) + } + } + "when it's fallback" in { + withObjectMocked[CrossHostApiCaller.type] { + Mockito.when(CrossHostApiCaller.apply(any[Seq[String]], any[Int], any[Option[Int]])).thenReturn(fooCrossHostApiCaller) + val plainCredentials = MenasPlainCredentials("user", "changeme") + val restDao = RestDaoFactory.getInstance(plainCredentials, menasApiBaseUrls, None, AvailabilitySetup.Fallback) + getAuthClient(restDao.restClient).getClass should be(classOf[LdapAuthClient]) + Mockito.verify(CrossHostApiCaller, Mockito.times(1)).apply( + menasApiBaseUrls, + CrossHostApiCaller.DefaultUrlsRetryCount, + Option(0)) + } + } + "when the setup type is not specified" in { + withObjectMocked[CrossHostApiCaller.type] { + Mockito.when(CrossHostApiCaller.apply(any[Seq[String]], any[Int], any[Option[Int]])).thenReturn(fooCrossHostApiCaller) + val restDao = RestDaoFactory.getInstance(plainCredentials, menasApiBaseUrls) + getAuthClient(restDao.restClient).getClass should be(classOf[LdapAuthClient]) + Mockito.verify(CrossHostApiCaller, Mockito.times(1)).apply( + menasApiBaseUrls, + CrossHostApiCaller.DefaultUrlsRetryCount, + None) + } + } + } } private def getAuthClient(restClient: RestClient): AuthClient = { @@ -54,5 +95,5 @@ class RestDaoFactorySuite extends AnyWordSpec with Matchers { field.setAccessible(true) field.get(restClient).asInstanceOf[AuthClient] } - } + diff --git a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/AuthClientSuite.scala b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/AuthClientSuite.scala index d6488918f..ac78882a0 100644 --- a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/AuthClientSuite.scala +++ b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/AuthClientSuite.scala @@ -44,17 +44,17 @@ abstract class AuthClientSuite() extends AnyWordSpec s"Calling authenticate()" should { "return authentication headers on 200 OK" in { - val sessionCookie = "session-cookie" + val jwt = "jwt" val csrfToken = "csrf-token" val responseHeaders = new LinkedMultiValueMap[String, String] - responseHeaders.add("set-cookie", sessionCookie) + responseHeaders.add("jwt", jwt) responseHeaders.add("x-csrf-token", csrfToken) setUpSuccessfulAuthRequest(responseHeaders) val expected = new HttpHeaders() - expected.add("cookie", sessionCookie) + expected.add("jwt", jwt) expected.add("x-csrf-token", csrfToken) val response = authClient.authenticate() diff --git a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/MenasPlainCredentialsSuite.scala b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/MenasPlainCredentialsSuite.scala index e7c8429d9..08bd78531 100644 --- a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/MenasPlainCredentialsSuite.scala +++ b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/MenasPlainCredentialsSuite.scala @@ -18,9 +18,9 @@ package za.co.absa.enceladus.dao.rest.auth import org.scalatest.wordspec.AnyWordSpec import za.co.absa.enceladus.dao.auth.MenasPlainCredentials import za.co.absa.enceladus.utils.fs.LocalFsUtils -import za.co.absa.enceladus.utils.testUtils.SparkTestBase +import za.co.absa.enceladus.utils.testUtils.TZNormalizedSparkTestBase -class MenasPlainCredentialsSuite extends AnyWordSpec with SparkTestBase { +class MenasPlainCredentialsSuite extends AnyWordSpec with TZNormalizedSparkTestBase { "MenasPlainCredentials" should { "be read from *.conf" in { diff --git a/data-model/pom.xml b/data-model/pom.xml index 801d69b6f..91a0b7f29 100644 --- a/data-model/pom.xml +++ b/data-model/pom.xml @@ -24,7 +24,7 @@ za.co.absa.enceladus parent - 2.23.0 + 3.0.0-SNAPSHOT diff --git a/data-model/src/main/scala/META-INF/MANIFEST.MF b/data-model/src/main/scala/META-INF/MANIFEST.MF new file mode 100644 index 000000000..254272e1c --- /dev/null +++ b/data-model/src/main/scala/META-INF/MANIFEST.MF @@ -0,0 +1,3 @@ +Manifest-Version: 1.0 +Class-Path: + diff --git a/data-model/src/main/scala/za/co/absa/enceladus/model/ConformedSchema.scala b/data-model/src/main/scala/za/co/absa/enceladus/model/ConformedSchema.scala new file mode 100644 index 000000000..f9fdd76a3 --- /dev/null +++ b/data-model/src/main/scala/za/co/absa/enceladus/model/ConformedSchema.scala @@ -0,0 +1,46 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.enceladus.model + +import org.apache.spark.sql.types.StructField +import za.co.absa.enceladus.model.conformanceRule._ + +case class ConformedSchema(schema: List[StructField], dataset: Dataset) { + def hasField(field: String): Boolean = { + if (schema.exists(_.name == field)) true else { + val ss = dataset.conformance.find { + case MappingConformanceRule(_, _, _, _, _, _, outputColumn, additionalColumns, _, _, _) => + outputColumn == field || additionalColumns.getOrElse(Map()).contains(field) + case SingleColumnConformanceRule(_, _, outputColumn, _, inputColumnAlias) => + outputColumn == field || field == outputColumn + "." + inputColumnAlias + case DropConformanceRule(_, _, _) => false + case c: ConformanceRule => c.outputColumn == field + } + + ss match { + case None => false + case Some(matchedRule: ConformanceRule) => + val maybeRule = dataset.conformance.find { + case DropConformanceRule(_, _, outputCol) => outputCol == matchedRule.outputColumn + case _ => false + } + maybeRule.isEmpty + } + } + } +} + + diff --git a/data-model/src/main/scala/za/co/absa/enceladus/model/SchemaField.scala b/data-model/src/main/scala/za/co/absa/enceladus/model/SchemaField.scala index 02c06063c..a878adae2 100644 --- a/data-model/src/main/scala/za/co/absa/enceladus/model/SchemaField.scala +++ b/data-model/src/main/scala/za/co/absa/enceladus/model/SchemaField.scala @@ -41,4 +41,9 @@ case class SchemaField def getAllChildren: Seq[String] = { children.flatMap(child => child.getAllChildren :+ child.getAbsolutePath) } + + @JsonIgnore + def getAllChildrenBasePath: Seq[String] = { + children.flatMap(child => child.getAllChildrenBasePath :+ child.path) + } } diff --git a/data-model/src/main/scala/za/co/absa/enceladus/model/conformanceRule/package.scala b/data-model/src/main/scala/za/co/absa/enceladus/model/conformanceRule/package.scala index 4415765d1..96185ebb1 100644 --- a/data-model/src/main/scala/za/co/absa/enceladus/model/conformanceRule/package.scala +++ b/data-model/src/main/scala/za/co/absa/enceladus/model/conformanceRule/package.scala @@ -88,9 +88,11 @@ package object conformanceRule { ) extends ConformanceRule { def allOutputColumns(): Map[String, String] = { - additionalColumns.getOrElse(Map()) + (outputColumn -> targetAttribute) + definedAdditionalColumns() + (outputColumn -> targetAttribute) } + def definedAdditionalColumns(): Map[String, String] = additionalColumns.getOrElse(Map()) + override def withUpdatedOrder(newOrder: Int): MappingConformanceRule = copy(order = newOrder) override def connectedEntities: Seq[ConnectedEntity] = Seq( diff --git a/data-model/src/main/scala/za/co/absa/enceladus/model/properties/PropertyDefinitionStats.scala b/data-model/src/main/scala/za/co/absa/enceladus/model/properties/PropertyDefinitionStats.scala new file mode 100644 index 000000000..fa933a5de --- /dev/null +++ b/data-model/src/main/scala/za/co/absa/enceladus/model/properties/PropertyDefinitionStats.scala @@ -0,0 +1,30 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.enceladus.model.properties + +import za.co.absa.enceladus.model.properties.essentiality.Essentiality + +case class PropertyDefinitionStats(name: String, + version: Int = 1, + essentiality: Essentiality = Essentiality.Optional, + missingInDatasetsCount: Int = 0) + +object PropertyDefinitionStats { + def apply(propertyDefinition: PropertyDefinition, missingCounts: Int): PropertyDefinitionStats = { + PropertyDefinitionStats(propertyDefinition.name, propertyDefinition.version, + propertyDefinition.essentiality, missingCounts) + } +} diff --git a/data-model/src/main/scala/za/co/absa/enceladus/model/test/factories/MappingTableFactory.scala b/data-model/src/main/scala/za/co/absa/enceladus/model/test/factories/MappingTableFactory.scala index d36bc1b9d..984afd357 100644 --- a/data-model/src/main/scala/za/co/absa/enceladus/model/test/factories/MappingTableFactory.scala +++ b/data-model/src/main/scala/za/co/absa/enceladus/model/test/factories/MappingTableFactory.scala @@ -17,6 +17,7 @@ package za.co.absa.enceladus.model.test.factories import java.time.ZonedDateTime +import za.co.absa.enceladus.model.dataFrameFilter.DataFrameFilter import za.co.absa.enceladus.model.menas.MenasReference import za.co.absa.enceladus.model.{DefaultValue, MappingTable, Schema} @@ -38,7 +39,8 @@ object MappingTableFactory extends EntityFactory[Schema] { disabled: Boolean = false, dateDisabled: Option[ZonedDateTime] = None, userDisabled: Option[String] = None, - parent: Option[MenasReference] = None): MappingTable = { + parent: Option[MenasReference] = None, + filter: Option[DataFrameFilter] = None): MappingTable = { MappingTable(name, version, @@ -54,7 +56,9 @@ object MappingTableFactory extends EntityFactory[Schema] { disabled, dateDisabled, userDisabled, - parent) + parent, + filter + ) } def getDummyDefaultValue(columnName: String = "dummyColumnName", diff --git a/data-model/src/test/scala/za/co/absa/enceladus/model/ConformedSchemaTest.scala b/data-model/src/test/scala/za/co/absa/enceladus/model/ConformedSchemaTest.scala new file mode 100644 index 000000000..b87a99664 --- /dev/null +++ b/data-model/src/test/scala/za/co/absa/enceladus/model/ConformedSchemaTest.scala @@ -0,0 +1,80 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.enceladus.model + +import org.apache.spark.sql.types.{StringType, StructField} +import org.scalatest.funsuite.AnyFunSuite +import za.co.absa.enceladus.model.conformanceRule.{DropConformanceRule, LiteralConformanceRule, MappingConformanceRule, SingleColumnConformanceRule} + +class ConformedSchemaTest extends AnyFunSuite{ + private val conformanceRule1 = LiteralConformanceRule( + order = 0, + controlCheckpoint = true, + outputColumn = "something", + value = "1.01" + ) + + private val conformanceRule1d = LiteralConformanceRule( + order = 0, + controlCheckpoint = true, + outputColumn = "fieldToDelete", + value = "1.01" + ) + + private val conformanceRule2 = DropConformanceRule(order = 0, + controlCheckpoint = true, + outputColumn = "fieldToDelete") + + private val conformanceRule3 = MappingConformanceRule(order = 0, + controlCheckpoint = true, + outputColumn = "something3",additionalColumns = Some(Map("newCol" -> "mappedCol")), + mappingTable = "",mappingTableVersion = 1, + attributeMappings = Map(),targetAttribute = "col") + + private val conformanceRule4 = SingleColumnConformanceRule( + order = 0, + outputColumn = "singleCol",inputColumn = "as", + inputColumnAlias = "subCol", controlCheckpoint = false) + + private val dataset = Dataset(name = "Test DS", + version = 1, + hdfsPath = "newPath", + hdfsPublishPath = "newPublishPath", + schemaName = "newSchema", + schemaVersion = 1, + conformance = List(conformanceRule1, conformanceRule1d, conformanceRule2, conformanceRule3, conformanceRule4), + properties = Some(Map( + "property1" -> "value1", + "property2.sub" -> "value2" + ) + )) + + val schemaFields = List(StructField("stdField",StringType)) + + test("conformed schema") { + val conformedSchema = ConformedSchema(schemaFields, dataset) + assertResult(conformedSchema.hasField("stdField"))(true) + assertResult(conformedSchema.hasField("fieldToDelete"))(false) + assertResult(conformedSchema.hasField("something"))(true) + assertResult(conformedSchema.hasField("newCol"))(true) + assertResult(conformedSchema.hasField("newCol1"))(false) + assertResult(conformedSchema.hasField("mappedColCol1"))(false) + assertResult(conformedSchema.hasField("something3"))(true) + assertResult(conformedSchema.hasField("col"))(false) + assertResult(conformedSchema.hasField("singleCol"))(true) + assertResult(conformedSchema.hasField("singleCol.subCol"))(true) + } +} diff --git a/data-model/src/test/scala/za/co/absa/enceladus/model/SchemaFieldTest.scala b/data-model/src/test/scala/za/co/absa/enceladus/model/SchemaFieldTest.scala index 400df960b..d67263976 100644 --- a/data-model/src/test/scala/za/co/absa/enceladus/model/SchemaFieldTest.scala +++ b/data-model/src/test/scala/za/co/absa/enceladus/model/SchemaFieldTest.scala @@ -21,7 +21,7 @@ class SchemaFieldTest extends AnyFunSuite { private val schemaFieldChildSecondLevel = SchemaField( name = "String", `type` = "string", - path = "", + path = "AnyStruct.AnyStruct2.String", elementType = None, containsNull = None, nullable = false, @@ -32,7 +32,7 @@ class SchemaFieldTest extends AnyFunSuite { private val schemaFieldChildOne = SchemaField( name = "AnyStruct2", `type` = "struct", - path = "", + path = "AnyStruct.AnyStruct2", elementType = None, containsNull = None, nullable = true, @@ -43,7 +43,7 @@ class SchemaFieldTest extends AnyFunSuite { private val schemaFieldChildTwo = SchemaField( name = "Number", `type` = "ling", - path = "AnyStruct", + path = "AnyStruct.Number", elementType = None, containsNull = None, nullable = true, @@ -54,7 +54,7 @@ class SchemaFieldTest extends AnyFunSuite { private val schemaFieldRoot = SchemaField( name = "AnyStruct", `type` = "struct", - path = "", + path = "AnyStruct", elementType = None, containsNull = None, nullable = true, @@ -63,8 +63,13 @@ class SchemaFieldTest extends AnyFunSuite { ) test("testGetAllChildren") { - val expectedAllChildren = List("String", "AnyStruct2", "AnyStruct.Number") + val expectedAllChildren = List("AnyStruct.AnyStruct2.String.String", "AnyStruct.AnyStruct2.AnyStruct2", "AnyStruct.Number.Number") assert(schemaFieldRoot.getAllChildren == expectedAllChildren) } + test("testGetAllChildrenBasePath") { + val expectedAllChildren = List("AnyStruct.AnyStruct2.String", "AnyStruct.AnyStruct2", "AnyStruct.Number") + assert(schemaFieldRoot.getAllChildrenBasePath == expectedAllChildren) + } + } diff --git a/data-model/src/test/scala/za/co/absa/enceladus/model/dataFrameFilter/DataFrameFilterSuite.scala b/data-model/src/test/scala/za/co/absa/enceladus/model/dataFrameFilter/DataFrameFilterSuite.scala index 5d8a0379a..793070d47 100644 --- a/data-model/src/test/scala/za/co/absa/enceladus/model/dataFrameFilter/DataFrameFilterSuite.scala +++ b/data-model/src/test/scala/za/co/absa/enceladus/model/dataFrameFilter/DataFrameFilterSuite.scala @@ -56,7 +56,7 @@ class DataFrameFilterSuite extends AnyFunSuite { assert(filterExpr2.semanticEquals(expected)) } - test("Three filters joined with an and condidion") { + test("Three filters joined with an and condition") { val f1 = DiffersFilter("column1", "v1") val f2 = DiffersFilter("column2", "v2") val f3 = DiffersFilter("column3", "v3") diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/explode/Explosion.scala b/database/src/main/01_users.ddl similarity index 59% rename from utils/src/main/scala/za/co/absa/enceladus/utils/explode/Explosion.scala rename to database/src/main/01_users.ddl index 9dd49dc52..2a87613ce 100644 --- a/utils/src/main/scala/za/co/absa/enceladus/utils/explode/Explosion.scala +++ b/database/src/main/01_users.ddl @@ -13,15 +13,18 @@ * limitations under the License. */ -package za.co.absa.enceladus.utils.explode +CREATE ROLE enceladus WITH + LOGIN + NOSUPERUSER + INHERIT + NOCREATEDB + NOCREATEROLE + NOREPLICATION; -/** - * Stores all info needed to revert a single array explosion. - */ -case class Explosion( - arrayFieldName: String, - idFieldName: String, - indexFieldName: String, - sizeFieldName: String, - superTransientFieldName: Option[String] - ) +CREATE ROLE menas WITH + LOGIN + NOSUPERUSER + INHERIT + NOCREATEDB + NOCREATEROLE + NOREPLICATION; diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/testUtils/SparkLocalMaster.scala b/database/src/main/02_databases.ddl similarity index 81% rename from utils/src/main/scala/za/co/absa/enceladus/utils/testUtils/SparkLocalMaster.scala rename to database/src/main/02_databases.ddl index 50063b0f9..d202a9ebb 100644 --- a/utils/src/main/scala/za/co/absa/enceladus/utils/testUtils/SparkLocalMaster.scala +++ b/database/src/main/02_databases.ddl @@ -13,8 +13,8 @@ * limitations under the License. */ -package za.co.absa.enceladus.utils.testUtils +CREATE DATABASE menas_db + WITH + ENCODING = 'UTF8' + CONNECTION LIMIT = -1; -trait SparkLocalMaster { - System.getProperties.setProperty("spark.master", "local[4]") -} diff --git a/database/src/main/03_menas_db.ddl b/database/src/main/03_menas_db.ddl new file mode 100644 index 000000000..4aefafe5e --- /dev/null +++ b/database/src/main/03_menas_db.ddl @@ -0,0 +1,18 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER DATABASE menas_db OWNER TO enceladus; + +CREATE EXTENSION IF NOT EXISTS hstore; diff --git a/database/src/main/dataset/_.ddl b/database/src/main/dataset/_.ddl new file mode 100644 index 000000000..0195bd979 --- /dev/null +++ b/database/src/main/dataset/_.ddl @@ -0,0 +1,19 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE SCHEMA IF NOT EXISTS dataset; +ALTER SCHEMA dataset OWNER TO enceladus; + +GRANT USAGE ON SCHEMA dataset TO menas; diff --git a/database/src/main/dataset/_add.sql b/database/src/main/dataset/_add.sql new file mode 100644 index 000000000..546588dec --- /dev/null +++ b/database/src/main/dataset/_add.sql @@ -0,0 +1,138 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE OR REPLACE FUNCTION dataset._add( + IN i_entity_name TEXT, + IN i_entity_version INTEGER, + IN i_entity_description TEXT, + IN i_source_path TEXT, + IN i_publish_path TEXT, + IN i_key_schema BIGINT, + IN i_conformance JSON[], + IN i_user_name TEXT, + OUT status INTEGER, + OUT status_text TEXT, + OUT key_entity_version BIGINT +) RETURNS record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: jobs_configuration._add(8) +-- Stores a new version of the dataset. +-- +-- Parameters: +-- i_entity_name - name of the dataset +-- i_entity_version - version of the dataset +-- i_entity_description - description of the dataset +-- i_source_path - source path for the dataset +-- i_publish_path - output path for the dataset +-- i_key_schema - reference to the schema of the dataset +-- i_conformance - array of conformance rules +-- i_user_name - the user who submitted the changes +-- +-- Returns: +-- status - Status code +-- status_text - Status text +-- key_entity_version - id of the newly created dataset record +-- +-- Status codes: +-- 11 - OK +-- 31 - Dataset has been disabled +-- 32 - Dataset is locked +-- 50 - Dataset version wrong +-- 51 - Dataset already exists +-- +------------------------------------------------------------------------------- +DECLARE + _entity_type CHAR := 'D'; + _key_entity BIGINT; + _new_entity BOOLEAN; + _latest_version INTEGER; + _locked BOOLEAN; + _disabled BOOLEAN; +BEGIN + IF i_entity_version = 1 THEN + -- lock on stats to prevent competing inserts of new entity + PERFORM 1 + FROM entity_base.stats S + WHERE S.entity_type = _entity_type + FOR UPDATE; + END IF; + + SELECT E.id_entity, E.entity_latest_version, E.locked_at IS NOT NULL, E.disabled_at IS NOT NULL + FROM dataset.entities E + WHERE E.entity_name = i_entity_name + FOR UPDATE + INTO _key_entity, _latest_version, _locked, _disabled; + + _new_entity := NOT found; + + IF _new_entity THEN + IF i_entity_version != 1 THEN + status := 50; + status_text := 'Dataset version wrong'; + RETURN; + END IF; + + UPDATE entity_base.stats + SET entity_count = stats.entity_count + 1 + WHERE entity_type = _entity_type; + + INSERT INTO dataset.entities(entity_name, entity_latest_version, created_by) + VALUES (i_entity_name, i_entity_version, i_user_name) + RETURNING id_entity + INTO _key_entity; + ELSE + IF _disabled THEN + status := 31; + status_text := 'Dataset has been disabled'; + RETURN ; + ELSIF _locked THEN + status := 32; + status_text := 'Dataset is locked'; + RETURN; + ELSIF _latest_version >= i_entity_version THEN + status := 51; + status_text := 'Dataset already exists'; + RETURN; + ELSIF _latest_version + 1 < i_entity_version THEN + status := 50; + status_text := 'Dataset version wrong'; + RETURN; + END IF; + + END IF; + + INSERT INTO dataset.versions(key_entity, entity_version, entity_description, updated_by, + source_path, publish_path, key_schema, conformance) + VALUES (_key_entity, i_entity_version, i_entity_description, i_user_name, + i_source_path, i_publish_path, i_key_schema, i_conformance) + RETURNING dataset.versions.id_entity_version + INTO key_entity_version; + + IF NOT _new_entity THEN + UPDATE dataset.entities + SET entity_latest_version = i_entity_version + WHERE id_entity = _key_entity; + END IF; + + status := 11; + status_text := 'OK'; + RETURN; +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +ALTER FUNCTION dataset._add(TEXT, INTEGER, TEXT, TEXT, TEXT, BIGINT, JSON[], TEXT) OWNER TO enceladus; diff --git a/database/src/main/dataset/add.sql b/database/src/main/dataset/add.sql new file mode 100644 index 000000000..142021463 --- /dev/null +++ b/database/src/main/dataset/add.sql @@ -0,0 +1,159 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE OR REPLACE FUNCTION dataset.add( + IN i_entity_name TEXT, + IN i_entity_version INTEGER, + IN i_entity_description TEXT, + IN i_source_path TEXT, + IN i_publish_path TEXT, + IN i_key_schema BIGINT, + IN i_conformance JSON[], + IN i_user_name TEXT, + OUT status INTEGER, + OUT status_text TEXT, + OUT key_entity_version BIGINT +) RETURNS record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: jobs_configuration.add(8) +-- Stores a new version of the dataset. +-- The i_entity_version has to be an increment of the latest version of an existing dataset or 1 in the case of a +-- new one +-- +-- Parameters: +-- i_entity_name - name of the dataset +-- i_entity_version - version of the dataset +-- i_entity_description - description of the dataset +-- i_source_path - source path for the dataset +-- i_publish_path - output path for the dataset +-- i_key_schema - reference to the schema of the dataset +-- i_conformance - array of conformance rules +-- i_user_name - the user who submitted the changes +-- +-- Returns: +-- status - Status code +-- status_text - Status text +-- key_entity_version - id of the newly created dataset record +-- +-- Status codes: +-- 11 - OK +-- 31 - Dataset has been disabled +-- 32 - Dataset is locked +-- 42 - Schema does not exists +-- 50 - Dataset version wrong +-- 51 - Dataset already exists +-- +------------------------------------------------------------------------------- +DECLARE +BEGIN + PERFORM 1 + FROM dataset_schema.versions V + WHERE V.id_entity_version = i_key_schema; + + IF NOT found THEN + status := 42; + status_text := 'Schema does not exists'; + RETURN; + END IF; + + SELECT A.status, A.status_text, A.key_entity_version + FROM dataset._add(i_entity_name, i_entity_version, i_entity_description, i_source_path, + i_publish_path, i_key_schema, i_conformance, i_user_name) A + INTO status, status_text, key_entity_version; + + RETURN; +END; +$$ + LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +ALTER FUNCTION dataset.add(TEXT, INTEGER, TEXT, TEXT, TEXT, BIGINT, JSON[], TEXT) OWNER TO enceladus; +GRANT EXECUTE ON FUNCTION dataset.add(TEXT, INTEGER, TEXT, TEXT, TEXT, BIGINT, JSON[], TEXT) TO menas; + +CREATE OR REPLACE FUNCTION dataset.add( + IN i_entity_name TEXT, + IN i_entity_version INTEGER, + IN i_entity_description TEXT, + IN i_source_path TEXT, + IN i_publish_path TEXT, + IN i_schema_name TEXT, + IN i_schema_version INTEGER, + IN i_conformance JSON[], + IN i_user_name TEXT, + OUT status INTEGER, + OUT status_text TEXT, + OUT key_entity_version BIGINT +) RETURNS record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: jobs_configuration.add(9) +-- Stores a new version of the mapping table. +-- The i_entity_version has to be an increment of the latest version of an existing dataset or 1 in the case of a +-- new one +-- +-- Parameters: +-- i_entity_name - name of the dataset +-- i_entity_version - version of the dataset +-- i_entity_description - description of the dataset +-- i_source_path - source path for the dataset +-- i_publish_path - output path for the dataset +-- i_schema_name - name of the referenced schema of the dataset +-- i_schema_version - version of the referenced schema of the dataset +-- i_conformance - array of conformance rules +-- i_user_name - the user who submitted the changes +-- +-- Returns: +-- status - Status code +-- status_text - Status text +-- key_entity_version - id of the newly created dataset record +-- +-- Status codes: +-- 11 - OK +-- 31 - Dataset has been disabled +-- 32 - Dataset is locked +-- 42 - Schema does not exists +-- 50 - Dataset version wrong +-- 51 - Dataset already exists +-- +------------------------------------------------------------------------------- +DECLARE + _key_schema BIGINT; +BEGIN + + SELECT G.id_entity_version + FROM dataset_schema.get(i_schema_name, i_schema_version) G + WHERE G.status = 10 + INTO _key_schema; + + IF NOT found THEN + status := 42; + status_text := 'Schema does not exists'; + RETURN; + END IF; + + SELECT A.status, A.status_text, A.key_entity_version + FROM mapping_table._add(i_entity_name, i_entity_version, i_entity_description, i_source_path, + i_publish_path, _key_schema, i_conformance, i_user_name) A + INTO status, status_text, key_entity_version; + + RETURN; +END; +$$ + LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +ALTER FUNCTION dataset.add(TEXT, INTEGER, TEXT, TEXT, TEXT, TEXT, INTEGER, JSON[], TEXT) OWNER TO enceladus; +GRANT EXECUTE ON FUNCTION dataset.add(TEXT, INTEGER, TEXT, TEXT, TEXT, TEXT, INTEGER, JSON[], TEXT) TO menas; diff --git a/database/src/main/dataset/entities.ddl b/database/src/main/dataset/entities.ddl new file mode 100644 index 000000000..757c86d39 --- /dev/null +++ b/database/src/main/dataset/entities.ddl @@ -0,0 +1,32 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- DROP TABLE IF EXISTS dataset.entities; + +CREATE TABLE dataset.entities +( + entity_type CHAR NOT NULL DEFAULT 'D', + CONSTRAINT entities_pk PRIMARY KEY (id_entity) +) + INHERITS (entity_base.entities); + +ALTER TABLE dataset.entities + ADD CONSTRAINT entities_unq UNIQUE (entity_name); + +ALTER TABLE IF EXISTS dataset.entities + ADD CONSTRAINT check_dataset_entity_type CHECK (entity_type = 'D') + NOT VALID; + +ALTER TABLE dataset.entities OWNER to enceladus; diff --git a/database/src/main/dataset/get.sql b/database/src/main/dataset/get.sql new file mode 100644 index 000000000..b524cc92e --- /dev/null +++ b/database/src/main/dataset/get.sql @@ -0,0 +1,242 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE OR REPLACE FUNCTION dataset.get( + IN i_entity_name TEXT, + IN i_entity_version INTEGER DEFAULT NULL, + OUT status INTEGER, + OUT status_text TEXT, + OUT id_entity_version BIGINT, + OUT entity_name TEXT, + OUT entity_version INTEGER, + OUT entity_description TEXT, + OUT created_by TEXT, + OUT created_at TIMESTAMP WITH TIME ZONE, + OUT updated_by TEXT, + OUT updated_at TIMESTAMP WITH TIME ZONE, + OUT locked_by TEXT, + OUT locked_at TIMESTAMP WITH TIME ZONE, + OUT disabled_by TEXT, + OUT disabled_at TIMESTAMP WITH TIME ZONE, + OUT source_path TEXT, + OUT publish_path TEXT, + OUT key_schema BIGINT, + OUT schema_name TEXT, + OUT schema_version INTEGER, + OUT schema_fields JSON, + OUT conformance JSON[] +) RETURNS record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: jobs_configuration.get(2) +-- Returns the data of the requested dataset, based on its name and version +-- If the version is omitted/NULL the latest version data are returned. +-- +-- Parameters: +-- i_entity_name - name of the dataset +-- i_entity_version - dataset version to return, latest is taken if NULL +-- +-- Returns: +-- status - Status code +-- status_text - Status text +-- id_entity_version - id of the dataset +-- entity_name - name of the dataset +-- entity_version - the version of the dataset +-- entity_description - description of the dataset +-- created_by - user who created the dataset +-- created_at - time & date when the dataset was disabled +-- updated_by - user who updated the dataset to this particular version +-- updated_at - time & date when the this particular version of the dataset was created +-- locked_by - if locked, who was the user who locked the dataset +-- locked_at - if not NULL the dataset is locked +-- disabled_by - if disabled, who was the user who disabled the dataset +-- disabled_at - if not NULL the dataset has been disabled +-- source_path - source path of the dataset +-- publish_path - publish path of the dataset +-- key_schema - id of the attached schema +-- schema_name - name of the schema +-- schema_version - the version of the schema +-- schema_fields - the fields the schema consist of +-- conformance - conformance rules of the dataset +-- +-- Status codes: +-- 10 - OK +-- 40 - Dataset does not exist +-- 42 - Schema not found (Should never happen) +-- 43 - Dataset of the given version does not exist +-- +------------------------------------------------------------------------------- +DECLARE + _key_entity BIGINT; + _entity_version INTEGER; + _schema_status INTEGER; +BEGIN + SELECT E.id_entity, coalesce(i_entity_version, E.entity_latest_version), E.entity_name, + E.created_by, E.created_at, E.locked_by, E.locked_at, + E.disabled_by, E.locked_at + FROM dataset.entities E + WHERE E.entity_name = i_entity_name + INTO _key_entity, _entity_version, get.entity_name, + get.created_by, get.created_at, get.locked_by, get.locked_at, + get.disabled_by, get.disabled_at; + + IF NOT found THEN + status := 40; + status_text := 'Dataset does not exist'; + RETURN; + END IF; + + SELECT 10, 'OK', V.id_entity_version, V.entity_version, + V.entity_description, V.updated_by, V.updated_at, + V.source_path, V.publish_path, V.key_schema, V.conformance + FROM dataset.versions V + WHERE V.key_entity = _key_entity AND + V.entity_version = _entity_version + INTO status, status_text, get.id_entity_version, get.entity_version, + get.entity_description, get.updated_by, get.updated_at, + get.source_path, get.publish_path, get.key_schema, get.conformance; + + IF NOT found THEN + status := 43; + status_text := 'Dataset of the given version does not exist'; + RETURN; + END IF; + + SELECT G.status, G.entity_name, G.entity_version, G.fields + FROM dataset_schema.get(key_schema) G + INTO _schema_status, schema_name, schema_version, schema_fields; + + IF _schema_status != 10 THEN + status := 42; + status_text := 'Schema not found (Should never happen)'; + RETURN; + END IF; + + RETURN; +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + + +CREATE OR REPLACE FUNCTION dataset.get( + IN i_key_entity_version BIGINT, + OUT status INTEGER, + OUT status_text TEXT, + OUT id_entity_version BIGINT, + OUT entity_name TEXT, + OUT entity_version INTEGER, + OUT entity_description TEXT, + OUT created_by TEXT, + OUT created_at TIMESTAMP WITH TIME ZONE, + OUT updated_by TEXT, + OUT updated_at TIMESTAMP WITH TIME ZONE, + OUT locked_by TEXT, + OUT locked_at TIMESTAMP WITH TIME ZONE, + OUT disabled_by TEXT, + OUT disabled_at TIMESTAMP WITH TIME ZONE, + OUT source_path TEXT, + OUT publish_path TEXT, + OUT key_schema BIGINT, + OUT schema_name TEXT, + OUT schema_version INTEGER, + OUT schema_fields JSON, + OUT conformance JSON[] +) RETURNS record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: jobs_configuration.get(1) +-- Returns the data of the requested dataset, based on its id +-- +-- Parameters: +-- i_key_entity_version - id of the dataset +-- +-- Returns: +-- status - Status code +-- status_text - Status text +-- id_entity_version - id of the dataset +-- entity_name - name of the dataset +-- entity_version - the version of the dataset +-- entity_description - description of the dataset +-- created_by - user who created the dataset +-- created_at - time & date when the dataset was disabled +-- updated_by - user who updated the dataset to this particular version +-- updated_at - time & date when the this particular version of the dataset was created +-- locked_by - if locked, who was the user who locked the dataset +-- locked_at - if not NULL the dataset is locked +-- disabled_by - if disabled, who was the user who disabled the dataset +-- disabled_at - if not NULL the dataset has been disabled +-- source_path - source path of the dataset +-- publish_path - publish path of the dataset +-- key_schema - id of the attached schema +-- schema_name - name of the schema +-- schema_version - the version of the schema +-- schema_fields - the fields the schema consist of +-- conformance - conformance rules of the dataset +-- +-- Status codes: +-- 10 - OK +-- 40 - Dataset does not exist +-- 42 - Schema not found (Should never happen) +-- +------------------------------------------------------------------------------- +DECLARE + _key_entity BIGINT; + _schema_status TEXT; +BEGIN + + SELECT 10, 'OK', V.id_entity_version, V.key_entity, V.entity_version, + V.entity_description, V.updated_by, V.updated_at, + V.source_path, V.publish_path, V.key_schema, V.conformance + FROM dataset.versions V + WHERE V.id_entity_version = i_key_entity_version + INTO status, status_text, get.id_entity_version, _key_entity, get.entity_version, + get.entity_description, get.updated_by, get.updated_at, + get.source_path, get.publish_path, get.key_schema, get.conformance; + + IF NOT found THEN + status := 40; + status_text := 'Dataset does not exist'; + RETURN; + END IF; + + + SELECT E.entity_name, E.created_by, E.created_at, + E.locked_by, E.locked_at, E.disabled_by, E.locked_at + FROM dataset.entities E + WHERE E.entity_name = get.entity_name + INTO get.entity_name, get.created_by, get.created_at, + get.locked_by, get.locked_at, get.disabled_by, get.disabled_at; + + SELECT G.status, G.entity_name, G.entity_version, G.fields + FROM dataset_schema.get(key_schema) G + INTO _schema_status, schema_name, schema_version, schema_fields; + + IF _schema_status != 10 THEN + status := 42; + status_text := 'Schema not found (Should never happen)'; + RETURN; + END IF; + + RETURN; +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +ALTER FUNCTION dataset.get(TEXT, INTEGER) OWNER TO enceladus; +ALTER FUNCTION dataset.get(BIGINT) OWNER TO enceladus; +GRANT EXECUTE ON FUNCTION dataset.get(TEXT, INTEGER) TO menas; +GRANT EXECUTE ON FUNCTION dataset.get(BIGINT) TO menas; diff --git a/database/src/main/dataset/list.sql b/database/src/main/dataset/list.sql new file mode 100644 index 000000000..27cb01670 --- /dev/null +++ b/database/src/main/dataset/list.sql @@ -0,0 +1,51 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE OR REPLACE FUNCTION dataset.list( + IN i_include_disabled BOOLEAN DEFAULT FALSE, + OUT entity_name TEXT, + OUT entity_latest_version INTEGER, + OUT locked BOOLEAN, + OUT disabled BOOLEAN +) RETURNS SETOF record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: dataset.list(1) +-- Returns a list of schemas with their latest versions +-- +-- Parameters: +-- i_include_disabled - flag indicating if to include disabled schemas too +-- +-- Returns: +-- entity_name - name of the schema +-- entity_latest_version - the latest version of the schema +-- locked - signals if the schema is locked or not +-- disabled - signals if the schema is disabled or not +-- +------------------------------------------------------------------------------- +DECLARE +BEGIN + RETURN QUERY + SELECT E.entity_name, E.entity_latest_version, E.disabled_at IS NOT NULL, E.locked_at IS NOT NULL + FROM dataset.entities E + WHERE i_include_disabled OR E.disabled_at IS NULL + ORDER BY entity_name; --TODO Include order by? +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +ALTER FUNCTION dataset.list(BOOLEAN) OWNER TO enceladus; +GRANT EXECUTE ON FUNCTION dataset.list(BOOLEAN) TO menas; diff --git a/database/src/main/dataset/versions.ddl b/database/src/main/dataset/versions.ddl new file mode 100644 index 000000000..ab929b600 --- /dev/null +++ b/database/src/main/dataset/versions.ddl @@ -0,0 +1,31 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- DROP TABLE IF EXISTS dataset.versions; + +CREATE TABLE dataset.versions +( + source_path TEXT NOT NULL, + publish_path TEXT NOT NULL, + key_schema BIGINT NOT NULL, + conformance JSON[] NOT NULL, + CONSTRAINT versions_pk PRIMARY KEY (id_entity_version) +) + INHERITS (entity_base.versions); + +ALTER TABLE dataset.versions + ADD CONSTRAINT versions_unq UNIQUE (key_entity, entity_version); + +ALTER TABLE dataset.versions OWNER to enceladus; diff --git a/database/src/main/dataset_schema/_.ddl b/database/src/main/dataset_schema/_.ddl new file mode 100644 index 000000000..f17e2df86 --- /dev/null +++ b/database/src/main/dataset_schema/_.ddl @@ -0,0 +1,19 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE SCHEMA IF NOT EXISTS dataset_schema; +ALTER SCHEMA dataset_schema OWNER TO enceladus; + +GRANT USAGE ON SCHEMA dataset_schema TO menas; diff --git a/database/src/main/dataset_schema/add.sql b/database/src/main/dataset_schema/add.sql new file mode 100644 index 000000000..aeae8e905 --- /dev/null +++ b/database/src/main/dataset_schema/add.sql @@ -0,0 +1,132 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE OR REPLACE FUNCTION dataset_schema.add( + IN i_entity_name TEXT, + IN i_entity_version INTEGER, + IN i_entity_description TEXT, + IN i_fields JSONB, + IN i_user_name TEXT, + OUT status INTEGER, + OUT status_text TEXT, + OUT id_entity_version BIGINT +) RETURNS record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: jobs_configuration.add(5) +-- Stores a new version of the schema. +-- The i_entity_version has to be an increment of the latest version of an existing schema or 1 +-- +-- Parameters: +-- i_entity_name - name of the schema +-- i_entity_version - version of the schema +-- i_entity_description - description of the schema +-- i_fields - the fields the schema consist of +-- i_user_name - the user who submitted the changes +-- +-- Returns: +-- status - Status code +-- status_text - Status text +-- id_schema - id of the newly created schema record +-- +-- Status codes: +-- 11 - OK +-- 31 - Schema has been disabled +-- 32 - Schema is locked +-- 50 - Schema version wrong +-- 51 - Schema already exists +-- +------------------------------------------------------------------------------- +DECLARE + _entity_type CHAR := 'S'; + _key_entity BIGINT; + _new_entity BOOLEAN; + _latest_version INTEGER; + _locked BOOLEAN; + _disabled BOOLEAN; +BEGIN + + IF i_entity_version = 1 THEN + -- lock on stats to prevent competing inserts of new entity + PERFORM 1 + FROM entity_base.stats S + WHERE S.entity_type = _entity_type + FOR UPDATE; + END IF; + + SELECT E.id_entity, E.entity_latest_version, E.locked_at IS NOT NULL, E.disabled_at IS NOT NULL + FROM dataset_schema.entities E + WHERE E.entity_name = i_entity_name + FOR UPDATE + INTO _key_entity, _latest_version, _locked, _disabled; + + _new_entity := NOT found; + + IF _new_entity THEN + IF i_entity_version != 1 THEN + status := 50; + status_text := 'Schema version wrong'; + RETURN; + END IF; + + UPDATE entity_base.stats + SET entity_count = stats.entity_count + 1 + WHERE entity_type = _entity_type; + + INSERT INTO dataset_schema.entities (entity_name, entity_latest_version, created_by) + VALUES (i_entity_name, i_entity_version, i_user_name) + RETURNING id_entity + INTO _key_entity; + ELSE + IF _disabled THEN + status := 31; + status_text := 'Schema has been disabled'; + RETURN ; + ELSIF _locked THEN + status := 32; + status_text := 'Schema is locked'; + RETURN; + ELSEIF _latest_version >= i_entity_version THEN + status := 51; + status_text := 'Schema already exists'; + RETURN; + ELSIF _latest_version + 1 < i_entity_version THEN + status := 50; + status_text := 'Schema version wrong'; + RETURN; + END IF; + END IF; + + INSERT INTO dataset_schema.versions (key_entity, entity_version, entity_description, fields, updated_by) + VALUES (_key_entity, i_entity_version, i_entity_description, i_fields, i_user_name) + RETURNING dataset_schema.versions.id_entity_version + INTO id_entity_version; + + IF NOT _new_entity THEN + UPDATE dataset_schema.entities + SET entity_latest_version = i_entity_version + WHERE id_entity = _key_entity; + END IF; + + status := 11; + status_text := 'OK'; + RETURN; +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +ALTER FUNCTION dataset_schema.add(TEXT, INTEGER, TEXT, JSONB, TEXT) OWNER TO enceladus; +GRANT EXECUTE ON FUNCTION dataset_schema.add(TEXT, INTEGER, TEXT, JSONB, TEXT) TO menas; diff --git a/database/src/main/dataset_schema/entities.ddl b/database/src/main/dataset_schema/entities.ddl new file mode 100644 index 000000000..adbf77749 --- /dev/null +++ b/database/src/main/dataset_schema/entities.ddl @@ -0,0 +1,33 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- DROP TABLE IF EXISTS dataset_schema.entities; + +CREATE TABLE dataset_schema.entities +( + entity_type CHAR NOT NULL DEFAULT 'S', + CONSTRAINT entities_pk PRIMARY KEY (id_entity) +) + INHERITS (entity_base.entities); + +ALTER TABLE dataset_schema.entities + ADD CONSTRAINT entities_unq UNIQUE (entity_name); + + +ALTER TABLE IF EXISTS dataset_schema.entities + ADD CONSTRAINT check_dataset_schema_entity_type CHECK (entity_type = 'S') + NOT VALID; + +ALTER TABLE dataset_schema.entities OWNER to enceladus; diff --git a/database/src/main/dataset_schema/get.sql b/database/src/main/dataset_schema/get.sql new file mode 100644 index 000000000..9c703e87a --- /dev/null +++ b/database/src/main/dataset_schema/get.sql @@ -0,0 +1,188 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE OR REPLACE FUNCTION dataset_schema.get( + IN i_entity_name TEXT, + IN i_entity_version INTEGER DEFAULT NULL, + OUT status INTEGER, + OUT status_text TEXT, + OUT id_entity_version BIGINT, + OUT entity_name TEXT, + OUT entity_version INTEGER, + OUT entity_description TEXT, + OUT created_by TEXT, + OUT created_at TIMESTAMP WITH TIME ZONE, + OUT updated_by TEXT, + OUT updated_at TIMESTAMP WITH TIME ZONE, + OUT locked_by TEXT, + OUT locked_at TIMESTAMP WITH TIME ZONE, + OUT disabled_by TEXT, + OUT disabled_at TIMESTAMP WITH TIME ZONE, + OUT fields JSON +) RETURNS record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: jobs_configuration.get(2) +-- Returns the data of the requested schema, based on its name and version +-- If the version is omitted/NULL the latest version data are returned. +-- +-- Parameters: +-- i_entity_name - name of the schema +-- i_entity_version - schema version to return, latest is taken if NULL +-- +-- Returns: +-- status - Status code +-- status_text - Status text +-- id_entity_version - Id of the schema +-- entity_name - name of the schema +-- entity_version - the version of the schema +-- entity_description - description of the schema +-- created_by - user who created the schema +-- created_at - time & date when the schema was disabled +-- updated_by - user who updated the schema to this particular version +-- updated_at - time & date when the this particular version of the schema was created +-- locked_by - if locked, who was the user who locked the schema +-- locked_at - if not NULL the schema is locked +-- disabled_by - if disabled, who was the user who disabled the schema +-- disabled_at - if not NULL the schema has been disabled +-- fields - the fields the schema consist of +-- +-- Status codes: +-- 10 - OK +-- 40 - Schema does not exist +-- 43 - Schema of the given version does not exist +-- +------------------------------------------------------------------------------- +DECLARE + _key_entity BIGINT; + _entity_version INTEGER; +BEGIN + SELECT E.id_entity, coalesce(i_entity_version, E.entity_latest_version), E.entity_name, + E.created_by, E.created_at, E.locked_by, E.locked_at, E.disabled_by, E.disabled_at + FROM dataset_schema.entities E + WHERE E.entity_name = i_entity_name + INTO _key_entity, _entity_version, get.entity_name, + get.created_by, get.created_at, get.locked_by, get.locked_at, get.disabled_by, get.disabled_at; + + IF NOT found THEN + status := 40; + status_text := 'Schema does not exist'; + RETURN; + END IF; + + SELECT 10, 'OK', V.id_entity_version, V.entity_version, V.entity_description, + V.fields, V.updated_by, V.updated_at + FROM dataset_schema.versions V + WHERE V.key_entity = _key_entity AND + V.entity_version = _entity_version + INTO status, status_text, get.id_entity_version, get.entity_name, get.entity_version, + get.entity_description, get.fields, get.updated_by, get.updated_at; + + IF NOT found THEN + status := 43; + status_text := 'Schema of the given version does not exist'; + RETURN; + END IF; + + RETURN; +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + + +CREATE OR REPLACE FUNCTION dataset_schema.get( + IN i_key_entity_version BIGINT, + OUT status INTEGER, + OUT status_text TEXT, + OUT id_entity_version BIGINT, + OUT entity_name TEXT, + OUT entity_version INTEGER, + OUT entity_description TEXT, + OUT created_by TEXT, + OUT created_at TIMESTAMP WITH TIME ZONE, + OUT updated_by TEXT, + OUT updated_at TIMESTAMP WITH TIME ZONE, + OUT locked_by TEXT, + OUT locked_at TIMESTAMP WITH TIME ZONE, + OUT disabled_by TEXT, + OUT disabled_at TIMESTAMP WITH TIME ZONE, + OUT fields JSON +) RETURNS record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: jobs_configuration.get(1) +-- Returns the data of the requested schema, based on its id +-- +-- Parameters: +-- i_key_schema - id of the schema +-- +-- Returns: +-- status - Status code +-- status_text - Status text +-- id_entity_version - Id of the schema +-- entity_name - name of the schema +-- entity_version - the version of the schema +-- entity_description - description of the schema +-- created_by - user who created the schema +-- created_at - time & date when the schema was disabled +-- updated_by - user who updated the schema to this particular version +-- updated_at - time & date when the this particular version of the schema was created +-- locked_by - if locked, who was the user who locked the schema +-- locked_at - if not NULL the schema is locked +-- disabled_by - if disabled, who was the user who disabled the schema +-- disabled_at - if not NULL the schema has been disabled +-- fields - the fields the schema consist of +-- +-- Status codes: +-- 10 - OK +-- 40 - Schema does not exist +-- +------------------------------------------------------------------------------- +DECLARE + _key_entity BIGINT; +BEGIN + + SELECT 10, 'OK', V.id_entity_version, V.key_entity,V.entity_version, + V.entity_description, V.fields, V.updated_by, V.updated_at + FROM dataset_schema.versions V + WHERE V.id_entity_version = i_key_entity_version + INTO status, status_text, get.id_entity_version, _key_entity, get.entity_version, + get.entity_description, get.fields, get.updated_by, get.updated_at; + + IF NOT found THEN + status := 40; + status_text := 'Schema does not exist'; + RETURN; + END IF; + + + SELECT E.entity_name, E.created_by, E.created_at, E.locked_by, E.locked_at, + E.disabled_by, E.locked_at + FROM dataset_schema.entities E + WHERE E.id_entity = _key_entity + INTO get.entity_name, get.created_by, get.created_at, get.locked_by, get.locked_at, + get.disabled_by, get.disabled_at; + + RETURN; +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +ALTER FUNCTION dataset_schema.get(TEXT, INTEGER) OWNER TO enceladus; +ALTER FUNCTION dataset_schema.get(BIGINT) OWNER TO enceladus; +GRANT EXECUTE ON FUNCTION dataset_schema.get(TEXT, INTEGER) TO menas; +GRANT EXECUTE ON FUNCTION dataset_schema.get(BIGINT) TO menas; diff --git a/database/src/main/dataset_schema/list.sql b/database/src/main/dataset_schema/list.sql new file mode 100644 index 000000000..5b4186e5b --- /dev/null +++ b/database/src/main/dataset_schema/list.sql @@ -0,0 +1,51 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE OR REPLACE FUNCTION dataset_schema.list( + IN i_include_disabled BOOLEAN DEFAULT FALSE, + OUT entity_name TEXT, + OUT entity_latest_version INTEGER, + OUT locked BOOLEAN, + OUT disabled BOOLEAN +) RETURNS SETOF record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: dataset_schema.list(1) +-- Returns a list of schemas with their latest versions +-- +-- Parameters: +-- i_include_disabled - flag indicating if to include disabled schemas too +-- +-- Returns: +-- entity_name - name of the schema +-- entity_latest_version - the latest version of the schema +-- locked - signals if the schema is locked or not +-- disabled - signals if the schema is disabled or not +-- +------------------------------------------------------------------------------- +DECLARE +BEGIN + RETURN QUERY + SELECT E.entity_name, E.entity_latest_version, E.disabled_at IS NOT NULL, E.locked_at IS NOT NULL + FROM dataset_schema.entities E + WHERE i_include_disabled OR E.disabled_at IS NULL + ORDER BY entity_name; --TODO Include order by? +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +ALTER FUNCTION dataset_schema.list(BOOLEAN) OWNER TO enceladus; +GRANT EXECUTE ON FUNCTION dataset_schema.list(BOOLEAN) TO menas; diff --git a/database/src/main/dataset_schema/versions.ddl b/database/src/main/dataset_schema/versions.ddl new file mode 100644 index 000000000..6dbf422e5 --- /dev/null +++ b/database/src/main/dataset_schema/versions.ddl @@ -0,0 +1,28 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- DROP TABLE IF EXISTS dataset_schema.versions; + +CREATE TABLE dataset_schema.versions +( + fields JSON, + CONSTRAINT versions_pk PRIMARY KEY (id_entity_version) +) + INHERITS (entity_base.versions); + +ALTER TABLE dataset_schema.versions + ADD CONSTRAINT versions_unq UNIQUE (key_entity, entity_version); + +ALTER TABLE dataset_schema.versions OWNER to enceladus; diff --git a/database/src/main/entity_base/_.ddl b/database/src/main/entity_base/_.ddl new file mode 100644 index 000000000..d371cdf73 --- /dev/null +++ b/database/src/main/entity_base/_.ddl @@ -0,0 +1,19 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE SCHEMA IF NOT EXISTS entity_base; +ALTER SCHEMA entity_base OWNER TO enceladus; + +GRANT USAGE ON SCHEMA entity_base TO menas; diff --git a/database/src/main/entity_base/entities.ddl b/database/src/main/entity_base/entities.ddl new file mode 100644 index 000000000..1573c2288 --- /dev/null +++ b/database/src/main/entity_base/entities.ddl @@ -0,0 +1,34 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- DROP TABLE IF EXISTS entity_base.entities CASCADE; + +CREATE TABLE entity_base.entities +( + id_entity BIGINT NOT NULL DEFAULT global_id(), + entity_name TEXT NOT NULL, + entity_latest_version INTEGER NOT NULL, + entity_type CHAR NOT NULL, + created_by TEXT NOT NULL, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), + locked_by TEXT, + locked_at TIMESTAMP WITH TIME ZONE, + disabled_by TEXT, + disabled_at TIMESTAMP WITH TIME ZONE +); + +ALTER TABLE entity_base.entities OWNER to enceladus; + +CREATE OR REPLACE RULE entity_base_entities_ins_protect AS ON INSERT TO entity_base.entities DO INSTEAD NOTHING; diff --git a/database/src/main/entity_base/entity_types.ddl b/database/src/main/entity_base/entity_types.ddl new file mode 100644 index 000000000..7e4b3dfee --- /dev/null +++ b/database/src/main/entity_base/entity_types.ddl @@ -0,0 +1,30 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- DROP TABLE IF EXISTS entity_base.entity_type CASCADE; + +CREATE TABLE entity_base.entity_types +( + entity_type CHAR NOT NULL, + entity_type_name TEXT NOT NULL, + CONSTRAINT entity_types_pk PRIMARY KEY (entity_type) +); + +ALTER TABLE entity_base.entity_types OWNER to enceladus; + +INSERT INTO entity_base.entity_types(entity_type, entity_type_name) +VALUES ('S', 'Schema'), + ('M', 'Mapping table'), + ('D', 'Dataset'); diff --git a/database/src/main/entity_base/stats.ddl b/database/src/main/entity_base/stats.ddl new file mode 100644 index 000000000..f018e5bc4 --- /dev/null +++ b/database/src/main/entity_base/stats.ddl @@ -0,0 +1,31 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- DROP TABLE IF EXISTS entity_base.stats + +CREATE TABLE entity_base.stats +( + entity_type CHAR NOT NULL, + entity_count INTEGER NOT NULL DEFAULT 0, + CONSTRAINT stats_pk PRIMARY KEY (entity_type) +); + +ALTER TABLE entity_base.stats + OWNER to enceladus; + +INSERT INTO entity_base.stats(entity_type) +VALUES ('S'), + ('M'), + ('D'); diff --git a/database/src/main/entity_base/versions.ddl b/database/src/main/entity_base/versions.ddl new file mode 100644 index 000000000..afeb1247a --- /dev/null +++ b/database/src/main/entity_base/versions.ddl @@ -0,0 +1,30 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- DROP TABLE IF EXISTS entity_base.versions CASCADE; + +CREATE TABLE entity_base.versions +( + id_entity_version BIGINT NOT NULL DEFAULT global_id(), + key_entity BIGINT NOT NULL, + entity_version INTEGER NOT NULL, + entity_description TEXT, + updated_by TEXT NOT NULL, + updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now() +); + +ALTER TABLE entity_base.versions OWNER to enceladus; + +CREATE OR REPLACE RULE entity_base_versions_ins_protect AS ON INSERT TO entity_base.versions DO INSTEAD NOTHING; diff --git a/database/src/main/mapping_table/_.ddl b/database/src/main/mapping_table/_.ddl new file mode 100644 index 000000000..ab392f3b1 --- /dev/null +++ b/database/src/main/mapping_table/_.ddl @@ -0,0 +1,19 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE SCHEMA IF NOT EXISTS mapping_table; +ALTER SCHEMA mapping_table OWNER TO enceladus; + +GRANT USAGE ON SCHEMA mapping_table TO menas; diff --git a/database/src/main/mapping_table/_add.sql b/database/src/main/mapping_table/_add.sql new file mode 100644 index 000000000..b861fd6da --- /dev/null +++ b/database/src/main/mapping_table/_add.sql @@ -0,0 +1,139 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE OR REPLACE FUNCTION mapping_table._add( + IN i_entity_name TEXT, + IN i_entity_version INTEGER, + IN i_entity_description TEXT, + IN i_table_path TEXT, + IN i_key_schema BIGINT, + IN i_default_mapping_values HSTORE, + IN i_table_filter JSON, + IN i_user_name TEXT, + OUT status INTEGER, + OUT status_text TEXT, + OUT key_entity_version BIGINT +) RETURNS record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: jobs_configuration._add(8) +-- Stores a new version of the mapping table. +-- The i_mapping table_version has to be an increment of the latest version of an existing mapping table or 1 +-- +-- Parameters: +-- i_entity_name - name of the mapping table +-- i_entity_version - version of the mapping table +-- i_entity_description - description of the mapping table +-- i_table_path - table_path, where the mapping table data are saved +-- i_key_schema - reference to the schema of the mapping table +-- i_default_mapping_values - default values of the mapping table +-- i_table_filter - filter on the data of the mapping table +-- i_user_name - the user who submitted the changes +-- +-- Returns: +-- status - Status code +-- status_text - Status text +-- key_entity_version - id of the newly created mapping table record +-- +-- Status codes: +-- 11 - OK +-- 31 - Mapping table has been disabled +-- 32 - Mapping table is locked +-- 50 - Mapping table version wrong +-- 51 - Mapping table already exists +-- +------------------------------------------------------------------------------- +DECLARE + _entity_type CHAR := 'M'; + _key_entity BIGINT; + _new_entity BOOLEAN; + _latest_version INTEGER; + _locked BOOLEAN; + _disabled BOOLEAN; +BEGIN + IF i_entity_version = 1 THEN + -- lock on stats to prevent competing inserts of new entity + PERFORM 1 + FROM entity_base.stats S + WHERE S.entity_type = _entity_type + FOR UPDATE; + END IF; + + SELECT E.id_entity, E.entity_latest_version, E.locked_at IS NOT NULL, E.disabled_at IS NOT NULL + FROM mapping_table.entities E + WHERE E.entity_name = i_entity_name + FOR UPDATE + INTO _key_entity, _latest_version, _locked, _disabled; + + _new_entity := NOT found; + + IF _new_entity THEN + IF i_entity_version != 1 THEN + status := 50; + status_text := 'Mapping table version wrong'; + RETURN; + END IF; + + UPDATE entity_base.stats + SET entity_count = stats.entity_count + 1 + WHERE entity_type = _entity_type; + + INSERT INTO mapping_table.entities(entity_name, entity_latest_version, created_by) + VALUES (i_entity_name, i_entity_version, i_user_name) + RETURNING id_entity + INTO _key_entity; + ELSE + IF _disabled THEN + status := 31; + status_text := 'Mapping table has been disabled'; + RETURN ; + ELSIF _locked THEN + status := 32; + status_text := 'Mapping table is locked'; + RETURN; + ELSIF _latest_version >= i_entity_version THEN + status := 51; + status_text := 'Mapping table already exists'; + RETURN; + ELSIF _latest_version + 1 < i_entity_version THEN + status := 50; + status_text := 'Mapping table version wrong'; + RETURN; + END IF; + + END IF; + + INSERT INTO mapping_table.versions(key_entity, entity_version, entity_description, table_path, + key_schema, default_mapping_values, table_filter, updated_by) + VALUES (_key_entity, i_entity_version, i_entity_description, i_table_path, + i_key_schema, i_default_mapping_values, i_table_filter, i_user_name) + RETURNING mapping_table.versions.id_entity_version + INTO key_entity_version; + + IF NOT _new_entity THEN + UPDATE mapping_table.entities + SET entity_latest_version = i_entity_version + WHERE id_entity = _key_entity; + END IF; + + status := 11; + status_text := 'OK'; + RETURN; +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +ALTER FUNCTION mapping_table._add(TEXT, INTEGER, TEXT, TEXT, BIGINT, HSTORE, JSON, TEXT) OWNER TO enceladus; diff --git a/database/src/main/mapping_table/add.sql b/database/src/main/mapping_table/add.sql new file mode 100644 index 000000000..259e0f530 --- /dev/null +++ b/database/src/main/mapping_table/add.sql @@ -0,0 +1,157 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE OR REPLACE FUNCTION mapping_table.add( + IN i_entity_name TEXT, + IN i_entity_version INTEGER, + IN i_entity_description TEXT, + IN i_table_path TEXT, + IN i_key_schema BIGINT, + IN i_default_mapping_values HSTORE, + IN i_table_filter JSON, + IN i_user_name TEXT, + OUT status INTEGER, + OUT status_text TEXT, + OUT key_entity_version BIGINT +) RETURNS record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: jobs_configuration.add(8) +-- Stores a new version of the mapping table. +-- The i_entity_version has to be an increment of the latest version of an existing mapping table or 1 +-- +-- Parameters: +-- i_entity_name - name of the mapping table +-- i_entity_version - version of the mapping table +-- i_entity_description - description of the mapping table +-- i_table_path - table_path, where the mapping table data are saved +-- i_key_schema - reference to the schema of the mapping table +-- i_default_mapping_values - default values of the mapping table +-- i_table_filter - filter on the data of the mapping table +-- i_user_name - the user who submitted the changes +-- +-- Returns: +-- status - Status code +-- status_text - Status text +-- key_entity_version - id of the newly created mapping table record +-- +-- Status codes: +-- 11 - OK +-- 31 - Mapping table has been disabled +-- 32 - Mapping table is locked +-- 42 - Schema does not exists +-- 50 - Mapping table version wrong +-- 51 - Mapping table already exists +-- +------------------------------------------------------------------------------- +DECLARE +BEGIN + PERFORM 1 + FROM dataset_schema.versions V + WHERE V.id_entity_version = i_key_schema; + + IF NOT found THEN + status := 42; + status_text := 'Schema does not exists'; + RETURN; + END IF; + + SELECT A.status, A.status_text, A.key_entity_version + FROM mapping_table._add(i_entity_name, i_entity_version, i_entity_description, i_table_path, + i_key_schema, i_default_mapping_values, i_table_filter, i_user_name) A + INTO status, status_text, key_entity_version; + + RETURN; +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +ALTER FUNCTION mapping_table.add(TEXT, INTEGER, TEXT, TEXT, BIGINT, HSTORE, JSON, TEXT) OWNER TO enceladus; +GRANT EXECUTE ON FUNCTION mapping_table.add(TEXT, INTEGER, TEXT, TEXT, BIGINT, HSTORE, JSON, TEXT) TO menas; + +CREATE OR REPLACE FUNCTION mapping_table.add( + IN i_entity_name TEXT, + IN i_entity_version INTEGER, + IN i_entity_description TEXT, + IN i_table_path TEXT, + IN i_schema_name TEXT, + IN i_schema_version INTEGER, + IN i_default_mapping_values HSTORE, + IN i_table_filter JSON, + IN i_user_name TEXT, + OUT status INTEGER, + OUT status_text TEXT, + OUT key_entity_version BIGINT +) RETURNS record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: jobs_configuration.add(9) +-- Stores a new version of the mapping table. +-- The i_entity_version has to be an increment of the latest version of an existing mapping table or 1 +-- +-- Parameters: +-- i_entity_name - name of the mapping table +-- i_entity_version - version of the mapping table +-- i_entity_description - description of the mapping table +-- i_table_path - table_path, where the mapping table data are saved +-- i_schema_name - name of the referenced schema of the mapping table +-- i_schema_version - version of the referenced schema of the mapping table +-- i_default_mapping_values - default values of the mapping table +-- i_table_filter - filter on the data of the mapping table +-- i_user_name - the user who submitted the changes +-- +-- Returns: +-- status - Status code +-- status_text - Status text +-- key_entity_version - id of the newly created mapping table record +-- +-- Status codes: +-- 11 - OK +-- 31 - Mapping table has been disabled +-- 32 - Mapping table is locked +-- 42 - Schema does not exists +-- 50 - Mapping table version wrong +-- 51 - Mapping table already exists +-- +------------------------------------------------------------------------------- +DECLARE + _key_schema BIGINT; +BEGIN + + SELECT G.id_entity_version + FROM dataset_schema.get(i_schema_name, i_schema_version) G + WHERE G.status = 10 + INTO _key_schema; + + IF NOT found THEN + status := 42; + status_text := 'Schema does not exists'; + RETURN; + END IF; + + SELECT A.status, A.status_text, A.key_entity_version + FROM mapping_table._add(i_entity_name, i_entity_version, i_entity_description, i_table_path, + _key_schema, i_default_mapping_values, i_table_filter, i_user_name) A + INTO status, status_text, key_entity_version; + + RETURN; +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +ALTER FUNCTION mapping_table.add(TEXT, INTEGER, TEXT, TEXT, TEXT, INTEGER, HSTORE, JSON, TEXT) OWNER TO enceladus; +GRANT EXECUTE ON FUNCTION mapping_table.add(TEXT, INTEGER, TEXT, TEXT, TEXT, INTEGER, HSTORE, JSON, TEXT) TO menas; diff --git a/database/src/main/mapping_table/entities.ddl b/database/src/main/mapping_table/entities.ddl new file mode 100644 index 000000000..dc72de400 --- /dev/null +++ b/database/src/main/mapping_table/entities.ddl @@ -0,0 +1,32 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- DROP TABLE IF EXISTS mapping_table.entities; + +CREATE TABLE mapping_table.entities +( + entity_type CHAR NOT NULL DEFAULT 'M', + CONSTRAINT entities_pk PRIMARY KEY (id_entity) +) + INHERITS (entity_base.entities); + +ALTER TABLE mapping_table.entities + ADD CONSTRAINT entities_unq UNIQUE (entity_name); + +ALTER TABLE IF EXISTS mapping_table.entities + ADD CONSTRAINT check_mapping_table_entity_type CHECK (entity_type = 'M') + NOT VALID; + +ALTER TABLE mapping_table.entities OWNER to enceladus; diff --git a/database/src/main/mapping_table/get.sql b/database/src/main/mapping_table/get.sql new file mode 100644 index 000000000..bff50452c --- /dev/null +++ b/database/src/main/mapping_table/get.sql @@ -0,0 +1,241 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE OR REPLACE FUNCTION mapping_table.get( + IN i_entity_name TEXT, + IN i_entity_version INTEGER DEFAULT NULL, + OUT status INTEGER, + OUT status_text TEXT, + OUT id_entity_version BIGINT, + OUT entity_name TEXT, + OUT entity_version INTEGER, + OUT entity_description TEXT, + OUT created_by TEXT, + OUT created_at TIMESTAMP WITH TIME ZONE, + OUT updated_by TEXT, + OUT updated_at TIMESTAMP WITH TIME ZONE, + OUT locked_by TEXT, + OUT locked_at TIMESTAMP WITH TIME ZONE, + OUT disabled_by TEXT, + OUT disabled_at TIMESTAMP WITH TIME ZONE, + OUT table_path TEXT, + OUT key_schema BIGINT, + OUT schema_name TEXT, + OUT schema_version INTEGER, + OUT schema_fields JSON, + OUT default_mapping_values HSTORE, + OUT table_filter JSON +) RETURNS record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: jobs_configuration.get(2) +-- Returns the data of the requested mapping table, based on its name and version +-- If the version is omitted/NULL the latest version data are returned. +-- +-- Parameters: +-- i_entity_name - name of the mapping table +-- i_entity_version - mapping table version to return, latest is taken if NULL +-- +-- Returns: +-- status - Status code +-- status_text - Status text +-- id_entity_version - id of the mapping table +-- entity_name - name of the mapping table +-- entity_version - the version of the mapping table +-- entity_description - description of the mapping table +-- created_by - user who created the schema +-- created_at - time & date when the schema was disabled +-- updated_by - user who updated the schema to this particular version +-- updated_at - time & date when the this particular version of the schema was created +-- locked_by - if locked, who was the user who locked the schema +-- locked_at - if not NULL the schema is locked +-- disabled_by - if disabled, who was the user who disabled the schema +-- disabled_at - if not NULL the schema has been disabled +-- table_path - table_path, where the mapping table data are saved +-- key_schema - id of the attached schema +-- schema_name - name of the schema +-- schema_version - the version of the schema +-- schema_fields - the fields the schema consist of +-- default_mapping_values - default values of the mapping table +-- table_filter - filter on the data of the mapping table +-- +-- Status codes: +-- 10 - OK +-- 40 - Mapping table does not exist +-- 42 - Schema not found (Should never happen) +-- 43 - Mapping table of the given version does not exist +-- +------------------------------------------------------------------------------- +DECLARE + _key_entity BIGINT; + _entity_version INTEGER; + _schema_status INTEGER; +BEGIN + SELECT E.id_entity, coalesce(i_entity_version, E.entity_latest_version), E.entity_name, + E.created_by, E.created_at, E.locked_by, E.locked_at, E.disabled_by, E.locked_at + FROM mapping_table.entities E + WHERE E.entity_name = i_entity_name + INTO _key_entity, _entity_version, get.entity_name, + get.created_by, get.created_at, get.locked_by, get.locked_at, get.disabled_by, get.disabled_at; + + IF NOT found THEN + status := 40; + status_text := 'Mapping table does not exist'; + RETURN; + END IF; + + SELECT 10, 'OK', V.id_entity_version, V.entity_version, + V.entity_description, V.updated_by, V.updated_at, + V.table_path, V.key_schema, V.default_mapping_values, V.table_filter + FROM mapping_table.versions V + WHERE V.key_entity = _key_entity AND + V.entity_version = _entity_version + INTO status, status_text, get.id_entity_version, get.entity_version, + get.entity_description, get.updated_by, get.updated_at, + get.table_path, get.key_schema, get.default_mapping_values, get.table_filter; + + IF NOT found THEN + status := 43; + status_text := 'Mapping table of the given version does not exist'; + RETURN; + END IF; + + SELECT G.status, G.entity_name, G.entity_version, G.fields + FROM dataset_schema.get(key_schema) G + INTO _schema_status, schema_name, schema_version, schema_fields; + + IF _schema_status != 10 THEN + status := 42; + status_text := 'Schema not found (Should never happen)'; + RETURN; + END IF; + + RETURN; +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + + +CREATE OR REPLACE FUNCTION mapping_table.get( + IN i_key_entity_version BIGINT, + OUT status INTEGER, + OUT status_text TEXT, + OUT id_entity_version BIGINT, + OUT entity_name TEXT, + OUT entity_version INTEGER, + OUT entity_description TEXT, + OUT created_by TEXT, + OUT created_at TIMESTAMP WITH TIME ZONE, + OUT updated_by TEXT, + OUT updated_at TIMESTAMP WITH TIME ZONE, + OUT locked_by TEXT, + OUT locked_at TIMESTAMP WITH TIME ZONE, + OUT disabled_by TEXT, + OUT disabled_at TIMESTAMP WITH TIME ZONE, + OUT table_path TEXT, + OUT key_schema BIGINT, + OUT schema_name TEXT, + OUT schema_version INTEGER, + OUT schema_fields JSON, + OUT default_mapping_values HSTORE, + OUT table_filter JSON +) RETURNS record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: jobs_configuration.get(1) +-- Returns the data of the requested schema, based on its id +-- +-- Parameters: +-- i_key_entity_version - id of the mapping table +-- +-- Returns: +-- status - Status code +-- status_text - Status text +-- id_entity_version - id of the mapping table +-- entity_name - name of the mapping table +-- entity_version - the version of the mapping table +-- entity_description - description of the mapping table +-- created_by - user who created the schema +-- created_at - time & date when the schema was disabled +-- updated_by - user who updated the schema to this particular version +-- updated_at - time & date when the this particular version of the schema was created +-- locked_by - if locked, who was the user who locked the schema +-- locked_at - if not NULL the schema is locked +-- disabled_by - if disabled, who was the user who disabled the schema +-- disabled_at - if not NULL the schema has been disabled +-- table_path - table_path, where the mapping table data are saved +-- key_schema - id of the attached schema +-- schema_name - name of the schema +-- schema_version - the version of the schema +-- schema_fields - the fields the schema consist of +-- default_mapping_values - default values of the mapping table +-- table_filter - filter on the data of the mapping table +-- +-- Status codes: +-- 10 - OK +-- 40 - Mapping table does not exist +-- 42 - Schema not found (Should never happen) +-- +------------------------------------------------------------------------------- +DECLARE + _key_entity BIGINT; + _schema_status TEXT; +BEGIN + + + SELECT 10, 'OK', V.id_entity_version, V.key_entity, V.entity_version, + V.entity_description, V.updated_by, V.updated_at, + V.table_path, V.key_schema, V.default_mapping_values, V.table_filter + FROM mapping_table.versions V + WHERE V.id_entity_version = i_key_entity_version + INTO status, status_text, get.id_entity_version, _key_entity, get.entity_version, + get.entity_description, get.updated_by, get.updated_at, + get.table_path, get.key_schema, get.default_mapping_values, get.table_filter; + + IF NOT found THEN + status := 40; + status_text := 'Mapping table does not exist'; + RETURN; + END IF; + + + SELECT E.entity_name, E.created_by, E.created_at, E.locked_by, E.locked_at, + E.disabled_by, E.locked_at + FROM mapping_table.entities E + WHERE E.id_entity = _key_entity + INTO get.entity_name, get.created_by, get.created_at, get.locked_by, get.locked_at, + get.disabled_by, get.disabled_at; + + SELECT G.status, G.entity_name, G.entity_version, G.fields + FROM dataset_schema.get(key_schema) G + INTO _schema_status, schema_name, schema_version, schema_fields; + + IF _schema_status != 10 THEN + status := 42; + status_text := 'Schema not found (Should never happen)'; + RETURN; + END IF; + + RETURN; +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +ALTER FUNCTION mapping_table.get(TEXT, INTEGER) OWNER TO enceladus; +ALTER FUNCTION mapping_table.get(BIGINT) OWNER TO enceladus; +GRANT EXECUTE ON FUNCTION mapping_table.get(TEXT, INTEGER) TO menas; +GRANT EXECUTE ON FUNCTION mapping_table.get(BIGINT) TO menas; diff --git a/database/src/main/mapping_table/list.sql b/database/src/main/mapping_table/list.sql new file mode 100644 index 000000000..dbb3dad2b --- /dev/null +++ b/database/src/main/mapping_table/list.sql @@ -0,0 +1,51 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE OR REPLACE FUNCTION mapping_table.list( + IN i_include_disabled BOOLEAN DEFAULT FALSE, + OUT entity_name TEXT, + OUT entity_latest_version INTEGER, + OUT locked BOOLEAN, + OUT disabled BOOLEAN +) RETURNS SETOF record AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: jobs_configuration.list(1) +-- Returns a list of mapping tables with their latest versions +-- +-- Parameters: +-- i_include_disabled - flag indicating if to include disabled mapping tables too +-- +-- Returns: +-- entity_name - name of the mapping table +-- entity_latest_version - the latest version of the mapping table +-- locked - signals if the mapping table is locked or not +-- disabled - signals if the mapping table is disabled or not +-- +------------------------------------------------------------------------------- +DECLARE +BEGIN + RETURN QUERY + SELECT E.entity_name, E.entity_latest_version, E.locked_at IS NOT NULL, E.disabled_at IS NOT NULL + FROM mapping_table.entities E + WHERE i_include_disabled OR E.disabled_at IS NULL + ORDER BY E.entity_name; --TODO Include order by? +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +ALTER FUNCTION mapping_table.list(BOOLEAN) OWNER TO enceladus; +GRANT EXECUTE ON FUNCTION mapping_table.list(BOOLEAN) TO menas; diff --git a/database/src/main/mapping_table/versions.ddl b/database/src/main/mapping_table/versions.ddl new file mode 100644 index 000000000..f277e4106 --- /dev/null +++ b/database/src/main/mapping_table/versions.ddl @@ -0,0 +1,33 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- DROP TABLE IF EXISTS mapping_table.versions; + +CREATE TABLE mapping_table.versions +( + table_path TEXT NOT NULL , + key_schema BIGINT NOT NULL, + default_mapping_values HSTORE, + table_filter JSON, + CONSTRAINT versions_pk PRIMARY KEY (id_entity_version) +) + INHERITS (entity_base.versions); + +ALTER TABLE mapping_table.versions + ADD CONSTRAINT versions_unq UNIQUE (key_entity, entity_version); + +CREATE INDEX versions_idx ON mapping_table.versions (key_schema); + +ALTER TABLE mapping_table.versions OWNER to enceladus; diff --git a/database/src/main/public/global_id.sql b/database/src/main/public/global_id.sql new file mode 100644 index 000000000..ac160c3c1 --- /dev/null +++ b/database/src/main/public/global_id.sql @@ -0,0 +1,45 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +--DROP SEQUENCE IF EXISTS public.global_id_seq + +-- DB_ID should be a unique number within your deployment between 0 and 9222 +CREATE SEQUENCE IF NOT EXISTS public.global_id_seq + INCREMENT 1 + START [DB_ID]*1000000000000000+1 + MINVALUE [DB_ID]*1000000000000000+1 + MAXVALUE [DB_ID] + 1)*1000000000000000 + CACHE 1; + +CREATE OR REPLACE FUNCTION public.global_id() RETURNS BIGINT AS +$$ +------------------------------------------------------------------------------- +-- +-- Function: public.global_id(0) +-- Generates a unique ID +-- +-- Returns: +-- - The next ID to use +-- +------------------------------------------------------------------------------- +DECLARE +BEGIN + RETURN nextval('global_id_seq'); +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +GRANT EXECUTE ON FUNCTION public.global_id() TO PUBLIC; diff --git a/examples/pom.xml b/examples/pom.xml index 23d93ba72..5c8c0a482 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ za.co.absa.enceladus parent - 2.23.0 + 3.0.0-SNAPSHOT diff --git a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample4.scala b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample4.scala index 2dd99b3f9..99de25fdd 100644 --- a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample4.scala +++ b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample4.scala @@ -57,7 +57,7 @@ object CustomRuleSample4 extends CustomRuleSampleFs { opt[String]("row-tag").optional.action((value, config) => config.copy(rowTag = Some(value))).text("use the specific row tag instead of 'ROW' for XML format") .validate(_ => - if (inputFormat.isDefined && inputFormat.get.equalsIgnoreCase("xml")) { + if (inputFormat.isDefined && inputFormat.get =="xml") { success } else { failure("The --row-tag option is supported only for XML raw data format") diff --git a/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/UppercaseCustomConformanceRuleSuite.scala b/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/UppercaseCustomConformanceRuleSuite.scala index 5c0ba1fbd..28bf1564e 100644 --- a/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/UppercaseCustomConformanceRuleSuite.scala +++ b/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/UppercaseCustomConformanceRuleSuite.scala @@ -23,8 +23,7 @@ import za.co.absa.enceladus.conformance.config.ConformanceConfig import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, FeatureSwitches} import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.Dataset -import za.co.absa.enceladus.utils.fs.HadoopFsUtils -import za.co.absa.enceladus.utils.testUtils.{HadoopFsTestBase, SparkTestBase} +import za.co.absa.enceladus.utils.testUtils.{HadoopFsTestBase, TZNormalizedSparkTestBase} case class TestInputRow(id: Int, mandatoryString: String, nullableString: Option[String]) @@ -33,7 +32,7 @@ object TestOutputRow { def apply(input: TestInputRow, doneUpper: String): TestOutputRow = TestOutputRow(input.id, input.mandatoryString, input.nullableString, doneUpper) } -class UppercaseCustomConformanceRuleSuite extends AnyFunSuite with SparkTestBase with MockitoSugar with HadoopFsTestBase { +class UppercaseCustomConformanceRuleSuite extends AnyFunSuite with TZNormalizedSparkTestBase with MockitoSugar with HadoopFsTestBase { import spark.implicits._ implicit val progArgs: ConformanceConfig = ConformanceConfig() // here we may need to specify some parameters (for certain rules) diff --git a/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/XPadCustomConformanceRuleSuite.scala b/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/XPadCustomConformanceRuleSuite.scala index 78daedc01..584d2f175 100644 --- a/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/XPadCustomConformanceRuleSuite.scala +++ b/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/XPadCustomConformanceRuleSuite.scala @@ -26,8 +26,7 @@ import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.dao.auth.MenasKerberosCredentials import za.co.absa.enceladus.dao.rest.{MenasConnectionStringParser, RestDaoFactory} import za.co.absa.enceladus.model.Dataset -import za.co.absa.enceladus.utils.fs.HadoopFsUtils -import za.co.absa.enceladus.utils.testUtils.{HadoopFsTestBase, SparkTestBase} +import za.co.absa.enceladus.utils.testUtils.{HadoopFsTestBase, TZNormalizedSparkTestBase} case class XPadTestInputRow(intField: Int, stringField: Option[String]) case class XPadTestOutputRow(intField: Int, stringField: Option[String], targetField: String) @@ -35,7 +34,7 @@ object XPadTestOutputRow { def apply(input: XPadTestInputRow, targetField: String): XPadTestOutputRow = XPadTestOutputRow(input.intField, input.stringField, targetField) } -class LpadCustomConformanceRuleSuite extends AnyFunSuite with SparkTestBase with MockitoSugar with HadoopFsTestBase { +class LpadCustomConformanceRuleSuite extends AnyFunSuite with TZNormalizedSparkTestBase with MockitoSugar with HadoopFsTestBase { import spark.implicits._ implicit val progArgs: ConformanceConfig = ConformanceConfig() // here we may need to specify some parameters (for certain rules) @@ -179,7 +178,7 @@ class LpadCustomConformanceRuleSuite extends AnyFunSuite with SparkTestBase with } -class RpadCustomConformanceRuleSuite extends AnyFunSuite with SparkTestBase with HadoopFsTestBase { +class RpadCustomConformanceRuleSuite extends AnyFunSuite with TZNormalizedSparkTestBase with HadoopFsTestBase { import spark.implicits._ diff --git a/menas/Dockerfile b/menas/Dockerfile index 2242f20c4..339dd8965 100644 --- a/menas/Dockerfile +++ b/menas/Dockerfile @@ -11,16 +11,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -# To build, first build menas and be in enceladus/menas folder. Then run: -# $> docker build -t menas . -# Run image using: -# $> docker run \ -# -e JAVA_OPTS=' \ -# -Dmenas.mongo.connection.string=mongodb://host.docker.internal:27017 \ -# -Dmenas.mongo.connection.database=menas \ -# -p 8080:8080 \ -# menas -FROM tomcat:9-jre8-alpine +ARG DOCKER_BASE_ARTIFACT=nginx +ARG DOCKER_BASE_TAG=1-alpine + +FROM ${DOCKER_BASE_ARTIFACT}:${DOCKER_BASE_TAG} LABEL \ vendor="AbsaOSS" \ @@ -28,20 +22,8 @@ LABEL \ license="Apache License, version 2.0" \ name="Menas" -ARG WAR_FILE -ARG PROPERTY_FILE - -ENV SPRING_CONFIG_NAME=${PROPERTY_FILE:-"application"} - -ADD ./src/main/resources/docker/start_menas.sh start_menas.sh -RUN chmod +x start_menas.sh && \ - rm -rf webapps/* - -ADD ./target/${WAR_FILE} webapps/ROOT.war -ADD src/main/resources/docker/server.xml /tmp/server.xml - -EXPOSE 8080 -EXPOSE 8443 -EXPOSE 8009 +ADD start_menas.sh /docker-entrypoint.d/start_menas.sh +COPY nginx.conf nginx.conf +RUN chmod +x /docker-entrypoint.d/start_menas.sh && rm -rf /usr/share/nginx/html/* -CMD ["./start_menas.sh"] +COPY ./ui/dist /usr/share/nginx/html/ diff --git a/menas/nginx.conf b/menas/nginx.conf new file mode 100644 index 000000000..caf649d41 --- /dev/null +++ b/menas/nginx.conf @@ -0,0 +1,27 @@ +# Copyright 2018 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +events {} + +http { + include mime.types; + ssl_session_cache shared:SSL:10m; + ssl_session_timeout 10m; + server { + listen 443 ssl http2; + root /usr/share/nginx/html; + server_name ${DNS_NAME}; + ssl_certificate /etc/ssl/certificate.pem; + ssl_certificate_key /etc/ssl/private.pem; + } +} diff --git a/menas/pom.xml b/menas/pom.xml index 1be9866a1..a83472bfc 100644 --- a/menas/pom.xml +++ b/menas/pom.xml @@ -21,271 +21,9 @@ za.co.absa.enceladus parent - 2.23.0 + 3.0.0-SNAPSHOT - - 1.8 - 1.0.4 - ${project.basedir}/ui - ${project.parent.basedir}/scalastyle-config.xml - 2.2.0 - - - - - - org.scala-lang - scala-library - ${scala.version} - - - org.scala-lang.modules - scala-xml_${scala.compat.version} - ${scala.xml.version} - - - org.apache.httpcomponents - httpclient - ${httpclient.version} - - - - org.apache.spark - spark-core_${scala.compat.version} - ${spark.version} - compile - - - org.slf4j - slf4j-log4j12 - - - javax.validation - validation-api - - - - - - org.apache.spark - spark-avro_${scala.compat.version} - ${spark.version} - compile - - - org.apache.spark - spark-sql_${scala.compat.version} - ${spark.version} - compile - - - org.apache.httpcomponents - httpclient - - - - - org.apache.hadoop - hadoop-client - ${hadoop.version} - compile - - - com.fasterxml.jackson.module - jackson-module-scala_${scala.compat.version} - ${jackson.version} - - - com.fasterxml.jackson.datatype - jackson-datatype-jsr310 - ${jackson.version} - - - - com.fasterxml.jackson.core - jackson-databind - ${jackson.version} - - - com.google.code.gson - gson - ${gson.version} - - - - org.mongodb.scala - mongo-scala-driver_${scala.compat.version} - ${mongo.scala.driver.version} - - - io.github.cbartosiak - bson-codecs-jsr310 - ${bson.codec.jsr310.version} - - - - org.springframework.boot - spring-boot-starter-web - ${spring.version} - - - ch.qos.logback - logback-classic - - - - - org.springframework.boot - spring-boot-starter-actuator - ${spring.version} - - - - org.springframework.boot - spring-boot-starter-security - ${spring.version} - - - org.springframework.security - spring-security-ldap - ${spring.ldap.version} - - - org.springframework.security.kerberos - spring-security-kerberos-web - ${spring.kerberos.version} - - - org.springframework.security.kerberos - spring-security-kerberos-client - ${spring.kerberos.version} - - - io.jsonwebtoken - jjwt-api - ${jjwt.version} - - - io.jsonwebtoken - jjwt-impl - ${jjwt.version} - runtime - - - io.jsonwebtoken - jjwt-jackson - ${jjwt.version} - - - - org.apache.htrace - htrace-core - ${htrace.version} - - - - za.co.absa.enceladus - data-model - ${project.version} - - - za.co.absa.enceladus - migrations - ${project.version} - - - za.co.absa.enceladus - utils - ${project.version} - - - org.slf4j - slf4j-log4j12 - - - - - org.scala-lang.modules - scala-java8-compat_${scala.compat.version} - ${scala.java8.compat.version} - - - - org.webjars - openui5 - ${openui5.version} - - - org.webjars.bower - lodash - ${lodash.version} - - - org.webjars - momentjs - ${momentjs.version} - - - org.webjars - webjars-locator-core - ${webjars.locator.version} - - - org.apache.oozie - oozie-client - ${oozie.version} - - - org.apache.oozie - oozie-hadoop-auth - - - - - - org.webjars.bower - chart.js - ${bower.chart.js.version} - - - - za.co.absa.cobrix - spark-cobol_${scala.compat.version} - ${cobrix.version} - - - - com.google.guava - guava - ${guava.version} - - - za.co.absa - atum-model_${scala.compat.version} - ${atum.version} - - - - org.springframework.boot - spring-boot-starter-test - ${spring.version} - test - - - - com.github.tomakehurst - wiremock-jre8 - ${wiremock.version} - test - - - de.flapdoodle.embed - de.flapdoodle.embed.mongo - ${embedded.mongo.version} - test - - @@ -293,47 +31,106 @@ dockerfile-maven-plugin ${dockerfile.maven.version} - ${dockerfile.repository} + ${dockerfile.menas.repository} ${dockerfile.tag} - ${project.build.finalName}.war - ${dockerfile.property.file} + ${dockerfile.menas.repository} + ${dockerfile.tag} - - org.scalastyle - scalastyle-maven-plugin + + org.apache.maven.plugins + maven-clean-plugin + 3.1.0 + + + + ${basedir}/ui/dist + ${basedir}/ui/node_modules + + + - org.springframework.boot - spring-boot-maven-plugin - ${spring.version} + com.github.eirslett + frontend-maven-plugin + 1.7.6 - za.co.absa.enceladus.menas.Application + target + ui + v10.16.0 + + + install node and npm + + install-node-and-npm + + + + install + + npm + + + install + + + + sync version to package.json + + npm + + + version ${project.parent.version} --allow-same-version + + + + ui5 build + + npm + + + run ui5-build + + + + clean node_modules + + npm + + + prune --production + + + + - net.alchim31.maven - scala-maven-plugin - ${scala.maven.plugin.version} + org.apache.maven.plugins + maven-antrun-plugin + ${maven.antrun.plugin.version} + Clean up after generating component preload + generate-resources + + + + + + + + + - compile - testCompile + run - - - -Xfatal-warnings - -unchecked - -deprecation - -feature - - + org.apache.maven.plugins maven-war-plugin @@ -342,7 +139,7 @@ false - ui + ui/dist / false @@ -355,115 +152,10 @@ - - org.apache.maven.plugins - maven-resources-plugin - - false - - - - org.apache.maven.plugins - maven-antrun-plugin - ${maven.antrun.plugin.version} - - - copy - validate - - - - - - - - run - - - - + - - generateComponentPreload - - - - com.github.eirslett - frontend-maven-plugin - 1.7.6 - - - install node and npm - - install-node-and-npm - - generate-resources - - - install grunt-openui5 - - npm - - - install grunt grunt-cli grunt-openui5 --save-dev - - - - generate preload - - grunt - - - openui5_preload - - - - - ui - v10.16.0 - - - - org.apache.maven.plugins - maven-antrun-plugin - ${maven.antrun.plugin.version} - - - Clean up after generating component preload - generate-resources - - - - - - - - run - - - - - - org.apache.maven.plugins - maven-jar-plugin - ${maven.jar.plugin.version} - - - - true - - - - - - - license-check diff --git a/menas/src/main/resources/banner.txt b/menas/src/main/resources/banner.txt deleted file mode 100644 index 07089b3e9..000000000 --- a/menas/src/main/resources/banner.txt +++ /dev/null @@ -1,7 +0,0 @@ - __ __ -| \/ | -| \ / | ___ _ __ __ _ ___ -| |\/| |/ _ \ '_ \ / _` / __| -| | | | __/ | | | (_| \__ \ -|_| |_|\___|_| |_|\__,_|___/ -version ${application.version} diff --git a/menas/src/main/scala/za/co/absa/enceladus/menas/controllers/LandingPageController.scala b/menas/src/main/scala/za/co/absa/enceladus/menas/controllers/LandingPageController.scala deleted file mode 100644 index e372b5be8..000000000 --- a/menas/src/main/scala/za/co/absa/enceladus/menas/controllers/LandingPageController.scala +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright 2018 ABSA Group Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package za.co.absa.enceladus.menas.controllers - -import java.util.concurrent.CompletableFuture - -import scala.concurrent.Future - -import org.springframework.beans.factory.annotation.Autowired -import org.springframework.scheduling.annotation.Async -import org.springframework.scheduling.annotation.Scheduled -import org.springframework.web.bind.annotation.GetMapping -import org.springframework.web.bind.annotation.RequestMapping -import org.springframework.web.bind.annotation.RestController - -import za.co.absa.enceladus.menas.models.LandingPageInformation -import za.co.absa.enceladus.menas.repositories.DatasetMongoRepository -import za.co.absa.enceladus.menas.repositories.LandingPageStatisticsMongoRepository -import za.co.absa.enceladus.menas.repositories.MappingTableMongoRepository -import za.co.absa.enceladus.menas.repositories.SchemaMongoRepository -import za.co.absa.enceladus.menas.services.RunService - -@RestController -@RequestMapping(Array("/api/landing")) -class LandingPageController @Autowired() (datasetRepository: DatasetMongoRepository, - mappingTableRepository: MappingTableMongoRepository, - schemaRepository: SchemaMongoRepository, - runsService: RunService, - landingPageRepository: LandingPageStatisticsMongoRepository) extends BaseController { - - import scala.concurrent.ExecutionContext.Implicits.global - import za.co.absa.enceladus.menas.utils.implicits._ - - @GetMapping(path = Array("/info")) - def retrieveLandingPageInfo(): CompletableFuture[LandingPageInformation] = { - landingPageRepository.get() - } - - def landingPageInfo(): Future[LandingPageInformation] = { - for { - dsCount <- datasetRepository.distinctCount() - mtCount <- mappingTableRepository.distinctCount() - schemaCount <- schemaRepository.distinctCount() - runCount <- runsService.getCount() - todaysStats <- runsService.getTodaysRunsStatistics() - } yield LandingPageInformation(dsCount, mtCount, schemaCount, runCount, todaysStats) - } - - // scalastyle:off magic.number - @Scheduled(initialDelay = 1000, fixedDelay = 300000) - @Async - def scheduledLandingPageStatsRecalc(): CompletableFuture[_] = { - logger.info("Running scheduled landing page statistics recalculation") - for { - newStats <- landingPageInfo() - res <- landingPageRepository.updateStatistics(newStats) - } yield res - } -} diff --git a/menas/start_menas.sh b/menas/start_menas.sh new file mode 100644 index 000000000..bde269cc9 --- /dev/null +++ b/menas/start_menas.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +# Copyright 2018 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [ -n "${PRIVATE_KEY}" ] && [ -n "${CERTIFICATE}" ]; then + envsubst < ./nginx.conf > /etc/nginx/nginx.conf + echo "${PRIVATE_KEY}" >> /etc/ssl/private.pem + echo "${CERTIFICATE}" >> /etc/ssl/certificate.pem + echo "${CA_CHAIN}" >> /etc/ssl/cachain.pem +fi + +envsubst < /usr/share/nginx/html/package.json > /usr/share/nginx/html/package-new.json +rm /usr/share/nginx/html/package.json +mv /usr/share/nginx/html/package-new.json /usr/share/nginx/html/package.json diff --git a/menas/ui/components/Component.js b/menas/ui/components/Component.js index a6f3278f2..c5ea51927 100644 --- a/menas/ui/components/Component.js +++ b/menas/ui/components/Component.js @@ -70,7 +70,12 @@ sap.ui.define([ name: "mappingTables", pattern: "mapping/:id:/:version:", target: "mappingTable" - } + }, + { + name: "properties", + pattern: "properties/:id:", + target: "property" + }, ], targets: { login: { @@ -102,6 +107,11 @@ sap.ui.define([ viewName: "components.mappingTable.mappingTableDetail", viewLevel: 1, viewId: "mappingTableDetailView" + }, + property: { + viewName: "components.property.datasetPropertyDetail", + viewLevel: 1, + viewId: "datasetPropertyDetailView" } } } diff --git a/menas/ui/components/app.controller.js b/menas/ui/components/app.controller.js index 67c4f7ec9..4f7949ec4 100644 --- a/menas/ui/components/app.controller.js +++ b/menas/ui/components/app.controller.js @@ -129,6 +129,11 @@ sap.ui.define([ this._app.toMaster(this.createId("mappingTablesPage")); }, + onPropertiesPress: function (oEv) { + this._eventBus.publish("properties", "list"); + this._app.toMaster(this.createId("propertiesPage")); + }, + onEntityCreated: function (sTopic, sEvent, oData) { this._router.navTo(sTopic, { id: oData.name, diff --git a/menas/ui/components/app.view.xml b/menas/ui/components/app.view.xml index 46419866c..fc779d087 100644 --- a/menas/ui/components/app.view.xml +++ b/menas/ui/components/app.view.xml @@ -25,6 +25,7 @@ +