diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..2433bb2 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,40 @@ +version: 2 +jobs: + build: + docker: + - image: 218546966473.dkr.ecr.us-east-1.amazonaws.com/circle-ci:stitch-tap-tester + steps: + - checkout + - run: + name: 'Setup virtual env' + command: | + python3 -mvenv /usr/local/share/virtualenvs/tap-slack + source /usr/local/share/virtualenvs/tap-slack/bin/activate + pip install -U 'pip<19.2' setuptools + pip install .[dev] + - add_ssh_keys + - run: + name: 'Integration Tests' + command: | + aws s3 cp s3://com-stitchdata-dev-deployment-assets/environments/tap-tester/tap_tester_sandbox dev_env.sh + source dev_env.sh + source /usr/local/share/virtualenvs/tap-tester/bin/activate + run-test --tap=tap-slack tests + +workflows: + version: 2 + commit: + jobs: + - build: + context: circleci-user + build_daily: + triggers: + - schedule: + cron: "0 19 * * *" + filters: + branches: + only: + - master + jobs: + - build: + context: circleci-user diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..ef49bc0 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,15 @@ +# Description of change +(write a short description or paste a link to JIRA) + +# Manual QA steps + - + +# Risks + - + +# Rollback steps + - revert this branch + +#### AI generated code +https://internal.qlik.dev/general/ways-of-working/code-reviews/#guidelines-for-ai-generated-code +- [ ] this PR has been written with the help of GitHub Copilot or another generative AI tool diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..8d7135b --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,16 @@ +# Changelog + +## 1.1.2 + * Remove unused Pipfile.lock since it is no longer needed. [#22](https://github.com/singer-io/tap-slack/pull/22) + +## 1.1.1 + * Fix a logic error [#10](https://github.com/singer-io/tap-slack/pull/10) + +## 1.1.0 + * Adding user's email address to the users schema [#16](https://github.com/singer-io/tap-slack/pull/16) + +## 1.0.0 + * Added some streams, changed the name of some streams [#9](https://github.com/singer-io/tap-slack/pull/9) + +## 0.0.1 + * Initial commit diff --git a/LICENSE b/LICENSE index f288702..753d647 100644 --- a/LICENSE +++ b/LICENSE @@ -1,23 +1,21 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 - Copyright (C) 2007 Free Software Foundation, Inc. + Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble - The GNU General Public License is a free, copyleft license for -software and other kinds of works. + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to +our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. +software for all its users. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you @@ -26,44 +24,34 @@ them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. The precise terms and conditions for copying, distribution and modification follow. @@ -72,7 +60,7 @@ modification follow. 0. Definitions. - "This License" refers to version 3 of the GNU General Public License. + "This License" refers to version 3 of the GNU Affero General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. @@ -549,35 +537,45 @@ to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. - 13. Use with the GNU Affero General Public License. + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single +under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General +Program specifies that a certain numbered version of the GNU Affero General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published +GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's +versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. @@ -620,55 +618,3 @@ copy of the Program in return for a fee. END OF TERMS AND CONDITIONS - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. diff --git a/Pipfile.lock b/Pipfile.lock deleted file mode 100644 index 545dd8b..0000000 --- a/Pipfile.lock +++ /dev/null @@ -1,255 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "d811da662f5f44376bd13483fbb5b66cb20263f4a2c116a3cd393da49b8ee975" - }, - "pipfile-spec": 6, - "requires": { - "python_version": "3.7" - }, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.org/simple", - "verify_ssl": true - } - ] - }, - "default": {}, - "develop": { - "aiodns": { - "hashes": [ - "sha256:815fdef4607474295d68da46978a54481dd1e7be153c7d60f9e72773cd38d77d", - "sha256:aaa5ac584f40fe778013df0aa6544bf157799bd3f608364b451840ed2c8688de" - ], - "version": "==2.0.0" - }, - "aiohttp": { - "hashes": [ - "sha256:00d198585474299c9c3b4f1d5de1a576cc230d562abc5e4a0e81d71a20a6ca55", - "sha256:0155af66de8c21b8dba4992aaeeabf55503caefae00067a3b1139f86d0ec50ed", - "sha256:09654a9eca62d1bd6d64aa44db2498f60a5c1e0ac4750953fdd79d5c88955e10", - "sha256:199f1d106e2b44b6dacdf6f9245493c7d716b01d0b7fbe1959318ba4dc64d1f5", - "sha256:296f30dedc9f4b9e7a301e5cc963012264112d78a1d3094cd83ef148fdf33ca1", - "sha256:368ed312550bd663ce84dc4b032a962fcb3c7cae099dbbd48663afc305e3b939", - "sha256:40d7ea570b88db017c51392349cf99b7aefaaddd19d2c78368aeb0bddde9d390", - "sha256:629102a193162e37102c50713e2e31dc9a2fe7ac5e481da83e5bb3c0cee700aa", - "sha256:6d5ec9b8948c3d957e75ea14d41e9330e1ac3fed24ec53766c780f82805140dc", - "sha256:87331d1d6810214085a50749160196391a712a13336cd02ce1c3ea3d05bcf8d5", - "sha256:9a02a04bbe581c8605ac423ba3a74999ec9d8bce7ae37977a3d38680f5780b6d", - "sha256:9c4c83f4fa1938377da32bc2d59379025ceeee8e24b89f72fcbccd8ca22dc9bf", - "sha256:9cddaff94c0135ee627213ac6ca6d05724bfe6e7a356e5e09ec57bd3249510f6", - "sha256:a25237abf327530d9561ef751eef9511ab56fd9431023ca6f4803f1994104d72", - "sha256:a5cbd7157b0e383738b8e29d6e556fde8726823dae0e348952a61742b21aeb12", - "sha256:a97a516e02b726e089cffcde2eea0d3258450389bbac48cbe89e0f0b6e7b0366", - "sha256:acc89b29b5f4e2332d65cd1b7d10c609a75b88ef8925d487a611ca788432dfa4", - "sha256:b05bd85cc99b06740aad3629c2585bda7b83bd86e080b44ba47faf905fdf1300", - "sha256:c2bec436a2b5dafe5eaeb297c03711074d46b6eb236d002c13c42f25c4a8ce9d", - "sha256:cc619d974c8c11fe84527e4b5e1c07238799a8c29ea1c1285149170524ba9303", - "sha256:d4392defd4648badaa42b3e101080ae3313e8f4787cb517efd3f5b8157eaefd6", - "sha256:e1c3c582ee11af7f63a34a46f0448fca58e59889396ffdae1f482085061a2889" - ], - "version": "==3.5.4" - }, - "async-timeout": { - "hashes": [ - "sha256:0c3c816a028d47f659d6ff5c745cb2acf1f966da1fe5c19c77a70282b25f4c5f", - "sha256:4291ca197d287d274d0b6cb5d6f8f8f82d434ed288f962539ff18cc9012f9ea3" - ], - "version": "==3.0.1" - }, - "attrs": { - "hashes": [ - "sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79", - "sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399" - ], - "version": "==19.1.0" - }, - "backoff": { - "hashes": [ - "sha256:608cd3c17c7ae541fb45454cf8236271d13f3cdae932e56879f26bd737344aa8" - ], - "version": "==1.3.2" - }, - "cffi": { - "hashes": [ - "sha256:041c81822e9f84b1d9c401182e174996f0bae9991f33725d059b771744290774", - "sha256:046ef9a22f5d3eed06334d01b1e836977eeef500d9b78e9ef693f9380ad0b83d", - "sha256:066bc4c7895c91812eff46f4b1c285220947d4aa46fa0a2651ff85f2afae9c90", - "sha256:066c7ff148ae33040c01058662d6752fd73fbc8e64787229ea8498c7d7f4041b", - "sha256:2444d0c61f03dcd26dbf7600cf64354376ee579acad77aef459e34efcb438c63", - "sha256:300832850b8f7967e278870c5d51e3819b9aad8f0a2c8dbe39ab11f119237f45", - "sha256:34c77afe85b6b9e967bd8154e3855e847b70ca42043db6ad17f26899a3df1b25", - "sha256:46de5fa00f7ac09f020729148ff632819649b3e05a007d286242c4882f7b1dc3", - "sha256:4aa8ee7ba27c472d429b980c51e714a24f47ca296d53f4d7868075b175866f4b", - "sha256:4d0004eb4351e35ed950c14c11e734182591465a33e960a4ab5e8d4f04d72647", - "sha256:4e3d3f31a1e202b0f5a35ba3bc4eb41e2fc2b11c1eff38b362de710bcffb5016", - "sha256:50bec6d35e6b1aaeb17f7c4e2b9374ebf95a8975d57863546fa83e8d31bdb8c4", - "sha256:55cad9a6df1e2a1d62063f79d0881a414a906a6962bc160ac968cc03ed3efcfb", - "sha256:5662ad4e4e84f1eaa8efce5da695c5d2e229c563f9d5ce5b0113f71321bcf753", - "sha256:59b4dc008f98fc6ee2bb4fd7fc786a8d70000d058c2bbe2698275bc53a8d3fa7", - "sha256:73e1ffefe05e4ccd7bcea61af76f36077b914f92b76f95ccf00b0c1b9186f3f9", - "sha256:a1f0fd46eba2d71ce1589f7e50a9e2ffaeb739fb2c11e8192aa2b45d5f6cc41f", - "sha256:a2e85dc204556657661051ff4bab75a84e968669765c8a2cd425918699c3d0e8", - "sha256:a5457d47dfff24882a21492e5815f891c0ca35fefae8aa742c6c263dac16ef1f", - "sha256:a8dccd61d52a8dae4a825cdbb7735da530179fea472903eb871a5513b5abbfdc", - "sha256:ae61af521ed676cf16ae94f30fe202781a38d7178b6b4ab622e4eec8cefaff42", - "sha256:b012a5edb48288f77a63dba0840c92d0504aa215612da4541b7b42d849bc83a3", - "sha256:d2c5cfa536227f57f97c92ac30c8109688ace8fa4ac086d19d0af47d134e2909", - "sha256:d42b5796e20aacc9d15e66befb7a345454eef794fdb0737d1af593447c6c8f45", - "sha256:dee54f5d30d775f525894d67b1495625dd9322945e7fee00731952e0368ff42d", - "sha256:e070535507bd6aa07124258171be2ee8dfc19119c28ca94c9dfb7efd23564512", - "sha256:e1ff2748c84d97b065cc95429814cdba39bcbd77c9c85c89344b317dc0d9cbff", - "sha256:ed851c75d1e0e043cbf5ca9a8e1b13c4c90f3fbd863dacb01c0808e2b5204201" - ], - "version": "==1.12.3" - }, - "chardet": { - "hashes": [ - "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", - "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" - ], - "version": "==3.0.4" - }, - "idna": { - "hashes": [ - "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", - "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" - ], - "version": "==2.8" - }, - "jsonschema": { - "hashes": [ - "sha256:000e68abd33c972a5248544925a0cae7d1125f9bf6c58280d37546b946769a08", - "sha256:6ff5f3180870836cae40f06fa10419f557208175f13ad7bc26caa77beb1f6e02" - ], - "version": "==2.6.0" - }, - "multidict": { - "hashes": [ - "sha256:024b8129695a952ebd93373e45b5d341dbb87c17ce49637b34000093f243dd4f", - "sha256:041e9442b11409be5e4fc8b6a97e4bcead758ab1e11768d1e69160bdde18acc3", - "sha256:045b4dd0e5f6121e6f314d81759abd2c257db4634260abcfe0d3f7083c4908ef", - "sha256:047c0a04e382ef8bd74b0de01407e8d8632d7d1b4db6f2561106af812a68741b", - "sha256:068167c2d7bbeebd359665ac4fff756be5ffac9cda02375b5c5a7c4777038e73", - "sha256:148ff60e0fffa2f5fad2eb25aae7bef23d8f3b8bdaf947a65cdbe84a978092bc", - "sha256:1d1c77013a259971a72ddaa83b9f42c80a93ff12df6a4723be99d858fa30bee3", - "sha256:1d48bc124a6b7a55006d97917f695effa9725d05abe8ee78fd60d6588b8344cd", - "sha256:31dfa2fc323097f8ad7acd41aa38d7c614dd1960ac6681745b6da124093dc351", - "sha256:34f82db7f80c49f38b032c5abb605c458bac997a6c3142e0d6c130be6fb2b941", - "sha256:3d5dd8e5998fb4ace04789d1d008e2bb532de501218519d70bb672c4c5a2fc5d", - "sha256:4a6ae52bd3ee41ee0f3acf4c60ceb3f44e0e3bc52ab7da1c2b2aa6703363a3d1", - "sha256:4b02a3b2a2f01d0490dd39321c74273fed0568568ea0e7ea23e02bd1fb10a10b", - "sha256:4b843f8e1dd6a3195679d9838eb4670222e8b8d01bc36c9894d6c3538316fa0a", - "sha256:5de53a28f40ef3c4fd57aeab6b590c2c663de87a5af76136ced519923d3efbb3", - "sha256:61b2b33ede821b94fa99ce0b09c9ece049c7067a33b279f343adfe35108a4ea7", - "sha256:6a3a9b0f45fd75dc05d8e93dc21b18fc1670135ec9544d1ad4acbcf6b86781d0", - "sha256:76ad8e4c69dadbb31bad17c16baee61c0d1a4a73bed2590b741b2e1a46d3edd0", - "sha256:7ba19b777dc00194d1b473180d4ca89a054dd18de27d0ee2e42a103ec9b7d014", - "sha256:7c1b7eab7a49aa96f3db1f716f0113a8a2e93c7375dd3d5d21c4941f1405c9c5", - "sha256:7fc0eee3046041387cbace9314926aa48b681202f8897f8bff3809967a049036", - "sha256:8ccd1c5fff1aa1427100ce188557fc31f1e0a383ad8ec42c559aabd4ff08802d", - "sha256:8e08dd76de80539d613654915a2f5196dbccc67448df291e69a88712ea21e24a", - "sha256:c18498c50c59263841862ea0501da9f2b3659c00db54abfbf823a80787fde8ce", - "sha256:c49db89d602c24928e68c0d510f4fcf8989d77defd01c973d6cbe27e684833b1", - "sha256:ce20044d0317649ddbb4e54dab3c1bcc7483c78c27d3f58ab3d0c7e6bc60d26a", - "sha256:d1071414dd06ca2eafa90c85a079169bfeb0e5f57fd0b45d44c092546fcd6fd9", - "sha256:d3be11ac43ab1a3e979dac80843b42226d5d3cccd3986f2e03152720a4297cd7", - "sha256:db603a1c235d110c860d5f39988ebc8218ee028f07a7cbc056ba6424372ca31b" - ], - "version": "==4.5.2" - }, - "pycares": { - "hashes": [ - "sha256:2ca080db265ea238dc45f997f94effb62b979a617569889e265c26a839ed6305", - "sha256:6f79c6afb6ce603009db2042fddc2e348ad093ece9784cbe2daa809499871a23", - "sha256:70918d06eb0603016d37092a5f2c0228509eb4e6c5a3faacb4184f6ab7be7650", - "sha256:755187d28d24a9ea63aa2b4c0638be31d65fbf7f0ce16d41261b9f8cb55a1b99", - "sha256:7baa4b1f2146eb8423ff8303ebde3a20fb444a60db761fba0430d104fe35ddbf", - "sha256:90b27d4df86395f465a171386bc341098d6d47b65944df46518814ae298f6cc6", - "sha256:9e090dd6b2afa65cb51c133883b2bf2240fd0f717b130b0048714b33fb0f47ce", - "sha256:a11b7d63c3718775f6e805d6464cb10943780395ab042c7e5a0a7a9f612735dd", - "sha256:b253f5dcaa0ac7076b79388a3ac80dd8f3bd979108f813baade40d3a9b8bf0bd", - "sha256:c7f4f65e44ba35e35ad3febc844270665bba21cfb0fb7d749434e705b556e087", - "sha256:cdb342e6a254f035bd976d95807a2184038fc088d957a5104dcaab8be602c093", - "sha256:cf08e164f8bfb83b9fe633feb56f2754fae6baefcea663593794fa0518f8f98c", - "sha256:df9bc694cf03673878ea8ce674082c5acd134991d64d6c306d4bd61c0c1df98f" - ], - "version": "==3.0.0" - }, - "pycparser": { - "hashes": [ - "sha256:a988718abfad80b6b157acce7bf130a30876d27603738ac39f140993246b25b3" - ], - "version": "==2.19" - }, - "python-dateutil": { - "hashes": [ - "sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb", - "sha256:c89805f6f4d64db21ed966fda138f8a5ed7a4fdbc1a8ee329ce1b74e3c74da9e" - ], - "version": "==2.8.0" - }, - "pytz": { - "hashes": [ - "sha256:65ae0c8101309c45772196b21b74c46b2e5d11b6275c45d251b150d5da334555", - "sha256:c06425302f2cf668f1bba7a0a03f3c1d34d4ebeef2c72003da308b3947c7f749" - ], - "version": "==2018.4" - }, - "simplejson": { - "hashes": [ - "sha256:01a22d49ddd9a168b136f26cac87d9a335660ce07aa5c630b8e3607d6f4325e7", - "sha256:11d91b88cc1e9645c79f0f6fd2961684249af963e2bbff5a00061ed4bbf55379", - "sha256:36b0de42e3a8a51086c339cc803f6ac7a9d1d5254066d680956a195ca12cf0d8", - "sha256:38c2b563cd03363e7cb2bbba6c20ae4eaafd853a83954c8c8dd345ee391787bf", - "sha256:7df76ae6cac4a62ad5295f9a9131857077d84cb15fad2011acb2ce7410476009", - "sha256:7f53ab6a675594f237ce7372c1edf742a6acb158149ed3259c5fffc5b613dc94", - "sha256:86aa9fd492230c4b8b6814fcf089b36ffba2cec4d0635c8c642135b9067ebbd7", - "sha256:8d73b96a6ee7c81fd49dac7225e3846fd60b54a0b5b93a0aaea04c5a5d2e7bf2", - "sha256:a6939199c30b78ae31e62e6913f0e12cb71a4a5ad67c259e0a98688df027a5de" - ], - "version": "==3.11.1" - }, - "singer-python": { - "hashes": [ - "sha256:a671caba6211bf30c386f23267a4bcfcfe974fe15a322c058fcc8b0942ed186c" - ], - "version": "==5.6.0" - }, - "six": { - "hashes": [ - "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", - "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" - ], - "version": "==1.12.0" - }, - "slackclient": { - "hashes": [ - "sha256:82cef9b4aa4180be440598807903f618f07aebc0f61afe3135b81870edb1b1ad", - "sha256:dccece1609fae52d23af5e9b7418c7b74a3a1040048cf8ffe416521a6f29b8a4" - ], - "version": "==2.0.1" - }, - "tap-slack": { - "editable": true, - "path": "." - }, - "yarl": { - "hashes": [ - "sha256:024ecdc12bc02b321bc66b41327f930d1c2c543fa9a561b39861da9388ba7aa9", - "sha256:2f3010703295fbe1aec51023740871e64bb9664c789cba5a6bdf404e93f7568f", - "sha256:3890ab952d508523ef4881457c4099056546593fa05e93da84c7250516e632eb", - "sha256:3e2724eb9af5dc41648e5bb304fcf4891adc33258c6e14e2a7414ea32541e320", - "sha256:5badb97dd0abf26623a9982cd448ff12cb39b8e4c94032ccdedf22ce01a64842", - "sha256:73f447d11b530d860ca1e6b582f947688286ad16ca42256413083d13f260b7a0", - "sha256:7ab825726f2940c16d92aaec7d204cfc34ac26c0040da727cf8ba87255a33829", - "sha256:b25de84a8c20540531526dfbb0e2d2b648c13fd5dd126728c496d7c3fea33310", - "sha256:c6e341f5a6562af74ba55205dbd56d248daf1b5748ec48a0200ba227bb9e33f4", - "sha256:c9bb7c249c4432cd47e75af3864bc02d26c9594f49c82e2a28624417f0ae63b8", - "sha256:e060906c0c585565c718d1c3841747b61c5439af2211e185f6739a9412dfbde1" - ], - "version": "==1.3.0" - } - } -} diff --git a/README.md b/README.md index d32d0d6..9e3f3ea 100644 --- a/README.md +++ b/README.md @@ -17,10 +17,82 @@ deactivate ## Setup -The tap requires a [Slack API token](https://github.com/slackapi/python-slackclient/blob/master/documentation_v2/auth.md#tokens--authentication) to interact with your Slack workspace. You can obtain a token for a single workspace by creating a new [Slack App](https://api.slack.com/apps?new_app=1) in your workspace and assigning it the relevant [scopes](https://api.slack.com/docs/oauth-scopes). As of right now, the minimum required scopes for this App are: - - `channels:read` +The tap requires a [Slack API token](https://slack.dev/python-slack-sdk/installation/index.html#access-tokens) to interact with your Slack workspace. You can obtain a token for a single workspace by creating a new [Slack App](https://api.slack.com/apps?new_app=1) in your workspace and assigning it the relevant [scopes](https://api.slack.com/docs/oauth-scopes). As of right now, the minimum required scopes for this App are: - `channels:history` + - `channels:join` + - `channels:read` + - `files:read` + - `groups:read` + - `links:read` + - `reactions:read` + - `remote_files:read` + - `remote_files:write` + - `team:read` + - `usergroups:read` + - `users.profile:read` - `users:read` + - `users:read.email` + +Create a config file containing the API token and a start date, e.g.: + +```json +{ + "token":"xxxx", + "start_date":"2020-05-01T00:00:00" +} +``` + +### Private channels + +Optionally, you can also specify whether you want to sync private channels or not by adding the following to the config: + +```json + "private_channels":"false" +``` + +By default, private channels will be synced. + +### Joining Public Channels + +By adding the following to your config file you can have the tap auto-join all public channels in your ogranziation. +```json +"join_public_channels":"true" +``` +If you do not elect to have the tap join all public channels you must invite the bot to all channels you wish to sync. + +### Specify channels to sync + +By default, the tap will sync all channels it has been invited to. However, you can limit the tap to sync only the channels you specify by adding their IDs to the config file, e.g.: + +```json +"channels":[ + "abc123", + "def345" + ] +``` + +Note this needs to be channel ID, not the name, as [recommended by the Slack API](https://api.slack.com/types/conversation#other_attributes). To get the ID for a channel, either use the Slack API or [find it in the URL](https://www.wikihow.com/Find-a-Channel-ID-on-Slack-on-PC-or-Mac). + +### Archived Channels + +You can control whether or not the tap will sync archived channels by including the following in the tap config: +```json + "exclude_archived": "false" +``` +It's important to note that a bot *CANNOT* join an archived channel, so unless the bot was added to the channel prior to it being archived it will not be able to sync the data from that channel. + +### Date Windowing + +Due to the potentially high volume of data when syncing certain streams (messages, files, threads) +this tap implements date windowing based on a configuration parameter. + +including +```json +"date_window_size": "5" +``` + +Will cause the tap to sync 5 days of data per request, for applicable streams. The default value if +one is not defined is to window requests for 7 days at a time. ## Usage @@ -35,36 +107,35 @@ In practice, it will look something like the following: ## Replication The Slack Conversations API does not natively store last updated timestamp information about a Conversation. In addition, Conversation records are mutable. Thus, `tap-slack` requires a `FULL_TABLE` replication strategy to ensure the most up-to-date data in replicated when replicating the following Streams: - - `Conversations` - - `ConversationMembersStream` - - `ConversationHistoryStream` + - `Channels` (Conversations) + - `Channel Members` (Conversation Members) The `Users` stream _does_ store information about when a User record was last updated, so `tap-slack` uses that timestamp as a bookmark value and prefers using an `INCREMENTAL` replication strategy. ## Table Schemas -### Conversations +### Channels (Conversations) - - Table Name: `conversations` + - Table Name: `channels` - Description: - Primary Key Column: `id` - Replication Strategy: `FULL_TABLE` - API Documentation: [Link](https://api.slack.com/methods/conversations.list) -### Conversation Members +### Channel Members (Conversation Members) - - Table Name: `conversation_members` + - Table Name: `channel_members` - Description: - - Primary Key Column: N/A + - Primary Key Columns: `channel_id`, `user_id` - Replication Strategy: `FULL_TABLE` - API Documentation: [Link](https://api.slack.com/methods/conversations.members) -### Conversation History +### Messages (Conversation History) - - Table Name: `conversation_history` + - Table Name: `messages` - Description: - - Primary Key Column: N/A - - Replication Strategy: `FULL_TABLE` + - Primary Key Columns: `channel_id`, `ts` + - Replication Strategy: `INCREMENTAL` - API Documentation: [Link](https://api.slack.com/methods/conversations.history) ### Users @@ -74,3 +145,81 @@ The `Users` stream _does_ store information about when a User record was last up - Primary Key Column: `id` - Replication Strategy: `INCREMENTAL` - API Documentation: [Link](https://api.slack.com/methods/users.list) + +### Threads (Conversation Replies) + + - Table Name: `threads` + - Description: + - Primary Key Columns: `channel_id`, `ts`, `thread_ts` + - Replication Strategy: `FULL_TABLE` for each parent `message` + - API Documentation: [Link](https://api.slack.com/methods/conversations.replies) + +### User Groups + + - Table Name: `user_groups` + - Description: + - Primary Key Column: `id` + - Replication Strategy: `FULL_TABLE` + - API Documentation: [Link](https://api.slack.com/methods/usergroups.list) + +### Files + + - Table Name: `files` + - Description: + - Primary Key Column: `id` + - Replication Strategy: `INCREMENTAL` query filtered using date windows and lookback window + - API Documentation: [Link](https://api.slack.com/methods/files.list) + +### Remote Files + + - Table Name: `remote_files` + - Description: + - Primary Key Column: `id` + - Replication Strategy: `INCREMENTAL` query filtered using date windows and lookback window + - API Documentation: [Link](https://api.slack.com/methods/files.remote.list) + +## Testing the Tap + +While developing the Slack tap, the following utilities were run in accordance with Singer.io best practices: +Pylint to improve [code quality](https://github.com/singer-io/getting-started/blob/master/docs/BEST_PRACTICES.md#code-quality): +```bash +> pylint tap_slack -d missing-docstring -d logging-format-interpolation -d too-many-locals -d too-many-arguments +``` +Pylint test resulted in the following score: +```bash +Your code has been rated at 9.72/10 + +``` + +To [check the tap](https://github.com/singer-io/singer-tools#singer-check-tap) and verify working: +```bash +> tap-slack --config tap_config.json --catalog catalog.json | singer-check-tap > state.json +> tail -1 state.json > state.json.tmp && mv state.json.tmp state.json +``` +Check tap resulted in the following: + ```bash +Checking stdin for valid Singer-formatted data +The output is valid. +It contained 3657 messages for 9 streams. + + 581 schema messages + 2393 record messages + 683 state messages + +Details by stream: ++-----------------+---------+---------+ +| stream | records | schemas | ++-----------------+---------+---------+ +| threads | 633 | 573 | +| user_groups | 1 | 1 | +| channel_members | 1049 | 1 | +| users | 22 | 1 | +| channels | 0 | 1 | +| remote_files | 3 | 1 | +| messages | 573 | 1 | +| teams | 1 | 1 | +| files | 111 | 1 | ++-----------------+---------+---------+ +``` +---- +Copyright © 2019 Stitch diff --git a/setup.py b/setup.py index 5915367..7c4d861 100644 --- a/setup.py +++ b/setup.py @@ -3,15 +3,22 @@ from setuptools import setup, find_packages setup(name='tap-slack', - version='0.0.1', + version='1.2.0', description='Singer.io tap for extracting data from the Slack Web API', author='dwallace@envoy.com', classifiers=['Programming Language :: Python :: 3 :: Only'], py_modules=['tap_slack'], install_requires=[ - 'singer-python==5.6.0', - 'slackclient==2.0.1' + 'singer-python==6.1.1', + 'slackclient==3.36.0', ], + extras_require={ + 'dev': [ + 'pylint', + 'ipdb', + 'nose' + ] + }, python_requires='>=3.6', entry_points=''' [console_scripts] diff --git a/tap_slack/__init__.py b/tap_slack/__init__.py index 9694f5a..30aec19 100644 --- a/tap_slack/__init__.py +++ b/tap_slack/__init__.py @@ -2,33 +2,80 @@ import json import singer from slack import WebClient -from .streams import AVAILABLE_STREAMS -from .catalog import generate_catalog + +from tap_slack.client import SlackClient +from tap_slack.streams import AVAILABLE_STREAMS +from tap_slack.catalog import generate_catalog LOGGER = singer.get_logger() -def discover(webclient): + +def auto_join(client, config): + + if "channels" in config: + conversations = config.get("channels") + + for conversation_id in conversations: + join_response = client.join_channel(channel=conversation_id) + if not join_response.get("ok", False): + error = join_response.get("error", "Unspecified Error") + LOGGER.error('Error joining {}, Reason: {}'.format(conversation_id, error)) + raise Exception('{}: {}'.format(conversation_id, error)) + else: + response = client.get_all_channels(types="public_channel", exclude_archived="true") + conversations = response.get("channels", []) + + for conversation in conversations: + conversation_id = conversation.get("id", None) + conversation_name = conversation.get("name", None) + join_response = client.join_channel(channel=conversation_id) + if not join_response.get("ok", False): + error = join_response.get("error", "Unspecified Error") + LOGGER.error('Error joining {}, Reason: {}'.format(conversation_name, error)) + raise Exception('{}: {}'.format(conversation_name, error)) + + +def discover(client): LOGGER.info('Starting Discovery..') - streams = [stream(webclient) for stream in AVAILABLE_STREAMS] + streams = [stream_class(client) for _, stream_class in AVAILABLE_STREAMS.items()] catalog = generate_catalog(streams) json.dump(catalog, sys.stdout, indent=2) LOGGER.info("Finished Discovery..") -def sync(webclient, config, catalog, state): - +def sync(client, config, catalog, state): LOGGER.info('Starting Sync..') - streams_to_sync = [] - for catalog_stream in catalog.streams: - for available_stream in AVAILABLE_STREAMS: - if available_stream.name == catalog_stream.stream: - to_sync = available_stream(webclient=webclient, config=config, catalog_stream=catalog_stream, state=state) - streams_to_sync.append(to_sync) - - for stream in streams_to_sync: - stream.write_schema() - stream.sync() - stream.write_state() + selected_streams = catalog.get_selected_streams(state) + + streams = [] + stream_keys = [] + for catalog_entry in selected_streams: + streams.append(catalog_entry) + stream_keys.append(catalog_entry.stream) + + if "threads" in stream_keys and "messages" not in stream_keys: + sync_messages = False + streams.append(catalog.get_stream("messages")) + elif "messages" in stream_keys: + sync_messages = True + else: + sync_messages = False + + for catalog_entry in streams: + if "threads" != catalog_entry.stream: + if "messages" == catalog_entry.stream: + stream = AVAILABLE_STREAMS[catalog_entry.stream](client=client, config=config, + catalog=catalog, + state=state, + write_to_singer=sync_messages) + else: + stream = AVAILABLE_STREAMS[catalog_entry.stream](client=client, config=config, + catalog=catalog, + state=state) + LOGGER.info('Syncing stream: %s', catalog_entry.stream) + stream.write_schema() + stream.sync(catalog_entry.metadata) + stream.write_state() LOGGER.info('Finished Sync..') @@ -37,11 +84,15 @@ def main(): args = singer.utils.parse_args(required_config_keys=['token', 'start_date']) webclient = WebClient(token=args.config.get("token")) + client = SlackClient(webclient=webclient, config=args.config) if args.discover: - discover(webclient=webclient) - else: - sync(webclient=webclient, config=args.config, catalog=args.catalog, state=args.state) + discover(client=client) + elif args.catalog: + if args.config.get("join_public_channels", "false") == "true": + auto_join(client=client, config=args.config) + sync(client=client, config=args.config, catalog=args.catalog, state=args.state) + if __name__ == '__main__': main() diff --git a/tap_slack/catalog.py b/tap_slack/catalog.py index 63db2c8..addc689 100644 --- a/tap_slack/catalog.py +++ b/tap_slack/catalog.py @@ -10,10 +10,11 @@ def generate_catalog(streams): 'stream': stream.name, 'tap_stream_id': stream.name, 'schema': schema, - 'metadata': singer.metadata.get_standard_metadata(schema=schema, - key_properties=stream.key_properties, - valid_replication_keys=stream.valid_replication_keys, - replication_method=stream.replication_method) + 'metadata': singer.metadata.get_standard_metadata( + schema=schema, + key_properties=stream.key_properties, + valid_replication_keys=stream.valid_replication_keys, + replication_method=stream.replication_method) } catalog['streams'].append(catalog_entry) diff --git a/tap_slack/client.py b/tap_slack/client.py new file mode 100644 index 0000000..0662ff4 --- /dev/null +++ b/tap_slack/client.py @@ -0,0 +1,166 @@ +""" +Client wrapping the python Slack SDK. +Handles retry/backoff logic using the Singer framework annotations. +""" +import time + +import backoff +import singer +from slack.errors import SlackApiError + +LOGGER = singer.get_logger() + + +class SlackClient(object): + + def __init__(self, webclient, config): + self.webclient = webclient + self.config = config + + def wait(err=None): + if isinstance(err, SlackApiError): + if err.response.data.get("error", "") == "ratelimited": + delay = int(err.response.headers.get("Retry-After", "0")) + else: + raise err + time.sleep(delay) + + @backoff.on_exception(backoff.constant, + (SlackApiError, TimeoutError), + max_tries=2, + jitter=None, + giveup=wait, + interval=0) + def get_all_channels(self, types, exclude_archived): + + return self.webclient.conversations_list( + exclude_archived=exclude_archived, + types=types) + + @backoff.on_exception(backoff.constant, + (SlackApiError, TimeoutError), + max_tries=2, + jitter=None, + giveup=wait, + interval=0) + def get_channel(self, include_num_members, channel=None): + page = self.webclient.conversations_info(channel=channel, + include_num_members=include_num_members) + yield page.get('channel') + + @backoff.on_exception(backoff.constant, + (SlackApiError, TimeoutError), + max_tries=2, + jitter=None, + giveup=wait, + interval=0) + def get_channel_members(self, channel): + try: + members_cursor = self.webclient.conversations_members(channel=channel) + except SlackApiError as err: + if err.response.data.get("error", "") == "fetch_members_failed": + LOGGER.warning('Failed to fetch members for channel: {}' + .format(channel)) + members_cursor = [] + else: + raise err + + return members_cursor + + @backoff.on_exception(backoff.constant, + (SlackApiError, TimeoutError), + max_tries=2, + jitter=None, + giveup=wait, + interval=0) + def get_messages(self, channel, oldest, latest): + try: + messages = self.webclient \ + .conversations_history(channel=channel, + oldest=oldest, + latest=latest) + except SlackApiError as err: + if err.response.data.get("error", "") == "not_in_channel": + # The tap config might dictate that archived channels should + # be processed, but if the slackbot was not made a member of + # those channels prior to archiving attempting to get the + # messages will throw an error + LOGGER.warning( + 'Attempted to get messages for channel: {} that ' + 'slackbot is not in'.format( + channel + )) + messages = None + else: + raise err + + return messages + + @backoff.on_exception(backoff.constant, + (SlackApiError, TimeoutError), + max_tries=2, + jitter=None, + giveup=wait, + interval=0) + def get_thread(self, channel, ts, inclusive, oldest, latest): + return self.webclient.conversations_replies(channel=channel, + ts=ts, + inclusive=inclusive, + oldest=oldest, + latest=latest) + + @backoff.on_exception(backoff.constant, + (SlackApiError, TimeoutError), + max_tries=2, + jitter=None, + giveup=wait, + interval=0) + def get_users(self, limit): + return self.webclient.users_list(limit=limit) + + @backoff.on_exception(backoff.constant, + (SlackApiError, TimeoutError), + max_tries=2, + jitter=None, + giveup=wait, + interval=0) + def get_user_groups(self, include_count, include_disabled, include_user): + return self.webclient.usergroups_list(include_count=include_count, + include_disabled=include_disabled, + include_user=include_user) + + @backoff.on_exception(backoff.constant, + (SlackApiError, TimeoutError), + max_tries=2, + jitter=None, + giveup=wait, + interval=0) + def get_teams(self): + return self.webclient.team_info() + + @backoff.on_exception(backoff.constant, + (SlackApiError, TimeoutError), + max_tries=2, + jitter=None, + giveup=wait, + interval=0) + def get_files(self, from_ts, to_ts): + return self.webclient.files_list(from_ts=from_ts, to_ts=to_ts) + + @backoff.on_exception(backoff.constant, + (SlackApiError, TimeoutError), + max_tries=2, + jitter=None, + giveup=wait, + interval=0) + def get_remote_files(self, from_ts, to_ts): + return self.webclient.files_remote_list(from_ts=from_ts, to_ts=to_ts) + + @backoff.on_exception(backoff.constant, + (SlackApiError, TimeoutError), + max_tries=2, + jitter=None, + giveup=wait, + interval=0) + def join_channel(self, channel): + return self.webclient.conversations_join(channel=channel) diff --git a/tap_slack/schemas/channel_members.json b/tap_slack/schemas/channel_members.json new file mode 100644 index 0000000..57bf513 --- /dev/null +++ b/tap_slack/schemas/channel_members.json @@ -0,0 +1,12 @@ +{ + "type": ["null", "object"], + "additionalProperties": false, + "properties": { + "channel_id": { + "type": ["null", "string"] + }, + "user_id": { + "type": ["null", "string"] + } + } +} diff --git a/tap_slack/schemas/conversations.json b/tap_slack/schemas/channels.json similarity index 83% rename from tap_slack/schemas/conversations.json rename to tap_slack/schemas/channels.json index c3f51e9..cf7c0e8 100644 --- a/tap_slack/schemas/conversations.json +++ b/tap_slack/schemas/channels.json @@ -49,6 +49,27 @@ "is_pending_ext_shared": { "type": ["null", "boolean"] }, + "shared_team_ids": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "pending_shared": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "pending_connected_team_ids": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "parent_conversation": { + "type": ["null", "string"] + }, "is_member": { "type": ["null", "boolean"] }, @@ -103,25 +124,8 @@ "type": ["null", "string"] } }, - "messages": { - "type": ["null", "array"], - "items": { - "type": ["null", "object"], - "properties": { - "type": { - "type": ["null", "string"] - }, - "user": { - "type": ["null", "string"] - }, - "text": { - "type": ["null", "string"] - }, - "ts": { - "type": ["null", "string"] - } - } - } + "channel_id": { + "type": ["null", "string"] } } } diff --git a/tap_slack/schemas/conversation_history.json b/tap_slack/schemas/conversation_history.json deleted file mode 100644 index 4c17838..0000000 --- a/tap_slack/schemas/conversation_history.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "type": ["null", "object"], - "additionalProperties": false, - "properties": { - "channel_id": { - "type": ["null", "string"] - }, - "client_msg_id": { - "type": ["null", "string"] - }, - "type": { - "type": ["null", "string"] - }, - "subtype": { - "type": ["null", "string"] - }, - "user": { - "type": ["null", "string"] - }, - "text": { - "type": ["null", "string"] - }, - "ts": { - "type": ["null", "string"] - }, - "edited": { - "type": ["null", "object"], - "properties": { - "user": { - "type": ["null", "string"] - }, - "ts": { - "type": ["null", "string"] - } - } - }, - "bot_id": { - "type": ["null", "string"] - }, - "reactions": { - "type": ["null", "array"], - "items": { - "type": ["null", "object"], - "properties": { - "name": { - "type": ["null", "string"] - }, - "users": { - "type": ["null", "array"], - "items": { - "type": ["null", "string"] - } - }, - "count": { - "type": ["null", "integer"] - } - } - } - }, - "inviter": { - "type": ["null", "string"] - }, - "thread_ts": { - "type": ["null", "string"] - }, - "reply_count": { - "type": ["null", "integer"] - }, - "reply_users_count": { - "type": ["null", "integer"] - }, - "latest_reply": { - "type": ["null", "string"] - }, - "reply_users": { - "type": ["null", "array"], - "items": { - "type": ["null", "string"] - } - }, - "replies": { - "type": ["null", "array"], - "items": { - "type": ["null", "object"], - "properties": { - "user": { - "type": ["null", "string"] - }, - "ts": { - "type": ["null", "string"] - } - } - } - } - } -} diff --git a/tap_slack/schemas/conversation_members.json b/tap_slack/schemas/conversation_members.json deleted file mode 100644 index f8b8e7d..0000000 --- a/tap_slack/schemas/conversation_members.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "type": ["null", "object"], - "additionalProperties": false, - "properties": { - "channel_id": { - "type": ["null", "string"] - }, - "user_id": { - "type": ["null", "string"] - } - } -} diff --git a/tap_slack/schemas/files.json b/tap_slack/schemas/files.json new file mode 100644 index 0000000..0cd7b9d --- /dev/null +++ b/tap_slack/schemas/files.json @@ -0,0 +1,461 @@ +{ + "additionalProperties": false, + "properties": { + "channels": { + "type": [ + "null", + "array" + ], + "items": { + "type": [ + "null", + "string" + ] + } + }, + "comments_count": { + "type": [ + "null", + "integer" + ] + }, + "created": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "date_delete": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "display_as_bot": { + "type": [ + "null", + "boolean" + ] + }, + "editable": { + "type": [ + "null", + "boolean" + ] + }, + "editor": { + "type": [ + "null", + "string" + ] + }, + "external_id": { + "type": [ + "null", + "string" + ] + }, + "external_type": { + "type": [ + "null", + "string" + ] + }, + "external_url": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "filetype": { + "type": [ + "null", + "string" + ] + }, + "groups": { + "type": [ + "null", + "array" + ], + "items": { + "type": [ + "null", + "string" + ] + } + }, + "has_rich_preview": { + "type": [ + "null", + "boolean" + ] + }, + "id": { + "type": [ + "null", + "string" + ] + }, + "image_exif_rotation": { + "type": [ + "null", + "integer" + ] + }, + "ims": { + "type": [ + "null", + "array" + ], + "items": { + "type": [ + "null", + "string" + ] + } + }, + "is_external": { + "type": [ + "null", + "boolean" + ] + }, + "is_public": { + "type": [ + "null", + "boolean" + ] + }, + "is_starred": { + "type": [ + "null", + "boolean" + ] + }, + "is_tombstoned": { + "type": [ + "null", + "boolean" + ] + }, + "last_editor": { + "type": [ + "null", + "string" + ] + }, + "mimetype": { + "type": [ + "null", + "string" + ] + }, + "mode": { + "type": [ + "null", + "string" + ] + }, + "name": { + "type": [ + "null", + "string" + ] + }, + "non_owner_editable": { + "type": [ + "null", + "boolean" + ] + }, + "num_stars": { + "type": [ + "null", + "integer" + ] + }, + "original_h": { + "type": [ + "null", + "integer" + ] + }, + "original_w": { + "type": [ + "null", + "integer" + ] + }, + "permalink": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "permalink_public": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "pinned_info": { + "type": [ + "null", + "object" + ], + "additionalProperties": true, + "properties": {} + }, + "pinned_to": { + "items": { + "type": [ + "null", + "string" + ] + }, + "type": [ + "null", + "array" + ] + }, + "pretty_type": { + "type": [ + "null", + "string" + ] + }, + "preview": { + "type": [ + "null", + "string" + ] + }, + "public_url_shared": { + "type": [ + "null", + "boolean" + ] + }, + "size": { + "type": [ + "null", + "integer" + ] + }, + "source_team": { + "type": [ + "null", + "string" + ] + }, + "state": { + "type": [ + "null", + "string" + ] + }, + "thumb_1024": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_1024_h": { + "type": [ + "null", + "integer" + ] + }, + "thumb_1024_w": { + "type": [ + "null", + "integer" + ] + }, + "thumb_160": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_360": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_360_h": { + "type": [ + "null", + "integer" + ] + }, + "thumb_360_w": { + "type": [ + "null", + "integer" + ] + }, + "thumb_480": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_480_h": { + "type": [ + "null", + "integer" + ] + }, + "thumb_480_w": { + "type": [ + "null", + "integer" + ] + }, + "thumb_64": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_720": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_720_h": { + "type": [ + "null", + "integer" + ] + }, + "thumb_720_w": { + "type": [ + "null", + "integer" + ] + }, + "thumb_80": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_800": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_800_h": { + "type": [ + "null", + "integer" + ] + }, + "thumb_800_w": { + "type": [ + "null", + "integer" + ] + }, + "thumb_960": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_960_h": { + "type": [ + "null", + "integer" + ] + }, + "thumb_960_w": { + "type": [ + "null", + "integer" + ] + }, + "thumb_tiny": { + "type": [ + "null", + "string" + ] + }, + "timestamp": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "title": { + "type": [ + "null", + "string" + ] + }, + "updated": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "url_private": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "url_private_download": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "user": { + "type": [ + "null", + "string" + ] + }, + "user_team": { + "type": [ + "null", + "string" + ] + }, + "username": { + "type": [ + "null", + "string" + ] + } + }, + "type": [ + "null", + "object" + ] +} \ No newline at end of file diff --git a/tap_slack/schemas/messages.json b/tap_slack/schemas/messages.json new file mode 100644 index 0000000..1b99bfe --- /dev/null +++ b/tap_slack/schemas/messages.json @@ -0,0 +1,358 @@ +{ + "additionalProperties": false, + "properties": { + "channel_id": { + "type": ["null", "string"] + }, + "blocks": { + "items": { + "additionalProperties": true, + "properties": { + "type": { + "type": [ + "null", + "string" + ] + } + }, + "type": [ + "null", + "object" + ] + }, + "type": [ + "null", + "array" + ] + }, + "bot_id": { + "type": [ + "null", + "string" + ] + }, + "bot_profile": { + "additionalProperties": false, + "properties": { + "app_id": { + "type": [ + "null", + "string" + ] + }, + "deleted": { + "type": [ + "null", + "boolean" + ] + }, + "id": { + "type": [ + "null", + "string" + ] + }, + "name": { + "type": [ + "null", + "string" + ] + }, + "team_id": { + "type": [ + "null", + "string" + ] + }, + "updated": { + "type": [ + "null", + "string" + ], + "format": "date-time" + } + }, + "type": [ + "null", + "object" + ] + }, + "client_msg_id": { + "type": [ + "null", + "string" + ] + }, + "display_as_bot": { + "type": [ + "null", + "boolean" + ] + }, + "file_id": { + "type": [ + "null", + "string" + ] + }, + "file_ids": { + "items": { + "type": [ + "null", + "string" + ] + }, + "type": [ + "null", + "array" + ] + }, + "icons": { + "additionalProperties": false, + "properties": { + "emoji": { + "type": [ + "null", + "string" + ] + } + }, + "type": [ + "null", + "object" + ] + }, + "inviter": { + "type": [ + "null", + "string" + ] + }, + "is_delayed_message": { + "type": [ + "null", + "boolean" + ] + }, + "is_intro": { + "type": [ + "null", + "boolean" + ] + }, + "is_starred": { + "type": [ + "null", + "boolean" + ] + }, + "last_read": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "latest_reply": { + "type": [ + "null", + "string" + ] + }, + "name": { + "type": [ + "null", + "string" + ] + }, + "old_name": { + "type": [ + "null", + "string" + ] + }, + "parent_user_id": { + "type": [ + "null", + "string" + ] + }, + "permalink": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "pinned_to": { + "items": { + "type": [ + "null", + "string" + ] + }, + "type": [ + "null", + "array" + ] + }, + "purpose": { + "type": [ + "null", + "string" + ] + }, + "reactions": { + "items": { + "additionalProperties": true, + "properties": { + "count": { + "type": [ + "null", + "integer" + ] + }, + "name": { + "type": [ + "null", + "string" + ] + }, + "users": { + "items": { + "type": [ + "null", + "string" + ] + }, + "type": [ + "null", + "array" + ] + } + }, + "type": [ + "null", + "object" + ] + }, + "type": [ + "null", + "array" + ] + }, + "reply_count": { + "type": [ + "null", + "integer" + ] + }, + "reply_users": { + "items": { + "type": [ + "null", + "string" + ] + }, + "type": [ + "null", + "array" + ] + }, + "reply_users_count": { + "type": [ + "null", + "integer" + ] + }, + "source_team": { + "type": [ + "null", + "string" + ] + }, + "subscribed": { + "type": [ + "null", + "boolean" + ] + }, + "subtype": { + "type": [ + "null", + "string" + ] + }, + "team": { + "type": [ + "null", + "string" + ] + }, + "text": { + "type": [ + "null", + "string" + ] + }, + "thread_ts": { + "type": [ + "null", + "string" + ] + }, + "topic": { + "type": [ + "null", + "string" + ] + }, + "ts": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "type": { + "type": [ + "null", + "string" + ] + }, + "unread_count": { + "type": [ + "null", + "integer" + ] + }, + "upload": { + "type": [ + "null", + "boolean" + ] + }, + "user": { + "type": [ + "null", + "string" + ] + }, + "user_team": { + "type": [ + "null", + "string" + ] + }, + "username": { + "type": [ + "null", + "string" + ] + } + }, + "type": [ + "null", + "object" + ] +} \ No newline at end of file diff --git a/tap_slack/schemas/remote_files.json b/tap_slack/schemas/remote_files.json new file mode 100644 index 0000000..0cd7b9d --- /dev/null +++ b/tap_slack/schemas/remote_files.json @@ -0,0 +1,461 @@ +{ + "additionalProperties": false, + "properties": { + "channels": { + "type": [ + "null", + "array" + ], + "items": { + "type": [ + "null", + "string" + ] + } + }, + "comments_count": { + "type": [ + "null", + "integer" + ] + }, + "created": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "date_delete": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "display_as_bot": { + "type": [ + "null", + "boolean" + ] + }, + "editable": { + "type": [ + "null", + "boolean" + ] + }, + "editor": { + "type": [ + "null", + "string" + ] + }, + "external_id": { + "type": [ + "null", + "string" + ] + }, + "external_type": { + "type": [ + "null", + "string" + ] + }, + "external_url": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "filetype": { + "type": [ + "null", + "string" + ] + }, + "groups": { + "type": [ + "null", + "array" + ], + "items": { + "type": [ + "null", + "string" + ] + } + }, + "has_rich_preview": { + "type": [ + "null", + "boolean" + ] + }, + "id": { + "type": [ + "null", + "string" + ] + }, + "image_exif_rotation": { + "type": [ + "null", + "integer" + ] + }, + "ims": { + "type": [ + "null", + "array" + ], + "items": { + "type": [ + "null", + "string" + ] + } + }, + "is_external": { + "type": [ + "null", + "boolean" + ] + }, + "is_public": { + "type": [ + "null", + "boolean" + ] + }, + "is_starred": { + "type": [ + "null", + "boolean" + ] + }, + "is_tombstoned": { + "type": [ + "null", + "boolean" + ] + }, + "last_editor": { + "type": [ + "null", + "string" + ] + }, + "mimetype": { + "type": [ + "null", + "string" + ] + }, + "mode": { + "type": [ + "null", + "string" + ] + }, + "name": { + "type": [ + "null", + "string" + ] + }, + "non_owner_editable": { + "type": [ + "null", + "boolean" + ] + }, + "num_stars": { + "type": [ + "null", + "integer" + ] + }, + "original_h": { + "type": [ + "null", + "integer" + ] + }, + "original_w": { + "type": [ + "null", + "integer" + ] + }, + "permalink": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "permalink_public": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "pinned_info": { + "type": [ + "null", + "object" + ], + "additionalProperties": true, + "properties": {} + }, + "pinned_to": { + "items": { + "type": [ + "null", + "string" + ] + }, + "type": [ + "null", + "array" + ] + }, + "pretty_type": { + "type": [ + "null", + "string" + ] + }, + "preview": { + "type": [ + "null", + "string" + ] + }, + "public_url_shared": { + "type": [ + "null", + "boolean" + ] + }, + "size": { + "type": [ + "null", + "integer" + ] + }, + "source_team": { + "type": [ + "null", + "string" + ] + }, + "state": { + "type": [ + "null", + "string" + ] + }, + "thumb_1024": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_1024_h": { + "type": [ + "null", + "integer" + ] + }, + "thumb_1024_w": { + "type": [ + "null", + "integer" + ] + }, + "thumb_160": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_360": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_360_h": { + "type": [ + "null", + "integer" + ] + }, + "thumb_360_w": { + "type": [ + "null", + "integer" + ] + }, + "thumb_480": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_480_h": { + "type": [ + "null", + "integer" + ] + }, + "thumb_480_w": { + "type": [ + "null", + "integer" + ] + }, + "thumb_64": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_720": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_720_h": { + "type": [ + "null", + "integer" + ] + }, + "thumb_720_w": { + "type": [ + "null", + "integer" + ] + }, + "thumb_80": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_800": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_800_h": { + "type": [ + "null", + "integer" + ] + }, + "thumb_800_w": { + "type": [ + "null", + "integer" + ] + }, + "thumb_960": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "thumb_960_h": { + "type": [ + "null", + "integer" + ] + }, + "thumb_960_w": { + "type": [ + "null", + "integer" + ] + }, + "thumb_tiny": { + "type": [ + "null", + "string" + ] + }, + "timestamp": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "title": { + "type": [ + "null", + "string" + ] + }, + "updated": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "url_private": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "url_private_download": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "user": { + "type": [ + "null", + "string" + ] + }, + "user_team": { + "type": [ + "null", + "string" + ] + }, + "username": { + "type": [ + "null", + "string" + ] + } + }, + "type": [ + "null", + "object" + ] +} \ No newline at end of file diff --git a/tap_slack/schemas/teams.json b/tap_slack/schemas/teams.json new file mode 100644 index 0000000..095cd20 --- /dev/null +++ b/tap_slack/schemas/teams.json @@ -0,0 +1,202 @@ +{ + "additionalProperties": false, + "properties": { + "archived": { + "type": [ + "null", + "boolean" + ] + }, + "avatar_base_url": { + "format": "uri", + "type": [ + "null", + "string" + ] + }, + "created": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "date_create": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "deleted": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "description": { + "type": [ + "null", + "string" + ] + }, + "discoverable": { + "type": [ + "null", + "string" + ] + }, + "domain": { + "type": [ + "null", + "string" + ] + }, + "email_domain": { + "type": [ + "null", + "string" + ] + }, + "enterprise_id": { + "type": [ + "null", + "string" + ] + }, + "enterprise_name": { + "type": [ + "null", + "string" + ] + }, + "has_compliance_export": { + "type": [ + "null", + "boolean" + ] + }, + "icon": { + "properties": { + "image_102": { + "type": [ + "null", + "string" + ] + }, + "image_132": { + "type": [ + "null", + "string" + ] + }, + "image_230": { + "type": [ + "null", + "string" + ] + }, + "image_34": { + "type": [ + "null", + "string" + ] + }, + "image_44": { + "type": [ + "null", + "string" + ] + }, + "image_68": { + "type": [ + "null", + "string" + ] + }, + "image_88": { + "type": [ + "null", + "string" + ] + }, + "image_default": { + "type": [ + "null", + "boolean" + ] + } + }, + "type": [ + "null", + "object" + ] + }, + "id": { + "type": [ + "null", + "string" + ] + }, + "is_assigned": { + "type": [ + "null", + "boolean" + ] + }, + "is_enterprise": { + "type": [ + "null", + "integer" + ] + }, + "messages_count": { + "type": [ + "null", + "integer" + ] + }, + "msg_edit_window_mins": { + "type": [ + "null", + "integer" + ] + }, + "name": { + "type": [ + "null", + "string" + ] + }, + "over_integrations_limit": { + "type": [ + "null", + "boolean" + ] + }, + "over_storage_limit": { + "type": [ + "null", + "boolean" + ] + }, + "plan": { + "enum": [ + "", + "std", + "plus", + "compliance", + "enterprise" + ], + "type": [ + "null", + "string" + ] + } + }, + "type": [ + "null", + "object" + ] +} \ No newline at end of file diff --git a/tap_slack/schemas/threads.json b/tap_slack/schemas/threads.json new file mode 100644 index 0000000..c15a08d --- /dev/null +++ b/tap_slack/schemas/threads.json @@ -0,0 +1,109 @@ +{ + "type": [ + "null", + "object" + ], + "additionalProperties": false, + "properties": { + "channel_id": { + "type": ["null", "string"] + }, + "client_msg_id": { + "type": [ + "null", + "string" + ] + }, + "type": { + "type": [ + "null", + "string" + ] + }, + "text": { + "type": [ + "null", + "string" + ] + }, + "user": { + "type": [ + "null", + "string" + ] + }, + "ts": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "team": { + "type": [ + "null", + "string" + ] + }, + "blocks": { + "type": [ + "null", + "array" + ], + "items": { + "type": [ + "null", + "object" + ], + "additionalProperties": true, + "properties": { + "type": { + "type": [ + "null", + "string" + ] + } + } + } + }, + "thread_ts": { + "type": [ + "null", + "string" + ] + }, + "reply_count": { + "type": [ + "null", + "integer" + ] + }, + "reply_users_count": { + "type": [ + "null", + "number" + ] + }, + "latest_reply": { + "type": [ + "null", + "string" + ] + }, + "reply_users": { + "type": [ + "null", + "array" + ], + "items": { + "type": "string" + } + }, + "subscribed": { + "type": [ + "null", + "boolean" + ] + } + } +} \ No newline at end of file diff --git a/tap_slack/schemas/user_groups.json b/tap_slack/schemas/user_groups.json new file mode 100644 index 0000000..839428f --- /dev/null +++ b/tap_slack/schemas/user_groups.json @@ -0,0 +1,188 @@ +{ + "type": [ + "null", + "object" + ], + "additionalProperties": false, + "properties": { + "created": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "creator": { + "type": [ + "null", + "string" + ] + }, + "id": { + "type": [ + "null", + "string" + ] + }, + "is_archived": { + "type": [ + "null", + "boolean" + ] + }, + "is_deleted": { + "type": [ + "null", + "boolean" + ] + }, + "is_group": { + "type": [ + "null", + "boolean" + ] + }, + "is_moved": { + "type": [ + "null", + "integer" + ] + }, + "is_mpim": { + "type": [ + "null", + "boolean" + ] + }, + "is_open": { + "type": [ + "null", + "boolean" + ] + }, + "is_pending_ext_shared": { + "type": [ + "null", + "boolean" + ] + }, + "is_read_only": { + "type": [ + "null", + "boolean" + ] + }, + "is_thread_only": { + "type": [ + "null", + "boolean" + ] + }, + "last_read": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "members": { + "items": { + "type": [ + "null", + "string" + ] + }, + "type": [ + "null", + "array" + ] + }, + "name": { + "type": [ + "null", + "string" + ] + }, + "name_normalized": { + "type": [ + "null", + "string" + ] + }, + "num_members": { + "type": [ + "null", + "integer" + ] + }, + "parent_group": { + "type": [ + "null", + "string" + ] + }, + "priority": { + "type": "number" + }, + "purpose": { + "additionalProperties": false, + "properties": { + "creator": { + "type": [ + "null", + "string" + ] + }, + "last_set": { + "type": [ + "null", + "integer" + ] + }, + "value": { + "type": [ + "null", + "string" + ] + } + }, + "type": "object" + }, + "topic": { + "additionalProperties": false, + "properties": { + "creator": { + "type": [ + "null", + "string" + ] + }, + "last_set": { + "type": [ + "null", + "integer" + ] + }, + "value": { + "type": [ + "null", + "string" + ] + } + }, + "type": "object" + }, + "unread_count": { + "type": [ + "null", + "integer" + ] + }, + "unread_count_display": { + "type": [ + "null", + "integer" + ] + } + } +} \ No newline at end of file diff --git a/tap_slack/schemas/users.json b/tap_slack/schemas/users.json index bf508be..3d1a618 100644 --- a/tap_slack/schemas/users.json +++ b/tap_slack/schemas/users.json @@ -56,6 +56,20 @@ }, "has_2fa": { "type": ["null", "boolean"] + }, + "profile": { + "type": [ + "null", + "object" + ], + "properties": { + "email": { + "type": [ + "null", + "string" + ] + } + } } } } diff --git a/tap_slack/streams.py b/tap_slack/streams.py index 59b9ef9..be691d9 100644 --- a/tap_slack/streams.py +++ b/tap_slack/streams.py @@ -1,151 +1,641 @@ -from slack import WebClient +import os +from datetime import timedelta, datetime +import pytz import singer -import os -import time +from singer import metadata, utils +from singer.utils import strptime_to_utc +from tap_slack.transform import transform_json LOGGER = singer.get_logger() +DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S' +utc = pytz.UTC -class SlackStream(): +class SlackStream: - def __init__(self, webclient, config=None, catalog_stream=None, state=None): - self.webclient = webclient + def __init__(self, client, config=None, catalog=None, state=None, write_to_singer=True): + self.client = client self.config = config - self.catalog_stream = catalog_stream + self.catalog = catalog self.state = state - - def get_abs_path(self, path): + self.write_to_singer = write_to_singer + if config: + self.date_window_size = int(config.get('date_window_size', '7')) + else: + self.date_window_size = 7 + + @staticmethod + def get_abs_path(path): return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) def load_schema(self): schema_path = self.get_abs_path('schemas') + # pylint: disable=no-member return singer.utils.load_json('{}/{}.json'.format(schema_path, self.name)) def write_schema(self): schema = self.load_schema() - return singer.write_schema(stream_name=self.name, schema=schema, key_properties=self.key_properties) + # pylint: disable=no-member + return singer.write_schema(stream_name=self.name, schema=schema, + key_properties=self.key_properties) def write_state(self): return singer.write_state(self.state) - + def update_bookmarks(self, stream, value): + if 'bookmarks' not in self.state: + self.state['bookmarks'] = {} + self.state['bookmarks'][stream] = value + LOGGER.info('Stream: {} - Write state, bookmark value: {}'.format(stream, value)) + self.write_state() + + def get_bookmark(self, stream, default): + # default only populated on initial sync + if (self.state is None) or ('bookmarks' not in self.state): + return default + return self.state.get('bookmarks', {}).get(stream, default) + + def get_absolute_date_range(self, start_date): + """ + Based on parameters in tap configuration, returns the absolute date range for the sync, + including the lookback window if applicable. + :param start_date: The start date in the config, or the last synced date from the bookmark + :return: the start date and the end date that make up the date range + """ + lookback_window = self.config.get('lookback_window', '14') + start_dttm = strptime_to_utc(start_date) + attribution_window = int(lookback_window) + now_dttm = utils.now() + delta_days = (now_dttm - start_dttm).days + if delta_days < attribution_window: + start_ddtm = now_dttm - timedelta(days=attribution_window) + else: + start_ddtm = start_dttm + + return start_ddtm, now_dttm + + def _all_channels(self): + types = "public_channel" + enable_private_channels = self.config.get("private_channels", "false") + exclude_archived = self.config.get("exclude_archived", "false") + if enable_private_channels == "true": + types = "public_channel,private_channel" + + conversations_list = self.client.get_all_channels(types=types, + exclude_archived=exclude_archived) + + for page in conversations_list: + channels = page.get('channels') + for channel in channels: + yield channel + + def _specified_channels(self): + for channel_id in self.config.get("channels"): + yield self.client.get_channel(include_num_members=0, channel=channel_id) + + def channels(self): + if "channels" in self.config: + yield from self._specified_channels() + else: + yield from self._all_channels() + + +# ConversationsStream = Slack Channels class ConversationsStream(SlackStream): - name = 'conversations' + name = 'channels' key_properties = ['id'] replication_method = 'FULL_TABLE' forced_replication_method = 'FULL_TABLE' valid_replication_keys = [] + date_fields = ['created'] - def sync(self): - + def sync(self, mdata): schema = self.load_schema() + # pylint: disable=unused-variable with singer.metrics.job_timer(job_type='list_conversations') as timer: with singer.metrics.record_counter(endpoint=self.name) as counter: - for page in self.webclient.conversations_list(limit=100, exclude_archived='false', types="public_channel,private_channel"): - channels = page.get('channels') - for channel in channels: - with singer.Transformer(integer_datetime_fmt="unix-seconds-integer-datetime-parsing") as transformer: - transformed_record = transformer.transform(data=channel, schema=schema) - singer.write_record(stream_name=self.name, time_extracted=singer.utils.now(), record=transformed_record) + channels = self.channels() + for channel in channels: + transformed_channel = transform_json(stream=self.name, data=[channel], + date_fields=self.date_fields) + with singer.Transformer( + integer_datetime_fmt="unix-seconds-integer-datetime-parsing") \ + as transformer: + transformed_record = transformer.transform(data=transformed_channel[0], schema=schema, + metadata=metadata.to_map(mdata)) + if self.write_to_singer: + singer.write_record(stream_name=self.name, + time_extracted=singer.utils.now(), + record=transformed_record) counter.increment() +# ConversationsMembersStream = Slack Channel Members (Users) class ConversationMembersStream(SlackStream): - name = 'conversation_members' - key_properties = ['channel_id','user_id'] + name = 'channel_members' + key_properties = ['channel_id', 'user_id'] replication_method = 'FULL_TABLE' forced_replication_method = 'FULL_TABLE' valid_replication_keys = [] + date_fields = [] - def sync(self): + def sync(self, mdata): schema = self.load_schema() + # pylint: disable=unused-variable with singer.metrics.job_timer(job_type='list_conversation_members') as timer: with singer.metrics.record_counter(endpoint=self.name) as counter: - for page in self.webclient.conversations_list(limit=100, exclude_archived='false', types="public_channel,private_channel"): - channels = page.get('channels') - for channel in channels: - channel_id = channel.get('id') - for page in self.webclient.conversations_members(channel=channel_id): - members = page.get('members') - for member in members: - data = {} - data['channel_id'] = channel_id - data['user_id'] = member - with singer.Transformer() as transformer: - transformed_record = transformer.transform(data=data, schema=schema) - singer.write_record(stream_name=self.name, time_extracted=singer.utils.now(), record=transformed_record) + for channel in self.channels(): + channel_id = channel.get('id') + + members_cursor = self.client.get_channel_members(channel_id) + + for page in members_cursor: + members = page.get('members') + for member in members: + data = {'channel_id': channel_id, 'user_id': member} + with singer.Transformer() as transformer: + transformed_record = transformer.transform(data=data, schema=schema, + metadata=metadata.to_map( + mdata)) + if self.write_to_singer: + singer.write_record(stream_name=self.name, + time_extracted=singer.utils.now(), + record=transformed_record) counter.increment() +# ConversationsHistoryStream = Slack Messages (not including reply threads) class ConversationHistoryStream(SlackStream): - name = 'conversation_history' - key_properties = ['channel_id','ts'] - replication_method = 'FULL_TABLE' - forced_replication_method = 'FULL_TABLE' - valid_replication_keys = [] - - def sync(self): + name = 'messages' + key_properties = ['channel_id', 'ts'] + replication_method = 'INCREMENTAL' + forced_replication_method = 'INCREMENTAL' + valid_replication_keys = ['channel_id', 'ts'] + date_fields = ['ts'] + + # pylint: disable=arguments-differ + def update_bookmarks(self, channel_id, value): + """ + For the messages stream, bookmarks are written per-channel. + :param channel_id: The channel to bookmark + :param value: The earliest message date in the window. + :return: None + """ + if 'bookmarks' not in self.state: + self.state['bookmarks'] = {} + if self.name not in self.state['bookmarks']: + self.state['bookmarks'][self.name] = {} + self.state['bookmarks'][self.name][channel_id] = value + self.write_state() + + # pylint: disable=arguments-differ + def get_bookmark(self, channel_id, default): + """ + Gets the channel's bookmark value, if present, otherwise a default value passed in. + :param channel_id: The channel to retrieve the bookmark for. + :param default: The default value to return if no bookmark + :return: The bookmark or default value passed in + """ + # default only populated on initial sync + if (self.state is None) or ('bookmarks' not in self.state): + return default + return self.state.get('bookmarks', {}).get(self.name, {channel_id: default}) \ + .get(channel_id, default) + + # pylint: disable=too-many-branches,too-many-statements + def sync(self, mdata): schema = self.load_schema() + threads_stream = None + threads_mdata = None + + # If threads are also being synced we'll need to do that for each message + for catalog_entry in self.catalog.get_selected_streams(self.state): + if catalog_entry.stream == 'threads': + threads_mdata = catalog_entry.metadata + threads_stream = ThreadsStream(client=self.client, config=self.config, + catalog=self.catalog, state=self.state) + # pylint: disable=unused-variable with singer.metrics.job_timer(job_type='list_conversation_history') as timer: with singer.metrics.record_counter(endpoint=self.name) as counter: - for page in self.webclient.conversations_list(limit=100, exclude_archived='false', types="public_channel,private_channel"): - channels = page.get('channels') - for channel in channels: - channel_id = channel.get('id') - for page in self.webclient.conversations_history(channel=channel_id): - messages = page.get('messages') - for message in messages: - data = {} - data['channel_id'] = channel_id - data = {**data, **message} - with singer.Transformer(integer_datetime_fmt="unix-seconds-integer-datetime-parsing") as transformer: - transformed_record = transformer.transform(data=data, schema=schema) - singer.write_record(stream_name=self.name, time_extracted=singer.utils.now(), record=transformed_record) - counter.increment() - #TODO: handle rate limiting better than this. - time.sleep(1) - - + for channel in self.channels(): + channel_id = channel.get('id') + + bookmark_date = self.get_bookmark(channel_id, self.config.get('start_date')) + start, end = self.get_absolute_date_range(bookmark_date) + + # Window the requests based on the tap configuration + date_window_start = start + date_window_end = start + timedelta(days=int(self.date_window_size)) + min_bookmark = start + max_bookmark = start + + while date_window_start < date_window_end: + + messages = self.client \ + .get_messages(channel=channel_id, + oldest=int(date_window_start.timestamp()), + latest=int(date_window_end.timestamp())) + + if messages: + for page in messages: + messages = page.get('messages') + transformed_messages = transform_json(stream=self.name, + data=messages, + date_fields=self.date_fields, + channel_id=channel_id) + for message in transformed_messages: + data = {'channel_id': channel_id} + data = {**data, **message} + + # If threads are being synced then the message data for the + # message the threaded replies are in response to will be + # synced to the messages table as well as the threads table + if threads_stream: + # If threads is selected we need to sync all the + # threaded replies to this message + threads_stream.write_schema() + threads_stream.sync(mdata=threads_mdata, + channel_id=channel_id, + ts=data.get('thread_ts')) + threads_stream.write_state() + with singer.Transformer( + integer_datetime_fmt= + "unix-seconds-integer-datetime-parsing" + ) as transformer: + transformed_record = transformer.transform( + data=data, + schema=schema, + metadata=metadata.to_map(mdata) + ) + record_timestamp = \ + transformed_record.get('thread_ts', '').partition('.')[ + 0] + record_timestamp_int = int(record_timestamp) + if record_timestamp_int >= start.timestamp(): + if self.write_to_singer: + singer.write_record(stream_name=self.name, + time_extracted=singer.utils.now(), + record=transformed_record) + counter.increment() + + if datetime.utcfromtimestamp( + record_timestamp_int).replace( + tzinfo=utc) > max_bookmark.replace(tzinfo=utc): + # Records are sorted by most recent first, so this + # should only fire once every sync, per channel + max_bookmark = datetime.fromtimestamp( + record_timestamp_int) + elif datetime.utcfromtimestamp( + record_timestamp_int).replace( + tzinfo=utc) < min_bookmark: + # The min bookmark tracks how far back we've synced + # during the sync, since the records are ordered + # newest -> oldest + min_bookmark = datetime.fromtimestamp( + record_timestamp_int) + self.update_bookmarks(channel_id, + min_bookmark.strftime(DATETIME_FORMAT)) + # Update the date window + date_window_start = date_window_end + date_window_end = date_window_start + timedelta( + days=self.date_window_size) + if date_window_end > end: + date_window_end = end + else: + date_window_start = date_window_end + + +# UsersStream = Slack Users class UsersStream(SlackStream): name = 'users' key_properties = ['id'] replication_method = 'INCREMENTAL' replication_key = 'updated' valid_replication_keys = ['updated_at'] + date_fields = ['updated'] - def sync(self): + def sync(self, mdata): schema = self.load_schema() - bookmark = singer.get_bookmark(state=self.state, tap_stream_id=self.name, key=self.replication_key) + bookmark = singer.get_bookmark(state=self.state, tap_stream_id=self.name, + key=self.replication_key) if bookmark is None: bookmark = self.config.get('start_date') new_bookmark = bookmark + # pylint: disable=unused-variable with singer.metrics.job_timer(job_type='list_users') as timer: with singer.metrics.record_counter(endpoint=self.name) as counter: - for page in self.webclient.users_list(limit=100): + users_list = self.client.get_users(limit=100) + + for page in users_list: users = page.get('members') - for user in users: - with singer.Transformer(integer_datetime_fmt="unix-seconds-integer-datetime-parsing") as transformer: - transformed_record = transformer.transform(data=user, schema=schema) + transformed_users = transform_json(stream=self.name, data=users, + date_fields=self.date_fields) + for user in transformed_users: + with singer.Transformer( + integer_datetime_fmt="unix-seconds-integer-datetime-parsing") \ + as transformer: + transformed_record = transformer.transform(data=user, schema=schema, + metadata=metadata.to_map( + mdata)) new_bookmark = max(new_bookmark, transformed_record.get('updated')) - if (self.replication_method == 'INCREMENTAL' and transformed_record.get('updated') > bookmark) or self.replication_method == 'FULL_TABLE': - singer.write_record(stream_name=self.name, time_extracted=singer.utils.now(), record=transformed_record) + if transformed_record.get('updated') > bookmark: + if self.write_to_singer: + singer.write_record(stream_name=self.name, + time_extracted=singer.utils.now(), + record=transformed_record) + counter.increment() + + self.state = singer.write_bookmark(state=self.state, tap_stream_id=self.name, + key=self.replication_key, val=new_bookmark) + + +# ThreadsStream = Slack Message Threads (Replies to Slack message) +# The threads stream does a "FULL TABLE" sync using a date window, based on the parent message. +# This means that a thread is only synced if the message it is started on fits within the overall +# sync window. Additionally threaded messages retrieved from the API are only included if they are +# within the overall sync window. +class ThreadsStream(SlackStream): + name = 'threads' + key_properties = ['channel_id', 'ts', 'thread_ts'] + replication_method = 'FULL_TABLE' + replication_key = 'updated' + valid_replication_keys = ['updated_at'] + date_fields = ['ts', 'last_read'] + + def sync(self, mdata, channel_id, ts): + schema = self.load_schema() + start, end = self.get_absolute_date_range(self.config.get('start_date')) + + # pylint: disable=unused-variable + with singer.metrics.job_timer(job_type='list_threads') as timer: + with singer.metrics.record_counter(endpoint=self.name) as counter: + replies = self.client.get_thread(channel=channel_id, + ts=ts, + inclusive="true", + oldest=int(start.timestamp()), + latest=int(end.timestamp())) + + for page in replies: + transformed_threads = transform_json(stream=self.name, + data=page.get('messages', []), + date_fields=self.date_fields, + channel_id=channel_id) + for message in transformed_threads: + with singer.Transformer( + integer_datetime_fmt="unix-seconds-integer-datetime-parsing") \ + as transformer: + transformed_record = transformer.transform(data=message, schema=schema, + metadata=metadata.to_map( + mdata)) + if self.write_to_singer: + singer.write_record(stream_name=self.name, + time_extracted=singer.utils.now(), + record=transformed_record) counter.increment() - self.state = singer.write_bookmark(state=self.state, tap_stream_id=self.name, key=self.replication_key, val=new_bookmark) +# UserGroupsStream = Slack User Groups +class UserGroupsStream(SlackStream): + name = 'user_groups' + key_properties = ['id'] + replication_method = 'FULL_TABLE' + valid_replication_keys = [] + + def sync(self, mdata): + schema = self.load_schema() + + # pylint: disable=unused-variable + with singer.metrics.job_timer(job_type='list_user_groups') as timer: + with singer.metrics.record_counter(endpoint=self.name) as counter: + usergroups_list = self.client.get_user_groups(include_count="true", + include_disabled="true", + include_user="true") + + for page in usergroups_list: + for usergroup in page.get('usergroups'): + with singer.Transformer( + integer_datetime_fmt="unix-seconds-integer-datetime-parsing") \ + as transformer: + transformed_record = transformer.transform(data=usergroup, + schema=schema, + metadata=metadata.to_map( + mdata)) + if self.write_to_singer: + singer.write_record(stream_name=self.name, + time_extracted=singer.utils.now(), + record=transformed_record) + counter.increment() + + +# TeamsStream = Slack Teams +class TeamsStream(SlackStream): + name = 'teams' + key_properties = ['id'] + replication_method = 'FULL_TABLE' + replication_key = 'updated' + valid_replication_keys = ['updated_at'] + date_fields = [] + + def sync(self, mdata): + schema = self.load_schema() + + # pylint: disable=unused-variable + with singer.metrics.job_timer(job_type='team_info') as timer: + with singer.metrics.record_counter(endpoint=self.name) as counter: + + team_info = self.client.get_teams() + + for page in team_info: + team = page.get('team') + with singer.Transformer( + integer_datetime_fmt="unix-seconds-integer-datetime-parsing") \ + as transformer: + transformed_record = transformer.transform(data=team, + schema=schema, + metadata=metadata.to_map( + mdata)) + if self.write_to_singer: + singer.write_record(stream_name=self.name, + time_extracted=singer.utils.now(), + record=transformed_record) + counter.increment() + + +# FilesStream = Files uploaded/shared to Slack and hosted by Slack +class FilesStream(SlackStream): + name = 'files' + key_properties = ['id'] + replication_method = 'INCREMENTAL' + replication_key = 'updated' + valid_replication_keys = ['updated_at'] + date_fields = [] + + def sync(self, mdata): + schema = self.load_schema() + + # pylint: disable=unused-variable + with singer.metrics.job_timer(job_type='list_files') as timer: + with singer.metrics.record_counter(endpoint=self.name) as counter: + + bookmark_date = self.get_bookmark(self.name, self.config.get('start_date')) + start, end = self.get_absolute_date_range(bookmark_date) + + # Window the requests based on the tap configuration + date_window_start = start + date_window_end = start + timedelta(days=int(self.date_window_size)) + min_bookmark = start + max_bookmark = start + + while date_window_start < date_window_end: + files_list = self.client.get_files( + from_ts=int(date_window_start.timestamp()), + to_ts=int(date_window_end.timestamp()) + ) + + for page in files_list: + files = page.get('files') + + for file in files: + with singer.Transformer( + integer_datetime_fmt="unix-seconds-integer-datetime-parsing" + ) as transformer: + transformed_record = transformer.transform( + data=file, + schema=schema, + metadata=metadata.to_map(mdata) + ) + record_timestamp = \ + file.get('timestamp', '') + record_timestamp_int = int(record_timestamp) + + if record_timestamp_int >= start.timestamp(): + if self.write_to_singer: + singer.write_record(stream_name=self.name, + time_extracted=singer.utils.now(), + record=transformed_record) + counter.increment() + + if datetime.utcfromtimestamp( + record_timestamp_int).replace( + tzinfo=utc) > max_bookmark.replace(tzinfo=utc): + # Records are sorted by most recent first, so this + # should only fire once every sync, per channel + max_bookmark = datetime.fromtimestamp( + record_timestamp_int) + elif datetime.utcfromtimestamp( + record_timestamp_int).replace( + tzinfo=utc) < min_bookmark: + # The min bookmark tracks how far back we've synced + # during the sync, since the records are ordered + # newest -> oldest + min_bookmark = datetime.fromtimestamp( + record_timestamp_int) + self.update_bookmarks(self.name, min_bookmark.strftime(DATETIME_FORMAT)) + # Update the date window + date_window_start = date_window_end + date_window_end = date_window_start + timedelta( + days=self.date_window_size) + if date_window_end > end: + date_window_end = end + + +# RemoteFilesStream = Files shared to Slack but not hosted by Slack +class RemoteFilesStream(SlackStream): + name = 'remote_files' + key_properties = ['id'] + replication_method = 'INCREMENTAL' + replication_key = 'updated' + valid_replication_keys = ['updated_at'] + date_fields = [] + + def sync(self, mdata): + schema = self.load_schema() + + # pylint: disable=unused-variable + with singer.metrics.job_timer(job_type='list_files') as timer: + with singer.metrics.record_counter(endpoint=self.name) as counter: -AVAILABLE_STREAMS = [ - ConversationsStream, - UsersStream, - ConversationMembersStream, - ConversationHistoryStream -] + bookmark_date = self.get_bookmark(self.name, self.config.get('start_date')) + start, end = self.get_absolute_date_range(bookmark_date) + + # Window the requests based on the tap configuration + date_window_start = start + date_window_end = start + timedelta(days=int(self.date_window_size)) + min_bookmark = start + max_bookmark = start + + while date_window_start < date_window_end: + remote_files_list = self.client.get_remote_files( + from_ts=int(date_window_start.timestamp()), + to_ts=int(date_window_end.timestamp()) + ) + + for page in remote_files_list: + remote_files = page.get('files') + transformed_files = transform_json(stream=self.name, + data=remote_files, + date_fields=self.date_fields) + for file in transformed_files: + with singer.Transformer( + integer_datetime_fmt="unix-seconds-integer-datetime-parsing" + ) as transformer: + transformed_record = transformer.transform( + data=file, + schema=schema, + metadata=metadata.to_map(mdata) + ) + record_timestamp = \ + file.get('timestamp', '') + record_timestamp_int = int(record_timestamp) + + if record_timestamp_int >= start.timestamp(): + if self.write_to_singer: + singer.write_record(stream_name=self.name, + time_extracted=singer.utils.now(), + record=transformed_record) + counter.increment() + + if datetime.utcfromtimestamp( + record_timestamp_int).replace( + tzinfo=utc) > max_bookmark.replace(tzinfo=utc): + # Records are sorted by most recent first, so this + # should only fire once every sync, per channel + max_bookmark = datetime.fromtimestamp( + record_timestamp_int) + elif datetime.utcfromtimestamp( + record_timestamp_int).replace( + tzinfo=utc) < min_bookmark: + # The min bookmark tracks how far back we've synced + # during the sync, since the records are ordered + # newest -> oldest + min_bookmark = datetime.fromtimestamp( + record_timestamp_int) + self.update_bookmarks(self.name, min_bookmark.strftime(DATETIME_FORMAT)) + # Update the date window + date_window_start = date_window_end + date_window_end = date_window_start + timedelta( + days=self.date_window_size) + if date_window_end > end: + date_window_end = end + + +AVAILABLE_STREAMS = { + "channels": ConversationsStream, + "users": UsersStream, + "channel_members": ConversationMembersStream, + "messages": ConversationHistoryStream, + "threads": ThreadsStream, + "user_groups": UserGroupsStream, + "teams": TeamsStream, + "files": FilesStream, + "remote_files": RemoteFilesStream, +} diff --git a/tap_slack/transform.py b/tap_slack/transform.py new file mode 100644 index 0000000..cb2ff56 --- /dev/null +++ b/tap_slack/transform.py @@ -0,0 +1,41 @@ + + +def decimal_timestamp_to_utc_timestamp(timestamp): + # Some timestamps returned by the Slack API are in the format of `Seconds.Milliseconds` + # and this takes only the `Seconds` part of that timestamp. + return timestamp.partition(".")[0] + + +def transform_json(stream, data, date_fields, channel_id=None): + + if data: + for record in data: + if stream == "messages": + # Strip out file info and just keep id + file_ids = [] + files = record.get("files", []) + for file in files: + file_id = file.get("id", None) + if file_id: + file_ids.append(file_id) + record['file_ids'] = file_ids + + if stream in ["messages", "threads"]: + # add channel_id to the message + record['channel_id'] = channel_id + + if stream == "channels": + # These come back on the client response but aren't actually populated + # Or in Slack's documentation at the time of writing, just remove them + record.pop("parent_conversation", None) + record.pop("channel_id", None) + + for date_field in date_fields: + timestamp = record.get(date_field, None) + if timestamp and isinstance(timestamp, str): + if stream == 'messages' or stream == "threads" and date_field == 'ts': + record['thread_ts'] = timestamp + record[date_field] = decimal_timestamp_to_utc_timestamp(timestamp) + else: + record[date_field] = decimal_timestamp_to_utc_timestamp(timestamp) + return data diff --git a/CHANGELOG.mg b/tests/__init__.py similarity index 100% rename from CHANGELOG.mg rename to tests/__init__.py diff --git a/tests/test_configuration.py b/tests/test_configuration.py new file mode 100644 index 0000000..0527547 --- /dev/null +++ b/tests/test_configuration.py @@ -0,0 +1,32 @@ +def config(): + return { + "test_name": "test_sync", + "tap_name": "tap-slack", + "type": "platform.slack", + "properties": { + "start_date": "TAP_SLACK_START_DATE", + "lookback_window": "TAP_SLACK_LOOKBACK_WINDOW", + "join_public_channels": "TAP_SLACK_JOIN_PUBLIC_CHANNELS", + "private_channels": "TAP_SLACK_PRIVATE_CHANNELS", + "exclude_archived": "TAP_SLACK_EXCLUDE_ARCHIVED", + }, + "credentials": { + "token": "TAP_SLACK_AUTH_TOKEN" + }, + "bookmark": { + "bookmark_key": "users", + "bookmark_timestamp": "2020-06-00T14:30:31+0000" + }, + "streams": { + "channels": {"id"}, + "channel_members": {"channel_id", "user_id"}, + "messages": {"channel_id", "ts"}, + "users": {"id"}, + "threads": {"channel_id", "ts", "thread_ts"}, + "user_groups": {"id"}, + "teams": {"id"}, + "files": {"id"}, + "remote_files": {"id"} + }, + "exclude_streams": [] + } diff --git a/tests/test_sync.py b/tests/test_sync.py new file mode 100644 index 0000000..0d5e9dd --- /dev/null +++ b/tests/test_sync.py @@ -0,0 +1,121 @@ +""" +Test tap combined +""" + +import unittest +from datetime import datetime, timedelta +import os +from test_configuration import config +from tap_tester import menagerie +import tap_tester.runner as runner +import tap_tester.connections as connections + +configuration = config() + + +class TestSyncNonReportStreams(unittest.TestCase): + """ Test the non-report streams """ + + def name(self): + return configuration['test_name'] + + def tap_name(self): + """The name of the tap""" + return configuration['tap_name'] + + def get_type(self): + """the expected url route ending""" + return configuration['type'] + + def expected_check_streams(self): + return set(configuration['streams'].keys()) + + def expected_sync_streams(self): + return set(configuration['streams'].keys()) + + def expected_pks(self): + return configuration['streams'] + + def get_properties(self): + """Configuration properties required for the tap.""" + properties_dict = { + "date_window_size": "60" + } + props = configuration['properties'] + for prop in props: + properties_dict[prop] = os.getenv(props[prop]) + + return properties_dict + + def get_credentials(self): + """Authentication information for the test account. Username is expected as a property.""" + credentials_dict = {} + creds = configuration['credentials'] + for cred in creds: + credentials_dict[cred] = os.getenv(creds[cred]) + + return credentials_dict + + def setUp(self): + missing_envs = [] + props = configuration['properties'] + creds = configuration['credentials'] + + for prop in props: + if os.getenv(props[prop]) is None: + missing_envs.append(prop) + for cred in creds: + if os.getenv(creds[cred]) is None: + missing_envs.append(cred) + + if len(missing_envs) != 0: + raise Exception("set " + ", ".join(missing_envs)) + + def test_run(self): + + conn_id = connections.ensure_connection(self, payload_hook=None) + + # Run the tap in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify the check's exit status + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify that there are catalogs found + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0, msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + subset = self.expected_check_streams().issubset(found_catalog_names) + self.assertTrue(subset, msg="Expected check streams are not subset of discovered catalog") + # + # # Select some catalogs + our_catalogs = [c for c in found_catalogs if c.get('tap_stream_id') in self.expected_sync_streams()] + for catalog in our_catalogs: + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + connections.select_catalog_and_fields_via_metadata(conn_id, catalog, schema, [], []) + + # # Verify that all streams sync at least one row for initial sync + # # This test is also verifying access token expiration handling. If test fails with + # # authentication error, refresh token was not replaced after expiring. + menagerie.set_state(conn_id, {}) + sync_job_name = runner.run_sync_mode(self, conn_id) + + # # Verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + record_count_by_stream = runner.examine_target_output_file(self, conn_id, self.expected_sync_streams(), + self.expected_pks()) + zero_count_streams = {k for k, v in record_count_by_stream.items() if v == 0} + self.assertFalse(zero_count_streams, + msg="The following streams did not sync any rows {}".format(zero_count_streams)) + + # Verify that bookmark values are correct after incremental sync + start_date = os.getenv(configuration['properties']['start_date']) + bookmark_props = configuration['bookmark'] + current_state = menagerie.get_state(conn_id) + test_bookmark = current_state['bookmarks'][bookmark_props['bookmark_key']] + print(test_bookmark) + self.assertTrue(test_bookmark['updated'] > start_date, + msg="The bookmark value does not match the expected result")