From 940dca2b837115c6d7d1bb264049d1e483220dc8 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Thu, 19 Dec 2019 14:49:26 +0200 Subject: [PATCH 01/44] CKAN 2.9 support --- ckanext/cloudstorage/cli.py | 188 +++--------------- ckanext/cloudstorage/commands.py | 57 ++++++ ckanext/cloudstorage/controller.py | 51 +---- .../fanstatic/scripts/webassets.yml | 10 + .../cloudstorage/logic/action/multipart.py | 7 +- .../{plugin.py => plugin/__init__.py} | 36 +--- ckanext/cloudstorage/plugin/flask_plugin.py | 20 ++ ckanext/cloudstorage/plugin/pylons_plugin.py | 33 +++ ckanext/cloudstorage/storage.py | 16 +- .../snippets/cloudstorage-js_asset.html | 1 + .../snippets/cloudstorage-js_resource.html | 1 + ckanext/cloudstorage/templates/page.html | 3 +- ckanext/cloudstorage/utils.py | 162 +++++++++++++++ ckanext/cloudstorage/views.py | 23 +++ pyproject.toml | 3 + setup.py | 2 +- 16 files changed, 370 insertions(+), 243 deletions(-) create mode 100644 ckanext/cloudstorage/commands.py create mode 100644 ckanext/cloudstorage/fanstatic/scripts/webassets.yml rename ckanext/cloudstorage/{plugin.py => plugin/__init__.py} (80%) create mode 100644 ckanext/cloudstorage/plugin/flask_plugin.py create mode 100644 ckanext/cloudstorage/plugin/pylons_plugin.py create mode 100644 ckanext/cloudstorage/templates/cloudstorage/snippets/cloudstorage-js_asset.html create mode 100644 ckanext/cloudstorage/templates/cloudstorage/snippets/cloudstorage-js_resource.html create mode 100644 ckanext/cloudstorage/utils.py create mode 100644 ckanext/cloudstorage/views.py create mode 100644 pyproject.toml diff --git a/ckanext/cloudstorage/cli.py b/ckanext/cloudstorage/cli.py index 8488c4e..3ebce02 100644 --- a/ckanext/cloudstorage/cli.py +++ b/ckanext/cloudstorage/cli.py @@ -1,170 +1,40 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -import os -import os.path -import cgi -import tempfile -from docopt import docopt -from ckan.lib.cli import CkanCommand -from ckanapi import LocalCKAN -from ckanext.cloudstorage.storage import ( - CloudStorage, - ResourceCloudStorage -) -from ckanext.cloudstorage.model import ( - create_tables, - drop_tables -) -from ckan.logic import NotFound +import click +import ckanext.cloudstorage.utils as utils -USAGE = """ckanext-cloudstorage +@click.group() +def cloudstorage(): + """CloudStorage management commands. + """ + pass -Commands: - - fix-cors Update CORS rules where possible. - - migrate Upload local storage to the remote. - - initdb Reinitalize database tables. -Usage: - cloudstorage fix-cors ... [--c=] - cloudstorage migrate [] [--c=] - cloudstorage initdb [--c=] +@cloudstorage.command('fix-cors') +@click.argument('domains', nargs=-1) +def fix_cors(domains): + """Update CORS rules where possible. + """ + msg, ok = utils.fix_cors(domains) + click.secho(msg, fg='green' if ok else 'red') -Options: - -c= The CKAN configuration file. -""" +@cloudstorage.command() +@click.argument('path') +@click.argument('resource', required=False) +def migrate(path, resource): + """Upload local storage to the remote. + """ + utils.migrate(path, resource) +@cloudstorage.command() +def initdb(): + """Reinitalize database tables. + """ + utils.initdb() + click.secho("DB tables are reinitialized", fg="green") -class FakeFileStorage(cgi.FieldStorage): - def __init__(self, fp, filename): - self.file = fp - self.filename = filename -class PasterCommand(CkanCommand): - summary = 'ckanext-cloudstorage maintence utilities.' - usage = USAGE - - def command(self): - self._load_config() - args = docopt(USAGE, argv=self.args) - - if args['fix-cors']: - _fix_cors(args) - elif args['migrate']: - _migrate(args) - elif args['initdb']: - _initdb() - - -def _migrate(args): - path = args[''] - single_id = args[''] - if not os.path.isdir(path): - print('The storage directory cannot be found.') - return - - lc = LocalCKAN() - resources = {} - failed = [] - - # The resource folder is stuctured like so on disk: - # - storage/ - # - ... - # - resources/ - # - <3 letter prefix> - # - <3 letter prefix> - # - - # ... - # ... - # ... - for root, dirs, files in os.walk(path): - # Only the bottom level of the tree actually contains any files. We - # don't care at all about the overall structure. - if not files: - continue - - split_root = root.split('/') - resource_id = split_root[-2] + split_root[-1] - - for file_ in files: - ckan_res_id = resource_id + file_ - if single_id and ckan_res_id != single_id: - continue - - resources[ckan_res_id] = os.path.join( - root, - file_ - ) - - for i, resource in enumerate(resources.iteritems(), 1): - resource_id, file_path = resource - print('[{i}/{count}] Working on {id}'.format( - i=i, - count=len(resources), - id=resource_id - )) - - try: - resource = lc.action.resource_show(id=resource_id) - except NotFound: - print(u'\tResource not found') - continue - if resource['url_type'] != 'upload': - print(u'\t`url_type` is not `upload`. Skip') - continue - - with open(file_path, 'rb') as fin: - resource['upload'] = FakeFileStorage( - fin, - resource['url'].split('/')[-1] - ) - try: - uploader = ResourceCloudStorage(resource) - uploader.upload(resource['id']) - except Exception as e: - failed.append(resource_id) - print(u'\tError of type {0} during upload: {1}'.format(type(e), e)) - - if failed: - log_file = tempfile.NamedTemporaryFile(delete=False) - log_file.file.writelines(failed) - print(u'ID of all failed uploads are saved to `{0}`'.format(log_file.name)) - - -def _fix_cors(args): - cs = CloudStorage() - - if cs.can_use_advanced_azure: - from azure.storage import blob as azure_blob - from azure.storage import CorsRule - - blob_service = azure_blob.BlockBlobService( - cs.driver_options['key'], - cs.driver_options['secret'] - ) - - blob_service.set_blob_service_properties( - cors=[ - CorsRule( - allowed_origins=args[''], - allowed_methods=['GET'] - ) - ] - ) - print('Done!') - else: - print( - 'The driver {driver_name} being used does not currently' - ' support updating CORS rules through' - ' cloudstorage.'.format( - driver_name=cs.driver_name - ) - ) - - -def _initdb(): - drop_tables() - create_tables() - print("DB tables are reinitialized") +def get_commands(): + return [cloudstorage] diff --git a/ckanext/cloudstorage/commands.py b/ckanext/cloudstorage/commands.py new file mode 100644 index 0000000..dea598f --- /dev/null +++ b/ckanext/cloudstorage/commands.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from ckan.lib.cli import CkanCommand +from docopt import docopt + +import ckanext.cloudstorage.utils as utils + + +USAGE = """ckanext-cloudstorage + +Commands: + - fix-cors Update CORS rules where possible. + - migrate Upload local storage to the remote. + - initdb Reinitalize database tables. + +Usage: + cloudstorage fix-cors ... [--c=] + cloudstorage migrate [] [--c=] + cloudstorage initdb [--c=] + +Options: + -c= The CKAN configuration file. +""" + + + +class PasterCommand(CkanCommand): + summary = 'ckanext-cloudstorage maintence utilities.' + usage = USAGE + + def command(self): + self._load_config() + args = docopt(USAGE, argv=self.args) + + if args['fix-cors']: + _fix_cors(args) + elif args['migrate']: + _migrate(args) + elif args['initdb']: + _initdb() + + +def _migrate(args): + path = args[''] + single_id = args[''] + utils.migrate(path, single_id) + + +def _fix_cors(args): + msg, _ = utils.fix_cors(args['']) + print(msg) + + +def _initdb(): + utils.initdb() + print("DB tables are reinitialized") diff --git a/ckanext/cloudstorage/controller.py b/ckanext/cloudstorage/controller.py index 992b0c1..162489e 100644 --- a/ckanext/cloudstorage/controller.py +++ b/ckanext/cloudstorage/controller.py @@ -8,55 +8,8 @@ from ckan import logic, model from ckan.lib import base, uploader import ckan.lib.helpers as h - +import ckanext.cloudstorage.utils as utils class StorageController(base.BaseController): def resource_download(self, id, resource_id, filename=None): - context = { - 'model': model, - 'session': model.Session, - 'user': c.user or c.author, - 'auth_user_obj': c.userobj - } - - try: - resource = logic.get_action('resource_show')( - context, - { - 'id': resource_id - } - ) - except logic.NotFound: - base.abort(404, _('Resource not found')) - except logic.NotAuthorized: - base.abort(401, _('Unauthorized to read resource {0}'.format(id))) - - # This isn't a file upload, so either redirect to the source - # (if available) or error out. - if resource.get('url_type') != 'upload': - url = resource.get('url') - if not url: - base.abort(404, _('No download is available')) - h.redirect_to(url) - - if filename is None: - # No filename was provided so we'll try to get one from the url. - filename = os.path.basename(resource['url']) - - upload = uploader.get_resource_uploader(resource) - - # if the client requests with a Content-Type header (e.g. Text preview) - # we have to add the header to the signature - try: - content_type = getattr(c.pylons.request, "content_type", None) - except AttributeError: - content_type = None - uploaded_url = upload.get_url_from_filename(resource['id'], filename, - content_type=content_type) - - # The uploaded file is missing for some reason, such as the - # provider being down. - if uploaded_url is None: - base.abort(404, _('No download is available')) - - h.redirect_to(uploaded_url) + return utils.resource_download(id, resource_id, filename) diff --git a/ckanext/cloudstorage/fanstatic/scripts/webassets.yml b/ckanext/cloudstorage/fanstatic/scripts/webassets.yml new file mode 100644 index 0000000..73665fd --- /dev/null +++ b/ckanext/cloudstorage/fanstatic/scripts/webassets.yml @@ -0,0 +1,10 @@ +main: + filters: rjsmin + output: ckanext-cloudstorage/%(version)s_main.js + extra: + preload: + - base/main + contents: + - vendor/jquery-widget.js + - vendor/file-upload.js + - cloudstorage-multipart-upload.js diff --git a/ckanext/cloudstorage/logic/action/multipart.py b/ckanext/cloudstorage/logic/action/multipart.py index 3af57f8..ae67666 100644 --- a/ckanext/cloudstorage/logic/action/multipart.py +++ b/ckanext/cloudstorage/logic/action/multipart.py @@ -3,7 +3,6 @@ import logging import datetime -from pylons import config from sqlalchemy.orm.exc import NoResultFound import ckan.model as model import ckan.lib.helpers as h @@ -12,6 +11,12 @@ from ckanext.cloudstorage.storage import ResourceCloudStorage from ckanext.cloudstorage.model import MultipartUpload, MultipartPart +if toolkit.check_ckan_version("2.9"): + config = toolkit.config +else: + from pylons import config + + log = logging.getLogger(__name__) diff --git a/ckanext/cloudstorage/plugin.py b/ckanext/cloudstorage/plugin/__init__.py similarity index 80% rename from ckanext/cloudstorage/plugin.py rename to ckanext/cloudstorage/plugin/__init__.py index 5d7a939..c0c736b 100644 --- a/ckanext/cloudstorage/plugin.py +++ b/ckanext/cloudstorage/plugin/__init__.py @@ -1,17 +1,19 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- from ckan import plugins -from routes.mapper import SubMapper import os.path from ckanext.cloudstorage import storage from ckanext.cloudstorage import helpers import ckanext.cloudstorage.logic.action.multipart as m_action import ckanext.cloudstorage.logic.auth.multipart as m_auth +if plugins.toolkit.check_ckan_version("2.9"): + from ckanext.cloudstorage.plugin.flask_plugin import MixinPlugin + # from ckanext.cloudstorage.plugin.pylons_plugin import MixinPlugin +else: + from ckanext.cloudstorage.plugin.pylons_plugin import MixinPlugin -class CloudStoragePlugin(plugins.SingletonPlugin): +class CloudStoragePlugin(MixinPlugin, plugins.SingletonPlugin): plugins.implements(plugins.IUploader) - plugins.implements(plugins.IRoutes, inherit=True) plugins.implements(plugins.IConfigurable) plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IActions) @@ -22,8 +24,8 @@ class CloudStoragePlugin(plugins.SingletonPlugin): # IConfigurer def update_config(self, config): - plugins.toolkit.add_template_directory(config, 'templates') - plugins.toolkit.add_resource('fanstatic/scripts', 'cloudstorage-js') + plugins.toolkit.add_template_directory(config, '../templates') + plugins.toolkit.add_resource('../fanstatic/scripts', 'cloudstorage-js') # ITemplateHelpers @@ -57,28 +59,6 @@ def get_uploader(self, upload_to, old_filename=None): # Returning None here will use the default Uploader. return None - def before_map(self, map): - sm = SubMapper( - map, - controller='ckanext.cloudstorage.controller:StorageController' - ) - - # Override the resource download controllers so we can do our - # lookup with libcloud. - with sm: - sm.connect( - 'resource_download', - '/dataset/{id}/resource/{resource_id}/download', - action='resource_download' - ) - sm.connect( - 'resource_download', - '/dataset/{id}/resource/{resource_id}/download/{filename}', - action='resource_download' - ) - - return map - # IActions def get_actions(self): diff --git a/ckanext/cloudstorage/plugin/flask_plugin.py b/ckanext/cloudstorage/plugin/flask_plugin.py new file mode 100644 index 0000000..2d2e64c --- /dev/null +++ b/ckanext/cloudstorage/plugin/flask_plugin.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +import ckan.plugins as p + +from ckanext.cloudstorage.views import get_blueprints +from ckanext.cloudstorage.cli import get_commands + +class MixinPlugin(p.SingletonPlugin): + p.implements(p.IBlueprint) + p.implements(p.IClick) + + # IBlueprint + + def get_blueprint(self): + return get_blueprints() + + # IClick + + def get_commands(self): + return get_commands() diff --git a/ckanext/cloudstorage/plugin/pylons_plugin.py b/ckanext/cloudstorage/plugin/pylons_plugin.py new file mode 100644 index 0000000..d8003d9 --- /dev/null +++ b/ckanext/cloudstorage/plugin/pylons_plugin.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +from routes.mapper import SubMapper + +import ckan.plugins as p + +class MixinPlugin(p.SingletonPlugin): + p.implements(p.IRoutes, inherit=True) + + # IRoutes + + + def before_map(self, map): + sm = SubMapper( + map, + controller='ckanext.cloudstorage.controller:StorageController' + ) + + # Override the resource download controllers so we can do our + # lookup with libcloud. + with sm: + sm.connect( + 'resource_download', + '/dataset/{id}/resource/{resource_id}/download', + action='resource_download' + ) + sm.connect( + 'resource_download', + '/dataset/{id}/resource/{resource_id}/download/{filename}', + action='resource_download' + ) + + return map diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index ac54221..48704d6 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -7,7 +7,6 @@ from ast import literal_eval from datetime import datetime, timedelta -from pylons import config from ckan import model from ckan.lib import munge import ckan.plugins as p @@ -15,6 +14,13 @@ from libcloud.storage.types import Provider, ObjectDoesNotExistError from libcloud.storage.providers import get_driver +if p.toolkit.check_ckan_version("2.9"): + from werkzeug.datastructures import FileStorage as UploadedFileType + config = p.toolkit.config +else: + from pylons import config + UploadedFileType = cgi.FieldStorage + class CloudStorage(object): def __init__(self): @@ -162,9 +168,12 @@ def __init__(self, resource): multipart_name = resource.pop('multipart_name', None) # Check to see if a file has been provided - if isinstance(upload_field_storage, cgi.FieldStorage): + if isinstance(upload_field_storage, UploadedFileType): self.filename = munge.munge_filename(upload_field_storage.filename) - self.file_upload = upload_field_storage.file + if p.toolkit.check_ckan_version("2.9"): + self.file_upload = upload_field_storage.stream + else: + self.file_upload = upload_field_storage.file resource['url'] = self.filename resource['url_type'] = 'upload' elif multipart_name and self.can_use_advanced_aws: @@ -222,7 +231,6 @@ def upload(self, id, max_size=10): content_settings = ContentSettings( content_type=content_type ) - return blob_service.create_blob_from_stream( container_name=self.container_name, blob_name=self.path_from_filename( diff --git a/ckanext/cloudstorage/templates/cloudstorage/snippets/cloudstorage-js_asset.html b/ckanext/cloudstorage/templates/cloudstorage/snippets/cloudstorage-js_asset.html new file mode 100644 index 0000000..0727f6b --- /dev/null +++ b/ckanext/cloudstorage/templates/cloudstorage/snippets/cloudstorage-js_asset.html @@ -0,0 +1 @@ +{% asset 'cloudstorage-js/main' %} diff --git a/ckanext/cloudstorage/templates/cloudstorage/snippets/cloudstorage-js_resource.html b/ckanext/cloudstorage/templates/cloudstorage/snippets/cloudstorage-js_resource.html new file mode 100644 index 0000000..e71499a --- /dev/null +++ b/ckanext/cloudstorage/templates/cloudstorage/snippets/cloudstorage-js_resource.html @@ -0,0 +1 @@ +{% resource 'cloudstorage-js/main' %} diff --git a/ckanext/cloudstorage/templates/page.html b/ckanext/cloudstorage/templates/page.html index 3759099..abbbe82 100644 --- a/ckanext/cloudstorage/templates/page.html +++ b/ckanext/cloudstorage/templates/page.html @@ -2,6 +2,7 @@ {% block scripts %} {{ super() }} - {% resource 'cloudstorage-js/main' %} + {% set type = 'asset' if h.ckan_version() > '2.9' else 'resource' %} + {% include 'cloudstorage/snippets/cloudstorage-js_' ~ type ~ '.html' %} {% endblock %} diff --git a/ckanext/cloudstorage/utils.py b/ckanext/cloudstorage/utils.py new file mode 100644 index 0000000..2e4e065 --- /dev/null +++ b/ckanext/cloudstorage/utils.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +import os.path + + +from ckan import logic, model +import ckan.plugins.toolkit as tk +from ckan.lib import base, uploader +import ckan.lib.helpers as h +import cgi +import tempfile +from ckan.logic import NotFound +from ckanapi import LocalCKAN + +from ckanext.cloudstorage.model import (create_tables, drop_tables) +from ckanext.cloudstorage.storage import (CloudStorage, ResourceCloudStorage) + + +class FakeFileStorage(cgi.FieldStorage): + def __init__(self, fp, filename): + self.file = fp + self.filename = filename + + +def initdb(): + drop_tables() + create_tables() + + +def fix_cors(domains): + cs = CloudStorage() + + if cs.can_use_advanced_azure: + from azure.storage import blob as azure_blob + from azure.storage import CorsRule + + blob_service = azure_blob.BlockBlobService(cs.driver_options['key'], + cs.driver_options['secret']) + + blob_service.set_blob_service_properties( + cors=[CorsRule(allowed_origins=domains, allowed_methods=['GET'])]) + return 'Done!', True + else: + return ('The driver {driver_name} being used does not currently' + ' support updating CORS rules through' + ' cloudstorage.'.format(driver_name=cs.driver_name)), False + + +def migrate(path, single_id): + if not os.path.isdir(path): + print('The storage directory cannot be found.') + return + + lc = LocalCKAN() + resources = {} + failed = [] + + # The resource folder is stuctured like so on disk: + # - storage/ + # - ... + # - resources/ + # - <3 letter prefix> + # - <3 letter prefix> + # - + # ... + # ... + # ... + for root, dirs, files in os.walk(path): + # Only the bottom level of the tree actually contains any files. We + # don't care at all about the overall structure. + if not files: + continue + + split_root = root.split('/') + resource_id = split_root[-2] + split_root[-1] + + for file_ in files: + ckan_res_id = resource_id + file_ + if single_id and ckan_res_id != single_id: + continue + + resources[ckan_res_id] = os.path.join(root, file_) + + for i, resource in enumerate(resources.iteritems(), 1): + resource_id, file_path = resource + print('[{i}/{count}] Working on {id}'.format(i=i, + count=len(resources), + id=resource_id)) + + try: + resource = lc.action.resource_show(id=resource_id) + except NotFound: + print(u'\tResource not found') + continue + if resource['url_type'] != 'upload': + print(u'\t`url_type` is not `upload`. Skip') + continue + + with open(file_path, 'rb') as fin: + resource['upload'] = FakeFileStorage( + fin, resource['url'].split('/')[-1]) + try: + uploader = ResourceCloudStorage(resource) + uploader.upload(resource['id']) + except Exception as e: + failed.append(resource_id) + print(u'\tError of type {0} during upload: {1}'.format( + type(e), e)) + + if failed: + log_file = tempfile.NamedTemporaryFile(delete=False) + log_file.file.writelines(failed) + print(u'ID of all failed uploads are saved to `{0}`'.format( + log_file.name)) + + +def resource_download(id, resource_id, filename=None): + context = { + 'model': model, + 'session': model.Session, + 'user': tk.c.user or tk.c.author, + 'auth_user_obj': tk.c.userobj + } + + try: + resource = logic.get_action('resource_show')(context, { + 'id': resource_id + }) + except logic.NotFound: + return base.abort(404, tk._('Resource not found')) + except logic.NotAuthorized: + return base.abort(401, tk._('Unauthorized to read resource {0}'.format(id))) + + # This isn't a file upload, so either redirect to the source + # (if available) or error out. + if resource.get('url_type') != 'upload': + url = resource.get('url') + if not url: + return base.abort(404, tk._('No download is available')) + return h.redirect_to(url) + + if filename is None: + # No filename was provided so we'll try to get one from the url. + filename = os.path.basename(resource['url']) + + upload = uploader.get_resource_uploader(resource) + + # if the client requests with a Content-Type header (e.g. Text preview) + # we have to add the header to the signature + try: + content_type = getattr(tk.request, "content_type", None) + except AttributeError: + content_type = None + uploaded_url = upload.get_url_from_filename(resource['id'], + filename, + content_type=content_type) + + # The uploaded file is missing for some reason, such as the + # provider being down. + if uploaded_url is None: + return base.abort(404, tk._('No download is available')) + + return h.redirect_to(uploaded_url) diff --git a/ckanext/cloudstorage/views.py b/ckanext/cloudstorage/views.py new file mode 100644 index 0000000..f5bf688 --- /dev/null +++ b/ckanext/cloudstorage/views.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- + +from flask import Blueprint +import ckan.views.resource as resource +import ckanext.cloudstorage.utils as utils + +cloudstorage = Blueprint('cloudstorage', __name__) + + +def download(id, resource_id, filename=None, package_type="dataset"): + return utils.resource_download(id, resource_id, filename) + + +cloudstorage.add_url_rule("/dataset//resource//download", + view_func=download) +cloudstorage.add_url_rule( + "/dataset//resource//download/", + view_func=download, +) + + +def get_blueprints(): + return [cloudstorage] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..858d39d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[tool.black] +line-length = 79 +include = '\.py$' diff --git a/setup.py b/setup.py index d7427ee..e3a7f1d 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ cloudstorage=ckanext.cloudstorage.plugin:CloudStoragePlugin [paste.paster_command] - cloudstorage=ckanext.cloudstorage.cli:PasterCommand + cloudstorage=ckanext.cloudstorage.commands:PasterCommand """ ), ) From c33e9a7bfb68a2b68c180b80216b8957135f7a63 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Thu, 19 Dec 2019 14:50:02 +0200 Subject: [PATCH 02/44] futurize --- ckanext/__init__.py | 2 + ckanext/cloudstorage/cli.py | 14 +- ckanext/cloudstorage/commands.py | 16 +- ckanext/cloudstorage/controller.py | 1 + ckanext/cloudstorage/helpers.py | 12 +- .../cloudstorage/logic/action/multipart.py | 143 +++++++++--------- ckanext/cloudstorage/logic/auth/multipart.py | 12 +- ckanext/cloudstorage/model.py | 22 +-- ckanext/cloudstorage/plugin/__init__.py | 59 ++++---- ckanext/cloudstorage/plugin/flask_plugin.py | 1 + ckanext/cloudstorage/plugin/pylons_plugin.py | 17 +-- ckanext/cloudstorage/storage.py | 128 +++++++--------- ckanext/cloudstorage/utils.py | 105 +++++++------ ckanext/cloudstorage/views.py | 7 +- 14 files changed, 276 insertions(+), 263 deletions(-) diff --git a/ckanext/__init__.py b/ckanext/__init__.py index 2e2033b..6d83202 100644 --- a/ckanext/__init__.py +++ b/ckanext/__init__.py @@ -1,7 +1,9 @@ # this is a namespace package try: import pkg_resources + pkg_resources.declare_namespace(__name__) except ImportError: import pkgutil + __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/ckanext/cloudstorage/cli.py b/ckanext/cloudstorage/cli.py index 3ebce02..4b8db37 100644 --- a/ckanext/cloudstorage/cli.py +++ b/ckanext/cloudstorage/cli.py @@ -4,6 +4,7 @@ import click import ckanext.cloudstorage.utils as utils + @click.group() def cloudstorage(): """CloudStorage management commands. @@ -11,22 +12,24 @@ def cloudstorage(): pass -@cloudstorage.command('fix-cors') -@click.argument('domains', nargs=-1) +@cloudstorage.command("fix-cors") +@click.argument("domains", nargs=-1) def fix_cors(domains): """Update CORS rules where possible. """ msg, ok = utils.fix_cors(domains) - click.secho(msg, fg='green' if ok else 'red') + click.secho(msg, fg="green" if ok else "red") + @cloudstorage.command() -@click.argument('path') -@click.argument('resource', required=False) +@click.argument("path") +@click.argument("resource", required=False) def migrate(path, resource): """Upload local storage to the remote. """ utils.migrate(path, resource) + @cloudstorage.command() def initdb(): """Reinitalize database tables. @@ -35,6 +38,5 @@ def initdb(): click.secho("DB tables are reinitialized", fg="green") - def get_commands(): return [cloudstorage] diff --git a/ckanext/cloudstorage/commands.py b/ckanext/cloudstorage/commands.py index dea598f..c8ab369 100644 --- a/ckanext/cloudstorage/commands.py +++ b/ckanext/cloudstorage/commands.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +from __future__ import print_function from ckan.lib.cli import CkanCommand from docopt import docopt @@ -24,31 +25,30 @@ """ - class PasterCommand(CkanCommand): - summary = 'ckanext-cloudstorage maintence utilities.' + summary = "ckanext-cloudstorage maintence utilities." usage = USAGE def command(self): self._load_config() args = docopt(USAGE, argv=self.args) - if args['fix-cors']: + if args["fix-cors"]: _fix_cors(args) - elif args['migrate']: + elif args["migrate"]: _migrate(args) - elif args['initdb']: + elif args["initdb"]: _initdb() def _migrate(args): - path = args[''] - single_id = args[''] + path = args[""] + single_id = args[""] utils.migrate(path, single_id) def _fix_cors(args): - msg, _ = utils.fix_cors(args['']) + msg, _ = utils.fix_cors(args[""]) print(msg) diff --git a/ckanext/cloudstorage/controller.py b/ckanext/cloudstorage/controller.py index 162489e..5105d46 100644 --- a/ckanext/cloudstorage/controller.py +++ b/ckanext/cloudstorage/controller.py @@ -10,6 +10,7 @@ import ckan.lib.helpers as h import ckanext.cloudstorage.utils as utils + class StorageController(base.BaseController): def resource_download(self, id, resource_id, filename=None): return utils.resource_download(id, resource_id, filename) diff --git a/ckanext/cloudstorage/helpers.py b/ckanext/cloudstorage/helpers.py index 1a7ca99..526f060 100644 --- a/ckanext/cloudstorage/helpers.py +++ b/ckanext/cloudstorage/helpers.py @@ -4,8 +4,10 @@ def use_secure_urls(): - return all([ - ResourceCloudStorage.use_secure_urls.fget(None), - # Currently implemented just AWS version - 'S3' in ResourceCloudStorage.driver_name.fget(None) - ]) + return all( + [ + ResourceCloudStorage.use_secure_urls.fget(None), + # Currently implemented just AWS version + "S3" in ResourceCloudStorage.driver_name.fget(None), + ] + ) diff --git a/ckanext/cloudstorage/logic/action/multipart.py b/ckanext/cloudstorage/logic/action/multipart.py index ae67666..94f0b61 100644 --- a/ckanext/cloudstorage/logic/action/multipart.py +++ b/ckanext/cloudstorage/logic/action/multipart.py @@ -21,18 +21,18 @@ def _get_max_multipart_lifetime(): - value = float(config.get('ckanext.cloudstorage.max_multipart_lifetime', 7)) + value = float(config.get("ckanext.cloudstorage.max_multipart_lifetime", 7)) return datetime.timedelta(value) def _get_object_url(uploader, name): - return '/' + uploader.container_name + '/' + name + return "/" + uploader.container_name + "/" + name def _delete_multipart(upload, uploader): resp = uploader.driver.connection.request( - _get_object_url(uploader, upload.name) + '?uploadId=' + upload.id, - method='DELETE' + _get_object_url(uploader, upload.name) + "?uploadId=" + upload.id, + method="DELETE", ) if not resp.success(): raise toolkit.ValidationError(resp.error) @@ -44,9 +44,11 @@ def _delete_multipart(upload, uploader): def _save_part_info(n, etag, upload): try: - part = model.Session.query(MultipartPart).filter( - MultipartPart.n == n, - MultipartPart.upload == upload).one() + part = ( + model.Session.query(MultipartPart) + .filter(MultipartPart.n == n, MultipartPart.upload == upload) + .one() + ) except NoResultFound: part = MultipartPart(n, etag, upload) else: @@ -65,17 +67,23 @@ def check_multipart(context, data_dict): """ - h.check_access('cloudstorage_check_multipart', data_dict) - id = toolkit.get_or_bust(data_dict, 'id') + h.check_access("cloudstorage_check_multipart", data_dict) + id = toolkit.get_or_bust(data_dict, "id") try: - upload = model.Session.query(MultipartUpload).filter_by( - resource_id=id).one() + upload = ( + model.Session.query(MultipartUpload) + .filter_by(resource_id=id) + .one() + ) except NoResultFound: return upload_dict = upload.as_dict() - upload_dict['parts'] = model.Session.query(MultipartPart).filter( - MultipartPart.upload == upload).count() - return {'upload': upload_dict} + upload_dict["parts"] = ( + model.Session.query(MultipartPart) + .filter(MultipartPart.upload == upload) + .count() + ) + return {"upload": upload_dict} def initiate_multipart(context, data_dict): @@ -92,13 +100,13 @@ def initiate_multipart(context, data_dict): """ - h.check_access('cloudstorage_initiate_multipart', data_dict) - id, name, size = toolkit.get_or_bust(data_dict, ['id', 'name', 'size']) + h.check_access("cloudstorage_initiate_multipart", data_dict) + id, name, size = toolkit.get_or_bust(data_dict, ["id", "name", "size"]) user_id = None - if context['auth_user_obj']: - user_id = context['auth_user_obj'].id + if context["auth_user_obj"]: + user_id = context["auth_user_obj"].id - uploader = ResourceCloudStorage({'multipart_name': name}) + uploader = ResourceCloudStorage({"multipart_name": name}) res_name = uploader.path_from_filename(id, name) upload_object = MultipartUpload.by_name(res_name) @@ -109,64 +117,61 @@ def initiate_multipart(context, data_dict): if upload_object is None: for old_upload in model.Session.query(MultipartUpload).filter_by( - resource_id=id): + resource_id=id + ): _delete_multipart(old_upload, uploader) - _rindex = res_name.rfind('/') + _rindex = res_name.rfind("/") if ~_rindex: try: name_prefix = res_name[:_rindex] for cloud_object in uploader.container.iterate_objects(): if cloud_object.name.startswith(name_prefix): - log.info('Removing cloud object: %s' % cloud_object) + log.info("Removing cloud object: %s" % cloud_object) cloud_object.delete() except Exception as e: - log.exception('[delete from cloud] %s' % e) + log.exception("[delete from cloud] %s" % e) resp = uploader.driver.connection.request( - _get_object_url(uploader, res_name) + '?uploads', - method='POST' + _get_object_url(uploader, res_name) + "?uploads", method="POST" ) if not resp.success(): raise toolkit.ValidationError(resp.error) try: upload_id = resp.object.find( - '{%s}UploadId' % resp.object.nsmap[None]).text + "{%s}UploadId" % resp.object.nsmap[None] + ).text except AttributeError: - upload_id_list = filter( - lambda e: e.tag.endswith('UploadId'), - resp.object.getchildren() - ) + upload_id_list = [e for e in resp.object.getchildren() if e.tag.endswith("UploadId")] upload_id = upload_id_list[0].text - upload_object = MultipartUpload(upload_id, id, res_name, size, name, user_id) + upload_object = MultipartUpload( + upload_id, id, res_name, size, name, user_id + ) upload_object.save() return upload_object.as_dict() def upload_multipart(context, data_dict): - h.check_access('cloudstorage_upload_multipart', data_dict) + h.check_access("cloudstorage_upload_multipart", data_dict) upload_id, part_number, part_content = toolkit.get_or_bust( - data_dict, ['uploadId', 'partNumber', 'upload']) + data_dict, ["uploadId", "partNumber", "upload"] + ) uploader = ResourceCloudStorage({}) upload = model.Session.query(MultipartUpload).get(upload_id) resp = uploader.driver.connection.request( - _get_object_url( - uploader, upload.name) + '?partNumber={0}&uploadId={1}'.format( - part_number, upload_id), - method='PUT', - data=bytearray(part_content.file.read()) + _get_object_url(uploader, upload.name) + + "?partNumber={0}&uploadId={1}".format(part_number, upload_id), + method="PUT", + data=bytearray(part_content.file.read()), ) if resp.status != 200: - raise toolkit.ValidationError('Upload failed: part %s' % part_number) + raise toolkit.ValidationError("Upload failed: part %s" % part_number) - _save_part_info(part_number, resp.headers['etag'], upload) - return { - 'partNumber': part_number, - 'ETag': resp.headers['etag'] - } + _save_part_info(part_number, resp.headers["etag"], upload) + return {"partNumber": part_number, "ETag": resp.headers["etag"]} def finish_multipart(context, data_dict): @@ -182,14 +187,15 @@ def finish_multipart(context, data_dict): """ - h.check_access('cloudstorage_finish_multipart', data_dict) - upload_id = toolkit.get_or_bust(data_dict, 'uploadId') - save_action = data_dict.get('save_action', False) + h.check_access("cloudstorage_finish_multipart", data_dict) + upload_id = toolkit.get_or_bust(data_dict, "uploadId") + save_action = data_dict.get("save_action", False) upload = model.Session.query(MultipartUpload).get(upload_id) chunks = [ (part.n, part.etag) - for part in model.Session.query(MultipartPart).filter_by( - upload_id=upload_id).order_by(MultipartPart.n) + for part in model.Session.query(MultipartPart) + .filter_by(upload_id=upload_id) + .order_by(MultipartPart.n) ] uploader = ResourceCloudStorage({}) try: @@ -198,31 +204,32 @@ def finish_multipart(context, data_dict): except Exception: pass uploader.driver._commit_multipart( - _get_object_url(uploader, upload.name), - upload_id, - chunks) + _get_object_url(uploader, upload.name), upload_id, chunks + ) upload.delete() upload.commit() if save_action and save_action == "go-metadata": try: - res_dict = toolkit.get_action('resource_show')( - context.copy(), {'id': data_dict.get('id')}) - pkg_dict = toolkit.get_action('package_show')( - context.copy(), {'id': res_dict['package_id']}) - if pkg_dict['state'] == 'draft': - toolkit.get_action('package_patch')( + res_dict = toolkit.get_action("resource_show")( + context.copy(), {"id": data_dict.get("id")} + ) + pkg_dict = toolkit.get_action("package_show")( + context.copy(), {"id": res_dict["package_id"]} + ) + if pkg_dict["state"] == "draft": + toolkit.get_action("package_patch")( dict(context.copy(), allow_state_change=True), - dict(id=pkg_dict['id'], state='active') + dict(id=pkg_dict["id"], state="active"), ) except Exception as e: log.error(e) - return {'commited': True} + return {"commited": True} def abort_multipart(context, data_dict): - h.check_access('cloudstorage_abort_multipart', data_dict) - id = toolkit.get_or_bust(data_dict, ['id']) + h.check_access("cloudstorage_abort_multipart", data_dict) + id = toolkit.get_or_bust(data_dict, ["id"]) uploader = ResourceCloudStorage({}) resource_uploads = MultipartUpload.resource_uploads(id) @@ -252,7 +259,7 @@ def clean_multipart(context, data_dict): """ - h.check_access('cloudstorage_clean_multipart', data_dict) + h.check_access("cloudstorage_clean_multipart", data_dict) uploader = ResourceCloudStorage({}) delta = _get_max_multipart_lifetime() oldest_allowed = datetime.datetime.utcnow() - delta @@ -261,18 +268,14 @@ def clean_multipart(context, data_dict): MultipartUpload.initiated < oldest_allowed ) - result = { - 'removed': 0, - 'total': uploads_to_remove.count(), - 'errors': [] - } + result = {"removed": 0, "total": uploads_to_remove.count(), "errors": []} for upload in uploads_to_remove: try: _delete_multipart(upload, uploader) except toolkit.ValidationError as e: - result['errors'].append(e.error_summary) + result["errors"].append(e.error_summary) else: - result['removed'] += 1 + result["removed"] += 1 return result diff --git a/ckanext/cloudstorage/logic/auth/multipart.py b/ckanext/cloudstorage/logic/auth/multipart.py index 4c0c3ce..9534362 100644 --- a/ckanext/cloudstorage/logic/auth/multipart.py +++ b/ckanext/cloudstorage/logic/auth/multipart.py @@ -4,24 +4,24 @@ def initiate_multipart(context, data_dict): - return {'success': check_access('resource_create', context, data_dict)} + return {"success": check_access("resource_create", context, data_dict)} def upload_multipart(context, data_dict): - return {'success': check_access('resource_create', context, data_dict)} + return {"success": check_access("resource_create", context, data_dict)} def finish_multipart(context, data_dict): - return {'success': check_access('resource_create', context, data_dict)} + return {"success": check_access("resource_create", context, data_dict)} def abort_multipart(context, data_dict): - return {'success': check_access('resource_create', context, data_dict)} + return {"success": check_access("resource_create", context, data_dict)} def check_multipart(context, data_dict): - return {'success': check_access('resource_create', context, data_dict)} + return {"success": check_access("resource_create", context, data_dict)} def clean_multipart(context, data_dict): - return {'success': False} + return {"success": False} diff --git a/ckanext/cloudstorage/model.py b/ckanext/cloudstorage/model.py index 8a33f54..cf541ad 100644 --- a/ckanext/cloudstorage/model.py +++ b/ckanext/cloudstorage/model.py @@ -9,7 +9,7 @@ DateTime, ForeignKey, Integer, - Numeric + Numeric, ) from datetime import datetime import ckan.model.meta as meta @@ -28,7 +28,7 @@ def create_tables(): class MultipartPart(Base, DomainObject): - __tablename__ = 'cloudstorage_multipart_part' + __tablename__ = "cloudstorage_multipart_part" def __init__(self, n, etag, upload): self.n = n @@ -38,17 +38,19 @@ def __init__(self, n, etag, upload): n = Column(Integer, primary_key=True) etag = Column(UnicodeText, primary_key=True) upload_id = Column( - UnicodeText, ForeignKey('cloudstorage_multipart_upload.id'), - primary_key=True + UnicodeText, + ForeignKey("cloudstorage_multipart_upload.id"), + primary_key=True, ) upload = relationship( - 'MultipartUpload', - backref=backref('parts', cascade='delete, delete-orphan'), - single_parent=True) + "MultipartUpload", + backref=backref("parts", cascade="delete, delete-orphan"), + single_parent=True, + ) class MultipartUpload(Base, DomainObject): - __tablename__ = 'cloudstorage_multipart_upload' + __tablename__ = "cloudstorage_multipart_upload" def __init__(self, id, resource_id, name, size, original_name, user_id): self.id = id @@ -60,9 +62,7 @@ def __init__(self, id, resource_id, name, size, original_name, user_id): @classmethod def resource_uploads(cls, resource_id): - query = meta.Session.query(cls).filter_by( - resource_id=resource_id - ) + query = meta.Session.query(cls).filter_by(resource_id=resource_id) return query id = Column(UnicodeText, primary_key=True) diff --git a/ckanext/cloudstorage/plugin/__init__.py b/ckanext/cloudstorage/plugin/__init__.py index c0c736b..f6b2522 100644 --- a/ckanext/cloudstorage/plugin/__init__.py +++ b/ckanext/cloudstorage/plugin/__init__.py @@ -8,10 +8,12 @@ if plugins.toolkit.check_ckan_version("2.9"): from ckanext.cloudstorage.plugin.flask_plugin import MixinPlugin + # from ckanext.cloudstorage.plugin.pylons_plugin import MixinPlugin else: from ckanext.cloudstorage.plugin.pylons_plugin import MixinPlugin + class CloudStoragePlugin(MixinPlugin, plugins.SingletonPlugin): plugins.implements(plugins.IUploader) plugins.implements(plugins.IConfigurable) @@ -24,30 +26,26 @@ class CloudStoragePlugin(MixinPlugin, plugins.SingletonPlugin): # IConfigurer def update_config(self, config): - plugins.toolkit.add_template_directory(config, '../templates') - plugins.toolkit.add_resource('../fanstatic/scripts', 'cloudstorage-js') + plugins.toolkit.add_template_directory(config, "../templates") + plugins.toolkit.add_resource("../fanstatic/scripts", "cloudstorage-js") # ITemplateHelpers def get_helpers(self): - return dict( - cloudstorage_use_secure_urls=helpers.use_secure_urls - ) + return dict(cloudstorage_use_secure_urls=helpers.use_secure_urls) def configure(self, config): required_keys = ( - 'ckanext.cloudstorage.driver', - 'ckanext.cloudstorage.driver_options', - 'ckanext.cloudstorage.container_name' + "ckanext.cloudstorage.driver", + "ckanext.cloudstorage.driver_options", + "ckanext.cloudstorage.container_name", ) for rk in required_keys: if config.get(rk) is None: raise RuntimeError( - 'Required configuration option {0} not found.'.format( - rk - ) + "Required configuration option {0} not found.".format(rk) ) def get_resource_uploader(self, data_dict): @@ -63,24 +61,24 @@ def get_uploader(self, upload_to, old_filename=None): def get_actions(self): return { - 'cloudstorage_initiate_multipart': m_action.initiate_multipart, - 'cloudstorage_upload_multipart': m_action.upload_multipart, - 'cloudstorage_finish_multipart': m_action.finish_multipart, - 'cloudstorage_abort_multipart': m_action.abort_multipart, - 'cloudstorage_check_multipart': m_action.check_multipart, - 'cloudstorage_clean_multipart': m_action.clean_multipart, + "cloudstorage_initiate_multipart": m_action.initiate_multipart, + "cloudstorage_upload_multipart": m_action.upload_multipart, + "cloudstorage_finish_multipart": m_action.finish_multipart, + "cloudstorage_abort_multipart": m_action.abort_multipart, + "cloudstorage_check_multipart": m_action.check_multipart, + "cloudstorage_clean_multipart": m_action.clean_multipart, } # IAuthFunctions def get_auth_functions(self): return { - 'cloudstorage_initiate_multipart': m_auth.initiate_multipart, - 'cloudstorage_upload_multipart': m_auth.upload_multipart, - 'cloudstorage_finish_multipart': m_auth.finish_multipart, - 'cloudstorage_abort_multipart': m_auth.abort_multipart, - 'cloudstorage_check_multipart': m_auth.check_multipart, - 'cloudstorage_clean_multipart': m_auth.clean_multipart, + "cloudstorage_initiate_multipart": m_auth.initiate_multipart, + "cloudstorage_upload_multipart": m_auth.upload_multipart, + "cloudstorage_finish_multipart": m_auth.finish_multipart, + "cloudstorage_abort_multipart": m_auth.abort_multipart, + "cloudstorage_check_multipart": m_auth.check_multipart, + "cloudstorage_clean_multipart": m_auth.clean_multipart, } # IResourceController @@ -91,36 +89,33 @@ def before_delete(self, context, resource, resources): # `else` clause for res in resources: - if res['id'] == resource['id']: + if res["id"] == resource["id"]: break else: return # just ignore simple links - if res['url_type'] != 'upload': + if res["url_type"] != "upload": return # we don't want to change original item from resources, just in case # someone will use it in another `before_delete`. So, let's copy it # and add `clear_upload` flag - res_dict = dict(res.items() + [('clear_upload', True)]) + res_dict = dict(list(res.items()) + [("clear_upload", True)]) uploader = self.get_resource_uploader(res_dict) # to be on the safe side, let's check existence of container - container = getattr(uploader, 'container', None) + container = getattr(uploader, "container", None) if container is None: return # and now uploader removes our file. - uploader.upload(resource['id']) + uploader.upload(resource["id"]) # and all other files linked to this resource if not uploader.leave_files: upload_path = os.path.dirname( - uploader.path_from_filename( - resource['id'], - 'fake-name' - ) + uploader.path_from_filename(resource["id"], "fake-name") ) for old_file in uploader.container.iterate_objects(): diff --git a/ckanext/cloudstorage/plugin/flask_plugin.py b/ckanext/cloudstorage/plugin/flask_plugin.py index 2d2e64c..7514151 100644 --- a/ckanext/cloudstorage/plugin/flask_plugin.py +++ b/ckanext/cloudstorage/plugin/flask_plugin.py @@ -5,6 +5,7 @@ from ckanext.cloudstorage.views import get_blueprints from ckanext.cloudstorage.cli import get_commands + class MixinPlugin(p.SingletonPlugin): p.implements(p.IBlueprint) p.implements(p.IClick) diff --git a/ckanext/cloudstorage/plugin/pylons_plugin.py b/ckanext/cloudstorage/plugin/pylons_plugin.py index d8003d9..93b4e17 100644 --- a/ckanext/cloudstorage/plugin/pylons_plugin.py +++ b/ckanext/cloudstorage/plugin/pylons_plugin.py @@ -4,30 +4,29 @@ import ckan.plugins as p + class MixinPlugin(p.SingletonPlugin): p.implements(p.IRoutes, inherit=True) # IRoutes - def before_map(self, map): sm = SubMapper( - map, - controller='ckanext.cloudstorage.controller:StorageController' + map, controller="ckanext.cloudstorage.controller:StorageController" ) # Override the resource download controllers so we can do our # lookup with libcloud. with sm: sm.connect( - 'resource_download', - '/dataset/{id}/resource/{resource_id}/download', - action='resource_download' + "resource_download", + "/dataset/{id}/resource/{resource_id}/download", + action="resource_download", ) sm.connect( - 'resource_download', - '/dataset/{id}/resource/{resource_id}/download/{filename}', - action='resource_download' + "resource_download", + "/dataset/{id}/resource/{resource_id}/download/{filename}", + action="resource_download", ) return map diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 48704d6..e8cf308 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -1,9 +1,12 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +from future import standard_library +standard_library.install_aliases() +from builtins import object import cgi import mimetypes import os.path -import urlparse +import urllib.parse from ast import literal_eval from datetime import datetime, timedelta @@ -16,20 +19,19 @@ if p.toolkit.check_ckan_version("2.9"): from werkzeug.datastructures import FileStorage as UploadedFileType + config = p.toolkit.config else: from pylons import config + UploadedFileType = cgi.FieldStorage class CloudStorage(object): def __init__(self): - self.driver = get_driver( - getattr( - Provider, - self.driver_name - ) - )(**self.driver_options) + self.driver = get_driver(getattr(Provider, self.driver_name))( + **self.driver_options + ) self._container = None def path_from_filename(self, rid, filename): @@ -53,7 +55,7 @@ def driver_options(self): A dictionary of options ckanext-cloudstorage has been configured to pass to the apache-libcloud driver. """ - return literal_eval(config['ckanext.cloudstorage.driver_options']) + return literal_eval(config["ckanext.cloudstorage.driver_options"]) @property def driver_name(self): @@ -67,7 +69,7 @@ def driver_name(self): This value is used to lookup the apache-libcloud driver to use based on the Provider enum. """ - return config['ckanext.cloudstorage.driver'] + return config["ckanext.cloudstorage.driver"] @property def container_name(self): @@ -75,7 +77,7 @@ def container_name(self): The name of the container (also called buckets on some providers) ckanext-cloudstorage is configured to use. """ - return config['ckanext.cloudstorage.container_name'] + return config["ckanext.cloudstorage.container_name"] @property def use_secure_urls(self): @@ -84,7 +86,7 @@ def use_secure_urls(self): one-time URLs to resources, `False` otherwise. """ return p.toolkit.asbool( - config.get('ckanext.cloudstorage.use_secure_urls', False) + config.get("ckanext.cloudstorage.use_secure_urls", False) ) @property @@ -95,7 +97,7 @@ def leave_files(self): otherwise `False`. """ return p.toolkit.asbool( - config.get('ckanext.cloudstorage.leave_files', False) + config.get("ckanext.cloudstorage.leave_files", False) ) @property @@ -106,10 +108,11 @@ def can_use_advanced_azure(self): `False`. """ # Are we even using Azure? - if self.driver_name == 'AZURE_BLOBS': + if self.driver_name == "AZURE_BLOBS": try: # Yes? Is the azure-storage package available? from azure import storage + # Shut the linter up. assert storage return True @@ -125,10 +128,11 @@ def can_use_advanced_aws(self): been configured to use Amazon S3, otherwise `False`. """ # Are we even using AWS? - if 'S3' in self.driver_name: + if "S3" in self.driver_name: try: # Yes? Is the boto package available? import boto + # Shut the linter up. assert boto return True @@ -144,7 +148,7 @@ def guess_mimetype(self): `False` otherwise. """ return p.toolkit.asbool( - config.get('ckanext.cloudstorage.guess_mimetype', False) + config.get("ckanext.cloudstorage.guess_mimetype", False) ) @@ -163,9 +167,9 @@ def __init__(self, resource): self.file = None self.resource = resource - upload_field_storage = resource.pop('upload', None) - self._clear = resource.pop('clear_upload', None) - multipart_name = resource.pop('multipart_name', None) + upload_field_storage = resource.pop("upload", None) + self._clear = resource.pop("clear_upload", None) + multipart_name = resource.pop("multipart_name", None) # Check to see if a file has been provided if isinstance(upload_field_storage, UploadedFileType): @@ -174,26 +178,24 @@ def __init__(self, resource): self.file_upload = upload_field_storage.stream else: self.file_upload = upload_field_storage.file - resource['url'] = self.filename - resource['url_type'] = 'upload' + resource["url"] = self.filename + resource["url_type"] = "upload" elif multipart_name and self.can_use_advanced_aws: # This means that file was successfully uploaded and stored # at cloud. # Currently implemented just AWS version - resource['url'] = munge.munge_filename(multipart_name) - resource['url_type'] = 'upload' - elif self._clear and resource.get('id'): + resource["url"] = munge.munge_filename(multipart_name) + resource["url_type"] = "upload" + elif self._clear and resource.get("id"): # Apparently, this is a created-but-not-commited resource whose # file upload has been canceled. We're copying the behaviour of # ckaenxt-s3filestore here. - old_resource = model.Session.query( - model.Resource - ).get( - resource['id'] + old_resource = model.Session.query(model.Resource).get( + resource["id"] ) self.old_filename = old_resource.url - resource['url_type'] = '' + resource["url_type"] = "" def path_from_filename(self, rid, filename): """ @@ -202,11 +204,7 @@ def path_from_filename(self, rid, filename): :param rid: The resource ID. :param filename: The unmunged resource filename. """ - return os.path.join( - 'resources', - rid, - munge.munge_filename(filename) - ) + return os.path.join("resources", rid, munge.munge_filename(filename)) def upload(self, id, max_size=10): """ @@ -221,8 +219,7 @@ def upload(self, id, max_size=10): from azure.storage.blob.models import ContentSettings blob_service = azure_blob.BlockBlobService( - self.driver_options['key'], - self.driver_options['secret'] + self.driver_options["key"], self.driver_options["secret"] ) content_settings = None if self.guess_mimetype: @@ -233,20 +230,14 @@ def upload(self, id, max_size=10): ) return blob_service.create_blob_from_stream( container_name=self.container_name, - blob_name=self.path_from_filename( - id, - self.filename - ), + blob_name=self.path_from_filename(id, self.filename), stream=self.file_upload, - content_settings=content_settings + content_settings=content_settings, ) else: self.container.upload_object_via_stream( self.file_upload, - object_name=self.path_from_filename( - id, - self.filename - ) + object_name=self.path_from_filename(id, self.filename), ) elif self._clear and self.old_filename and not self.leave_files: @@ -255,10 +246,7 @@ def upload(self, id, max_size=10): try: self.container.delete_object( self.container.get_object( - self.path_from_filename( - id, - self.old_filename - ) + self.path_from_filename(id, self.old_filename) ) ) except ObjectDoesNotExistError: @@ -292,8 +280,7 @@ def get_url_from_filename(self, rid, filename, content_type=None): from azure.storage import blob as azure_blob blob_service = azure_blob.BlockBlobService( - self.driver_options['key'], - self.driver_options['secret'] + self.driver_options["key"], self.driver_options["secret"] ) return blob_service.make_blob_url( @@ -303,23 +290,25 @@ def get_url_from_filename(self, rid, filename, content_type=None): container_name=self.container_name, blob_name=path, expiry=datetime.utcnow() + timedelta(hours=1), - permission=azure_blob.BlobPermissions.READ - ) + permission=azure_blob.BlobPermissions.READ, + ), ) elif self.can_use_advanced_aws and self.use_secure_urls: from boto.s3.connection import S3Connection + s3_connection = S3Connection( - self.driver_options['key'], - self.driver_options['secret'] + self.driver_options["key"], self.driver_options["secret"] ) - generate_url_params = {"expires_in": 60 * 60, - "method": "GET", - "bucket": self.container_name, - "query_auth": True, - "key": path} + generate_url_params = { + "expires_in": 60 * 60, + "method": "GET", + "bucket": self.container_name, + "query_auth": True, + "key": path, + } if content_type: - generate_url_params['headers'] = {"Content-Type": content_type} + generate_url_params["headers"] = {"Content-Type": content_type} return s3_connection.generate_url(**generate_url_params) @@ -332,20 +321,19 @@ def get_url_from_filename(self, rid, filename, content_type=None): try: return self.driver.get_object_cdn_url(obj) except NotImplementedError: - if 'S3' in self.driver_name: - return urlparse.urljoin( - 'https://' + self.driver.connection.host, - '{container}/{path}'.format( - container=self.container_name, - path=path - ) + if "S3" in self.driver_name: + return urllib.parse.urljoin( + "https://" + self.driver.connection.host, + "{container}/{path}".format( + container=self.container_name, path=path + ), ) # This extra 'url' property isn't documented anywhere, sadly. # See azure_blobs.py:_xml_to_object for more. - elif 'url' in obj.extra: - return obj.extra['url'] + elif "url" in obj.extra: + return obj.extra["url"] raise @property def package(self): - return model.Package.get(self.resource['package_id']) + return model.Package.get(self.resource["package_id"]) diff --git a/ckanext/cloudstorage/utils.py b/ckanext/cloudstorage/utils.py index 2e4e065..2829d90 100644 --- a/ckanext/cloudstorage/utils.py +++ b/ckanext/cloudstorage/utils.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import print_function import os.path @@ -11,8 +12,8 @@ from ckan.logic import NotFound from ckanapi import LocalCKAN -from ckanext.cloudstorage.model import (create_tables, drop_tables) -from ckanext.cloudstorage.storage import (CloudStorage, ResourceCloudStorage) +from ckanext.cloudstorage.model import create_tables, drop_tables +from ckanext.cloudstorage.storage import CloudStorage, ResourceCloudStorage class FakeFileStorage(cgi.FieldStorage): @@ -33,21 +34,28 @@ def fix_cors(domains): from azure.storage import blob as azure_blob from azure.storage import CorsRule - blob_service = azure_blob.BlockBlobService(cs.driver_options['key'], - cs.driver_options['secret']) + blob_service = azure_blob.BlockBlobService( + cs.driver_options["key"], cs.driver_options["secret"] + ) blob_service.set_blob_service_properties( - cors=[CorsRule(allowed_origins=domains, allowed_methods=['GET'])]) - return 'Done!', True + cors=[CorsRule(allowed_origins=domains, allowed_methods=["GET"])] + ) + return "Done!", True else: - return ('The driver {driver_name} being used does not currently' - ' support updating CORS rules through' - ' cloudstorage.'.format(driver_name=cs.driver_name)), False + return ( + ( + "The driver {driver_name} being used does not currently" + " support updating CORS rules through" + " cloudstorage.".format(driver_name=cs.driver_name) + ), + False, + ) def migrate(path, single_id): if not os.path.isdir(path): - print('The storage directory cannot be found.') + print("The storage directory cannot be found.") return lc = LocalCKAN() @@ -70,7 +78,7 @@ def migrate(path, single_id): if not files: continue - split_root = root.split('/') + split_root = root.split("/") resource_id = split_root[-2] + split_root[-1] for file_ in files: @@ -80,67 +88,78 @@ def migrate(path, single_id): resources[ckan_res_id] = os.path.join(root, file_) - for i, resource in enumerate(resources.iteritems(), 1): + for i, resource in enumerate(iter(resources.items()), 1): resource_id, file_path = resource - print('[{i}/{count}] Working on {id}'.format(i=i, - count=len(resources), - id=resource_id)) + print( + "[{i}/{count}] Working on {id}".format( + i=i, count=len(resources), id=resource_id + ) + ) try: resource = lc.action.resource_show(id=resource_id) except NotFound: - print(u'\tResource not found') + print(u"\tResource not found") continue - if resource['url_type'] != 'upload': - print(u'\t`url_type` is not `upload`. Skip') + if resource["url_type"] != "upload": + print(u"\t`url_type` is not `upload`. Skip") continue - with open(file_path, 'rb') as fin: - resource['upload'] = FakeFileStorage( - fin, resource['url'].split('/')[-1]) + with open(file_path, "rb") as fin: + resource["upload"] = FakeFileStorage( + fin, resource["url"].split("/")[-1] + ) try: uploader = ResourceCloudStorage(resource) - uploader.upload(resource['id']) + uploader.upload(resource["id"]) except Exception as e: failed.append(resource_id) - print(u'\tError of type {0} during upload: {1}'.format( - type(e), e)) + print( + u"\tError of type {0} during upload: {1}".format( + type(e), e + ) + ) if failed: log_file = tempfile.NamedTemporaryFile(delete=False) log_file.file.writelines(failed) - print(u'ID of all failed uploads are saved to `{0}`'.format( - log_file.name)) + print( + u"ID of all failed uploads are saved to `{0}`".format( + log_file.name + ) + ) def resource_download(id, resource_id, filename=None): context = { - 'model': model, - 'session': model.Session, - 'user': tk.c.user or tk.c.author, - 'auth_user_obj': tk.c.userobj + "model": model, + "session": model.Session, + "user": tk.c.user or tk.c.author, + "auth_user_obj": tk.c.userobj, } try: - resource = logic.get_action('resource_show')(context, { - 'id': resource_id - }) + resource = logic.get_action("resource_show")( + context, {"id": resource_id} + ) except logic.NotFound: - return base.abort(404, tk._('Resource not found')) + return base.abort(404, tk._("Resource not found")) except logic.NotAuthorized: - return base.abort(401, tk._('Unauthorized to read resource {0}'.format(id))) + return base.abort( + 401, tk._("Unauthorized to read resource {0}".format(id)) + ) # This isn't a file upload, so either redirect to the source # (if available) or error out. - if resource.get('url_type') != 'upload': - url = resource.get('url') + if resource.get("url_type") != "upload": + url = resource.get("url") if not url: - return base.abort(404, tk._('No download is available')) + return base.abort(404, tk._("No download is available")) return h.redirect_to(url) if filename is None: # No filename was provided so we'll try to get one from the url. - filename = os.path.basename(resource['url']) + filename = os.path.basename(resource["url"]) upload = uploader.get_resource_uploader(resource) @@ -150,13 +169,13 @@ def resource_download(id, resource_id, filename=None): content_type = getattr(tk.request, "content_type", None) except AttributeError: content_type = None - uploaded_url = upload.get_url_from_filename(resource['id'], - filename, - content_type=content_type) + uploaded_url = upload.get_url_from_filename( + resource["id"], filename, content_type=content_type + ) # The uploaded file is missing for some reason, such as the # provider being down. if uploaded_url is None: - return base.abort(404, tk._('No download is available')) + return base.abort(404, tk._("No download is available")) return h.redirect_to(uploaded_url) diff --git a/ckanext/cloudstorage/views.py b/ckanext/cloudstorage/views.py index f5bf688..be9c288 100644 --- a/ckanext/cloudstorage/views.py +++ b/ckanext/cloudstorage/views.py @@ -4,15 +4,16 @@ import ckan.views.resource as resource import ckanext.cloudstorage.utils as utils -cloudstorage = Blueprint('cloudstorage', __name__) +cloudstorage = Blueprint("cloudstorage", __name__) def download(id, resource_id, filename=None, package_type="dataset"): return utils.resource_download(id, resource_id, filename) -cloudstorage.add_url_rule("/dataset//resource//download", - view_func=download) +cloudstorage.add_url_rule( + "/dataset//resource//download", view_func=download +) cloudstorage.add_url_rule( "/dataset//resource//download/", view_func=download, From 7808057a174729542e1919587852c05926e227db Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Thu, 19 Dec 2019 15:21:28 +0200 Subject: [PATCH 03/44] Revert "futurize" This reverts commit c33e9a7bfb68a2b68c180b80216b8957135f7a63. --- ckanext/__init__.py | 2 - ckanext/cloudstorage/cli.py | 14 +- ckanext/cloudstorage/commands.py | 16 +- ckanext/cloudstorage/controller.py | 1 - ckanext/cloudstorage/helpers.py | 12 +- .../cloudstorage/logic/action/multipart.py | 143 +++++++++--------- ckanext/cloudstorage/logic/auth/multipart.py | 12 +- ckanext/cloudstorage/model.py | 22 +-- ckanext/cloudstorage/plugin/__init__.py | 59 ++++---- ckanext/cloudstorage/plugin/flask_plugin.py | 1 - ckanext/cloudstorage/plugin/pylons_plugin.py | 17 ++- ckanext/cloudstorage/storage.py | 128 +++++++++------- ckanext/cloudstorage/utils.py | 105 ++++++------- ckanext/cloudstorage/views.py | 7 +- 14 files changed, 263 insertions(+), 276 deletions(-) diff --git a/ckanext/__init__.py b/ckanext/__init__.py index 6d83202..2e2033b 100644 --- a/ckanext/__init__.py +++ b/ckanext/__init__.py @@ -1,9 +1,7 @@ # this is a namespace package try: import pkg_resources - pkg_resources.declare_namespace(__name__) except ImportError: import pkgutil - __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/ckanext/cloudstorage/cli.py b/ckanext/cloudstorage/cli.py index 4b8db37..3ebce02 100644 --- a/ckanext/cloudstorage/cli.py +++ b/ckanext/cloudstorage/cli.py @@ -4,7 +4,6 @@ import click import ckanext.cloudstorage.utils as utils - @click.group() def cloudstorage(): """CloudStorage management commands. @@ -12,24 +11,22 @@ def cloudstorage(): pass -@cloudstorage.command("fix-cors") -@click.argument("domains", nargs=-1) +@cloudstorage.command('fix-cors') +@click.argument('domains', nargs=-1) def fix_cors(domains): """Update CORS rules where possible. """ msg, ok = utils.fix_cors(domains) - click.secho(msg, fg="green" if ok else "red") - + click.secho(msg, fg='green' if ok else 'red') @cloudstorage.command() -@click.argument("path") -@click.argument("resource", required=False) +@click.argument('path') +@click.argument('resource', required=False) def migrate(path, resource): """Upload local storage to the remote. """ utils.migrate(path, resource) - @cloudstorage.command() def initdb(): """Reinitalize database tables. @@ -38,5 +35,6 @@ def initdb(): click.secho("DB tables are reinitialized", fg="green") + def get_commands(): return [cloudstorage] diff --git a/ckanext/cloudstorage/commands.py b/ckanext/cloudstorage/commands.py index c8ab369..dea598f 100644 --- a/ckanext/cloudstorage/commands.py +++ b/ckanext/cloudstorage/commands.py @@ -1,7 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from __future__ import print_function from ckan.lib.cli import CkanCommand from docopt import docopt @@ -25,30 +24,31 @@ """ + class PasterCommand(CkanCommand): - summary = "ckanext-cloudstorage maintence utilities." + summary = 'ckanext-cloudstorage maintence utilities.' usage = USAGE def command(self): self._load_config() args = docopt(USAGE, argv=self.args) - if args["fix-cors"]: + if args['fix-cors']: _fix_cors(args) - elif args["migrate"]: + elif args['migrate']: _migrate(args) - elif args["initdb"]: + elif args['initdb']: _initdb() def _migrate(args): - path = args[""] - single_id = args[""] + path = args[''] + single_id = args[''] utils.migrate(path, single_id) def _fix_cors(args): - msg, _ = utils.fix_cors(args[""]) + msg, _ = utils.fix_cors(args['']) print(msg) diff --git a/ckanext/cloudstorage/controller.py b/ckanext/cloudstorage/controller.py index 5105d46..162489e 100644 --- a/ckanext/cloudstorage/controller.py +++ b/ckanext/cloudstorage/controller.py @@ -10,7 +10,6 @@ import ckan.lib.helpers as h import ckanext.cloudstorage.utils as utils - class StorageController(base.BaseController): def resource_download(self, id, resource_id, filename=None): return utils.resource_download(id, resource_id, filename) diff --git a/ckanext/cloudstorage/helpers.py b/ckanext/cloudstorage/helpers.py index 526f060..1a7ca99 100644 --- a/ckanext/cloudstorage/helpers.py +++ b/ckanext/cloudstorage/helpers.py @@ -4,10 +4,8 @@ def use_secure_urls(): - return all( - [ - ResourceCloudStorage.use_secure_urls.fget(None), - # Currently implemented just AWS version - "S3" in ResourceCloudStorage.driver_name.fget(None), - ] - ) + return all([ + ResourceCloudStorage.use_secure_urls.fget(None), + # Currently implemented just AWS version + 'S3' in ResourceCloudStorage.driver_name.fget(None) + ]) diff --git a/ckanext/cloudstorage/logic/action/multipart.py b/ckanext/cloudstorage/logic/action/multipart.py index 94f0b61..ae67666 100644 --- a/ckanext/cloudstorage/logic/action/multipart.py +++ b/ckanext/cloudstorage/logic/action/multipart.py @@ -21,18 +21,18 @@ def _get_max_multipart_lifetime(): - value = float(config.get("ckanext.cloudstorage.max_multipart_lifetime", 7)) + value = float(config.get('ckanext.cloudstorage.max_multipart_lifetime', 7)) return datetime.timedelta(value) def _get_object_url(uploader, name): - return "/" + uploader.container_name + "/" + name + return '/' + uploader.container_name + '/' + name def _delete_multipart(upload, uploader): resp = uploader.driver.connection.request( - _get_object_url(uploader, upload.name) + "?uploadId=" + upload.id, - method="DELETE", + _get_object_url(uploader, upload.name) + '?uploadId=' + upload.id, + method='DELETE' ) if not resp.success(): raise toolkit.ValidationError(resp.error) @@ -44,11 +44,9 @@ def _delete_multipart(upload, uploader): def _save_part_info(n, etag, upload): try: - part = ( - model.Session.query(MultipartPart) - .filter(MultipartPart.n == n, MultipartPart.upload == upload) - .one() - ) + part = model.Session.query(MultipartPart).filter( + MultipartPart.n == n, + MultipartPart.upload == upload).one() except NoResultFound: part = MultipartPart(n, etag, upload) else: @@ -67,23 +65,17 @@ def check_multipart(context, data_dict): """ - h.check_access("cloudstorage_check_multipart", data_dict) - id = toolkit.get_or_bust(data_dict, "id") + h.check_access('cloudstorage_check_multipart', data_dict) + id = toolkit.get_or_bust(data_dict, 'id') try: - upload = ( - model.Session.query(MultipartUpload) - .filter_by(resource_id=id) - .one() - ) + upload = model.Session.query(MultipartUpload).filter_by( + resource_id=id).one() except NoResultFound: return upload_dict = upload.as_dict() - upload_dict["parts"] = ( - model.Session.query(MultipartPart) - .filter(MultipartPart.upload == upload) - .count() - ) - return {"upload": upload_dict} + upload_dict['parts'] = model.Session.query(MultipartPart).filter( + MultipartPart.upload == upload).count() + return {'upload': upload_dict} def initiate_multipart(context, data_dict): @@ -100,13 +92,13 @@ def initiate_multipart(context, data_dict): """ - h.check_access("cloudstorage_initiate_multipart", data_dict) - id, name, size = toolkit.get_or_bust(data_dict, ["id", "name", "size"]) + h.check_access('cloudstorage_initiate_multipart', data_dict) + id, name, size = toolkit.get_or_bust(data_dict, ['id', 'name', 'size']) user_id = None - if context["auth_user_obj"]: - user_id = context["auth_user_obj"].id + if context['auth_user_obj']: + user_id = context['auth_user_obj'].id - uploader = ResourceCloudStorage({"multipart_name": name}) + uploader = ResourceCloudStorage({'multipart_name': name}) res_name = uploader.path_from_filename(id, name) upload_object = MultipartUpload.by_name(res_name) @@ -117,61 +109,64 @@ def initiate_multipart(context, data_dict): if upload_object is None: for old_upload in model.Session.query(MultipartUpload).filter_by( - resource_id=id - ): + resource_id=id): _delete_multipart(old_upload, uploader) - _rindex = res_name.rfind("/") + _rindex = res_name.rfind('/') if ~_rindex: try: name_prefix = res_name[:_rindex] for cloud_object in uploader.container.iterate_objects(): if cloud_object.name.startswith(name_prefix): - log.info("Removing cloud object: %s" % cloud_object) + log.info('Removing cloud object: %s' % cloud_object) cloud_object.delete() except Exception as e: - log.exception("[delete from cloud] %s" % e) + log.exception('[delete from cloud] %s' % e) resp = uploader.driver.connection.request( - _get_object_url(uploader, res_name) + "?uploads", method="POST" + _get_object_url(uploader, res_name) + '?uploads', + method='POST' ) if not resp.success(): raise toolkit.ValidationError(resp.error) try: upload_id = resp.object.find( - "{%s}UploadId" % resp.object.nsmap[None] - ).text + '{%s}UploadId' % resp.object.nsmap[None]).text except AttributeError: - upload_id_list = [e for e in resp.object.getchildren() if e.tag.endswith("UploadId")] + upload_id_list = filter( + lambda e: e.tag.endswith('UploadId'), + resp.object.getchildren() + ) upload_id = upload_id_list[0].text - upload_object = MultipartUpload( - upload_id, id, res_name, size, name, user_id - ) + upload_object = MultipartUpload(upload_id, id, res_name, size, name, user_id) upload_object.save() return upload_object.as_dict() def upload_multipart(context, data_dict): - h.check_access("cloudstorage_upload_multipart", data_dict) + h.check_access('cloudstorage_upload_multipart', data_dict) upload_id, part_number, part_content = toolkit.get_or_bust( - data_dict, ["uploadId", "partNumber", "upload"] - ) + data_dict, ['uploadId', 'partNumber', 'upload']) uploader = ResourceCloudStorage({}) upload = model.Session.query(MultipartUpload).get(upload_id) resp = uploader.driver.connection.request( - _get_object_url(uploader, upload.name) - + "?partNumber={0}&uploadId={1}".format(part_number, upload_id), - method="PUT", - data=bytearray(part_content.file.read()), + _get_object_url( + uploader, upload.name) + '?partNumber={0}&uploadId={1}'.format( + part_number, upload_id), + method='PUT', + data=bytearray(part_content.file.read()) ) if resp.status != 200: - raise toolkit.ValidationError("Upload failed: part %s" % part_number) + raise toolkit.ValidationError('Upload failed: part %s' % part_number) - _save_part_info(part_number, resp.headers["etag"], upload) - return {"partNumber": part_number, "ETag": resp.headers["etag"]} + _save_part_info(part_number, resp.headers['etag'], upload) + return { + 'partNumber': part_number, + 'ETag': resp.headers['etag'] + } def finish_multipart(context, data_dict): @@ -187,15 +182,14 @@ def finish_multipart(context, data_dict): """ - h.check_access("cloudstorage_finish_multipart", data_dict) - upload_id = toolkit.get_or_bust(data_dict, "uploadId") - save_action = data_dict.get("save_action", False) + h.check_access('cloudstorage_finish_multipart', data_dict) + upload_id = toolkit.get_or_bust(data_dict, 'uploadId') + save_action = data_dict.get('save_action', False) upload = model.Session.query(MultipartUpload).get(upload_id) chunks = [ (part.n, part.etag) - for part in model.Session.query(MultipartPart) - .filter_by(upload_id=upload_id) - .order_by(MultipartPart.n) + for part in model.Session.query(MultipartPart).filter_by( + upload_id=upload_id).order_by(MultipartPart.n) ] uploader = ResourceCloudStorage({}) try: @@ -204,32 +198,31 @@ def finish_multipart(context, data_dict): except Exception: pass uploader.driver._commit_multipart( - _get_object_url(uploader, upload.name), upload_id, chunks - ) + _get_object_url(uploader, upload.name), + upload_id, + chunks) upload.delete() upload.commit() if save_action and save_action == "go-metadata": try: - res_dict = toolkit.get_action("resource_show")( - context.copy(), {"id": data_dict.get("id")} - ) - pkg_dict = toolkit.get_action("package_show")( - context.copy(), {"id": res_dict["package_id"]} - ) - if pkg_dict["state"] == "draft": - toolkit.get_action("package_patch")( + res_dict = toolkit.get_action('resource_show')( + context.copy(), {'id': data_dict.get('id')}) + pkg_dict = toolkit.get_action('package_show')( + context.copy(), {'id': res_dict['package_id']}) + if pkg_dict['state'] == 'draft': + toolkit.get_action('package_patch')( dict(context.copy(), allow_state_change=True), - dict(id=pkg_dict["id"], state="active"), + dict(id=pkg_dict['id'], state='active') ) except Exception as e: log.error(e) - return {"commited": True} + return {'commited': True} def abort_multipart(context, data_dict): - h.check_access("cloudstorage_abort_multipart", data_dict) - id = toolkit.get_or_bust(data_dict, ["id"]) + h.check_access('cloudstorage_abort_multipart', data_dict) + id = toolkit.get_or_bust(data_dict, ['id']) uploader = ResourceCloudStorage({}) resource_uploads = MultipartUpload.resource_uploads(id) @@ -259,7 +252,7 @@ def clean_multipart(context, data_dict): """ - h.check_access("cloudstorage_clean_multipart", data_dict) + h.check_access('cloudstorage_clean_multipart', data_dict) uploader = ResourceCloudStorage({}) delta = _get_max_multipart_lifetime() oldest_allowed = datetime.datetime.utcnow() - delta @@ -268,14 +261,18 @@ def clean_multipart(context, data_dict): MultipartUpload.initiated < oldest_allowed ) - result = {"removed": 0, "total": uploads_to_remove.count(), "errors": []} + result = { + 'removed': 0, + 'total': uploads_to_remove.count(), + 'errors': [] + } for upload in uploads_to_remove: try: _delete_multipart(upload, uploader) except toolkit.ValidationError as e: - result["errors"].append(e.error_summary) + result['errors'].append(e.error_summary) else: - result["removed"] += 1 + result['removed'] += 1 return result diff --git a/ckanext/cloudstorage/logic/auth/multipart.py b/ckanext/cloudstorage/logic/auth/multipart.py index 9534362..4c0c3ce 100644 --- a/ckanext/cloudstorage/logic/auth/multipart.py +++ b/ckanext/cloudstorage/logic/auth/multipart.py @@ -4,24 +4,24 @@ def initiate_multipart(context, data_dict): - return {"success": check_access("resource_create", context, data_dict)} + return {'success': check_access('resource_create', context, data_dict)} def upload_multipart(context, data_dict): - return {"success": check_access("resource_create", context, data_dict)} + return {'success': check_access('resource_create', context, data_dict)} def finish_multipart(context, data_dict): - return {"success": check_access("resource_create", context, data_dict)} + return {'success': check_access('resource_create', context, data_dict)} def abort_multipart(context, data_dict): - return {"success": check_access("resource_create", context, data_dict)} + return {'success': check_access('resource_create', context, data_dict)} def check_multipart(context, data_dict): - return {"success": check_access("resource_create", context, data_dict)} + return {'success': check_access('resource_create', context, data_dict)} def clean_multipart(context, data_dict): - return {"success": False} + return {'success': False} diff --git a/ckanext/cloudstorage/model.py b/ckanext/cloudstorage/model.py index cf541ad..8a33f54 100644 --- a/ckanext/cloudstorage/model.py +++ b/ckanext/cloudstorage/model.py @@ -9,7 +9,7 @@ DateTime, ForeignKey, Integer, - Numeric, + Numeric ) from datetime import datetime import ckan.model.meta as meta @@ -28,7 +28,7 @@ def create_tables(): class MultipartPart(Base, DomainObject): - __tablename__ = "cloudstorage_multipart_part" + __tablename__ = 'cloudstorage_multipart_part' def __init__(self, n, etag, upload): self.n = n @@ -38,19 +38,17 @@ def __init__(self, n, etag, upload): n = Column(Integer, primary_key=True) etag = Column(UnicodeText, primary_key=True) upload_id = Column( - UnicodeText, - ForeignKey("cloudstorage_multipart_upload.id"), - primary_key=True, + UnicodeText, ForeignKey('cloudstorage_multipart_upload.id'), + primary_key=True ) upload = relationship( - "MultipartUpload", - backref=backref("parts", cascade="delete, delete-orphan"), - single_parent=True, - ) + 'MultipartUpload', + backref=backref('parts', cascade='delete, delete-orphan'), + single_parent=True) class MultipartUpload(Base, DomainObject): - __tablename__ = "cloudstorage_multipart_upload" + __tablename__ = 'cloudstorage_multipart_upload' def __init__(self, id, resource_id, name, size, original_name, user_id): self.id = id @@ -62,7 +60,9 @@ def __init__(self, id, resource_id, name, size, original_name, user_id): @classmethod def resource_uploads(cls, resource_id): - query = meta.Session.query(cls).filter_by(resource_id=resource_id) + query = meta.Session.query(cls).filter_by( + resource_id=resource_id + ) return query id = Column(UnicodeText, primary_key=True) diff --git a/ckanext/cloudstorage/plugin/__init__.py b/ckanext/cloudstorage/plugin/__init__.py index f6b2522..c0c736b 100644 --- a/ckanext/cloudstorage/plugin/__init__.py +++ b/ckanext/cloudstorage/plugin/__init__.py @@ -8,12 +8,10 @@ if plugins.toolkit.check_ckan_version("2.9"): from ckanext.cloudstorage.plugin.flask_plugin import MixinPlugin - # from ckanext.cloudstorage.plugin.pylons_plugin import MixinPlugin else: from ckanext.cloudstorage.plugin.pylons_plugin import MixinPlugin - class CloudStoragePlugin(MixinPlugin, plugins.SingletonPlugin): plugins.implements(plugins.IUploader) plugins.implements(plugins.IConfigurable) @@ -26,26 +24,30 @@ class CloudStoragePlugin(MixinPlugin, plugins.SingletonPlugin): # IConfigurer def update_config(self, config): - plugins.toolkit.add_template_directory(config, "../templates") - plugins.toolkit.add_resource("../fanstatic/scripts", "cloudstorage-js") + plugins.toolkit.add_template_directory(config, '../templates') + plugins.toolkit.add_resource('../fanstatic/scripts', 'cloudstorage-js') # ITemplateHelpers def get_helpers(self): - return dict(cloudstorage_use_secure_urls=helpers.use_secure_urls) + return dict( + cloudstorage_use_secure_urls=helpers.use_secure_urls + ) def configure(self, config): required_keys = ( - "ckanext.cloudstorage.driver", - "ckanext.cloudstorage.driver_options", - "ckanext.cloudstorage.container_name", + 'ckanext.cloudstorage.driver', + 'ckanext.cloudstorage.driver_options', + 'ckanext.cloudstorage.container_name' ) for rk in required_keys: if config.get(rk) is None: raise RuntimeError( - "Required configuration option {0} not found.".format(rk) + 'Required configuration option {0} not found.'.format( + rk + ) ) def get_resource_uploader(self, data_dict): @@ -61,24 +63,24 @@ def get_uploader(self, upload_to, old_filename=None): def get_actions(self): return { - "cloudstorage_initiate_multipart": m_action.initiate_multipart, - "cloudstorage_upload_multipart": m_action.upload_multipart, - "cloudstorage_finish_multipart": m_action.finish_multipart, - "cloudstorage_abort_multipart": m_action.abort_multipart, - "cloudstorage_check_multipart": m_action.check_multipart, - "cloudstorage_clean_multipart": m_action.clean_multipart, + 'cloudstorage_initiate_multipart': m_action.initiate_multipart, + 'cloudstorage_upload_multipart': m_action.upload_multipart, + 'cloudstorage_finish_multipart': m_action.finish_multipart, + 'cloudstorage_abort_multipart': m_action.abort_multipart, + 'cloudstorage_check_multipart': m_action.check_multipart, + 'cloudstorage_clean_multipart': m_action.clean_multipart, } # IAuthFunctions def get_auth_functions(self): return { - "cloudstorage_initiate_multipart": m_auth.initiate_multipart, - "cloudstorage_upload_multipart": m_auth.upload_multipart, - "cloudstorage_finish_multipart": m_auth.finish_multipart, - "cloudstorage_abort_multipart": m_auth.abort_multipart, - "cloudstorage_check_multipart": m_auth.check_multipart, - "cloudstorage_clean_multipart": m_auth.clean_multipart, + 'cloudstorage_initiate_multipart': m_auth.initiate_multipart, + 'cloudstorage_upload_multipart': m_auth.upload_multipart, + 'cloudstorage_finish_multipart': m_auth.finish_multipart, + 'cloudstorage_abort_multipart': m_auth.abort_multipart, + 'cloudstorage_check_multipart': m_auth.check_multipart, + 'cloudstorage_clean_multipart': m_auth.clean_multipart, } # IResourceController @@ -89,33 +91,36 @@ def before_delete(self, context, resource, resources): # `else` clause for res in resources: - if res["id"] == resource["id"]: + if res['id'] == resource['id']: break else: return # just ignore simple links - if res["url_type"] != "upload": + if res['url_type'] != 'upload': return # we don't want to change original item from resources, just in case # someone will use it in another `before_delete`. So, let's copy it # and add `clear_upload` flag - res_dict = dict(list(res.items()) + [("clear_upload", True)]) + res_dict = dict(res.items() + [('clear_upload', True)]) uploader = self.get_resource_uploader(res_dict) # to be on the safe side, let's check existence of container - container = getattr(uploader, "container", None) + container = getattr(uploader, 'container', None) if container is None: return # and now uploader removes our file. - uploader.upload(resource["id"]) + uploader.upload(resource['id']) # and all other files linked to this resource if not uploader.leave_files: upload_path = os.path.dirname( - uploader.path_from_filename(resource["id"], "fake-name") + uploader.path_from_filename( + resource['id'], + 'fake-name' + ) ) for old_file in uploader.container.iterate_objects(): diff --git a/ckanext/cloudstorage/plugin/flask_plugin.py b/ckanext/cloudstorage/plugin/flask_plugin.py index 7514151..2d2e64c 100644 --- a/ckanext/cloudstorage/plugin/flask_plugin.py +++ b/ckanext/cloudstorage/plugin/flask_plugin.py @@ -5,7 +5,6 @@ from ckanext.cloudstorage.views import get_blueprints from ckanext.cloudstorage.cli import get_commands - class MixinPlugin(p.SingletonPlugin): p.implements(p.IBlueprint) p.implements(p.IClick) diff --git a/ckanext/cloudstorage/plugin/pylons_plugin.py b/ckanext/cloudstorage/plugin/pylons_plugin.py index 93b4e17..d8003d9 100644 --- a/ckanext/cloudstorage/plugin/pylons_plugin.py +++ b/ckanext/cloudstorage/plugin/pylons_plugin.py @@ -4,29 +4,30 @@ import ckan.plugins as p - class MixinPlugin(p.SingletonPlugin): p.implements(p.IRoutes, inherit=True) # IRoutes + def before_map(self, map): sm = SubMapper( - map, controller="ckanext.cloudstorage.controller:StorageController" + map, + controller='ckanext.cloudstorage.controller:StorageController' ) # Override the resource download controllers so we can do our # lookup with libcloud. with sm: sm.connect( - "resource_download", - "/dataset/{id}/resource/{resource_id}/download", - action="resource_download", + 'resource_download', + '/dataset/{id}/resource/{resource_id}/download', + action='resource_download' ) sm.connect( - "resource_download", - "/dataset/{id}/resource/{resource_id}/download/{filename}", - action="resource_download", + 'resource_download', + '/dataset/{id}/resource/{resource_id}/download/{filename}', + action='resource_download' ) return map diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index e8cf308..48704d6 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -1,12 +1,9 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from future import standard_library -standard_library.install_aliases() -from builtins import object import cgi import mimetypes import os.path -import urllib.parse +import urlparse from ast import literal_eval from datetime import datetime, timedelta @@ -19,19 +16,20 @@ if p.toolkit.check_ckan_version("2.9"): from werkzeug.datastructures import FileStorage as UploadedFileType - config = p.toolkit.config else: from pylons import config - UploadedFileType = cgi.FieldStorage class CloudStorage(object): def __init__(self): - self.driver = get_driver(getattr(Provider, self.driver_name))( - **self.driver_options - ) + self.driver = get_driver( + getattr( + Provider, + self.driver_name + ) + )(**self.driver_options) self._container = None def path_from_filename(self, rid, filename): @@ -55,7 +53,7 @@ def driver_options(self): A dictionary of options ckanext-cloudstorage has been configured to pass to the apache-libcloud driver. """ - return literal_eval(config["ckanext.cloudstorage.driver_options"]) + return literal_eval(config['ckanext.cloudstorage.driver_options']) @property def driver_name(self): @@ -69,7 +67,7 @@ def driver_name(self): This value is used to lookup the apache-libcloud driver to use based on the Provider enum. """ - return config["ckanext.cloudstorage.driver"] + return config['ckanext.cloudstorage.driver'] @property def container_name(self): @@ -77,7 +75,7 @@ def container_name(self): The name of the container (also called buckets on some providers) ckanext-cloudstorage is configured to use. """ - return config["ckanext.cloudstorage.container_name"] + return config['ckanext.cloudstorage.container_name'] @property def use_secure_urls(self): @@ -86,7 +84,7 @@ def use_secure_urls(self): one-time URLs to resources, `False` otherwise. """ return p.toolkit.asbool( - config.get("ckanext.cloudstorage.use_secure_urls", False) + config.get('ckanext.cloudstorage.use_secure_urls', False) ) @property @@ -97,7 +95,7 @@ def leave_files(self): otherwise `False`. """ return p.toolkit.asbool( - config.get("ckanext.cloudstorage.leave_files", False) + config.get('ckanext.cloudstorage.leave_files', False) ) @property @@ -108,11 +106,10 @@ def can_use_advanced_azure(self): `False`. """ # Are we even using Azure? - if self.driver_name == "AZURE_BLOBS": + if self.driver_name == 'AZURE_BLOBS': try: # Yes? Is the azure-storage package available? from azure import storage - # Shut the linter up. assert storage return True @@ -128,11 +125,10 @@ def can_use_advanced_aws(self): been configured to use Amazon S3, otherwise `False`. """ # Are we even using AWS? - if "S3" in self.driver_name: + if 'S3' in self.driver_name: try: # Yes? Is the boto package available? import boto - # Shut the linter up. assert boto return True @@ -148,7 +144,7 @@ def guess_mimetype(self): `False` otherwise. """ return p.toolkit.asbool( - config.get("ckanext.cloudstorage.guess_mimetype", False) + config.get('ckanext.cloudstorage.guess_mimetype', False) ) @@ -167,9 +163,9 @@ def __init__(self, resource): self.file = None self.resource = resource - upload_field_storage = resource.pop("upload", None) - self._clear = resource.pop("clear_upload", None) - multipart_name = resource.pop("multipart_name", None) + upload_field_storage = resource.pop('upload', None) + self._clear = resource.pop('clear_upload', None) + multipart_name = resource.pop('multipart_name', None) # Check to see if a file has been provided if isinstance(upload_field_storage, UploadedFileType): @@ -178,24 +174,26 @@ def __init__(self, resource): self.file_upload = upload_field_storage.stream else: self.file_upload = upload_field_storage.file - resource["url"] = self.filename - resource["url_type"] = "upload" + resource['url'] = self.filename + resource['url_type'] = 'upload' elif multipart_name and self.can_use_advanced_aws: # This means that file was successfully uploaded and stored # at cloud. # Currently implemented just AWS version - resource["url"] = munge.munge_filename(multipart_name) - resource["url_type"] = "upload" - elif self._clear and resource.get("id"): + resource['url'] = munge.munge_filename(multipart_name) + resource['url_type'] = 'upload' + elif self._clear and resource.get('id'): # Apparently, this is a created-but-not-commited resource whose # file upload has been canceled. We're copying the behaviour of # ckaenxt-s3filestore here. - old_resource = model.Session.query(model.Resource).get( - resource["id"] + old_resource = model.Session.query( + model.Resource + ).get( + resource['id'] ) self.old_filename = old_resource.url - resource["url_type"] = "" + resource['url_type'] = '' def path_from_filename(self, rid, filename): """ @@ -204,7 +202,11 @@ def path_from_filename(self, rid, filename): :param rid: The resource ID. :param filename: The unmunged resource filename. """ - return os.path.join("resources", rid, munge.munge_filename(filename)) + return os.path.join( + 'resources', + rid, + munge.munge_filename(filename) + ) def upload(self, id, max_size=10): """ @@ -219,7 +221,8 @@ def upload(self, id, max_size=10): from azure.storage.blob.models import ContentSettings blob_service = azure_blob.BlockBlobService( - self.driver_options["key"], self.driver_options["secret"] + self.driver_options['key'], + self.driver_options['secret'] ) content_settings = None if self.guess_mimetype: @@ -230,14 +233,20 @@ def upload(self, id, max_size=10): ) return blob_service.create_blob_from_stream( container_name=self.container_name, - blob_name=self.path_from_filename(id, self.filename), + blob_name=self.path_from_filename( + id, + self.filename + ), stream=self.file_upload, - content_settings=content_settings, + content_settings=content_settings ) else: self.container.upload_object_via_stream( self.file_upload, - object_name=self.path_from_filename(id, self.filename), + object_name=self.path_from_filename( + id, + self.filename + ) ) elif self._clear and self.old_filename and not self.leave_files: @@ -246,7 +255,10 @@ def upload(self, id, max_size=10): try: self.container.delete_object( self.container.get_object( - self.path_from_filename(id, self.old_filename) + self.path_from_filename( + id, + self.old_filename + ) ) ) except ObjectDoesNotExistError: @@ -280,7 +292,8 @@ def get_url_from_filename(self, rid, filename, content_type=None): from azure.storage import blob as azure_blob blob_service = azure_blob.BlockBlobService( - self.driver_options["key"], self.driver_options["secret"] + self.driver_options['key'], + self.driver_options['secret'] ) return blob_service.make_blob_url( @@ -290,25 +303,23 @@ def get_url_from_filename(self, rid, filename, content_type=None): container_name=self.container_name, blob_name=path, expiry=datetime.utcnow() + timedelta(hours=1), - permission=azure_blob.BlobPermissions.READ, - ), + permission=azure_blob.BlobPermissions.READ + ) ) elif self.can_use_advanced_aws and self.use_secure_urls: from boto.s3.connection import S3Connection - s3_connection = S3Connection( - self.driver_options["key"], self.driver_options["secret"] + self.driver_options['key'], + self.driver_options['secret'] ) - generate_url_params = { - "expires_in": 60 * 60, - "method": "GET", - "bucket": self.container_name, - "query_auth": True, - "key": path, - } + generate_url_params = {"expires_in": 60 * 60, + "method": "GET", + "bucket": self.container_name, + "query_auth": True, + "key": path} if content_type: - generate_url_params["headers"] = {"Content-Type": content_type} + generate_url_params['headers'] = {"Content-Type": content_type} return s3_connection.generate_url(**generate_url_params) @@ -321,19 +332,20 @@ def get_url_from_filename(self, rid, filename, content_type=None): try: return self.driver.get_object_cdn_url(obj) except NotImplementedError: - if "S3" in self.driver_name: - return urllib.parse.urljoin( - "https://" + self.driver.connection.host, - "{container}/{path}".format( - container=self.container_name, path=path - ), + if 'S3' in self.driver_name: + return urlparse.urljoin( + 'https://' + self.driver.connection.host, + '{container}/{path}'.format( + container=self.container_name, + path=path + ) ) # This extra 'url' property isn't documented anywhere, sadly. # See azure_blobs.py:_xml_to_object for more. - elif "url" in obj.extra: - return obj.extra["url"] + elif 'url' in obj.extra: + return obj.extra['url'] raise @property def package(self): - return model.Package.get(self.resource["package_id"]) + return model.Package.get(self.resource['package_id']) diff --git a/ckanext/cloudstorage/utils.py b/ckanext/cloudstorage/utils.py index 2829d90..2e4e065 100644 --- a/ckanext/cloudstorage/utils.py +++ b/ckanext/cloudstorage/utils.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import print_function import os.path @@ -12,8 +11,8 @@ from ckan.logic import NotFound from ckanapi import LocalCKAN -from ckanext.cloudstorage.model import create_tables, drop_tables -from ckanext.cloudstorage.storage import CloudStorage, ResourceCloudStorage +from ckanext.cloudstorage.model import (create_tables, drop_tables) +from ckanext.cloudstorage.storage import (CloudStorage, ResourceCloudStorage) class FakeFileStorage(cgi.FieldStorage): @@ -34,28 +33,21 @@ def fix_cors(domains): from azure.storage import blob as azure_blob from azure.storage import CorsRule - blob_service = azure_blob.BlockBlobService( - cs.driver_options["key"], cs.driver_options["secret"] - ) + blob_service = azure_blob.BlockBlobService(cs.driver_options['key'], + cs.driver_options['secret']) blob_service.set_blob_service_properties( - cors=[CorsRule(allowed_origins=domains, allowed_methods=["GET"])] - ) - return "Done!", True + cors=[CorsRule(allowed_origins=domains, allowed_methods=['GET'])]) + return 'Done!', True else: - return ( - ( - "The driver {driver_name} being used does not currently" - " support updating CORS rules through" - " cloudstorage.".format(driver_name=cs.driver_name) - ), - False, - ) + return ('The driver {driver_name} being used does not currently' + ' support updating CORS rules through' + ' cloudstorage.'.format(driver_name=cs.driver_name)), False def migrate(path, single_id): if not os.path.isdir(path): - print("The storage directory cannot be found.") + print('The storage directory cannot be found.') return lc = LocalCKAN() @@ -78,7 +70,7 @@ def migrate(path, single_id): if not files: continue - split_root = root.split("/") + split_root = root.split('/') resource_id = split_root[-2] + split_root[-1] for file_ in files: @@ -88,78 +80,67 @@ def migrate(path, single_id): resources[ckan_res_id] = os.path.join(root, file_) - for i, resource in enumerate(iter(resources.items()), 1): + for i, resource in enumerate(resources.iteritems(), 1): resource_id, file_path = resource - print( - "[{i}/{count}] Working on {id}".format( - i=i, count=len(resources), id=resource_id - ) - ) + print('[{i}/{count}] Working on {id}'.format(i=i, + count=len(resources), + id=resource_id)) try: resource = lc.action.resource_show(id=resource_id) except NotFound: - print(u"\tResource not found") + print(u'\tResource not found') continue - if resource["url_type"] != "upload": - print(u"\t`url_type` is not `upload`. Skip") + if resource['url_type'] != 'upload': + print(u'\t`url_type` is not `upload`. Skip') continue - with open(file_path, "rb") as fin: - resource["upload"] = FakeFileStorage( - fin, resource["url"].split("/")[-1] - ) + with open(file_path, 'rb') as fin: + resource['upload'] = FakeFileStorage( + fin, resource['url'].split('/')[-1]) try: uploader = ResourceCloudStorage(resource) - uploader.upload(resource["id"]) + uploader.upload(resource['id']) except Exception as e: failed.append(resource_id) - print( - u"\tError of type {0} during upload: {1}".format( - type(e), e - ) - ) + print(u'\tError of type {0} during upload: {1}'.format( + type(e), e)) if failed: log_file = tempfile.NamedTemporaryFile(delete=False) log_file.file.writelines(failed) - print( - u"ID of all failed uploads are saved to `{0}`".format( - log_file.name - ) - ) + print(u'ID of all failed uploads are saved to `{0}`'.format( + log_file.name)) def resource_download(id, resource_id, filename=None): context = { - "model": model, - "session": model.Session, - "user": tk.c.user or tk.c.author, - "auth_user_obj": tk.c.userobj, + 'model': model, + 'session': model.Session, + 'user': tk.c.user or tk.c.author, + 'auth_user_obj': tk.c.userobj } try: - resource = logic.get_action("resource_show")( - context, {"id": resource_id} - ) + resource = logic.get_action('resource_show')(context, { + 'id': resource_id + }) except logic.NotFound: - return base.abort(404, tk._("Resource not found")) + return base.abort(404, tk._('Resource not found')) except logic.NotAuthorized: - return base.abort( - 401, tk._("Unauthorized to read resource {0}".format(id)) - ) + return base.abort(401, tk._('Unauthorized to read resource {0}'.format(id))) # This isn't a file upload, so either redirect to the source # (if available) or error out. - if resource.get("url_type") != "upload": - url = resource.get("url") + if resource.get('url_type') != 'upload': + url = resource.get('url') if not url: - return base.abort(404, tk._("No download is available")) + return base.abort(404, tk._('No download is available')) return h.redirect_to(url) if filename is None: # No filename was provided so we'll try to get one from the url. - filename = os.path.basename(resource["url"]) + filename = os.path.basename(resource['url']) upload = uploader.get_resource_uploader(resource) @@ -169,13 +150,13 @@ def resource_download(id, resource_id, filename=None): content_type = getattr(tk.request, "content_type", None) except AttributeError: content_type = None - uploaded_url = upload.get_url_from_filename( - resource["id"], filename, content_type=content_type - ) + uploaded_url = upload.get_url_from_filename(resource['id'], + filename, + content_type=content_type) # The uploaded file is missing for some reason, such as the # provider being down. if uploaded_url is None: - return base.abort(404, tk._("No download is available")) + return base.abort(404, tk._('No download is available')) return h.redirect_to(uploaded_url) diff --git a/ckanext/cloudstorage/views.py b/ckanext/cloudstorage/views.py index be9c288..f5bf688 100644 --- a/ckanext/cloudstorage/views.py +++ b/ckanext/cloudstorage/views.py @@ -4,16 +4,15 @@ import ckan.views.resource as resource import ckanext.cloudstorage.utils as utils -cloudstorage = Blueprint("cloudstorage", __name__) +cloudstorage = Blueprint('cloudstorage', __name__) def download(id, resource_id, filename=None, package_type="dataset"): return utils.resource_download(id, resource_id, filename) -cloudstorage.add_url_rule( - "/dataset//resource//download", view_func=download -) +cloudstorage.add_url_rule("/dataset//resource//download", + view_func=download) cloudstorage.add_url_rule( "/dataset//resource//download/", view_func=download, From 8e1f7cd6dd1a6dc98180fd334db1b1e101040431 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Thu, 19 Dec 2019 15:24:30 +0200 Subject: [PATCH 04/44] Format updated files --- ckanext/cloudstorage/cli.py | 5 +- ckanext/cloudstorage/commands.py | 2 - ckanext/cloudstorage/controller.py | 1 + ckanext/cloudstorage/plugin/__init__.py | 33 +++---- ckanext/cloudstorage/plugin/flask_plugin.py | 1 + ckanext/cloudstorage/plugin/pylons_plugin.py | 16 ++-- ckanext/cloudstorage/storage.py | 96 ++++++-------------- ckanext/cloudstorage/utils.py | 4 +- ckanext/cloudstorage/views.py | 6 +- pyproject.toml | 3 - 10 files changed, 58 insertions(+), 109 deletions(-) delete mode 100644 pyproject.toml diff --git a/ckanext/cloudstorage/cli.py b/ckanext/cloudstorage/cli.py index 3ebce02..4b5a696 100644 --- a/ckanext/cloudstorage/cli.py +++ b/ckanext/cloudstorage/cli.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- - import click import ckanext.cloudstorage.utils as utils + @click.group() def cloudstorage(): """CloudStorage management commands. @@ -19,6 +19,7 @@ def fix_cors(domains): msg, ok = utils.fix_cors(domains) click.secho(msg, fg='green' if ok else 'red') + @cloudstorage.command() @click.argument('path') @click.argument('resource', required=False) @@ -27,6 +28,7 @@ def migrate(path, resource): """ utils.migrate(path, resource) + @cloudstorage.command() def initdb(): """Reinitalize database tables. @@ -35,6 +37,5 @@ def initdb(): click.secho("DB tables are reinitialized", fg="green") - def get_commands(): return [cloudstorage] diff --git a/ckanext/cloudstorage/commands.py b/ckanext/cloudstorage/commands.py index dea598f..0e521db 100644 --- a/ckanext/cloudstorage/commands.py +++ b/ckanext/cloudstorage/commands.py @@ -6,7 +6,6 @@ import ckanext.cloudstorage.utils as utils - USAGE = """ckanext-cloudstorage Commands: @@ -24,7 +23,6 @@ """ - class PasterCommand(CkanCommand): summary = 'ckanext-cloudstorage maintence utilities.' usage = USAGE diff --git a/ckanext/cloudstorage/controller.py b/ckanext/cloudstorage/controller.py index 162489e..5105d46 100644 --- a/ckanext/cloudstorage/controller.py +++ b/ckanext/cloudstorage/controller.py @@ -10,6 +10,7 @@ import ckan.lib.helpers as h import ckanext.cloudstorage.utils as utils + class StorageController(base.BaseController): def resource_download(self, id, resource_id, filename=None): return utils.resource_download(id, resource_id, filename) diff --git a/ckanext/cloudstorage/plugin/__init__.py b/ckanext/cloudstorage/plugin/__init__.py index c0c736b..558a37b 100644 --- a/ckanext/cloudstorage/plugin/__init__.py +++ b/ckanext/cloudstorage/plugin/__init__.py @@ -1,17 +1,21 @@ # -*- coding: utf-8 -*- -from ckan import plugins import os.path -from ckanext.cloudstorage import storage -from ckanext.cloudstorage import helpers + +from ckan import plugins + import ckanext.cloudstorage.logic.action.multipart as m_action import ckanext.cloudstorage.logic.auth.multipart as m_auth +from ckanext.cloudstorage import storage +from ckanext.cloudstorage import helpers + if plugins.toolkit.check_ckan_version("2.9"): from ckanext.cloudstorage.plugin.flask_plugin import MixinPlugin # from ckanext.cloudstorage.plugin.pylons_plugin import MixinPlugin else: from ckanext.cloudstorage.plugin.pylons_plugin import MixinPlugin + class CloudStoragePlugin(MixinPlugin, plugins.SingletonPlugin): plugins.implements(plugins.IUploader) plugins.implements(plugins.IConfigurable) @@ -30,25 +34,18 @@ def update_config(self, config): # ITemplateHelpers def get_helpers(self): - return dict( - cloudstorage_use_secure_urls=helpers.use_secure_urls - ) + return dict(cloudstorage_use_secure_urls=helpers.use_secure_urls) def configure(self, config): - required_keys = ( - 'ckanext.cloudstorage.driver', - 'ckanext.cloudstorage.driver_options', - 'ckanext.cloudstorage.container_name' - ) + required_keys = ('ckanext.cloudstorage.driver', + 'ckanext.cloudstorage.driver_options', + 'ckanext.cloudstorage.container_name') for rk in required_keys: if config.get(rk) is None: raise RuntimeError( - 'Required configuration option {0} not found.'.format( - rk - ) - ) + 'Required configuration option {0} not found.'.format(rk)) def get_resource_uploader(self, data_dict): # We provide a custom Resource uploader. @@ -117,11 +114,7 @@ def before_delete(self, context, resource, resources): # and all other files linked to this resource if not uploader.leave_files: upload_path = os.path.dirname( - uploader.path_from_filename( - resource['id'], - 'fake-name' - ) - ) + uploader.path_from_filename(resource['id'], 'fake-name')) for old_file in uploader.container.iterate_objects(): if old_file.name.startswith(upload_path): diff --git a/ckanext/cloudstorage/plugin/flask_plugin.py b/ckanext/cloudstorage/plugin/flask_plugin.py index 2d2e64c..7514151 100644 --- a/ckanext/cloudstorage/plugin/flask_plugin.py +++ b/ckanext/cloudstorage/plugin/flask_plugin.py @@ -5,6 +5,7 @@ from ckanext.cloudstorage.views import get_blueprints from ckanext.cloudstorage.cli import get_commands + class MixinPlugin(p.SingletonPlugin): p.implements(p.IBlueprint) p.implements(p.IClick) diff --git a/ckanext/cloudstorage/plugin/pylons_plugin.py b/ckanext/cloudstorage/plugin/pylons_plugin.py index d8003d9..8f35445 100644 --- a/ckanext/cloudstorage/plugin/pylons_plugin.py +++ b/ckanext/cloudstorage/plugin/pylons_plugin.py @@ -4,30 +4,26 @@ import ckan.plugins as p + class MixinPlugin(p.SingletonPlugin): p.implements(p.IRoutes, inherit=True) # IRoutes - def before_map(self, map): sm = SubMapper( map, - controller='ckanext.cloudstorage.controller:StorageController' - ) + controller='ckanext.cloudstorage.controller:StorageController') # Override the resource download controllers so we can do our # lookup with libcloud. with sm: - sm.connect( - 'resource_download', - '/dataset/{id}/resource/{resource_id}/download', - action='resource_download' - ) + sm.connect('resource_download', + '/dataset/{id}/resource/{resource_id}/download', + action='resource_download') sm.connect( 'resource_download', '/dataset/{id}/resource/{resource_id}/download/{filename}', - action='resource_download' - ) + action='resource_download') return map diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 48704d6..e926157 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -24,12 +24,8 @@ class CloudStorage(object): def __init__(self): - self.driver = get_driver( - getattr( - Provider, - self.driver_name - ) - )(**self.driver_options) + self.driver = get_driver(getattr( + Provider, self.driver_name))(**self.driver_options) self._container = None def path_from_filename(self, rid, filename): @@ -42,8 +38,7 @@ def container(self): """ if self._container is None: self._container = self.driver.get_container( - container_name=self.container_name - ) + container_name=self.container_name) return self._container @@ -84,8 +79,7 @@ def use_secure_urls(self): one-time URLs to resources, `False` otherwise. """ return p.toolkit.asbool( - config.get('ckanext.cloudstorage.use_secure_urls', False) - ) + config.get('ckanext.cloudstorage.use_secure_urls', False)) @property def leave_files(self): @@ -95,8 +89,7 @@ def leave_files(self): otherwise `False`. """ return p.toolkit.asbool( - config.get('ckanext.cloudstorage.leave_files', False) - ) + config.get('ckanext.cloudstorage.leave_files', False)) @property def can_use_advanced_azure(self): @@ -144,8 +137,7 @@ def guess_mimetype(self): `False` otherwise. """ return p.toolkit.asbool( - config.get('ckanext.cloudstorage.guess_mimetype', False) - ) + config.get('ckanext.cloudstorage.guess_mimetype', False)) class ResourceCloudStorage(CloudStorage): @@ -186,11 +178,8 @@ def __init__(self, resource): # Apparently, this is a created-but-not-commited resource whose # file upload has been canceled. We're copying the behaviour of # ckaenxt-s3filestore here. - old_resource = model.Session.query( - model.Resource - ).get( - resource['id'] - ) + old_resource = model.Session.query(model.Resource).get( + resource['id']) self.old_filename = old_resource.url resource['url_type'] = '' @@ -202,11 +191,7 @@ def path_from_filename(self, rid, filename): :param rid: The resource ID. :param filename: The unmunged resource filename. """ - return os.path.join( - 'resources', - rid, - munge.munge_filename(filename) - ) + return os.path.join('resources', rid, munge.munge_filename(filename)) def upload(self, id, max_size=10): """ @@ -221,33 +206,22 @@ def upload(self, id, max_size=10): from azure.storage.blob.models import ContentSettings blob_service = azure_blob.BlockBlobService( - self.driver_options['key'], - self.driver_options['secret'] - ) + self.driver_options['key'], self.driver_options['secret']) content_settings = None if self.guess_mimetype: content_type, _ = mimetypes.guess_type(self.filename) if content_type: content_settings = ContentSettings( - content_type=content_type - ) + content_type=content_type) return blob_service.create_blob_from_stream( container_name=self.container_name, - blob_name=self.path_from_filename( - id, - self.filename - ), + blob_name=self.path_from_filename(id, self.filename), stream=self.file_upload, - content_settings=content_settings - ) + content_settings=content_settings) else: self.container.upload_object_via_stream( self.file_upload, - object_name=self.path_from_filename( - id, - self.filename - ) - ) + object_name=self.path_from_filename(id, self.filename)) elif self._clear and self.old_filename and not self.leave_files: # This is only set when a previously-uploaded file is replace @@ -255,12 +229,7 @@ def upload(self, id, max_size=10): try: self.container.delete_object( self.container.get_object( - self.path_from_filename( - id, - self.old_filename - ) - ) - ) + self.path_from_filename(id, self.old_filename))) except ObjectDoesNotExistError: # It's possible for the object to have already been deleted, or # for it to not yet exist in a committed state due to an @@ -292,9 +261,7 @@ def get_url_from_filename(self, rid, filename, content_type=None): from azure.storage import blob as azure_blob blob_service = azure_blob.BlockBlobService( - self.driver_options['key'], - self.driver_options['secret'] - ) + self.driver_options['key'], self.driver_options['secret']) return blob_service.make_blob_url( container_name=self.container_name, @@ -303,21 +270,19 @@ def get_url_from_filename(self, rid, filename, content_type=None): container_name=self.container_name, blob_name=path, expiry=datetime.utcnow() + timedelta(hours=1), - permission=azure_blob.BlobPermissions.READ - ) - ) + permission=azure_blob.BlobPermissions.READ)) elif self.can_use_advanced_aws and self.use_secure_urls: from boto.s3.connection import S3Connection - s3_connection = S3Connection( - self.driver_options['key'], - self.driver_options['secret'] - ) - - generate_url_params = {"expires_in": 60 * 60, - "method": "GET", - "bucket": self.container_name, - "query_auth": True, - "key": path} + s3_connection = S3Connection(self.driver_options['key'], + self.driver_options['secret']) + + generate_url_params = { + "expires_in": 60 * 60, + "method": "GET", + "bucket": self.container_name, + "query_auth": True, + "key": path + } if content_type: generate_url_params['headers'] = {"Content-Type": content_type} @@ -335,11 +300,8 @@ def get_url_from_filename(self, rid, filename, content_type=None): if 'S3' in self.driver_name: return urlparse.urljoin( 'https://' + self.driver.connection.host, - '{container}/{path}'.format( - container=self.container_name, - path=path - ) - ) + '{container}/{path}'.format(container=self.container_name, + path=path)) # This extra 'url' property isn't documented anywhere, sadly. # See azure_blobs.py:_xml_to_object for more. elif 'url' in obj.extra: diff --git a/ckanext/cloudstorage/utils.py b/ckanext/cloudstorage/utils.py index 2e4e065..6864130 100644 --- a/ckanext/cloudstorage/utils.py +++ b/ckanext/cloudstorage/utils.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import os.path - from ckan import logic, model import ckan.plugins.toolkit as tk from ckan.lib import base, uploader @@ -128,7 +127,8 @@ def resource_download(id, resource_id, filename=None): except logic.NotFound: return base.abort(404, tk._('Resource not found')) except logic.NotAuthorized: - return base.abort(401, tk._('Unauthorized to read resource {0}'.format(id))) + return base.abort(401, + tk._('Unauthorized to read resource {0}'.format(id))) # This isn't a file upload, so either redirect to the source # (if available) or error out. diff --git a/ckanext/cloudstorage/views.py b/ckanext/cloudstorage/views.py index f5bf688..e91c18b 100644 --- a/ckanext/cloudstorage/views.py +++ b/ckanext/cloudstorage/views.py @@ -7,14 +7,14 @@ cloudstorage = Blueprint('cloudstorage', __name__) -def download(id, resource_id, filename=None, package_type="dataset"): +def download(id, resource_id, filename=None, package_type='dataset'): return utils.resource_download(id, resource_id, filename) -cloudstorage.add_url_rule("/dataset//resource//download", +cloudstorage.add_url_rule('/dataset//resource//download', view_func=download) cloudstorage.add_url_rule( - "/dataset//resource//download/", + '/dataset//resource//download/', view_func=download, ) diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 858d39d..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,3 +0,0 @@ -[tool.black] -line-length = 79 -include = '\.py$' From 8730fa783621302fa0f3fcde73cd2a2569580a1c Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Thu, 19 Dec 2019 15:24:39 +0200 Subject: [PATCH 05/44] futurize --- ckanext/cloudstorage/commands.py | 1 + ckanext/cloudstorage/logic/action/multipart.py | 5 +---- ckanext/cloudstorage/plugin/__init__.py | 2 +- ckanext/cloudstorage/storage.py | 7 +++++-- ckanext/cloudstorage/utils.py | 3 ++- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/ckanext/cloudstorage/commands.py b/ckanext/cloudstorage/commands.py index 0e521db..2d31c84 100644 --- a/ckanext/cloudstorage/commands.py +++ b/ckanext/cloudstorage/commands.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +from __future__ import print_function from ckan.lib.cli import CkanCommand from docopt import docopt diff --git a/ckanext/cloudstorage/logic/action/multipart.py b/ckanext/cloudstorage/logic/action/multipart.py index ae67666..b248c2a 100644 --- a/ckanext/cloudstorage/logic/action/multipart.py +++ b/ckanext/cloudstorage/logic/action/multipart.py @@ -133,10 +133,7 @@ def initiate_multipart(context, data_dict): upload_id = resp.object.find( '{%s}UploadId' % resp.object.nsmap[None]).text except AttributeError: - upload_id_list = filter( - lambda e: e.tag.endswith('UploadId'), - resp.object.getchildren() - ) + upload_id_list = [e for e in resp.object.getchildren() if e.tag.endswith('UploadId')] upload_id = upload_id_list[0].text upload_object = MultipartUpload(upload_id, id, res_name, size, name, user_id) diff --git a/ckanext/cloudstorage/plugin/__init__.py b/ckanext/cloudstorage/plugin/__init__.py index 558a37b..a07bfd9 100644 --- a/ckanext/cloudstorage/plugin/__init__.py +++ b/ckanext/cloudstorage/plugin/__init__.py @@ -99,7 +99,7 @@ def before_delete(self, context, resource, resources): # we don't want to change original item from resources, just in case # someone will use it in another `before_delete`. So, let's copy it # and add `clear_upload` flag - res_dict = dict(res.items() + [('clear_upload', True)]) + res_dict = dict(list(res.items()) + [('clear_upload', True)]) uploader = self.get_resource_uploader(res_dict) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index e926157..9918a6b 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -1,9 +1,12 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +from future import standard_library +standard_library.install_aliases() +from builtins import object import cgi import mimetypes import os.path -import urlparse +import urllib.parse from ast import literal_eval from datetime import datetime, timedelta @@ -298,7 +301,7 @@ def get_url_from_filename(self, rid, filename, content_type=None): return self.driver.get_object_cdn_url(obj) except NotImplementedError: if 'S3' in self.driver_name: - return urlparse.urljoin( + return urllib.parse.urljoin( 'https://' + self.driver.connection.host, '{container}/{path}'.format(container=self.container_name, path=path)) diff --git a/ckanext/cloudstorage/utils.py b/ckanext/cloudstorage/utils.py index 6864130..a76dd73 100644 --- a/ckanext/cloudstorage/utils.py +++ b/ckanext/cloudstorage/utils.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import print_function import os.path from ckan import logic, model @@ -79,7 +80,7 @@ def migrate(path, single_id): resources[ckan_res_id] = os.path.join(root, file_) - for i, resource in enumerate(resources.iteritems(), 1): + for i, resource in enumerate(iter(resources.items()), 1): resource_id, file_path = resource print('[{i}/{count}] Working on {id}'.format(i=i, count=len(resources), From fe2020aa3cb43a0258d4e6feb8d511cc00007204 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Thu, 19 Dec 2019 15:40:36 +0200 Subject: [PATCH 06/44] Restore `storage.py` formatting` --- ckanext/cloudstorage/storage.py | 96 +++++++++++++++++++++++---------- ckanext/cloudstorage/utils.py | 2 +- 2 files changed, 68 insertions(+), 30 deletions(-) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 9918a6b..c625f2a 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -27,8 +27,12 @@ class CloudStorage(object): def __init__(self): - self.driver = get_driver(getattr( - Provider, self.driver_name))(**self.driver_options) + self.driver = get_driver( + getattr( + Provider, + self.driver_name + ) + )(**self.driver_options) self._container = None def path_from_filename(self, rid, filename): @@ -41,7 +45,8 @@ def container(self): """ if self._container is None: self._container = self.driver.get_container( - container_name=self.container_name) + container_name=self.container_name + ) return self._container @@ -82,7 +87,8 @@ def use_secure_urls(self): one-time URLs to resources, `False` otherwise. """ return p.toolkit.asbool( - config.get('ckanext.cloudstorage.use_secure_urls', False)) + config.get('ckanext.cloudstorage.use_secure_urls', False) + ) @property def leave_files(self): @@ -92,7 +98,8 @@ def leave_files(self): otherwise `False`. """ return p.toolkit.asbool( - config.get('ckanext.cloudstorage.leave_files', False)) + config.get('ckanext.cloudstorage.leave_files', False) + ) @property def can_use_advanced_azure(self): @@ -140,7 +147,8 @@ def guess_mimetype(self): `False` otherwise. """ return p.toolkit.asbool( - config.get('ckanext.cloudstorage.guess_mimetype', False)) + config.get('ckanext.cloudstorage.guess_mimetype', False) + ) class ResourceCloudStorage(CloudStorage): @@ -181,8 +189,11 @@ def __init__(self, resource): # Apparently, this is a created-but-not-commited resource whose # file upload has been canceled. We're copying the behaviour of # ckaenxt-s3filestore here. - old_resource = model.Session.query(model.Resource).get( - resource['id']) + old_resource = model.Session.query( + model.Resource + ).get( + resource['id'] + ) self.old_filename = old_resource.url resource['url_type'] = '' @@ -194,7 +205,11 @@ def path_from_filename(self, rid, filename): :param rid: The resource ID. :param filename: The unmunged resource filename. """ - return os.path.join('resources', rid, munge.munge_filename(filename)) + return os.path.join( + 'resources', + rid, + munge.munge_filename(filename) + ) def upload(self, id, max_size=10): """ @@ -209,22 +224,33 @@ def upload(self, id, max_size=10): from azure.storage.blob.models import ContentSettings blob_service = azure_blob.BlockBlobService( - self.driver_options['key'], self.driver_options['secret']) + self.driver_options['key'], + self.driver_options['secret'] + ) content_settings = None if self.guess_mimetype: content_type, _ = mimetypes.guess_type(self.filename) if content_type: content_settings = ContentSettings( - content_type=content_type) + content_type=content_type + ) return blob_service.create_blob_from_stream( container_name=self.container_name, - blob_name=self.path_from_filename(id, self.filename), + blob_name=self.path_from_filename( + id, + self.filename + ), stream=self.file_upload, - content_settings=content_settings) + content_settings=content_settings + ) else: self.container.upload_object_via_stream( self.file_upload, - object_name=self.path_from_filename(id, self.filename)) + object_name=self.path_from_filename( + id, + self.filename + ) + ) elif self._clear and self.old_filename and not self.leave_files: # This is only set when a previously-uploaded file is replace @@ -232,7 +258,12 @@ def upload(self, id, max_size=10): try: self.container.delete_object( self.container.get_object( - self.path_from_filename(id, self.old_filename))) + self.path_from_filename( + id, + self.old_filename + ) + ) + ) except ObjectDoesNotExistError: # It's possible for the object to have already been deleted, or # for it to not yet exist in a committed state due to an @@ -264,7 +295,9 @@ def get_url_from_filename(self, rid, filename, content_type=None): from azure.storage import blob as azure_blob blob_service = azure_blob.BlockBlobService( - self.driver_options['key'], self.driver_options['secret']) + self.driver_options['key'], + self.driver_options['secret'] + ) return blob_service.make_blob_url( container_name=self.container_name, @@ -273,19 +306,21 @@ def get_url_from_filename(self, rid, filename, content_type=None): container_name=self.container_name, blob_name=path, expiry=datetime.utcnow() + timedelta(hours=1), - permission=azure_blob.BlobPermissions.READ)) + permission=azure_blob.BlobPermissions.READ + ) + ) elif self.can_use_advanced_aws and self.use_secure_urls: from boto.s3.connection import S3Connection - s3_connection = S3Connection(self.driver_options['key'], - self.driver_options['secret']) - - generate_url_params = { - "expires_in": 60 * 60, - "method": "GET", - "bucket": self.container_name, - "query_auth": True, - "key": path - } + s3_connection = S3Connection( + self.driver_options['key'], + self.driver_options['secret'] + ) + + generate_url_params = {"expires_in": 60 * 60, + "method": "GET", + "bucket": self.container_name, + "query_auth": True, + "key": path} if content_type: generate_url_params['headers'] = {"Content-Type": content_type} @@ -303,8 +338,11 @@ def get_url_from_filename(self, rid, filename, content_type=None): if 'S3' in self.driver_name: return urllib.parse.urljoin( 'https://' + self.driver.connection.host, - '{container}/{path}'.format(container=self.container_name, - path=path)) + '{container}/{path}'.format( + container=self.container_name, + path=path + ) + ) # This extra 'url' property isn't documented anywhere, sadly. # See azure_blobs.py:_xml_to_object for more. elif 'url' in obj.extra: diff --git a/ckanext/cloudstorage/utils.py b/ckanext/cloudstorage/utils.py index a76dd73..8fc9539 100644 --- a/ckanext/cloudstorage/utils.py +++ b/ckanext/cloudstorage/utils.py @@ -80,7 +80,7 @@ def migrate(path, single_id): resources[ckan_res_id] = os.path.join(root, file_) - for i, resource in enumerate(iter(resources.items()), 1): + for i, resource in enumerate(iter(list(resources.items())), 1): resource_id, file_path = resource print('[{i}/{count}] Working on {id}'.format(i=i, count=len(resources), From 8e9c26428f0aeadf391e49c5ffcfd518e21bdcbf Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Wed, 15 Jan 2020 12:07:35 +0200 Subject: [PATCH 07/44] Update libcloud version. Handle py3 uploads --- ckanext/cloudstorage/storage.py | 8 +++++++- setup.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index c625f2a..9694396 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -244,8 +244,14 @@ def upload(self, id, max_size=10): content_settings=content_settings ) else: + file_upload = self.file_upload + # in Python3 libcloud iterates over uploaded file, + # while it's wrappend into non-iterator. So, pick real + # file-object and give it to cloudstorage + if six.PY3: + file_upload = file_upload._file self.container.upload_object_via_stream( - self.file_upload, + file_upload, object_name=self.path_from_filename( id, self.filename diff --git a/setup.py b/setup.py index e3a7f1d..9745823 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ include_package_data=True, zip_safe=False, install_requires=[ - 'apache-libcloud==1.5' + 'apache-libcloud>=1.5' ], entry_points=( """ From 89633a4af718457cd51539ca2cdd4234cd0546c2 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Wed, 15 Jan 2020 12:10:10 +0200 Subject: [PATCH 08/44] Import six --- ckanext/cloudstorage/storage.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 9694396..d1b0e16 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -10,6 +10,8 @@ from ast import literal_eval from datetime import datetime, timedelta +import six + from ckan import model from ckan.lib import munge import ckan.plugins as p From 7be388b7b7775bc3b9af8f0688d9f4de6ab7b88c Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Wed, 15 Jan 2020 12:11:35 +0200 Subject: [PATCH 09/44] Pin libcloud version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9745823..4e565a5 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ include_package_data=True, zip_safe=False, install_requires=[ - 'apache-libcloud>=1.5' + 'apache-libcloud==2.8' ], entry_points=( """ From 46a8158aa4b09a191b08efe553c3e195bcc38033 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Wed, 15 Jan 2020 15:16:29 +0200 Subject: [PATCH 10/44] Code fixed for libckloud2 --- .../fanstatic/scripts/webassets.yml | 2 +- .../cloudstorage/logic/action/multipart.py | 56 ++++++++++++------- ckanext/cloudstorage/storage.py | 16 +++--- setup.py | 2 +- 4 files changed, 46 insertions(+), 30 deletions(-) diff --git a/ckanext/cloudstorage/fanstatic/scripts/webassets.yml b/ckanext/cloudstorage/fanstatic/scripts/webassets.yml index 73665fd..34b9126 100644 --- a/ckanext/cloudstorage/fanstatic/scripts/webassets.yml +++ b/ckanext/cloudstorage/fanstatic/scripts/webassets.yml @@ -4,7 +4,7 @@ main: extra: preload: - base/main + - vendor/fileupload contents: - vendor/jquery-widget.js - - vendor/file-upload.js - cloudstorage-multipart-upload.js diff --git a/ckanext/cloudstorage/logic/action/multipart.py b/ckanext/cloudstorage/logic/action/multipart.py index b248c2a..c17def2 100644 --- a/ckanext/cloudstorage/logic/action/multipart.py +++ b/ckanext/cloudstorage/logic/action/multipart.py @@ -3,6 +3,8 @@ import logging import datetime +import six + from sqlalchemy.orm.exc import NoResultFound import ckan.model as model import ckan.lib.helpers as h @@ -16,6 +18,8 @@ else: from pylons import config +import libcloud.security +libcloud.security.VERIFY_SSL_CERT = True log = logging.getLogger(__name__) @@ -123,19 +127,17 @@ def initiate_multipart(context, data_dict): except Exception as e: log.exception('[delete from cloud] %s' % e) - resp = uploader.driver.connection.request( - _get_object_url(uploader, res_name) + '?uploads', - method='POST' + upload_object = MultipartUpload( + uploader.driver._initiate_multipart( + container=uploader.container, + object_name=res_name + ), + id, + res_name, + size, + name, + user_id ) - if not resp.success(): - raise toolkit.ValidationError(resp.error) - try: - upload_id = resp.object.find( - '{%s}UploadId' % resp.object.nsmap[None]).text - except AttributeError: - upload_id_list = [e for e in resp.object.getchildren() if e.tag.endswith('UploadId')] - upload_id = upload_id_list[0].text - upload_object = MultipartUpload(upload_id, id, res_name, size, name, user_id) upload_object.save() return upload_object.as_dict() @@ -144,17 +146,29 @@ def initiate_multipart(context, data_dict): def upload_multipart(context, data_dict): h.check_access('cloudstorage_upload_multipart', data_dict) upload_id, part_number, part_content = toolkit.get_or_bust( - data_dict, ['uploadId', 'partNumber', 'upload']) + data_dict, + ['uploadId', 'partNumber', 'upload'] + ) uploader = ResourceCloudStorage({}) upload = model.Session.query(MultipartUpload).get(upload_id) - + if six.PY2: + data = part_content.file.read() + else: + data = part_content.stream.read() resp = uploader.driver.connection.request( _get_object_url( - uploader, upload.name) + '?partNumber={0}&uploadId={1}'.format( - part_number, upload_id), + uploader, upload.name + ), + params={ + 'uploadId': upload_id, + 'partNumber': part_number + }, method='PUT', - data=bytearray(part_content.file.read()) + data=data, + headers={ + 'Content-Length': len(data) + } ) if resp.status != 200: raise toolkit.ValidationError('Upload failed: part %s' % part_number) @@ -195,9 +209,11 @@ def finish_multipart(context, data_dict): except Exception: pass uploader.driver._commit_multipart( - _get_object_url(uploader, upload.name), - upload_id, - chunks) + container=uploader.container, + object_name=upload.name, + upload_id=upload_id, + chunks=chunks + ) upload.delete() upload.commit() diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index d1b0e16..80f5e14 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -5,13 +5,12 @@ from builtins import object import cgi import mimetypes -import os.path +import os import urllib.parse from ast import literal_eval from datetime import datetime, timedelta import six - from ckan import model from ckan.lib import munge import ckan.plugins as p @@ -250,8 +249,8 @@ def upload(self, id, max_size=10): # in Python3 libcloud iterates over uploaded file, # while it's wrappend into non-iterator. So, pick real # file-object and give it to cloudstorage - if six.PY3: - file_upload = file_upload._file + # if six.PY3: + # file_upload = file_upload._file self.container.upload_object_via_stream( file_upload, object_name=self.path_from_filename( @@ -319,20 +318,21 @@ def get_url_from_filename(self, rid, filename, content_type=None): ) elif self.can_use_advanced_aws and self.use_secure_urls: from boto.s3.connection import S3Connection + os.environ['S3_USE_SIGV4'] = 'True' s3_connection = S3Connection( self.driver_options['key'], - self.driver_options['secret'] + self.driver_options['secret'], + # FIXME: while testing, set to local host + host='s3.ap-southeast-2.amazonaws.com' ) generate_url_params = {"expires_in": 60 * 60, "method": "GET", "bucket": self.container_name, - "query_auth": True, "key": path} if content_type: generate_url_params['headers'] = {"Content-Type": content_type} - - return s3_connection.generate_url(**generate_url_params) + return s3_connection.generate_url_sigv4(**generate_url_params) # Find the object for the given key. obj = self.container.get_object(path) diff --git a/setup.py b/setup.py index 4e565a5..11c46f8 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ include_package_data=True, zip_safe=False, install_requires=[ - 'apache-libcloud==2.8' + 'apache-libcloud' ], entry_points=( """ From 501f4a89eafe86a26044b05a4fe601194b149733 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Thu, 6 Feb 2020 10:57:15 +0200 Subject: [PATCH 11/44] Additional check of upload type --- ckanext/cloudstorage/storage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 80f5e14..41c45df 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -172,7 +172,8 @@ def __init__(self, resource): multipart_name = resource.pop('multipart_name', None) # Check to see if a file has been provided - if isinstance(upload_field_storage, UploadedFileType): + if bool(upload_field_storage) and isinstance( + upload_field_storage, UploadedFileType): self.filename = munge.munge_filename(upload_field_storage.filename) if p.toolkit.check_ckan_version("2.9"): self.file_upload = upload_field_storage.stream From 96150f8aea3a8eb10719ff802e2dd00fe0df39d4 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Tue, 5 May 2020 21:38:13 +0300 Subject: [PATCH 12/44] Disable cleanup before multipart upload --- .../cloudstorage/logic/action/multipart.py | 26 +++++++++---------- ckanext/cloudstorage/storage.py | 8 ++---- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/ckanext/cloudstorage/logic/action/multipart.py b/ckanext/cloudstorage/logic/action/multipart.py index c17def2..e06b4de 100644 --- a/ckanext/cloudstorage/logic/action/multipart.py +++ b/ckanext/cloudstorage/logic/action/multipart.py @@ -116,16 +116,16 @@ def initiate_multipart(context, data_dict): resource_id=id): _delete_multipart(old_upload, uploader) - _rindex = res_name.rfind('/') - if ~_rindex: - try: - name_prefix = res_name[:_rindex] - for cloud_object in uploader.container.iterate_objects(): - if cloud_object.name.startswith(name_prefix): - log.info('Removing cloud object: %s' % cloud_object) - cloud_object.delete() - except Exception as e: - log.exception('[delete from cloud] %s' % e) + # _rindex = res_name.rfind('/') + # if ~_rindex: + # try: + # name_prefix = res_name[:_rindex] + # for cloud_object in uploader.container.iterate_objects(): + # if cloud_object.name.startswith(name_prefix): + # log.info('Removing cloud object: %s' % cloud_object) + # cloud_object.delete() + # except Exception as e: + # log.exception('[delete from cloud] %s' % e) upload_object = MultipartUpload( uploader.driver._initiate_multipart( @@ -152,10 +152,10 @@ def upload_multipart(context, data_dict): uploader = ResourceCloudStorage({}) upload = model.Session.query(MultipartUpload).get(upload_id) - if six.PY2: - data = part_content.file.read() - else: + if toolkit.check_ckan_version('2.8'): data = part_content.stream.read() + else: + data = part_content.file.read() resp = uploader.driver.connection.request( _get_object_url( uploader, upload.name diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 41c45df..6a7503a 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -1,16 +1,12 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from future import standard_library -standard_library.install_aliases() -from builtins import object import cgi import mimetypes import os -import urllib.parse +from six.moves.urllib.parse import urljoin from ast import literal_eval from datetime import datetime, timedelta -import six from ckan import model from ckan.lib import munge import ckan.plugins as p @@ -345,7 +341,7 @@ def get_url_from_filename(self, rid, filename, content_type=None): return self.driver.get_object_cdn_url(obj) except NotImplementedError: if 'S3' in self.driver_name: - return urllib.parse.urljoin( + return urljoin( 'https://' + self.driver.connection.host, '{container}/{path}'.format( container=self.container_name, From 8436cd597a84046d16284806d7eb32f9615137af Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Wed, 6 May 2020 19:17:52 +0300 Subject: [PATCH 13/44] Remove hardcoded fixes --- .../cloudstorage/logic/action/multipart.py | 26 ++++++++++--------- ckanext/cloudstorage/storage.py | 9 ++++--- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/ckanext/cloudstorage/logic/action/multipart.py b/ckanext/cloudstorage/logic/action/multipart.py index e06b4de..a1c3b38 100644 --- a/ckanext/cloudstorage/logic/action/multipart.py +++ b/ckanext/cloudstorage/logic/action/multipart.py @@ -115,17 +115,19 @@ def initiate_multipart(context, data_dict): for old_upload in model.Session.query(MultipartUpload).filter_by( resource_id=id): _delete_multipart(old_upload, uploader) - - # _rindex = res_name.rfind('/') - # if ~_rindex: - # try: - # name_prefix = res_name[:_rindex] - # for cloud_object in uploader.container.iterate_objects(): - # if cloud_object.name.startswith(name_prefix): - # log.info('Removing cloud object: %s' % cloud_object) - # cloud_object.delete() - # except Exception as e: - # log.exception('[delete from cloud] %s' % e) + _rindex = res_name.rfind('/') + if ~_rindex: + try: + name_prefix = res_name[:_rindex] + old_objects = uploader.driver.iterate_container_objects( + uploader.container, + name_prefix + ) + for obj in old_objects: + log.info('Removing cloud object: %s' % obj) + obj.delete() + except Exception as e: + log.exception('[delete from cloud] %s' % e) upload_object = MultipartUpload( uploader.driver._initiate_multipart( @@ -210,7 +212,7 @@ def finish_multipart(context, data_dict): pass uploader.driver._commit_multipart( container=uploader.container, - object_name=upload.name, + object_name=upload.name, upload_id=upload_id, chunks=chunks ) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 6a7503a..6d7640d 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -33,7 +33,7 @@ def __init__(self): self._container = None def path_from_filename(self, rid, filename): - raise NotImplemented + raise NotImplementedError @property def container(self): @@ -126,6 +126,10 @@ def can_use_advanced_aws(self): """ # Are we even using AWS? if 'S3' in self.driver_name: + if 'host' not in self.driver_options: + # newer libcloud versions(must-use for python3) + # requires host for secure URLs + return False try: # Yes? Is the boto package available? import boto @@ -319,8 +323,7 @@ def get_url_from_filename(self, rid, filename, content_type=None): s3_connection = S3Connection( self.driver_options['key'], self.driver_options['secret'], - # FIXME: while testing, set to local host - host='s3.ap-southeast-2.amazonaws.com' + host=self.driver_options['host'] ) generate_url_params = {"expires_in": 60 * 60, From 9ab02ae0ef80257223799e74e7bbf99ce4d8f8f5 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Wed, 6 May 2020 22:09:11 +0300 Subject: [PATCH 14/44] Initial configuration of pytests --- .gitignore | 1 + ckanext/cloudstorage/logic/action/__init__.py | 14 ++ ckanext/cloudstorage/logic/auth/__init__.py | 14 ++ ckanext/cloudstorage/plugin/__init__.py | 37 ++-- ckanext/cloudstorage/tests/__init__.py | 0 ckanext/cloudstorage/tests/ckan_setup.py | 38 ++++ ckanext/cloudstorage/tests/conftest.py | 21 ++ ckanext/cloudstorage/tests/fixtures.py | 192 ++++++++++++++++++ ckanext/cloudstorage/tests/test_helpers.py | 29 +++ ckanext/cloudstorage/tests/test_plugin.py | 35 ++++ ckanext/cloudstorage/views.py | 11 +- conftest.py | 6 + setup.cfg | 7 + setup.py | 2 +- 14 files changed, 375 insertions(+), 32 deletions(-) create mode 100644 ckanext/cloudstorage/tests/__init__.py create mode 100644 ckanext/cloudstorage/tests/ckan_setup.py create mode 100644 ckanext/cloudstorage/tests/conftest.py create mode 100644 ckanext/cloudstorage/tests/fixtures.py create mode 100644 ckanext/cloudstorage/tests/test_helpers.py create mode 100644 ckanext/cloudstorage/tests/test_plugin.py create mode 100644 conftest.py create mode 100644 setup.cfg diff --git a/.gitignore b/.gitignore index 979398f..ece61fa 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ syntax: glob *.swo .DS_Store ckan.egg-info/* +ckanext_cloudstorage.egg-info/* sandbox/* dist diff --git a/ckanext/cloudstorage/logic/action/__init__.py b/ckanext/cloudstorage/logic/action/__init__.py index e69de29..d9adf7d 100644 --- a/ckanext/cloudstorage/logic/action/__init__.py +++ b/ckanext/cloudstorage/logic/action/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- + +from ckanext.cloudstorage.logic.action import multipart + + +def get_actions(): + return { + 'cloudstorage_initiate_multipart': multipart.initiate_multipart, + 'cloudstorage_upload_multipart': multipart.upload_multipart, + 'cloudstorage_finish_multipart': multipart.finish_multipart, + 'cloudstorage_abort_multipart': multipart.abort_multipart, + 'cloudstorage_check_multipart': multipart.check_multipart, + 'cloudstorage_clean_multipart': multipart.clean_multipart, + } diff --git a/ckanext/cloudstorage/logic/auth/__init__.py b/ckanext/cloudstorage/logic/auth/__init__.py index e69de29..c9e6391 100644 --- a/ckanext/cloudstorage/logic/auth/__init__.py +++ b/ckanext/cloudstorage/logic/auth/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- + +from ckanext.cloudstorage.logic.auth import multipart + + +def get_auth_functions(): + return { + 'cloudstorage_initiate_multipart': multipart.initiate_multipart, + 'cloudstorage_upload_multipart': multipart.upload_multipart, + 'cloudstorage_finish_multipart': multipart.finish_multipart, + 'cloudstorage_abort_multipart': multipart.abort_multipart, + 'cloudstorage_check_multipart': multipart.check_multipart, + 'cloudstorage_clean_multipart': multipart.clean_multipart, + } diff --git a/ckanext/cloudstorage/plugin/__init__.py b/ckanext/cloudstorage/plugin/__init__.py index a07bfd9..89011eb 100644 --- a/ckanext/cloudstorage/plugin/__init__.py +++ b/ckanext/cloudstorage/plugin/__init__.py @@ -3,15 +3,14 @@ from ckan import plugins -import ckanext.cloudstorage.logic.action.multipart as m_action -import ckanext.cloudstorage.logic.auth.multipart as m_auth +from ckanext.cloudstorage.logic.action import get_actions +from ckanext.cloudstorage.logic.auth import get_auth_functions from ckanext.cloudstorage import storage from ckanext.cloudstorage import helpers if plugins.toolkit.check_ckan_version("2.9"): from ckanext.cloudstorage.plugin.flask_plugin import MixinPlugin - # from ckanext.cloudstorage.plugin.pylons_plugin import MixinPlugin else: from ckanext.cloudstorage.plugin.pylons_plugin import MixinPlugin @@ -36,6 +35,8 @@ def update_config(self, config): def get_helpers(self): return dict(cloudstorage_use_secure_urls=helpers.use_secure_urls) + # IConfigurable + def configure(self, config): required_keys = ('ckanext.cloudstorage.driver', @@ -47,6 +48,8 @@ def configure(self, config): raise RuntimeError( 'Required configuration option {0} not found.'.format(rk)) + # IUploader + def get_resource_uploader(self, data_dict): # We provide a custom Resource uploader. return storage.ResourceCloudStorage(data_dict) @@ -59,26 +62,12 @@ def get_uploader(self, upload_to, old_filename=None): # IActions def get_actions(self): - return { - 'cloudstorage_initiate_multipart': m_action.initiate_multipart, - 'cloudstorage_upload_multipart': m_action.upload_multipart, - 'cloudstorage_finish_multipart': m_action.finish_multipart, - 'cloudstorage_abort_multipart': m_action.abort_multipart, - 'cloudstorage_check_multipart': m_action.check_multipart, - 'cloudstorage_clean_multipart': m_action.clean_multipart, - } + return get_actions() # IAuthFunctions def get_auth_functions(self): - return { - 'cloudstorage_initiate_multipart': m_auth.initiate_multipart, - 'cloudstorage_upload_multipart': m_auth.upload_multipart, - 'cloudstorage_finish_multipart': m_auth.finish_multipart, - 'cloudstorage_abort_multipart': m_auth.abort_multipart, - 'cloudstorage_check_multipart': m_auth.check_multipart, - 'cloudstorage_clean_multipart': m_auth.clean_multipart, - } + return get_auth_functions() # IResourceController @@ -116,6 +105,10 @@ def before_delete(self, context, resource, resources): upload_path = os.path.dirname( uploader.path_from_filename(resource['id'], 'fake-name')) - for old_file in uploader.container.iterate_objects(): - if old_file.name.startswith(upload_path): - old_file.delete() + old_files = uploader.driver.iterate_container_objects( + uploader.container, + upload_path + ) + + for old_file in old_files: + old_file.delete() diff --git a/ckanext/cloudstorage/tests/__init__.py b/ckanext/cloudstorage/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ckanext/cloudstorage/tests/ckan_setup.py b/ckanext/cloudstorage/tests/ckan_setup.py new file mode 100644 index 0000000..be2d438 --- /dev/null +++ b/ckanext/cloudstorage/tests/ckan_setup.py @@ -0,0 +1,38 @@ +try: + from ckan.tests.pytest_ckan.ckan_setup import * # NOQA +except ImportError: + from ckan.config.middleware import make_app # NOQA + from ckan.common import config # NOQA + + import pkg_resources + from paste.deploy import loadapp + import sys + import os + + import pylons + from pylons.i18n.translation import _get_translator + + def pytest_addoption(parser): + """Allow using custom config file during tests. + """ + parser.addoption(u"--ckan-ini", action=u"store") + + def pytest_sessionstart(session): + """Initialize CKAN environment. + """ + global pylonsapp + path = os.getcwd() + sys.path.insert(0, path) + pkg_resources.working_set.add_entry(path) + pylonsapp = loadapp( + 'config:' + session.config.option.ckan_ini, + relative_to=path, + ) + + # Initialize a translator for tests that utilize i18n + translator = _get_translator(pylons.config.get('lang')) + pylons.translator._push_object(translator) + + class FakeResponse: + headers = {} # because render wants to delete Pragma + pylons.response._push_object(FakeResponse) diff --git a/ckanext/cloudstorage/tests/conftest.py b/ckanext/cloudstorage/tests/conftest.py new file mode 100644 index 0000000..af157e4 --- /dev/null +++ b/ckanext/cloudstorage/tests/conftest.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +import os + +import pytest + + +@pytest.fixture +def with_driver_options(ckan_config, monkeypatch): + monkeypatch.setitem( + ckan_config, + 'ckanext.cloudstorage.driver', + os.getenv('TEST_DRIVER', 'S3')) + monkeypatch.setitem( + ckan_config, + 'ckanext.cloudstorage.container_name', + os.getenv('TEST_CONTAINER', 'cloudstorage-test')) + + monkeypatch.setitem( + ckan_config, + 'ckanext.cloudstorage.driver_options', + os.getenv('TEST_DRIVER_OPTIONS', '{}')) diff --git a/ckanext/cloudstorage/tests/fixtures.py b/ckanext/cloudstorage/tests/fixtures.py new file mode 100644 index 0000000..6d0e86c --- /dev/null +++ b/ckanext/cloudstorage/tests/fixtures.py @@ -0,0 +1,192 @@ +# -*- coding: utf-8 -*- + +try: + from ckan.tests.pytest_ckan.fixtures import * # NOQA + +except ImportError: + import pytest + import six + import ckan.tests.helpers as test_helpers + import ckan.tests.factories as factories + import ckan.plugins + import ckan.lib.search as search + + from ckan.common import config + + from ckanext.cloudstorage.utils import FakeFileStorage + + @pytest.fixture + def ckan_config(request, monkeypatch): + """Allows to override the configuration object used by tests + + Takes into account config patches introduced by the ``ckan_config`` + mark. + + If you just want to set one or more configuration options for the + scope of a test (or a test class), use the ``ckan_config`` mark:: + + @pytest.mark.ckan_config('ckan.auth.create_unowned_dataset', True) + def test_auth_create_unowned_dataset(): + + # ... + + To use the custom config inside a test, apply the + ``ckan_config`` mark to it and inject the ``ckan_config`` fixture: + + .. literalinclude:: /../ckan/tests/pytest_ckan/test_fixtures.py + :start-after: # START-CONFIG-OVERRIDE + :end-before: # END-CONFIG-OVERRIDE + + If the change only needs to be applied locally, use the + ``monkeypatch`` fixture + + .. literalinclude:: /../ckan/tests/test_common.py + :start-after: # START-CONFIG-OVERRIDE + :end-before: # END-CONFIG-OVERRIDE + + """ + _original = config.copy() + for mark in request.node.iter_markers(u"ckan_config"): + monkeypatch.setitem(config, *mark.args) + yield config + config.clear() + config.update(_original) + + @pytest.fixture + def make_app(ckan_config): + """Factory for client app instances. + + Unless you need to create app instances lazily for some reason, + use the ``app`` fixture instead. + """ + return test_helpers._get_test_app + + @pytest.fixture + def app(make_app): + """Returns a client app instance to use in functional tests + + To use it, just add the ``app`` parameter to your test function signature:: + + def test_dataset_search(self, app): + + url = h.url_for('dataset.search') + + response = app.get(url) + + + """ + return make_app() + + @pytest.fixture(scope=u"session") + def reset_db(): + """Callable for resetting the database to the initial state. + + If possible use the ``clean_db`` fixture instead. + + """ + return test_helpers.reset_db + + @pytest.fixture(scope=u"session") + def reset_index(): + """Callable for cleaning search index. + + If possible use the ``clean_index`` fixture instead. + """ + return search.clear_all + + @pytest.fixture + def clean_db(reset_db): + """Resets the database to the initial state. + + This can be used either for all tests in a class:: + + @pytest.mark.usefixtures("clean_db") + class TestExample(object): + + def test_example(self): + + or for a single test:: + + class TestExample(object): + + @pytest.mark.usefixtures("clean_db") + def test_example(self): + + """ + reset_db() + + @pytest.fixture + def clean_index(reset_index): + """Clear search index before starting the test. + """ + reset_index() + + @pytest.fixture + def with_plugins(ckan_config): + """Load all plugins specified by the ``ckan.plugins`` config option + at the beginning of the test. When the test ends (even it fails), it will + unload all the plugins in the reverse order. + + .. literalinclude:: /../ckan/tests/test_factories.py + :start-after: # START-CONFIG-OVERRIDE + :end-before: # END-CONFIG-OVERRIDE + + """ + plugins = ckan_config["ckan.plugins"].split() + for plugin in plugins: + if not ckan.plugins.plugin_loaded(plugin): + ckan.plugins.load(plugin) + yield + for plugin in reversed(plugins): + if ckan.plugins.plugin_loaded(plugin): + ckan.plugins.unload(plugin) + + @pytest.fixture + def test_request_context(app): + """Provide function for creating Flask request context. + """ + return app.flask_app.test_request_context + + @pytest.fixture + def with_request_context(test_request_context): + """Execute test inside requests context + """ + with test_request_context(): + yield + + @pytest.fixture + def make_resource(clean_db, ckan_config, monkeypatch, tmpdir): + """Shortcut for creating uploaded resource. + Requires content and name for newly created resource. By default + is using `resource_create` action, but it can be changed by + passing named argument `action`. + In addition, accepts named argument `context` that will be passed + to `ckan.tests.helpers.call_action` and arbitary number of + additional named arguments, that will be used as resource + properties. + Example:: + def test_uploaded_resource(make_resource): + resource = make_resource("hello world", "file.txt") + assert resource["url_type"] == "upload" + assert resource["format"] == "TXT" + assert resource["size"] == 11 + """ + monkeypatch.setitem(ckan_config, u'ckan.storage_path', str(tmpdir)) + monkeypatch.setattr(ckan.lib.uploader, u'_storage_path', str(tmpdir)) + + def factory(data, filename, context={}, **kwargs): + action = kwargs.pop(u"action", u"resource_create") + test_file = six.BytesIO() + test_file.write(six.ensure_binary(data)) + test_file.seek(0) + test_resource = FakeFileStorage(test_file, filename) + + params = { + u"url": u"http://data", + u"upload": test_resource, + } + params.update(kwargs) + if u'package_id' not in params: + params[u'package_id'] = factories.Dataset()[u"id"] + return test_helpers.call_action(action, context, **params) + return factory diff --git a/ckanext/cloudstorage/tests/test_helpers.py b/ckanext/cloudstorage/tests/test_helpers.py new file mode 100644 index 0000000..c5f92e9 --- /dev/null +++ b/ckanext/cloudstorage/tests/test_helpers.py @@ -0,0 +1,29 @@ +import pytest + +import ckan.plugins.toolkit as tk + + +@pytest.mark.ckan_config('ckanext.cloudstorage.container_name', 'test') +@pytest.mark.ckan_config('ckanext.cloudstorage.driver_options', '{}') +@pytest.mark.ckan_config('ckan.plugins', 'cloudstorage') +@pytest.mark.usefixtures('with_plugins') +class TestUseSecureUrls(object): + @pytest.mark.ckan_config('ckanext.cloudstorage.use_secure_urls', 'true') + @pytest.mark.ckan_config('ckanext.cloudstorage.driver', 'AZURE_BLOBS') + def test_unsupported_provider_enabled(self): + assert not tk.h.cloudstorage_use_secure_urls() + + @pytest.mark.ckan_config('ckanext.cloudstorage.use_secure_urls', 'false') + @pytest.mark.ckan_config('ckanext.cloudstorage.driver', 'AZURE_BLOBS') + def test_unsupported_provider_disabled(self): + assert not tk.h.cloudstorage_use_secure_urls() + + @pytest.mark.ckan_config('ckanext.cloudstorage.use_secure_urls', 'true') + @pytest.mark.ckan_config('ckanext.cloudstorage.driver', 'S3_US_WEST') + def test_supported_provider_enabled(self): + assert tk.h.cloudstorage_use_secure_urls() + + @pytest.mark.ckan_config('ckanext.cloudstorage.use_secure_urls', 'false') + @pytest.mark.ckan_config('ckanext.cloudstorage.driver', 'S3_US_WEST') + def test_supported_provider_disabled(self): + assert not tk.h.cloudstorage_use_secure_urls() diff --git a/ckanext/cloudstorage/tests/test_plugin.py b/ckanext/cloudstorage/tests/test_plugin.py new file mode 100644 index 0000000..4c50772 --- /dev/null +++ b/ckanext/cloudstorage/tests/test_plugin.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- + +import pytest + +from libcloud.storage.types import ObjectDoesNotExistError + +import ckan.plugins as p + +from ckan.tests import helpers + + +@pytest.mark.ckan_config('ckan.plugins', 'cloudstorage') +@pytest.mark.usefixtures('with_driver_options', 'with_plugins') +class TestCloudstoragePlugin(object): + + @pytest.mark.parametrize('option', ( + 'ckanext.cloudstorage.driver', + 'ckanext.cloudstorage.driver_options', + 'ckanext.cloudstorage.container_name')) + def test_required_config(self, ckan_config, monkeypatch, option): + monkeypatch.delitem(ckan_config, option) + plugin = p.get_plugin('cloudstorage') + with pytest.raises(RuntimeError, match='configuration option'): + plugin.configure(ckan_config) + + @pytest.mark.usefixtures('clean_db') + def test_before_delete(self, make_resource): + name = 'test.txt' + resource = make_resource('hello world', name, name=name) + plugin = p.get_plugin('cloudstorage') + uploader = plugin.get_resource_uploader(resource) + assert uploader.get_url_from_filename(resource['id'], name) + helpers.call_action('resource_delete', id=resource['id']) + with pytest.raises(ObjectDoesNotExistError): + assert uploader.get_url_from_filename(resource['id'], name) diff --git a/ckanext/cloudstorage/views.py b/ckanext/cloudstorage/views.py index e91c18b..2f18a80 100644 --- a/ckanext/cloudstorage/views.py +++ b/ckanext/cloudstorage/views.py @@ -1,23 +1,16 @@ # -*- coding: utf-8 -*- from flask import Blueprint -import ckan.views.resource as resource import ckanext.cloudstorage.utils as utils cloudstorage = Blueprint('cloudstorage', __name__) +@cloudstorage.route('/dataset//resource//download') +@cloudstorage.route('/dataset//resource//download/') def download(id, resource_id, filename=None, package_type='dataset'): return utils.resource_download(id, resource_id, filename) -cloudstorage.add_url_rule('/dataset//resource//download', - view_func=download) -cloudstorage.add_url_rule( - '/dataset//resource//download/', - view_func=download, -) - - def get_blueprints(): return [cloudstorage] diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..fe491a4 --- /dev/null +++ b/conftest.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- + +pytest_plugins = [ + u'ckanext.cloudstorage.tests.ckan_setup', + u'ckanext.cloudstorage.tests.fixtures', +] diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..0e8287b --- /dev/null +++ b/setup.cfg @@ -0,0 +1,7 @@ +[tool:pytest] + +filterwarnings = + ignore::sqlalchemy.exc.SADeprecationWarning + ignore::sqlalchemy.exc.SAWarning + ignore::DeprecationWarning +addopts = --pdbcls=IPython.terminal.debugger:TerminalPdb \ No newline at end of file diff --git a/setup.py b/setup.py index 11c46f8..c259de4 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ include_package_data=True, zip_safe=False, install_requires=[ - 'apache-libcloud' + 'apache-libcloud~=2.8.2' ], entry_points=( """ From be1bc26e0727c1e8505480ab61c869dd273c3b41 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Sat, 9 May 2020 00:20:50 +0300 Subject: [PATCH 15/44] Quick tests for mait points --- README.md | 2 +- ckanext/cloudstorage/controller.py | 8 +-- ckanext/cloudstorage/helpers.py | 3 +- .../cloudstorage/logic/action/multipart.py | 10 ++-- ckanext/cloudstorage/tests/conftest.py | 44 ++++++++++----- ckanext/cloudstorage/tests/logic/__init__.py | 0 .../tests/logic/action/__init__.py | 0 .../tests/logic/action/test_multipart.py | 56 +++++++++++++++++++ ckanext/cloudstorage/tests/test_helpers.py | 33 +++++++---- ckanext/cloudstorage/tests/test_plugin.py | 19 ++++++- ckanext/cloudstorage/tests/test_storage.py | 42 ++++++++++++++ ckanext/cloudstorage/tests/test_utils.py | 53 ++++++++++++++++++ ckanext/cloudstorage/utils.py | 1 + 13 files changed, 233 insertions(+), 38 deletions(-) create mode 100644 ckanext/cloudstorage/tests/logic/__init__.py create mode 100644 ckanext/cloudstorage/tests/logic/action/__init__.py create mode 100644 ckanext/cloudstorage/tests/logic/action/test_multipart.py create mode 100644 ckanext/cloudstorage/tests/test_storage.py create mode 100644 ckanext/cloudstorage/tests/test_utils.py diff --git a/README.md b/README.md index f5308ff..7847eb7 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ below have been tested: | Provider | Uploads | Downloads | Secure URLs (private resources) | | --- | --- | --- | --- | | Azure | YES | YES | YES (if `azure-storage` is installed) | -| AWS S3 | YES | YES | YES (if `boto` is installed) | +| AWS S3 | YES | YES | YES (if `boto` is installed and `host` key added to `driver_options`) | | Rackspace | YES | YES | No | # What are "Secure URLs"? diff --git a/ckanext/cloudstorage/controller.py b/ckanext/cloudstorage/controller.py index 5105d46..46569cb 100644 --- a/ckanext/cloudstorage/controller.py +++ b/ckanext/cloudstorage/controller.py @@ -1,13 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import os.path -from pylons import c -from pylons.i18n import _ - -from ckan import logic, model -from ckan.lib import base, uploader -import ckan.lib.helpers as h +from ckan.lib import base import ckanext.cloudstorage.utils as utils diff --git a/ckanext/cloudstorage/helpers.py b/ckanext/cloudstorage/helpers.py index 1a7ca99..a5042eb 100644 --- a/ckanext/cloudstorage/helpers.py +++ b/ckanext/cloudstorage/helpers.py @@ -7,5 +7,6 @@ def use_secure_urls(): return all([ ResourceCloudStorage.use_secure_urls.fget(None), # Currently implemented just AWS version - 'S3' in ResourceCloudStorage.driver_name.fget(None) + 'S3' in ResourceCloudStorage.driver_name.fget(None), + 'host' in ResourceCloudStorage.driver_options.fget(None), ]) diff --git a/ckanext/cloudstorage/logic/action/multipart.py b/ckanext/cloudstorage/logic/action/multipart.py index a1c3b38..3570c3f 100644 --- a/ckanext/cloudstorage/logic/action/multipart.py +++ b/ckanext/cloudstorage/logic/action/multipart.py @@ -3,7 +3,7 @@ import logging import datetime -import six +import libcloud.security from sqlalchemy.orm.exc import NoResultFound import ckan.model as model @@ -18,7 +18,6 @@ else: from pylons import config -import libcloud.security libcloud.security.VERIFY_SSL_CERT = True log = logging.getLogger(__name__) @@ -98,9 +97,8 @@ def initiate_multipart(context, data_dict): h.check_access('cloudstorage_initiate_multipart', data_dict) id, name, size = toolkit.get_or_bust(data_dict, ['id', 'name', 'size']) - user_id = None - if context['auth_user_obj']: - user_id = context['auth_user_obj'].id + user_obj = model.User.get(context['user']) + user_id = user_obj.id if user_obj else None uploader = ResourceCloudStorage({'multipart_name': name}) res_name = uploader.path_from_filename(id, name) @@ -115,6 +113,8 @@ def initiate_multipart(context, data_dict): for old_upload in model.Session.query(MultipartUpload).filter_by( resource_id=id): _delete_multipart(old_upload, uploader) + + # Find and remove previous file from this resourve _rindex = res_name.rfind('/') if ~_rindex: try: diff --git a/ckanext/cloudstorage/tests/conftest.py b/ckanext/cloudstorage/tests/conftest.py index af157e4..5fcfd5f 100644 --- a/ckanext/cloudstorage/tests/conftest.py +++ b/ckanext/cloudstorage/tests/conftest.py @@ -2,20 +2,38 @@ import os import pytest +from ckanext.cloudstorage import utils @pytest.fixture def with_driver_options(ckan_config, monkeypatch): - monkeypatch.setitem( - ckan_config, - 'ckanext.cloudstorage.driver', - os.getenv('TEST_DRIVER', 'S3')) - monkeypatch.setitem( - ckan_config, - 'ckanext.cloudstorage.container_name', - os.getenv('TEST_CONTAINER', 'cloudstorage-test')) - - monkeypatch.setitem( - ckan_config, - 'ckanext.cloudstorage.driver_options', - os.getenv('TEST_DRIVER_OPTIONS', '{}')) + """Apply config from env variablies - thus you won't have unstaged + changes in config file and won't accidentally commit your cloud + credentials. + + """ + driver = os.getenv('TEST_DRIVER') + + if not driver: + pytest.skip('TEST_DRIVER is not set') + monkeypatch.setitem(ckan_config, 'ckanext.cloudstorage.driver', driver) + + container = os.getenv('TEST_CONTAINER') + if not container: + pytest.skip('TEST_CONTAINER is not set') + monkeypatch.setitem(ckan_config, + 'ckanext.cloudstorage.container_name', container) + + options = os.getenv('TEST_DRIVER_OPTIONS') + if not options: + pytest.skip('TEST_DRIVER_OPTIONS is not set') + monkeypatch.setitem(ckan_config, + 'ckanext.cloudstorage.driver_options', options) + + +@pytest.fixture +def clean_db(reset_db): + """Initialize extension's tables. + """ + reset_db() + utils.initdb() diff --git a/ckanext/cloudstorage/tests/logic/__init__.py b/ckanext/cloudstorage/tests/logic/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ckanext/cloudstorage/tests/logic/action/__init__.py b/ckanext/cloudstorage/tests/logic/action/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ckanext/cloudstorage/tests/logic/action/test_multipart.py b/ckanext/cloudstorage/tests/logic/action/test_multipart.py new file mode 100644 index 0000000..9d5537b --- /dev/null +++ b/ckanext/cloudstorage/tests/logic/action/test_multipart.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- + +import pytest +import six + +from libcloud.storage.types import ObjectDoesNotExistError + +from ckan.tests import factories, helpers +from ckanext.cloudstorage.storage import ResourceCloudStorage +from ckanext.cloudstorage.utils import FakeFileStorage + + +@pytest.mark.ckan_config('ckan.plugins', 'cloudstorage') +@pytest.mark.usefixtures( + 'with_driver_options', 'with_plugins', + 'with_request_context', 'clean_db') +class TestMultipartUpload(object): + + def test_upload(self): + filename = 'file.txt' + res = factories.Resource() + multipart = helpers.call_action( + 'cloudstorage_initiate_multipart', + id=res['id'], name='file.txt', size=1024 * 1024 * 5 * 2) + storage = ResourceCloudStorage(res) + assert storage.path_from_filename( + res['id'], filename) == multipart['name'] + with pytest.raises(ObjectDoesNotExistError): + storage.get_url_from_filename(res['id'], filename) + + fp = six.BytesIO(b'b' * 1024 * 1024 * 5) + fp.seek(0) + helpers.call_action( + 'cloudstorage_upload_multipart', + uploadId=multipart['id'], + partNumber=1, + upload=FakeFileStorage(fp, filename)) + + with pytest.raises(ObjectDoesNotExistError): + storage.get_url_from_filename(res['id'], filename) + + fp = six.BytesIO(b'a' * 1024 * 1024 * 5) + fp.seek(0) + helpers.call_action( + 'cloudstorage_upload_multipart', + uploadId=multipart['id'], + partNumber=2, + upload=FakeFileStorage(fp, filename)) + + with pytest.raises(ObjectDoesNotExistError): + storage.get_url_from_filename(res['id'], filename) + + result = helpers.call_action( + 'cloudstorage_finish_multipart', uploadId=multipart['id']) + assert result['commited'] + assert storage.get_url_from_filename(res['id'], filename) diff --git a/ckanext/cloudstorage/tests/test_helpers.py b/ckanext/cloudstorage/tests/test_helpers.py index c5f92e9..f0a4557 100644 --- a/ckanext/cloudstorage/tests/test_helpers.py +++ b/ckanext/cloudstorage/tests/test_helpers.py @@ -2,28 +2,41 @@ import ckan.plugins.toolkit as tk +_secure_urls = 'ckanext.cloudstorage.use_secure_urls' +_driver = 'ckanext.cloudstorage.driver' +_options = 'ckanext.cloudstorage.driver_options' + @pytest.mark.ckan_config('ckanext.cloudstorage.container_name', 'test') -@pytest.mark.ckan_config('ckanext.cloudstorage.driver_options', '{}') @pytest.mark.ckan_config('ckan.plugins', 'cloudstorage') @pytest.mark.usefixtures('with_plugins') class TestUseSecureUrls(object): - @pytest.mark.ckan_config('ckanext.cloudstorage.use_secure_urls', 'true') - @pytest.mark.ckan_config('ckanext.cloudstorage.driver', 'AZURE_BLOBS') + @pytest.mark.ckan_config(_secure_urls, 'true') + @pytest.mark.ckan_config(_driver, 'AZURE_BLOBS') + @pytest.mark.ckan_config(_options, '{}') def test_unsupported_provider_enabled(self): assert not tk.h.cloudstorage_use_secure_urls() - @pytest.mark.ckan_config('ckanext.cloudstorage.use_secure_urls', 'false') - @pytest.mark.ckan_config('ckanext.cloudstorage.driver', 'AZURE_BLOBS') + @pytest.mark.ckan_config(_secure_urls, 'false') + @pytest.mark.ckan_config(_driver, 'AZURE_BLOBS') + @pytest.mark.ckan_config(_options, '{}') def test_unsupported_provider_disabled(self): assert not tk.h.cloudstorage_use_secure_urls() - @pytest.mark.ckan_config('ckanext.cloudstorage.use_secure_urls', 'true') - @pytest.mark.ckan_config('ckanext.cloudstorage.driver', 'S3_US_WEST') - def test_supported_provider_enabled(self): + @pytest.mark.ckan_config(_secure_urls, 'true') + @pytest.mark.ckan_config(_driver, 'S3_US_WEST') + @pytest.mark.ckan_config(_options, '{}') + def test_supported_provider_enabled_withoug_host(self): + assert not tk.h.cloudstorage_use_secure_urls() + + @pytest.mark.ckan_config(_secure_urls, 'true') + @pytest.mark.ckan_config(_driver, 'S3_US_WEST') + @pytest.mark.ckan_config(_options, '{"host": "x"}') + def test_supported_provider_enabled_with_host(self): assert tk.h.cloudstorage_use_secure_urls() - @pytest.mark.ckan_config('ckanext.cloudstorage.use_secure_urls', 'false') - @pytest.mark.ckan_config('ckanext.cloudstorage.driver', 'S3_US_WEST') + @pytest.mark.ckan_config(_secure_urls, 'false') + @pytest.mark.ckan_config(_driver, 'S3_US_WEST') + @pytest.mark.ckan_config(_options, '{}') def test_supported_provider_disabled(self): assert not tk.h.cloudstorage_use_secure_urls() diff --git a/ckanext/cloudstorage/tests/test_plugin.py b/ckanext/cloudstorage/tests/test_plugin.py index 4c50772..03d226d 100644 --- a/ckanext/cloudstorage/tests/test_plugin.py +++ b/ckanext/cloudstorage/tests/test_plugin.py @@ -6,7 +6,7 @@ import ckan.plugins as p -from ckan.tests import helpers +from ckan.tests import helpers, factories @pytest.mark.ckan_config('ckan.plugins', 'cloudstorage') @@ -18,6 +18,10 @@ class TestCloudstoragePlugin(object): 'ckanext.cloudstorage.driver_options', 'ckanext.cloudstorage.container_name')) def test_required_config(self, ckan_config, monkeypatch, option): + """All those config options are essential and cloudstorage will + prevent application from start if any of them is missing. + + """ monkeypatch.delitem(ckan_config, option) plugin = p.get_plugin('cloudstorage') with pytest.raises(RuntimeError, match='configuration option'): @@ -25,11 +29,24 @@ def test_required_config(self, ckan_config, monkeypatch, option): @pytest.mark.usefixtures('clean_db') def test_before_delete(self, make_resource): + """When resource deleted, we must remove corresponding file from S3. + + """ name = 'test.txt' resource = make_resource('hello world', name, name=name) plugin = p.get_plugin('cloudstorage') uploader = plugin.get_resource_uploader(resource) assert uploader.get_url_from_filename(resource['id'], name) + helpers.call_action('resource_delete', id=resource['id']) with pytest.raises(ObjectDoesNotExistError): assert uploader.get_url_from_filename(resource['id'], name) + + @pytest.mark.usefixtures('clean_db') + def test_before_delete_for_linked_resource(self): + """Non-uploads don't raise exceptions. + """ + resource = factories.Resource() + helpers.call_action('resource_delete', id=resource['id']) + with pytest.raises(p.toolkit.ObjectNotFound): + helpers.call_action('resource_show', id=resource['id']) diff --git a/ckanext/cloudstorage/tests/test_storage.py b/ckanext/cloudstorage/tests/test_storage.py new file mode 100644 index 0000000..b69054e --- /dev/null +++ b/ckanext/cloudstorage/tests/test_storage.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +import pytest + +from six.moves.urllib.parse import urlparse + +from ckanext.cloudstorage.storage import CloudStorage, ResourceCloudStorage + + +@pytest.mark.ckan_config('ckan.plugins', 'cloudstorage') +@pytest.mark.usefixtures('with_driver_options', 'with_plugins') +class TestCloudStorage(object): + def test_props(self): + storage = CloudStorage() + assert storage.driver_options + assert storage.driver_name + assert storage.container_name + assert storage.container + assert not storage.leave_files + assert not storage.use_secure_urls + assert not storage.guess_mimetype + + +@pytest.mark.ckan_config('ckan.plugins', 'cloudstorage') +@pytest.mark.usefixtures('with_driver_options', 'with_plugins') +class TestResourceCloudStorage(object): + def test_not_secure_url_from_filename(self, make_resource): + filename = 'file.txt' + resource = make_resource('test', filename) + storage = ResourceCloudStorage(resource) + url = storage.get_url_from_filename(resource['id'], filename) + assert storage.container_name in url + assert not urlparse(url).query + + @pytest.mark.ckan_config('ckanext.cloudstorage.use_secure_urls', True) + def test_secure_url_from_filename(self, make_resource): + filename = 'file.txt' + resource = make_resource('test', filename) + storage = ResourceCloudStorage(resource) + if not storage.can_use_advanced_aws or not storage.use_secure_urls: + pytest.skip('SecureURL not supported') + url = storage.get_url_from_filename(resource['id'], filename) + assert urlparse(url).query diff --git a/ckanext/cloudstorage/tests/test_utils.py b/ckanext/cloudstorage/tests/test_utils.py new file mode 100644 index 0000000..0c058ae --- /dev/null +++ b/ckanext/cloudstorage/tests/test_utils.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +import pytest +import mock + +import ckan.plugins.toolkit as tk + +from ckan.tests import factories, helpers +from ckanext.cloudstorage import utils, storage + + +@pytest.mark.ckan_config('ckan.plugins', 'cloudstorage') +@pytest.mark.usefixtures('with_driver_options', 'with_plugins') +class TestResourceDownload(object): + def test_utils_used_by_download_route(self, app, monkeypatch): + url = tk.url_for('resource.download', id='a', resource_id='b') + func = mock.Mock(return_value='') + monkeypatch.setattr(utils, 'resource_download', func) + app.get(url) + func.assert_called_once_with('a', 'b', None) + + @pytest.mark.usefixtures('clean_db') + def test_status_codes(self, app): + user = factories.User() + org = factories.Organization() + dataset = factories.Dataset(private=True, owner_org=org['id']) + resource = factories.Resource(package_id=dataset['id']) + + env = {'REMOTE_USER': user['name']} + url = tk.url_for( + 'resource.download', id='a', resource_id='b') + app.get(url, status=404, extra_environ=env) + + url = tk.url_for( + 'resource.download', id=dataset['id'], resource_id=resource['id']) + app.get(url, status=401, extra_environ=env) + + helpers.call_action('package_patch', id=dataset['id'], private=False) + app.get(url, status=302, extra_environ=env, follow_redirects=False) + + @pytest.mark.usefixtures('clean_db') + def test_download(self, make_resource, app): + filename = 'file.txt' + resource = make_resource('hello world', filename) + url = tk.url_for( + 'resource.download', + id=resource['package_id'], + resource_id=resource['id']) + resp = app.get(url, status=302, follow_redirects=False) + + uploader = storage.ResourceCloudStorage(resource) + expected_url = uploader.get_url_from_filename(resource['id'], + filename) + assert resp.headers['location'] == expected_url diff --git a/ckanext/cloudstorage/utils.py b/ckanext/cloudstorage/utils.py index 8fc9539..3d2d129 100644 --- a/ckanext/cloudstorage/utils.py +++ b/ckanext/cloudstorage/utils.py @@ -18,6 +18,7 @@ class FakeFileStorage(cgi.FieldStorage): def __init__(self, fp, filename): self.file = fp + self.stream = fp self.filename = filename From f567be54f46ecc50fc740885e9675c6ff20be12f Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Tue, 26 May 2020 06:02:30 +0300 Subject: [PATCH 16/44] Allow multipart upload without secure urls --- .../cloudstorage/logic/action/multipart.py | 7 +-- ckanext/cloudstorage/storage.py | 5 +- .../snippets/multipart_module.html | 8 ++-- test.ini | 48 +++++++++++++++++++ 4 files changed, 54 insertions(+), 14 deletions(-) create mode 100644 test.ini diff --git a/ckanext/cloudstorage/logic/action/multipart.py b/ckanext/cloudstorage/logic/action/multipart.py index 6e6320d..33ea469 100644 --- a/ckanext/cloudstorage/logic/action/multipart.py +++ b/ckanext/cloudstorage/logic/action/multipart.py @@ -161,10 +161,7 @@ def upload_multipart(context, data_dict): uploader = ResourceCloudStorage({}) upload = model.Session.query(MultipartUpload).get(upload_id) - if toolkit.check_ckan_version('2.8'): - data = part_content.stream.read() - else: - data = part_content.file.read() + data = _get_underlying_file(part_content).read() resp = uploader.driver.connection.request( _get_object_url( uploader, upload.name @@ -177,7 +174,7 @@ def upload_multipart(context, data_dict): headers={ 'Content-Length': len(data) }, - data=bytearray(_get_underlying_file(part_content).read()) + data=data ) if resp.status != 200: raise toolkit.ValidationError('Upload failed: part %s' % part_number) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 0319923..ca26539 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -184,10 +184,7 @@ def __init__(self, resource): # Check to see if a file has been provided if isinstance(upload_field_storage, (ALLOWED_UPLOAD_TYPES)): self.filename = munge.munge_filename(upload_field_storage.filename) - if p.toolkit.check_ckan_version("2.9"): - self.file_upload = upload_field_storage.stream - else: - self.file_upload = upload_field_storage.file + self.file_upload = _get_underlying_file(upload_field_storage) resource['url'] = self.filename resource['url_type'] = 'upload' elif multipart_name and self.can_use_advanced_aws: diff --git a/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html b/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html index 2568614..607228b 100644 --- a/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html +++ b/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html @@ -1,9 +1,7 @@
{{ parent() }} diff --git a/test.ini b/test.ini new file mode 100644 index 0000000..1f310e3 --- /dev/null +++ b/test.ini @@ -0,0 +1,48 @@ +[DEFAULT] +debug = false +smtp_server = localhost +error_email_from = paste@localhost + +[server:main] +use = egg:Paste#http +host = 0.0.0.0 +port = 5000 + +[app:main] +use = config:../ckan/test-core.ini + +# Insert any custom config settings to be used when running your extension's +# tests here. + +# Logging configuration +[loggers] +keys = root, ckan, sqlalchemy + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console + +[logger_ckan] +qualname = ckan +handlers = +level = INFO + +[logger_sqlalchemy] +handlers = +qualname = sqlalchemy.engine +level = WARN + +[handler_console] +class = StreamHandler +args = (sys.stdout,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s \ No newline at end of file From 46e03c53d9a4c2986bcb71c3727742263ce778b2 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Thu, 23 Jul 2020 03:09:52 +0300 Subject: [PATCH 17/44] Fix bug with resource-links --- ckanext/cloudstorage/storage.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index ca26539..81aff5e 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -182,8 +182,10 @@ def __init__(self, resource): multipart_name = resource.pop('multipart_name', None) # Check to see if a file has been provided - if isinstance(upload_field_storage, (ALLOWED_UPLOAD_TYPES)): + if isinstance(upload_field_storage, (ALLOWED_UPLOAD_TYPES)) and \ + upload_field_storage.filename: self.filename = munge.munge_filename(upload_field_storage.filename) + self.file_upload = _get_underlying_file(upload_field_storage) resource['url'] = self.filename resource['url_type'] = 'upload' From 51f0d6e0adea99c4a07296180ed18ffac8d1d7f9 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Fri, 2 Oct 2020 17:11:56 +0300 Subject: [PATCH 18/44] Avoid 500 error when object does not exist --- ckanext/cloudstorage/storage.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 81aff5e..3d0b85e 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -343,7 +343,10 @@ def get_url_from_filename(self, rid, filename, content_type=None): return s3_connection.generate_url_sigv4(**generate_url_params) # Find the object for the given key. - obj = self.container.get_object(path) + try: + obj = self.container.get_object(path) + except ObjectDoesNotExistError: + return if obj is None: return From 469e8f89a7665f8680444c92c25853fe767df6ef Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Wed, 21 Oct 2020 23:40:08 +0300 Subject: [PATCH 19/44] Handle unicode path --- ckanext/cloudstorage/storage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 3d0b85e..abd2fde 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -3,6 +3,7 @@ import cgi import mimetypes import os +import six from six.moves.urllib.parse import urljoin from ast import literal_eval from datetime import datetime, timedelta @@ -359,7 +360,7 @@ def get_url_from_filename(self, rid, filename, content_type=None): 'https://' + self.driver.connection.host, '{container}/{path}'.format( container=self.container_name, - path=path + path=six.ensure_str(path) ) ) # This extra 'url' property isn't documented anywhere, sadly. From d0ad841f2422716f1db92bf2e8b2b73c083fd764 Mon Sep 17 00:00:00 2001 From: espona Date: Tue, 1 Dec 2020 20:47:35 +0100 Subject: [PATCH 20/44] add the region_name as driver option --- ckanext/cloudstorage/storage.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index abd2fde..bd26c07 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -335,6 +335,9 @@ def get_url_from_filename(self, rid, filename, content_type=None): host=self.driver_options['host'] ) + if 'region_name' in self.driver_options.keys(): + s3_connection.auth_region_name = self.driver_options['region_name'] + generate_url_params = {"expires_in": 60 * 60, "method": "GET", "bucket": self.container_name, From fd802f9ac31c5c47e9dc6bed3cf5b894ebe0e3c8 Mon Sep 17 00:00:00 2001 From: espona Date: Thu, 3 Dec 2020 16:55:07 +0100 Subject: [PATCH 21/44] Fixes on the JS to visualize the progress bar --- .../scripts/cloudstorage-multipart-upload.js | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js b/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js index cf0d083..b3052db 100644 --- a/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js +++ b/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js @@ -33,12 +33,28 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { this._file = $('#field-image-upload'); this._url = $('#field-image-url'); this._save = $('[name=save]'); + this + Remove this._id = $('input[name=id]'); this._progress = $('
', { class: 'hide controls progress progress-striped active' }); + + this._upload_message = $('
Please wait until upload finishes
'); + this._upload_message.addClass("upload-message") + this._upload_message.css('margin-top', '10px'); + this._upload_message.css('line-height', '0px'); + this._upload_message.css('text-align', 'center'); + this._upload_message.css('text-align', 'center'); + this._progress.append(this._upload_message); + this._bar = $('
', {class:'bar'}); + this._bar.css('height', '100%'); + this._bar.css('width', '0%'); + this._bar.css('margin-top', '-10px'); + this._bar.css('background-color', '#30b9ba'); this._progress.append(this._bar); + this._progress.insertAfter(this._url.parent().parent()); this._resumeBtn = $('', {class: 'hide btn btn-info controls'}).insertAfter( this._progress).text('Resume Upload'); @@ -62,6 +78,9 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { this._save.on('click', this._onSaveClick); this._onCheckExistingMultipart('choose'); + + (function blink() { $('.upload-message').fadeOut(500).fadeIn(500, blink); })(); + }, _onChunkUploaded: function () { @@ -143,6 +162,7 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onUploadFileSubmit: function (event, data) { + console.log("_onUploadFileSubmit") if (!this._uploadId) { this._onDisableSave(false); this.sandbox.notify( @@ -155,6 +175,8 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { this._setProgressType('info', this._progress); this._progress.show('slow'); + $(window).scrollTop(this._form.scrollTop); + this._progress.removeClass('hide'); }, _onGenerateAdditionalData: function (form) { @@ -185,6 +207,7 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onFileUploadAdd: function (event, data) { + console.log("_onFileUploadAdd") this._setProgress(0, this._bar); var file = data.files[0]; var target = $(event.target); @@ -214,6 +237,8 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }); this._progress.show('slow'); + this._progress.removeClass('hide'); + $(window).scrollTop(this._form.scrollTop); this._onDisableResumeBtn(); this._save.trigger('click'); @@ -238,6 +263,8 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onSaveClick: function(event, pass) { + console.log("_onSaveClick") + if (pass || !window.FileList || !this._file || !this._file.val()) { return; } @@ -264,6 +291,7 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onSaveForm: function() { + console.log("_onSaveForm") var file = this._file[0].files[0]; var self = this; var formData = this._form.serializeArray().reduce( @@ -310,6 +338,8 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { _onPerformUpload: function(file) { + console.log("_onPerformUpload") + console.log("_onPerformUpload") var id = this._id.val(); var self = this; if (this._uploadId === null) @@ -329,7 +359,7 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onPrepareUpload: function(file, id) { - + console.log("_onPrepareUpload") return $.ajax({ method: 'POST', url: this.sandbox.client.url('/api/action/cloudstorage_initiate_multipart'), @@ -427,6 +457,7 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onHandleError: function (msg) { + console.log("_onHandleError: " + msg) this.sandbox.notify( 'Error', msg, From 8d71db7b3765a6740c1925744b1588b82125fc3d Mon Sep 17 00:00:00 2001 From: espona Date: Thu, 3 Dec 2020 20:29:59 +0100 Subject: [PATCH 22/44] fixed resume uploads --- .../scripts/cloudstorage-multipart-upload.js | 36 ++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js b/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js index b3052db..a7cc713 100644 --- a/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js +++ b/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js @@ -33,8 +33,6 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { this._file = $('#field-image-upload'); this._url = $('#field-image-url'); this._save = $('[name=save]'); - this - Remove this._id = $('input[name=id]'); this._progress = $('
', { class: 'hide controls progress progress-striped active' @@ -150,10 +148,13 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { } }) .show(); + this._resumeBtn.removeClass('hide') + }, _onDisableResumeBtn: function () { this._resumeBtn.hide(); + this._resumeBtn.addClass('hide') }, _onUploadFail: function (e, data) { @@ -251,6 +252,7 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { target.fileupload('option', 'maxChunkSize', chunkSize); + this.el.off('multipartstarted.cloudstorage'); this.el.on('multipartstarted.cloudstorage', function () { data.submit(); @@ -262,8 +264,14 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { this._setProgress(progress, this._bar); }, + _onRemoveClick: function(event, pass) { + console.log("_onRemoveClick"); + //this._progress.hide('fast'); + //this._progress.addClass('hide'); + }, + _onSaveClick: function(event, pass) { - console.log("_onSaveClick") + console.log("_onSaveClick"); if (pass || !window.FileList || !this._file || !this._file.val()) { return; @@ -280,10 +288,12 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { } else { try{ this._onDisableSave(true); + this._onDisableRemove(true); this._onSaveForm(); } catch(error){ console.log(error); this._onDisableSave(false); + this._onDisableRemove(false); } } @@ -339,7 +349,9 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { _onPerformUpload: function(file) { console.log("_onPerformUpload") - console.log("_onPerformUpload") + + $('.btn-remove-url').on('click', this._onRemoveClick); + var id = this._id.val(); var self = this; if (this._uploadId === null) @@ -373,7 +385,8 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onAbortUpload: function(id) { - var self = this; + console.log("_onAbortUpload") + var self = this; this.sandbox.client.call( 'POST', 'cloudstorage_abort_multipart', @@ -392,6 +405,7 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onFinishUpload: function() { + console.log("_onFinishUpload") var self = this; var data_dict = { 'uploadId': this._uploadId, @@ -405,6 +419,7 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { function (data) { self._progress.hide('fast'); + self._progress.addClass('hide'); self._onDisableSave(false); if (self._resourceId && self._packageId){ @@ -443,9 +458,20 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onDisableSave: function (value) { + console.log("_onDisableSave: " + value); this._save.attr('disabled', value); }, + _onDisableRemove: function(value) { + console.log("_onDisableRemove: " + value); + $('.btn-remove-url').attr('disabled', value); + if (value) { + $('.btn-remove-url').off(); + } else { + $('.btn-remove-url').on(); + } + }, + _setProgress: function (progress, bar) { bar.css('width', progress + '%'); }, From 471016086d0912ba87ec96e97a6a5bdf8d69bd7c Mon Sep 17 00:00:00 2001 From: espona Date: Thu, 3 Dec 2020 20:35:13 +0100 Subject: [PATCH 23/44] Fixed error message on resume upload --- .../fanstatic/scripts/cloudstorage-multipart-upload.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js b/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js index a7cc713..8e21cfa 100644 --- a/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js +++ b/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js @@ -221,14 +221,13 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { this._onCleanUpload(); this.sandbox.notify( 'Mismatch file', - 'You are trying to upload wrong file. Cancel previous upload first.', + 'You are trying to upload wrong file. Select '+ this._uploadName + ' or delete this resource and create a new one.', 'error' ); event.preventDefault(); throw 'Wrong file'; } - var loaded = chunkSize * this._uploadedParts; // target.fileupload('option', 'uploadedBytes', loaded); From edfcf0639cdf17e9e7b20f2aa32bc237b177b6fa Mon Sep 17 00:00:00 2001 From: espona Date: Fri, 4 Dec 2020 12:26:33 +0100 Subject: [PATCH 24/44] code cleaning --- .../scripts/cloudstorage-multipart-upload.js | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js b/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js index 8e21cfa..8b86206 100644 --- a/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js +++ b/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js @@ -163,7 +163,6 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onUploadFileSubmit: function (event, data) { - console.log("_onUploadFileSubmit") if (!this._uploadId) { this._onDisableSave(false); this.sandbox.notify( @@ -208,7 +207,6 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onFileUploadAdd: function (event, data) { - console.log("_onFileUploadAdd") this._setProgress(0, this._bar); var file = data.files[0]; var target = $(event.target); @@ -263,15 +261,7 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { this._setProgress(progress, this._bar); }, - _onRemoveClick: function(event, pass) { - console.log("_onRemoveClick"); - //this._progress.hide('fast'); - //this._progress.addClass('hide'); - }, - _onSaveClick: function(event, pass) { - console.log("_onSaveClick"); - if (pass || !window.FileList || !this._file || !this._file.val()) { return; } @@ -300,7 +290,6 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onSaveForm: function() { - console.log("_onSaveForm") var file = this._file[0].files[0]; var self = this; var formData = this._form.serializeArray().reduce( @@ -347,10 +336,6 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { _onPerformUpload: function(file) { - console.log("_onPerformUpload") - - $('.btn-remove-url').on('click', this._onRemoveClick); - var id = this._id.val(); var self = this; if (this._uploadId === null) @@ -370,7 +355,6 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onPrepareUpload: function(file, id) { - console.log("_onPrepareUpload") return $.ajax({ method: 'POST', url: this.sandbox.client.url('/api/action/cloudstorage_initiate_multipart'), @@ -384,7 +368,6 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onAbortUpload: function(id) { - console.log("_onAbortUpload") var self = this; this.sandbox.client.call( 'POST', @@ -404,7 +387,6 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onFinishUpload: function() { - console.log("_onFinishUpload") var self = this; var data_dict = { 'uploadId': this._uploadId, @@ -457,12 +439,10 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onDisableSave: function (value) { - console.log("_onDisableSave: " + value); this._save.attr('disabled', value); }, _onDisableRemove: function(value) { - console.log("_onDisableRemove: " + value); $('.btn-remove-url').attr('disabled', value); if (value) { $('.btn-remove-url').off(); @@ -482,7 +462,6 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { }, _onHandleError: function (msg) { - console.log("_onHandleError: " + msg) this.sandbox.notify( 'Error', msg, From a256d59b311243a512dbe7d51f753f471fa277c2 Mon Sep 17 00:00:00 2001 From: espona Date: Fri, 4 Dec 2020 16:04:35 +0100 Subject: [PATCH 25/44] Added file size limitation --- .../scripts/cloudstorage-multipart-upload.js | 18 ++++++++++++++++-- .../snippets/multipart_module.html | 1 + .../templates/package/new_resource.html | 4 ++-- .../package/new_resource_not_draft.html | 4 ++-- .../templates/package/resource_edit.html | 4 ++-- 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js b/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js index 8b86206..1830349 100644 --- a/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js +++ b/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js @@ -211,6 +211,22 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { var file = data.files[0]; var target = $(event.target); + if (this.options.maxSize && (! isNaN(parseInt(this.options.maxSize)))){ + var max_size = parseInt(this.options.maxSize); + var file_size_gb = file.size/1073741824 + if (file_size_gb > max_size) { + this._file.val(''); + this._onCleanUpload(); + this.sandbox.notify( + 'Too large file:', + 'You selected a file larger than '+ max_size + 'GB. Contact support for an alternative upload method or select a smaller one.', + 'error' + ); + event.preventDefault(); + throw 'Too large file'; + } + } + var chunkSize = this._countChunkSize(file.size, target.fileupload('option', 'maxChunkSize')); if (this._uploadName && this._uploadSize && this._uploadedParts !== null) { @@ -246,10 +262,8 @@ ckan.module('cloudstorage-multipart-upload', function($, _) { } - target.fileupload('option', 'maxChunkSize', chunkSize); - this.el.off('multipartstarted.cloudstorage'); this.el.on('multipartstarted.cloudstorage', function () { data.submit(); diff --git a/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html b/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html index 607228b..78f120b 100644 --- a/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html +++ b/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html @@ -2,6 +2,7 @@ data-module="cloudstorage-multipart-upload" data-module-cloud='S3' data-module-package-id="_{{ pkg_name }}" + data-module-max-size="{{ max_size }}" > {{ parent() }} diff --git a/ckanext/cloudstorage/templates/package/new_resource.html b/ckanext/cloudstorage/templates/package/new_resource.html index 0505bca..4f76aff 100644 --- a/ckanext/cloudstorage/templates/package/new_resource.html +++ b/ckanext/cloudstorage/templates/package/new_resource.html @@ -2,7 +2,7 @@ {% block form %} - - {% snippet 'cloudstorage/snippets/multipart_module.html', pkg_name=pkg_name, parent=super %} + {% set max_size = config.get('ckanext.cloudstorage.max_upload_size_gb') %} + {% snippet 'cloudstorage/snippets/multipart_module.html', pkg_name=pkg_name, parent=super, max_size=max_size %} {% endblock %} diff --git a/ckanext/cloudstorage/templates/package/new_resource_not_draft.html b/ckanext/cloudstorage/templates/package/new_resource_not_draft.html index fae3e21..9c575ae 100644 --- a/ckanext/cloudstorage/templates/package/new_resource_not_draft.html +++ b/ckanext/cloudstorage/templates/package/new_resource_not_draft.html @@ -2,6 +2,6 @@ {% block form %} - - {% snippet 'cloudstorage/snippets/multipart_module.html', pkg_name=pkg_name, parent=super %} + {% set max_size = config.get('ckanext.cloudstorage.max_upload_size_gb') %} + {% snippet 'cloudstorage/snippets/multipart_module.html', pkg_name=pkg_name, parent=super, max_size=max_size %} {% endblock %} diff --git a/ckanext/cloudstorage/templates/package/resource_edit.html b/ckanext/cloudstorage/templates/package/resource_edit.html index 2e74235..d05eb09 100644 --- a/ckanext/cloudstorage/templates/package/resource_edit.html +++ b/ckanext/cloudstorage/templates/package/resource_edit.html @@ -2,7 +2,7 @@ {% block form %} - - {% snippet 'cloudstorage/snippets/multipart_module.html', pkg_name=pkg.name, parent=super %} + {% set max_size = config.get('ckanext.cloudstorage.max_upload_size_gb') %} + {% snippet 'cloudstorage/snippets/multipart_module.html', pkg_name=pkg.name, parent=super, max_size=max_size %} {% endblock %} From 96ff0022530a4b815e2a4561624b0bb43f3094c1 Mon Sep 17 00:00:00 2001 From: espona Date: Mon, 7 Dec 2020 17:35:34 +0100 Subject: [PATCH 26/44] Remove write to log, not working --- ckanext/cloudstorage/utils.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ckanext/cloudstorage/utils.py b/ckanext/cloudstorage/utils.py index 3d2d129..7f25bff 100644 --- a/ckanext/cloudstorage/utils.py +++ b/ckanext/cloudstorage/utils.py @@ -86,7 +86,6 @@ def migrate(path, single_id): print('[{i}/{count}] Working on {id}'.format(i=i, count=len(resources), id=resource_id)) - try: resource = lc.action.resource_show(id=resource_id) except NotFound: @@ -109,9 +108,9 @@ def migrate(path, single_id): if failed: log_file = tempfile.NamedTemporaryFile(delete=False) - log_file.file.writelines(failed) - print(u'ID of all failed uploads are saved to `{0}`'.format( - log_file.name)) + #log_file.file.writelines(failed) + print(u'ID of all failed uploads are saved to `{0}`: {1}'.format( + log_file.name, failed)) def resource_download(id, resource_id, filename=None): From ab99751e71c00fb0571ce95e8b5fef8bf2776c7f Mon Sep 17 00:00:00 2001 From: espona Date: Mon, 7 Dec 2020 17:35:50 +0100 Subject: [PATCH 27/44] simplified upload --- ckanext/cloudstorage/storage.py | 43 +++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index bd26c07..56da0c9 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -7,13 +7,14 @@ from six.moves.urllib.parse import urljoin from ast import literal_eval from datetime import datetime, timedelta - +import traceback from ckan import model from ckan.lib import munge import ckan.plugins as p from libcloud.storage.types import Provider, ObjectDoesNotExistError from libcloud.storage.providers import get_driver +import libcloud.common.types as types if p.toolkit.check_ckan_version("2.9"): from werkzeug.datastructures import FileStorage as UploadedFileType @@ -186,7 +187,6 @@ def __init__(self, resource): if isinstance(upload_field_storage, (ALLOWED_UPLOAD_TYPES)) and \ upload_field_storage.filename: self.filename = munge.munge_filename(upload_field_storage.filename) - self.file_upload = _get_underlying_file(upload_field_storage) resource['url'] = self.filename resource['url_type'] = 'upload' @@ -255,19 +255,32 @@ def upload(self, id, max_size=10): content_settings=content_settings ) else: - file_upload = self.file_upload - # in Python3 libcloud iterates over uploaded file, - # while it's wrappend into non-iterator. So, pick real - # file-object and give it to cloudstorage - # if six.PY3: - # file_upload = file_upload._file - self.container.upload_object_via_stream( - file_upload, - object_name=self.path_from_filename( - id, - self.filename - ) - ) + try: + file_upload = self.file_upload + # in Python3 libcloud iterates over uploaded file, + # while it's wrappend into non-iterator. So, pick real + # file-object and give it to cloudstorage + #if six.PY3: + # file_upload = file_upload._file + + # self.container.upload_object_via_stream( + # file_upload, + # object_name=self.path_from_filename( + # id, + # self.filename + # ) + # ) + + # FIX: replaced call with a simpler version + with open(file_upload.name, 'rb') as iterator: + self.container.upload_object_via_stream(iterator=iterator, + object_name=self.path_from_filename(id, self.filename)) + except ValueError as v: + print(traceback.format_exc()) + raise v + except types.InvalidCredsError as err: + print('EXCEPTION: {0}'.format(err)) + raise err elif self._clear and self.old_filename and not self.leave_files: # This is only set when a previously-uploaded file is replace From cb16d52ecf0d174d9b1a099ad3015824d9741587 Mon Sep 17 00:00:00 2001 From: espona Date: Mon, 7 Dec 2020 17:38:12 +0100 Subject: [PATCH 28/44] improved error handling --- ckanext/cloudstorage/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 56da0c9..18ced84 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -279,7 +279,7 @@ def upload(self, id, max_size=10): print(traceback.format_exc()) raise v except types.InvalidCredsError as err: - print('EXCEPTION: {0}'.format(err)) + print(traceback.format_exc()) raise err elif self._clear and self.old_filename and not self.leave_files: From 2bdefeb5b2e8b13fa54fdebcd6b2840b9102eefc Mon Sep 17 00:00:00 2001 From: espona Date: Tue, 8 Dec 2020 19:33:45 +0100 Subject: [PATCH 29/44] Improved migration --- ckanext/cloudstorage/storage.py | 59 ++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 18ced84..a1520d3 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -11,6 +11,8 @@ from ckan import model from ckan.lib import munge import ckan.plugins as p +import hashlib +import binascii from libcloud.storage.types import Provider, ObjectDoesNotExistError from libcloud.storage.providers import get_driver @@ -18,14 +20,17 @@ if p.toolkit.check_ckan_version("2.9"): from werkzeug.datastructures import FileStorage as UploadedFileType + config = p.toolkit.config else: from pylons import config - UploadedFileType = cgi.FieldStorage + UploadedFileType = cgi.FieldStorage from werkzeug.datastructures import FileStorage as FlaskFileStorage + ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage) +AWS_UPLOAD_PART_SIZE = 5 * 1024 * 1024 def _get_underlying_file(wrapper): @@ -34,6 +39,25 @@ def _get_underlying_file(wrapper): return wrapper.file +def _md5sum(source_path): + block_count = 0 + block = True + md5string = b'' + with open(source_path, "rb") as f: + while block: + block = f.read(AWS_UPLOAD_PART_SIZE) + if block: + block_count += 1 + hash_obj = hashlib.md5() + hash_obj.update(block) + md5string = md5string + binascii.unhexlify(hash_obj.hexdigest()) + else: + break + hash_obj = hashlib.md5() + hash_obj.update(md5string) + return hash_obj.hexdigest() + "-" + str(block_count) + + class CloudStorage(object): def __init__(self): self.driver = get_driver( @@ -185,7 +209,7 @@ def __init__(self, resource): # Check to see if a file has been provided if isinstance(upload_field_storage, (ALLOWED_UPLOAD_TYPES)) and \ - upload_field_storage.filename: + upload_field_storage.filename: self.filename = munge.munge_filename(upload_field_storage.filename) self.file_upload = _get_underlying_file(upload_field_storage) resource['url'] = self.filename @@ -260,7 +284,7 @@ def upload(self, id, max_size=10): # in Python3 libcloud iterates over uploaded file, # while it's wrappend into non-iterator. So, pick real # file-object and give it to cloudstorage - #if six.PY3: + # if six.PY3: # file_upload = file_upload._file # self.container.upload_object_via_stream( @@ -271,10 +295,35 @@ def upload(self, id, max_size=10): # ) # ) + # check if already uploaded + object_name = self.path_from_filename(id, self.filename) + try: + cloud_object = self.container.get_object(object_name=object_name) + print("\t Object found, checking size {0}: {1}".format(object_name, cloud_object.size)) + file_size = os.path.getsize(file_upload.name) + print("\t - File size {0}: {1}".format(file_upload.name, file_size)) + if file_size == int(cloud_object.size): + print("\t Size fits, checking hash {0}: {1}".format(object_name, cloud_object.hash)) + hash_file = hashlib.md5(open(file_upload.name, 'rb').read()).hexdigest() + print("\t - File hash {0}: {1}".format(file_upload.name, hash_file)) + # basic hash + if hash_file == cloud_object.hash: + print("\t => File found, matching hash, skipping upload") + return + # multipart hash + multi_hash_file = _md5sum(file_upload.name) + print("\t - File multi hash {0}: {1}".format(file_upload.name, multi_hash_file)) + if multi_hash_file == cloud_object.hash: + print("\t => File found, matching hash, skipping upload") + return + print("\t Resource found in the cloud but outdated, uploading") + except ObjectDoesNotExistError: + print("\t Resource not found in the cloud, uploading") + # FIX: replaced call with a simpler version with open(file_upload.name, 'rb') as iterator: - self.container.upload_object_via_stream(iterator=iterator, - object_name=self.path_from_filename(id, self.filename)) + self.container.upload_object_via_stream(iterator=iterator, object_name=object_name) + print("\t => UPLOADED {0}: {1}".format(file_upload.name, object_name)) except ValueError as v: print(traceback.format_exc()) raise v From 2ec6206599f7ed3237633722d37e136c43a3f870 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 28 Dec 2020 22:52:40 +1100 Subject: [PATCH 30/44] Fix file migration script --- ckanext/cloudstorage/utils.py | 16 ++++++++++------ setup.py | 3 ++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/ckanext/cloudstorage/utils.py b/ckanext/cloudstorage/utils.py index 3d2d129..53dc086 100644 --- a/ckanext/cloudstorage/utils.py +++ b/ckanext/cloudstorage/utils.py @@ -2,6 +2,7 @@ from __future__ import print_function import os.path +import six from ckan import logic, model import ckan.plugins.toolkit as tk from ckan.lib import base, uploader @@ -15,11 +16,14 @@ from ckanext.cloudstorage.storage import (CloudStorage, ResourceCloudStorage) -class FakeFileStorage(cgi.FieldStorage): - def __init__(self, fp, filename): - self.file = fp - self.stream = fp - self.filename = filename +if tk.check_ckan_version("2.9"): + from werkzeug.datastructures import FileStorage as FakeFileStorage +else: + class FakeFileStorage(cgi.FieldStorage): + def __init__(self, fp, filename): + self.file = fp + self.stream = fp + self.filename = filename def initdb(): @@ -109,7 +113,7 @@ def migrate(path, single_id): if failed: log_file = tempfile.NamedTemporaryFile(delete=False) - log_file.file.writelines(failed) + log_file.file.writelines([six.ensure_binary(l) for l in failed]) print(u'ID of all failed uploads are saved to `{0}`'.format( log_file.name)) diff --git a/setup.py b/setup.py index c259de4..176929f 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,8 @@ include_package_data=True, zip_safe=False, install_requires=[ - 'apache-libcloud~=2.8.2' + 'apache-libcloud~=2.8.2', + 'six>=1.12.0', ], entry_points=( """ From 26d1a2b4917fd6828d3be70a9fb8bdad24b4d007 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Tue, 12 Jan 2021 16:18:46 +0200 Subject: [PATCH 31/44] Add migration script --- README.md | 7 +- ckanext/cloudstorage/cli.py | 9 --- .../migration/cloudstorage/README | 1 + .../migration/cloudstorage/env.py | 81 +++++++++++++++++++ .../migration/cloudstorage/script.py.mako | 24 ++++++ .../472b797d58d7_create_multipart_tables.py | 51 ++++++++++++ 6 files changed, 163 insertions(+), 10 deletions(-) create mode 100644 ckanext/cloudstorage/migration/cloudstorage/README create mode 100644 ckanext/cloudstorage/migration/cloudstorage/env.py create mode 100644 ckanext/cloudstorage/migration/cloudstorage/script.py.mako create mode 100644 ckanext/cloudstorage/migration/cloudstorage/versions/472b797d58d7_create_multipart_tables.py diff --git a/README.md b/README.md index 5aa4990..6c7e73c 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,12 @@ benefits of your CDN/blob storage. This option also enables multipart uploads, but you need to create database tables first. Run next command from extension folder: - `paster cloudstorage initdb -c /etc/ckan/default/production.ini ` + + paster cloudstorage initdb -c /etc/ckan/default/production.ini + +For CKAN>=2.9 use the following command instead: + + ckan -c /etc/ckan/default/production.ini ` db upgrade -p cloudstorage With that feature you can use `cloudstorage_clean_multipart` action, which is available only for sysadmins. After executing, all unfinished multipart uploads, older than 7 days, diff --git a/ckanext/cloudstorage/cli.py b/ckanext/cloudstorage/cli.py index 4b5a696..80a2812 100644 --- a/ckanext/cloudstorage/cli.py +++ b/ckanext/cloudstorage/cli.py @@ -28,14 +28,5 @@ def migrate(path, resource): """ utils.migrate(path, resource) - -@cloudstorage.command() -def initdb(): - """Reinitalize database tables. - """ - utils.initdb() - click.secho("DB tables are reinitialized", fg="green") - - def get_commands(): return [cloudstorage] diff --git a/ckanext/cloudstorage/migration/cloudstorage/README b/ckanext/cloudstorage/migration/cloudstorage/README new file mode 100644 index 0000000..98e4f9c --- /dev/null +++ b/ckanext/cloudstorage/migration/cloudstorage/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/ckanext/cloudstorage/migration/cloudstorage/env.py b/ckanext/cloudstorage/migration/cloudstorage/env.py new file mode 100644 index 0000000..0093682 --- /dev/null +++ b/ckanext/cloudstorage/migration/cloudstorage/env.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +from __future__ import with_statement +from alembic import context +from sqlalchemy import engine_from_config, pool +from logging.config import fileConfig + +import os + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = None + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + +name = os.path.basename(os.path.dirname(__file__)) + + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + + url = config.get_main_option(u"sqlalchemy.url") + context.configure( + url=url, target_metadata=target_metadata, literal_binds=True, + version_table=u'{}_alembic_version'.format(name) + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online(): + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section), + prefix=u'sqlalchemy.', + poolclass=pool.NullPool) + + with connectable.connect() as connection: + context.configure( + connection=connection, + target_metadata=target_metadata, + version_table=u'{}_alembic_version'.format(name) + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/ckanext/cloudstorage/migration/cloudstorage/script.py.mako b/ckanext/cloudstorage/migration/cloudstorage/script.py.mako new file mode 100644 index 0000000..2c01563 --- /dev/null +++ b/ckanext/cloudstorage/migration/cloudstorage/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} diff --git a/ckanext/cloudstorage/migration/cloudstorage/versions/472b797d58d7_create_multipart_tables.py b/ckanext/cloudstorage/migration/cloudstorage/versions/472b797d58d7_create_multipart_tables.py new file mode 100644 index 0000000..98ef835 --- /dev/null +++ b/ckanext/cloudstorage/migration/cloudstorage/versions/472b797d58d7_create_multipart_tables.py @@ -0,0 +1,51 @@ +"""Create multipart tables + +Revision ID: 472b797d58d7 +Revises: +Create Date: 2021-01-12 14:24:02.227319 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.engine.reflection import Inspector + +# revision identifiers, used by Alembic. +revision = "472b797d58d7" +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + conn = op.get_bind() + inspector = Inspector.from_engine(conn) + tables = inspector.get_table_names() + if "cloudstorage_multipart_upload" not in tables: + op.create_table( + "cloudstorage_multipart_upload", + sa.Column("id", sa.UnicodeText, primary_key=True), + sa.Column("resource_id", sa.UnicodeText), + sa.Column("name", sa.UnicodeText), + sa.Column("initiated", sa.DateTime), + sa.Column("size", sa.Numeric), + sa.Column("original_name", sa.UnicodeText), + sa.Column("user_id", sa.UnicodeText), + ) + + if "cloudstorage_multipart_part" not in tables: + op.create_table( + "cloudstorage_multipart_part", + sa.Column("n", sa.Integer, primary_key=True), + sa.Column("etag", sa.UnicodeText, primary_key=True), + sa.Column( + "upload_id", + sa.UnicodeText, + sa.ForeignKey("cloudstorage_multipart_upload.id"), + primary_key=True, + ), + ) + + +def downgrade(): + op.drop_table("cloudstorage_multipart_part") + op.drop_table("cloudstorage_multipart_upload") From cd6fbf6efecc7513929a58f11f075de2bc848381 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Tue, 12 Jan 2021 16:53:21 +0200 Subject: [PATCH 32/44] Fix progressbar --- .../scripts/cloudstorage-multipart-upload.js | 850 +++++++++--------- .../snippets/multipart_module.html | 1 + ckanext/cloudstorage/templates/page.html | 7 +- 3 files changed, 428 insertions(+), 430 deletions(-) diff --git a/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js b/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js index cf0d083..df296be 100644 --- a/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js +++ b/ckanext/cloudstorage/fanstatic/scripts/cloudstorage-multipart-upload.js @@ -1,443 +1,437 @@ -ckan.module('cloudstorage-multipart-upload', function($, _) { - 'use strict'; - - return { - options: { - cloud: 'S3', - i18n: { - resource_create: _('Resource has been created.'), - resource_update: _('Resource has been updated.'), - undefined_upload_id: _('Undefined uploadId.'), - upload_completed: _('Upload completed. You will be redirected in few seconds...'), - unable_to_finish: _('Unable to finish multipart upload') - } - }, - - _partNumber: 1, - - _uploadId: null, - _packageId: null, - _resourceId: null, - _uploadSize: null, - _uploadName: null, - _uploadedParts: null, - _clickedBtn: null, - _redirect_url: null, - - initialize: function() { - $.proxyAll(this, /_on/); - this.options.packageId = this.options.packageId.slice(1); - this._form = this.$('form'); - // this._origin = $('#field-image-upload'); - // this._file = this._origin.clone() - this._file = $('#field-image-upload'); - this._url = $('#field-image-url'); - this._save = $('[name=save]'); - this._id = $('input[name=id]'); - this._progress = $('
', { - class: 'hide controls progress progress-striped active' - }); - this._bar = $('
', {class:'bar'}); - this._progress.append(this._bar); - this._progress.insertAfter(this._url.parent().parent()); - this._resumeBtn = $('', {class: 'hide btn btn-info controls'}).insertAfter( - this._progress).text('Resume Upload'); - - var self = this; - - this._file.fileupload({ - url: this.sandbox.client.url('/api/action/cloudstorage_upload_multipart'), - maxChunkSize: 5 * 1024 * 1024, - replaceFileInput: false, - formData: this._onGenerateAdditionalData, - submit: this._onUploadFileSubmit, - chunkdone: this._onChunkUploaded, - add: this._onFileUploadAdd, - progressall: this._onFileUploadProgress, - done: this._onFinishUpload, - fail: this._onUploadFail, - always: this._onAnyEndedUpload - }); - - this._save.on('click', this._onSaveClick); - - this._onCheckExistingMultipart('choose'); - }, - - _onChunkUploaded: function () { - this._uploadedParts = this._partNumber++; - }, - - _onCheckExistingMultipart: function (operation) { - var self = this; - var id = this._id.val(); - if (!id) return; - this.sandbox.client.call( - 'POST', - 'cloudstorage_check_multipart', - {id: id}, - function (data) { - if (!data.result) return; - var upload = data.result.upload; - - var name = upload.name.slice(upload.name.lastIndexOf('/')+1); - self._uploadId = upload.id; - self._uploadSize = upload.size; - self._uploadedParts = upload.parts; - self._uploadName = upload.original_name; - self._partNumber = self._uploadedParts + 1; - - - var current_chunk_size = self._file.fileupload('option', 'maxChunkSize'); - var uploaded_bytes = current_chunk_size * upload.parts; - self._file.fileupload('option', 'uploadedBytes', uploaded_bytes); - - self.sandbox.notify( - 'Incomplete upload', - 'File: ' + upload.original_name + - '; Size: ' + self._uploadSize, - 'warning'); - self._onEnableResumeBtn(operation); - }, - function (error) { - console.log(error); - setTimeout(function() { - self._onCheckExistingMultipart(operation); - }, 2000); - } - - ); - }, - - _onEnableResumeBtn: function (operation) { - var self = this; - this.$('.btn-remove-url').remove(); - if (operation === 'choose'){ - self._onDisableSave(true); - - } - this._resumeBtn - .off('click') - .on('click', function (event) { - switch (operation) { - case 'resume': - self._save.trigger('click'); - self._onDisableResumeBtn(); - break; - case 'choose': - default: - self._file.trigger('click'); - break; - } - }) - .show(); - }, - - _onDisableResumeBtn: function () { - this._resumeBtn.hide(); - }, - - _onUploadFail: function (e, data) { - this._onHandleError('Upload fail'); - this._onCheckExistingMultipart('resume'); - }, - - _onUploadFileSubmit: function (event, data) { - if (!this._uploadId) { - this._onDisableSave(false); - this.sandbox.notify( - 'Upload error', - this.i18n('undefined_upload_id'), - 'error' - ); - return false; - } - - this._setProgressType('info', this._progress); - this._progress.show('slow'); - }, - - _onGenerateAdditionalData: function (form) { - return [ - { - name: 'partNumber', - value: this._partNumber - }, - { - name: 'uploadId', - value: this._uploadId - }, - { - name: 'id', - value: this._resourceId - } - - ]; - }, - - _onAnyEndedUpload: function () { - this._partNumber = 1; - }, - - _countChunkSize: function (size, chunk) { - while (size / chunk > 10000) chunk *= 2; - return chunk; +ckan.module("cloudstorage-multipart-upload", function($, _) { + "use strict"; + + return { + options: { + cloud: "S3", + i18n: { + resource_create: _("Resource has been created."), + resource_update: _("Resource has been updated."), + undefined_upload_id: _("Undefined uploadId."), + upload_completed: _( + "Upload completed. You will be redirected in few seconds..." + ), + unable_to_finish: _("Unable to finish multipart upload") + } + }, + + _partNumber: 1, + + _uploadId: null, + _packageId: null, + _resourceId: null, + _uploadSize: null, + _uploadName: null, + _uploadedParts: null, + _clickedBtn: null, + _redirect_url: null, + + initialize: function() { + $.proxyAll(this, /_on/); + // There is an undescore as a prefix added to package ID in + // order to prevent type-coercion, so we have to strip it + this.options.packageId = this.options.packageId.slice(1); + this._form = this.$("form"); + this._file = $("#field-image-upload"); + this._url = $("#field-image-url"); + this._save = $("[name=save]"); + this._id = $("input[name=id]"); + this._progress = $("
", { + class: "progress" + }); + this._bar = $("
", { + class: "progress-bar progress-bar-striped progress-bar-animated active" + }); + this._progress.append(this._bar); + this._progress.insertAfter(this._url.parent().parent()); + this._progress.hide(); + + this._resumeBtn = $("", { class: "btn btn-info controls" }) + .insertAfter(this._progress) + .text("Resume Upload"); + this._resumeBtn.hide(); + + var self = this; + + this._file.fileupload({ + url: this.sandbox.client.url( + "/api/action/cloudstorage_upload_multipart" + ), + maxChunkSize: 5 * 1024 * 1024, + replaceFileInput: false, + formData: this._onGenerateAdditionalData, + submit: this._onUploadFileSubmit, + chunkdone: this._onChunkUploaded, + add: this._onFileUploadAdd, + progressall: this._onFileUploadProgress, + done: this._onFinishUpload, + fail: this._onUploadFail, + always: this._onAnyEndedUpload + }); + + this._save.on("click", this._onSaveClick); + + this._onCheckExistingMultipart("choose"); + }, + + _onChunkUploaded: function() { + this._uploadedParts = this._partNumber++; + }, + + _onCheckExistingMultipart: function(operation) { + var self = this; + var id = this._id.val(); + if (!id) return; + this.sandbox.client.call( + "POST", + "cloudstorage_check_multipart", + { id: id }, + function(data) { + if (!data.result) return; + var upload = data.result.upload; + + var name = upload.name.slice(upload.name.lastIndexOf("/") + 1); + self._uploadId = upload.id; + self._uploadSize = upload.size; + self._uploadedParts = upload.parts; + self._uploadName = upload.original_name; + self._partNumber = self._uploadedParts + 1; + + var current_chunk_size = self._file.fileupload( + "option", + "maxChunkSize" + ); + var uploaded_bytes = current_chunk_size * upload.parts; + self._file.fileupload("option", "uploadedBytes", uploaded_bytes); + + self.sandbox.notify( + "Incomplete upload", + "File: " + upload.original_name + "; Size: " + self._uploadSize, + "warning" + ); + self._onEnableResumeBtn(operation); }, - - _onFileUploadAdd: function (event, data) { - this._setProgress(0, this._bar); - var file = data.files[0]; - var target = $(event.target); - - var chunkSize = this._countChunkSize(file.size, target.fileupload('option', 'maxChunkSize')); - - if (this._uploadName && this._uploadSize && this._uploadedParts !== null) { - if (this._uploadSize !== file.size || this._uploadName !== file.name){ - this._file.val(''); - this._onCleanUpload(); - this.sandbox.notify( - 'Mismatch file', - 'You are trying to upload wrong file. Cancel previous upload first.', - 'error' - ); - event.preventDefault(); - throw 'Wrong file'; - } - - - var loaded = chunkSize * this._uploadedParts; - - // target.fileupload('option', 'uploadedBytes', loaded); - this._onFileUploadProgress(event, { - total: file.size, - loaded: loaded - }); - - this._progress.show('slow'); - this._onDisableResumeBtn(); - this._save.trigger('click'); - - if (loaded >= file.size){ - this._onFinishUpload(); - } - - } - - - target.fileupload('option', 'maxChunkSize', chunkSize); - - this.el.off('multipartstarted.cloudstorage'); - this.el.on('multipartstarted.cloudstorage', function () { - data.submit(); - }); + function(error) { + console.error(error); + setTimeout(function() { + self._onCheckExistingMultipart(operation); + }, 2000); + } + ); + }, + + _onEnableResumeBtn: function(operation) { + var self = this; + this.$(".btn-remove-url").remove(); + if (operation === "choose") { + self._onDisableSave(true); + } + this._resumeBtn + .off("click") + .on("click", function(event) { + switch (operation) { + case "resume": + self._save.trigger("click"); + self._onDisableResumeBtn(); + break; + case "choose": + default: + self._file.trigger("click"); + break; + } + }) + .show(); + }, + + _onDisableResumeBtn: function() { + this._resumeBtn.hide(); + }, + + _onUploadFail: function(e, data) { + this._onHandleError("Upload fail"); + this._onCheckExistingMultipart("resume"); + }, + + _onUploadFileSubmit: function(event, data) { + if (!this._uploadId) { + this._onDisableSave(false); + this.sandbox.notify( + "Upload error", + this.i18n("undefined_upload_id"), + "error" + ); + return false; + } + + this._setProgressType("info", this._bar); + this._progress.show("slow"); + }, + + _onGenerateAdditionalData: function(form) { + return [ + { + name: "partNumber", + value: this._partNumber }, - - _onFileUploadProgress: function (event, data) { - var progress = 100 / (data.total / data.loaded); - this._setProgress(progress, this._bar); + { + name: "uploadId", + value: this._uploadId }, + { + name: "id", + value: this._resourceId + } + ]; + }, + + _onAnyEndedUpload: function() { + this._partNumber = 1; + }, + + _countChunkSize: function(size, chunk) { + while (size / chunk > 10000) chunk *= 2; + return chunk; + }, + + _onFileUploadAdd: function(event, data) { + this._setProgress(0, this._bar); + var file = data.files[0]; + var target = $(event.target); + + var chunkSize = this._countChunkSize( + file.size, + target.fileupload("option", "maxChunkSize") + ); + + if ( + this._uploadName && + this._uploadSize && + this._uploadedParts !== null + ) { + if (this._uploadSize !== file.size || this._uploadName !== file.name) { + this._file.val(""); + this._onCleanUpload(); + this.sandbox.notify( + "Mismatch file", + "You are trying to upload wrong file. Cancel previous upload first.", + "error" + ); + event.preventDefault(); + throw "Wrong file"; + } - _onSaveClick: function(event, pass) { - if (pass || !window.FileList || !this._file || !this._file.val()) { - return; - } - event.preventDefault(); - var dataset_id = this.options.packageId; - this._clickedBtn = $(event.target).attr('value'); - if (this._clickedBtn == 'go-dataset') { - this._onDisableSave(false); - this._redirect_url = this.sandbox.url( - '/dataset/edit/' + - dataset_id); - window.location = this._redirect_url; - } else { - try{ - this._onDisableSave(true); - this._onSaveForm(); - } catch(error){ - console.log(error); - this._onDisableSave(false); - } - } + var loaded = chunkSize * this._uploadedParts; - // this._form.trigger('submit', true); - }, + // target.fileupload('option', 'uploadedBytes', loaded); + this._onFileUploadProgress(event, { + total: file.size, + loaded: loaded + }); - _onSaveForm: function() { - var file = this._file[0].files[0]; - var self = this; - var formData = this._form.serializeArray().reduce( - function (result, item) { - result[item.name] = item.value; - return result; - }, {}); - - formData.multipart_name = file.name; - formData.url = file.name; - formData.package_id = this.options.packageId; - formData.size = file.size; - formData.url_type = 'upload'; - var action = formData.id ? 'resource_update' : 'resource_create'; - var url = this._form.attr('action') || window.location.href; - this.sandbox.client.call( - 'POST', - action, - formData, - function (data) { - var result = data.result; - self._packageId = result.package_id; - self._resourceId = result.id; - - self._id.val(result.id); - self.sandbox.notify( - result.id, - self.i18n(action, {id: result.id}), - 'success' - ); - self._onPerformUpload(file); - }, - function (err, st, msg) { - self.sandbox.notify( - 'Error', - msg, - 'error' - ); - self._onHandleError('Unable to save resource'); - } - ); + this._progress.show("slow"); + this._onDisableResumeBtn(); + this._save.trigger("click"); + if (loaded >= file.size) { + this._onFinishUpload(); + } + } + + target.fileupload("option", "maxChunkSize", chunkSize); + + this.el.off("multipartstarted.cloudstorage"); + this.el.on("multipartstarted.cloudstorage", function() { + data.submit(); + }); + }, + + _onFileUploadProgress: function(event, data) { + var progress = 100 / (data.total / data.loaded); + this._setProgress(progress, this._bar); + }, + + _onSaveClick: function(event, pass) { + if (pass || !window.FileList || !this._file || !this._file.val()) { + return; + } + event.preventDefault(); + + var dataset_id = this.options.packageId; + this._clickedBtn = $(event.target).attr("value"); + if (this._clickedBtn == "go-dataset") { + this._onDisableSave(false); + this._redirect_url = this.sandbox.url("/dataset/edit/" + dataset_id); + window.location = this._redirect_url; + } else { + try { + $("html, body").animate({ scrollTop: 0 }, "50"); + this._onDisableSave(true); + this._onSaveForm(); + } catch (error) { + console.error(error); + this._onDisableSave(false); + } + } + + // this._form.trigger('submit', true); + }, + + _onSaveForm: function() { + var file = this._file[0].files[0]; + var self = this; + var formData = this._form.serializeArray().reduce(function(result, item) { + result[item.name] = item.value; + return result; + }, {}); + + formData.multipart_name = file.name; + formData.url = file.name; + formData.package_id = this.options.packageId; + formData.size = file.size; + formData.url_type = "upload"; + var action = formData.id ? "resource_update" : "resource_create"; + var url = this._form.attr("action") || window.location.href; + this.sandbox.client.call( + "POST", + action, + formData, + function(data) { + var result = data.result; + self._packageId = result.package_id; + self._resourceId = result.id; + + self._id.val(result.id); + self.sandbox.notify( + result.id, + self.i18n(action, { id: result.id }), + "success" + ); + self._onPerformUpload(file); }, - - - _onPerformUpload: function(file) { - var id = this._id.val(); - var self = this; - if (this._uploadId === null) - this._onPrepareUpload(file, id).then( - function (data) { - self._uploadId = data.result.id; - self.el.trigger('multipartstarted.cloudstorage'); - }, - function (err) { - console.log(err); - self._onHandleError('Unable to initiate multipart upload'); - } - ); - else - this.el.trigger('multipartstarted.cloudstorage'); - + function(err, st, msg) { + self.sandbox.notify("Error", msg, "error"); + self._onHandleError("Unable to save resource"); + } + ); + }, + + _onPerformUpload: function(file) { + var id = this._id.val(); + var self = this; + if (this._uploadId === null) + this._onPrepareUpload(file, id).then( + function(data) { + self._uploadId = data.result.id; + self.el.trigger("multipartstarted.cloudstorage"); + }, + function(err) { + console.error(err); + self._onHandleError("Unable to initiate multipart upload"); + } + ); + else this.el.trigger("multipartstarted.cloudstorage"); + }, + + _onPrepareUpload: function(file, id) { + return $.ajax({ + method: "POST", + url: this.sandbox.client.url( + "/api/action/cloudstorage_initiate_multipart" + ), + data: JSON.stringify({ + id: id, + name: encodeURIComponent(file.name), + size: file.size + }) + }); + }, + + _onAbortUpload: function(id) { + var self = this; + this.sandbox.client.call( + "POST", + "cloudstorage_abort_multipart", + { + id: id }, - - _onPrepareUpload: function(file, id) { - - return $.ajax({ - method: 'POST', - url: this.sandbox.client.url('/api/action/cloudstorage_initiate_multipart'), - data: JSON.stringify({ - id: id, - name: encodeURIComponent(file.name), - size: file.size - }) - }); - + function(data) { + console.log(data); }, - - _onAbortUpload: function(id) { - var self = this; - this.sandbox.client.call( - 'POST', - 'cloudstorage_abort_multipart', - { - id: id - }, - function (data) { - console.log(data); - }, - function (err) { - console.log(err); - self._onHandleError('Unable to abort multipart upload'); - } + function(err) { + console.error(err); + self._onHandleError("Unable to abort multipart upload"); + } + ); + }, + + _onFinishUpload: function() { + var self = this; + var data_dict = { + uploadId: this._uploadId, + id: this._resourceId, + save_action: this._clickedBtn + }; + this.sandbox.client.call( + "POST", + "cloudstorage_finish_multipart", + data_dict, + function(data) { + self._progress.hide("fast"); + self._onDisableSave(false); + + if (self._resourceId && self._packageId) { + self.sandbox.notify( + "Success", + self.i18n("upload_completed"), + "success" ); - - }, - - _onFinishUpload: function() { - var self = this; - var data_dict = { - 'uploadId': this._uploadId, - 'id': this._resourceId, - 'save_action': this._clickedBtn + // self._form.remove(); + if (self._clickedBtn == "again") { + this._redirect_url = self.sandbox.url( + "/dataset/new_resource/" + self._packageId + ); + } else { + this._redirect_url = self.sandbox.url( + "/dataset/" + self._packageId + ); } - this.sandbox.client.call( - 'POST', - 'cloudstorage_finish_multipart', - data_dict, - function (data) { - - self._progress.hide('fast'); - self._onDisableSave(false); - - if (self._resourceId && self._packageId){ - self.sandbox.notify( - 'Success', - self.i18n('upload_completed'), - 'success' - ); - // self._form.remove(); - if (self._clickedBtn == 'again') { - this._redirect_url = self.sandbox.url( - '/dataset/new_resource/' + - self._packageId - ); - } else { - this._redirect_url = self.sandbox.url( - '/dataset/' + - self._packageId - ); - } - self._form.attr('action', this._redirect_url); - self._form.attr('method', 'GET'); - self.$('[name]').attr('name', null); - setTimeout(function(){ - self._form.submit(); - }, 3000); - - } - }, - function (err) { - console.log(err); - self._onHandleError(self.i18n('unable_to_finish')); - } - ); - this._setProgressType('success', this._progress); - }, - - _onDisableSave: function (value) { - this._save.attr('disabled', value); + self._form.attr("action", this._redirect_url); + self._form.attr("method", "GET"); + self.$("[name]").attr("name", null); + setTimeout(function() { + self._form.submit(); + }, 3000); + } }, - - _setProgress: function (progress, bar) { - bar.css('width', progress + '%'); - }, - - _setProgressType: function (type, progress) { - progress - .removeClass('progress-success progress-danger progress-info') - .addClass('progress-' + type); - }, - - _onHandleError: function (msg) { - this.sandbox.notify( - 'Error', - msg, - 'error' - ); - this._onDisableSave(false); - }, - - _onCleanUpload: function () { - this.$('.btn-remove-url').trigger('click'); + function(err) { + console.error(err); + self._onHandleError(self.i18n("unable_to_finish")); } - - }; + ); + this._setProgressType("success", this._bar); + }, + + _onDisableSave: function(value) { + this._save.attr("disabled", value); + }, + + _setProgress: function(progress, bar) { + bar.css("width", progress + "%"); + }, + + _setProgressType: function(type, bar) { + bar + .removeClass("bg-success bg-info bg-warning bg-danger") + .addClass("bg-" + type); + }, + + _onHandleError: function(msg) { + this.sandbox.notify("Error", msg, "error"); + this._onDisableSave(false); + }, + + _onCleanUpload: function() { + this.$(".btn-remove-url").trigger("click"); + } + }; }); diff --git a/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html b/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html index 607228b..1fc6458 100644 --- a/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html +++ b/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html @@ -1,6 +1,7 @@
{{ parent() }} diff --git a/ckanext/cloudstorage/templates/page.html b/ckanext/cloudstorage/templates/page.html index abbbe82..acd49f3 100644 --- a/ckanext/cloudstorage/templates/page.html +++ b/ckanext/cloudstorage/templates/page.html @@ -2,7 +2,10 @@ {% block scripts %} {{ super() }} - {% set type = 'asset' if h.ckan_version() > '2.9' else 'resource' %} - {% include 'cloudstorage/snippets/cloudstorage-js_' ~ type ~ '.html' %} + {% with version = h.ckan_version() %} + {# version < '2.2' means CKAN>=2.10, because cloudstorage just won't work with CKAN==2.1.* #} + {% set type = 'asset' if version > '2.9' or version < '2.2' else 'resource' %} + {% include 'cloudstorage/snippets/cloudstorage-js_' ~ type ~ '.html' %} + {% endwith %} {% endblock %} From 5239c9a5656308863e60c3990fecef94cda3aa46 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Tue, 19 Jan 2021 14:21:55 +0200 Subject: [PATCH 33/44] Add alembic to git --- .gitignore | 1 + .../migration/cloudstorage/alembic.ini | 74 +++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 ckanext/cloudstorage/migration/cloudstorage/alembic.ini diff --git a/.gitignore b/.gitignore index ece61fa..302e5d9 100644 --- a/.gitignore +++ b/.gitignore @@ -17,5 +17,6 @@ tmp/* solr_runtime/* fl_notes.txt *.ini +!alembic.ini .noseids *~ diff --git a/ckanext/cloudstorage/migration/cloudstorage/alembic.ini b/ckanext/cloudstorage/migration/cloudstorage/alembic.ini new file mode 100644 index 0000000..47fc754 --- /dev/null +++ b/ckanext/cloudstorage/migration/cloudstorage/alembic.ini @@ -0,0 +1,74 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = %(here)s + +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# timezone to use when rendering the date +# within the migration file as well as the filename. +# string value is passed to dateutil.tz.gettz() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +#truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; this defaults +# to /home/sergey/Projects/core/ckanext-cloudstorage/ckanext/cloudstorage/migration/cloudstorage/versions. When using multiple version +# directories, initial revisions must be specified with --version-path +# version_locations = %(here)s/bar %(here)s/bat /home/sergey/Projects/core/ckanext-cloudstorage/ckanext/cloudstorage/migration/cloudstorage/versions + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = driver://user:pass@localhost/dbname + + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S From 4d430e6d06a860cd7a5da7328edc1ac4a816b391 Mon Sep 17 00:00:00 2001 From: Oleksandr Styhar Date: Mon, 1 Feb 2021 19:02:38 +0200 Subject: [PATCH 34/44] Fix resource_create, resource_patch API calls --- README.md | 2 +- ckanext/cloudstorage/helpers.py | 6 +- ckanext/cloudstorage/plugin/__init__.py | 5 +- ckanext/cloudstorage/storage.py | 71 ++++++++++--------- .../snippets/multipart_module.html | 14 ++-- ckanext/cloudstorage/tests/fixtures.py | 8 +-- .../tests/logic/action/test_multipart.py | 9 +-- ckanext/cloudstorage/tests/test_plugin.py | 7 +- ckanext/cloudstorage/tests/test_storage.py | 21 ++++-- ckanext/cloudstorage/tests/test_utils.py | 4 +- test.ini | 10 ++- 11 files changed, 92 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index 6c7e73c..080764e 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ first. Run next command from extension folder: For CKAN>=2.9 use the following command instead: - ckan -c /etc/ckan/default/production.ini ` db upgrade -p cloudstorage + ckan -c /etc/ckan/default/production.ini db upgrade -p cloudstorage With that feature you can use `cloudstorage_clean_multipart` action, which is available only for sysadmins. After executing, all unfinished multipart uploads, older than 7 days, diff --git a/ckanext/cloudstorage/helpers.py b/ckanext/cloudstorage/helpers.py index a5042eb..830665a 100644 --- a/ckanext/cloudstorage/helpers.py +++ b/ckanext/cloudstorage/helpers.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- from ckanext.cloudstorage.storage import ResourceCloudStorage - +import ckan.plugins.toolkit as tk def use_secure_urls(): return all([ @@ -10,3 +10,7 @@ def use_secure_urls(): 'S3' in ResourceCloudStorage.driver_name.fget(None), 'host' in ResourceCloudStorage.driver_options.fget(None), ]) + + +def use_multipart_upload(): + return use_secure_urls() diff --git a/ckanext/cloudstorage/plugin/__init__.py b/ckanext/cloudstorage/plugin/__init__.py index 89011eb..6f3b4aa 100644 --- a/ckanext/cloudstorage/plugin/__init__.py +++ b/ckanext/cloudstorage/plugin/__init__.py @@ -33,7 +33,10 @@ def update_config(self, config): # ITemplateHelpers def get_helpers(self): - return dict(cloudstorage_use_secure_urls=helpers.use_secure_urls) + return dict( + cloudstorage_use_secure_urls=helpers.use_secure_urls, + cloudstorage_use_multipart_upload=helpers.use_multipart_upload, + ) # IConfigurable diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index a1520d3..4396e65 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import cgi import mimetypes +import logging import os import six from six.moves.urllib.parse import urljoin @@ -29,6 +30,8 @@ from werkzeug.datastructures import FileStorage as FlaskFileStorage +log = logging.getLogger(__name__) + ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage) AWS_UPLOAD_PART_SIZE = 5 * 1024 * 1024 @@ -39,20 +42,20 @@ def _get_underlying_file(wrapper): return wrapper.file -def _md5sum(source_path): +def _md5sum(fobj): block_count = 0 block = True md5string = b'' - with open(source_path, "rb") as f: - while block: - block = f.read(AWS_UPLOAD_PART_SIZE) - if block: - block_count += 1 - hash_obj = hashlib.md5() - hash_obj.update(block) - md5string = md5string + binascii.unhexlify(hash_obj.hexdigest()) - else: - break + while block: + block = fobj.read(AWS_UPLOAD_PART_SIZE) + if block: + block_count += 1 + hash_obj = hashlib.md5() + hash_obj.update(block) + md5string = md5string + binascii.unhexlify(hash_obj.hexdigest()) + else: + break + fobj.seek(0, os.SEEK_SET) hash_obj = hashlib.md5() hash_obj.update(md5string) return hash_obj.hexdigest() + "-" + str(block_count) @@ -299,36 +302,38 @@ def upload(self, id, max_size=10): object_name = self.path_from_filename(id, self.filename) try: cloud_object = self.container.get_object(object_name=object_name) - print("\t Object found, checking size {0}: {1}".format(object_name, cloud_object.size)) - file_size = os.path.getsize(file_upload.name) - print("\t - File size {0}: {1}".format(file_upload.name, file_size)) + log.debug("\t Object found, checking size %s: %s", object_name, cloud_object.size) + if os.path.isfile(self.filename): + file_size = os.path.getsize(self.filename) + else: + self.file_upload.seek(0, os.SEEK_END) + file_size = self.file_upload.tell() + self.file_upload.seek(0, os.SEEK_SET) + + log.debug("\t - File size %s: %s", self.filename, file_size) if file_size == int(cloud_object.size): - print("\t Size fits, checking hash {0}: {1}".format(object_name, cloud_object.hash)) - hash_file = hashlib.md5(open(file_upload.name, 'rb').read()).hexdigest() - print("\t - File hash {0}: {1}".format(file_upload.name, hash_file)) + log.debug("\t Size fits, checking hash %s: %s", object_name, cloud_object.hash) + hash_file = hashlib.md5(self.file_upload.read()).hexdigest() + self.file_upload.seek(0, os.SEEK_SET) + log.debug("\t - File hash %s: %s", self.filename, hash_file) # basic hash if hash_file == cloud_object.hash: - print("\t => File found, matching hash, skipping upload") + log.debug("\t => File found, matching hash, skipping upload") return # multipart hash - multi_hash_file = _md5sum(file_upload.name) - print("\t - File multi hash {0}: {1}".format(file_upload.name, multi_hash_file)) + multi_hash_file = _md5sum(self.file_upload) + log.debug("\t - File multi hash %s: %s", self.filename, multi_hash_file) if multi_hash_file == cloud_object.hash: - print("\t => File found, matching hash, skipping upload") + log.debug("\t => File found, matching hash, skipping upload") return - print("\t Resource found in the cloud but outdated, uploading") + log.debug("\t Resource found in the cloud but outdated, uploading") except ObjectDoesNotExistError: - print("\t Resource not found in the cloud, uploading") - - # FIX: replaced call with a simpler version - with open(file_upload.name, 'rb') as iterator: - self.container.upload_object_via_stream(iterator=iterator, object_name=object_name) - print("\t => UPLOADED {0}: {1}".format(file_upload.name, object_name)) - except ValueError as v: - print(traceback.format_exc()) - raise v - except types.InvalidCredsError as err: - print(traceback.format_exc()) + log.debug("\t Resource not found in the cloud, uploading") + + self.container.upload_object_via_stream(iterator=iter(file_upload), object_name=object_name) + log.debug("\t => UPLOADED %s: %s", self.filename, object_name) + except (ValueError, types.InvalidCredsError) as err: + log.error(traceback.format_exc()) raise err elif self._clear and self.old_filename and not self.leave_files: diff --git a/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html b/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html index 932efdc..48bedf1 100644 --- a/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html +++ b/ckanext/cloudstorage/templates/cloudstorage/snippets/multipart_module.html @@ -1,10 +1,12 @@
+ {% if h.cloudstorage_use_multipart_upload() %} + data-module="cloudstorage-multipart-upload" + data-module-cloud='S3' + {# prevent type-guessing inside JS module by prefixing ID with underscore #} + data-module-package-id="_{{ pkg_name }}" + data-module-max-size="{{ max_size }}" + {% endif %} +> {{ parent() }}
diff --git a/ckanext/cloudstorage/tests/fixtures.py b/ckanext/cloudstorage/tests/fixtures.py index 6d0e86c..0ea2268 100644 --- a/ckanext/cloudstorage/tests/fixtures.py +++ b/ckanext/cloudstorage/tests/fixtures.py @@ -155,7 +155,7 @@ def with_request_context(test_request_context): yield @pytest.fixture - def make_resource(clean_db, ckan_config, monkeypatch, tmpdir): + def create_with_upload(clean_db, ckan_config, monkeypatch, tmpdir): """Shortcut for creating uploaded resource. Requires content and name for newly created resource. By default is using `resource_create` action, but it can be changed by @@ -165,8 +165,8 @@ def make_resource(clean_db, ckan_config, monkeypatch, tmpdir): additional named arguments, that will be used as resource properties. Example:: - def test_uploaded_resource(make_resource): - resource = make_resource("hello world", "file.txt") + def test_uploaded_resource(create_with_upload): + resource = create_with_upload("hello world", "file.txt", package_id=factories.Dataset()['id']) assert resource["url_type"] == "upload" assert resource["format"] == "TXT" assert resource["size"] == 11 @@ -186,7 +186,5 @@ def factory(data, filename, context={}, **kwargs): u"upload": test_resource, } params.update(kwargs) - if u'package_id' not in params: - params[u'package_id'] = factories.Dataset()[u"id"] return test_helpers.call_action(action, context, **params) return factory diff --git a/ckanext/cloudstorage/tests/logic/action/test_multipart.py b/ckanext/cloudstorage/tests/logic/action/test_multipart.py index 9d5537b..1b1383b 100644 --- a/ckanext/cloudstorage/tests/logic/action/test_multipart.py +++ b/ckanext/cloudstorage/tests/logic/action/test_multipart.py @@ -25,8 +25,7 @@ def test_upload(self): storage = ResourceCloudStorage(res) assert storage.path_from_filename( res['id'], filename) == multipart['name'] - with pytest.raises(ObjectDoesNotExistError): - storage.get_url_from_filename(res['id'], filename) + assert storage.get_url_from_filename(res['id'], filename) is None fp = six.BytesIO(b'b' * 1024 * 1024 * 5) fp.seek(0) @@ -36,8 +35,7 @@ def test_upload(self): partNumber=1, upload=FakeFileStorage(fp, filename)) - with pytest.raises(ObjectDoesNotExistError): - storage.get_url_from_filename(res['id'], filename) + assert storage.get_url_from_filename(res['id'], filename) is None fp = six.BytesIO(b'a' * 1024 * 1024 * 5) fp.seek(0) @@ -47,8 +45,7 @@ def test_upload(self): partNumber=2, upload=FakeFileStorage(fp, filename)) - with pytest.raises(ObjectDoesNotExistError): - storage.get_url_from_filename(res['id'], filename) + assert storage.get_url_from_filename(res['id'], filename) is None result = helpers.call_action( 'cloudstorage_finish_multipart', uploadId=multipart['id']) diff --git a/ckanext/cloudstorage/tests/test_plugin.py b/ckanext/cloudstorage/tests/test_plugin.py index 03d226d..7242783 100644 --- a/ckanext/cloudstorage/tests/test_plugin.py +++ b/ckanext/cloudstorage/tests/test_plugin.py @@ -28,19 +28,18 @@ def test_required_config(self, ckan_config, monkeypatch, option): plugin.configure(ckan_config) @pytest.mark.usefixtures('clean_db') - def test_before_delete(self, make_resource): + def test_before_delete(self, create_with_upload): """When resource deleted, we must remove corresponding file from S3. """ name = 'test.txt' - resource = make_resource('hello world', name, name=name) + resource = create_with_upload('hello world', name, name=name, package_id=factories.Dataset()['id']) plugin = p.get_plugin('cloudstorage') uploader = plugin.get_resource_uploader(resource) assert uploader.get_url_from_filename(resource['id'], name) helpers.call_action('resource_delete', id=resource['id']) - with pytest.raises(ObjectDoesNotExistError): - assert uploader.get_url_from_filename(resource['id'], name) + assert uploader.get_url_from_filename(resource['id'], name) is None @pytest.mark.usefixtures('clean_db') def test_before_delete_for_linked_resource(self): diff --git a/ckanext/cloudstorage/tests/test_storage.py b/ckanext/cloudstorage/tests/test_storage.py index b69054e..bfe47e1 100644 --- a/ckanext/cloudstorage/tests/test_storage.py +++ b/ckanext/cloudstorage/tests/test_storage.py @@ -2,6 +2,7 @@ import pytest from six.moves.urllib.parse import urlparse +from ckan.tests import factories from ckanext.cloudstorage.storage import CloudStorage, ResourceCloudStorage @@ -23,20 +24,32 @@ def test_props(self): @pytest.mark.ckan_config('ckan.plugins', 'cloudstorage') @pytest.mark.usefixtures('with_driver_options', 'with_plugins') class TestResourceCloudStorage(object): - def test_not_secure_url_from_filename(self, make_resource): + def test_not_secure_url_from_filename(self, create_with_upload): filename = 'file.txt' - resource = make_resource('test', filename) + resource = create_with_upload('test', filename, package_id=factories.Dataset()['id']) storage = ResourceCloudStorage(resource) url = storage.get_url_from_filename(resource['id'], filename) assert storage.container_name in url assert not urlparse(url).query @pytest.mark.ckan_config('ckanext.cloudstorage.use_secure_urls', True) - def test_secure_url_from_filename(self, make_resource): + def test_secure_url_from_filename(self, create_with_upload): filename = 'file.txt' - resource = make_resource('test', filename) + resource = create_with_upload('test', filename, package_id=factories.Dataset()['id']) storage = ResourceCloudStorage(resource) if not storage.can_use_advanced_aws or not storage.use_secure_urls: pytest.skip('SecureURL not supported') url = storage.get_url_from_filename(resource['id'], filename) assert urlparse(url).query + + @pytest.mark.ckan_config('ckanext.cloudstorage.use_secure_urls', True) + def test_hash_check(self, create_with_upload): + filename = 'file.txt' + resource = create_with_upload('test', filename, package_id=factories.Dataset()['id']) + storage = ResourceCloudStorage(resource) + if not storage.can_use_advanced_aws or not storage.use_secure_urls: + pytest.skip('SecureURL not supported') + url = storage.get_url_from_filename(resource['id'], filename) + resource = create_with_upload('test', filename, action='resource_update', id=resource['id']) + + assert urlparse(url).query diff --git a/ckanext/cloudstorage/tests/test_utils.py b/ckanext/cloudstorage/tests/test_utils.py index 0c058ae..c6fd155 100644 --- a/ckanext/cloudstorage/tests/test_utils.py +++ b/ckanext/cloudstorage/tests/test_utils.py @@ -38,9 +38,9 @@ def test_status_codes(self, app): app.get(url, status=302, extra_environ=env, follow_redirects=False) @pytest.mark.usefixtures('clean_db') - def test_download(self, make_resource, app): + def test_download(self, create_with_upload, app): filename = 'file.txt' - resource = make_resource('hello world', filename) + resource = create_with_upload('hello world', filename, package_id=factories.Dataset()['id']) url = tk.url_for( 'resource.download', id=resource['package_id'], diff --git a/test.ini b/test.ini index 1f310e3..12ace2e 100644 --- a/test.ini +++ b/test.ini @@ -16,7 +16,7 @@ use = config:../ckan/test-core.ini # Logging configuration [loggers] -keys = root, ckan, sqlalchemy +keys = root, ckan, sqlalchemy, ckanext [handlers] keys = console @@ -45,4 +45,10 @@ level = NOTSET formatter = generic [formatter_generic] -format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s \ No newline at end of file +format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s + +[logger_ckanext] +level = DEBUG +handlers = console +qualname = ckanext +propagate = 0 From 88984c97d82ae3dd02074a1c6ae0d411150ba927 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Thu, 4 Feb 2021 15:11:26 +0200 Subject: [PATCH 35/44] Increase upload speed for multiline files --- ckanext/cloudstorage/storage.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 4396e65..d8695e6 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -5,6 +5,7 @@ import logging import os import six +import tempfile from six.moves.urllib.parse import urljoin from ast import literal_eval from datetime import datetime, timedelta @@ -330,7 +331,22 @@ def upload(self, id, max_size=10): except ObjectDoesNotExistError: log.debug("\t Resource not found in the cloud, uploading") - self.container.upload_object_via_stream(iterator=iter(file_upload), object_name=object_name) + # If it's temporary file, we'd better convert it + # into FileIO. Otherwise libcloud will iterate + # over lines, not over chunks and it will really + # slow down the process for files that consist of + # millions of short linew + if isinstance(file_upload, tempfile.SpooledTemporaryFile): + file_upload.rollover() + try: + # extract underlying file + file_upload_iter = file_upload._file.detach() + except AttributeError: + # It's python2 + file_upload_iter = file_upload._file + else: + file_upload_iter = iter(file_upload) + self.container.upload_object_via_stream(iterator=file_upload_iter, object_name=object_name) log.debug("\t => UPLOADED %s: %s", self.filename, object_name) except (ValueError, types.InvalidCredsError) as err: log.error(traceback.format_exc()) From 51f5ff209d0141e81b79a95764c22c0563ab78d7 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Wed, 10 Feb 2021 09:41:03 +0700 Subject: [PATCH 36/44] Standard version check --- ckanext/cloudstorage/templates/page.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/cloudstorage/templates/page.html b/ckanext/cloudstorage/templates/page.html index acd49f3..4ade9fe 100644 --- a/ckanext/cloudstorage/templates/page.html +++ b/ckanext/cloudstorage/templates/page.html @@ -4,7 +4,7 @@ {{ super() }} {% with version = h.ckan_version() %} {# version < '2.2' means CKAN>=2.10, because cloudstorage just won't work with CKAN==2.1.* #} - {% set type = 'asset' if version > '2.9' or version < '2.2' else 'resource' %} + {% set type = 'asset' if h.ckan_version().split('.')[1] | int >= 9 else 'resource' %} {% include 'cloudstorage/snippets/cloudstorage-js_' ~ type ~ '.html' %} {% endwith %} From 93c2e0b94ca7f563a713d3d1e75053b4c29ad8fd Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Wed, 24 Feb 2021 09:30:37 +0700 Subject: [PATCH 37/44] Py2 support of max upload size --- ckanext/cloudstorage/helpers.py | 4 ++++ ckanext/cloudstorage/plugin/__init__.py | 1 + ckanext/cloudstorage/templates/package/new_resource.html | 2 +- .../templates/package/new_resource_not_draft.html | 4 ++-- ckanext/cloudstorage/templates/package/resource_edit.html | 2 +- 5 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ckanext/cloudstorage/helpers.py b/ckanext/cloudstorage/helpers.py index 830665a..cc8ef0b 100644 --- a/ckanext/cloudstorage/helpers.py +++ b/ckanext/cloudstorage/helpers.py @@ -14,3 +14,7 @@ def use_secure_urls(): def use_multipart_upload(): return use_secure_urls() + + +def max_upload_size(): + return tk.config.get('ckanext.cloudstorage.max_upload_size_gb') diff --git a/ckanext/cloudstorage/plugin/__init__.py b/ckanext/cloudstorage/plugin/__init__.py index 6f3b4aa..7e94f09 100644 --- a/ckanext/cloudstorage/plugin/__init__.py +++ b/ckanext/cloudstorage/plugin/__init__.py @@ -36,6 +36,7 @@ def get_helpers(self): return dict( cloudstorage_use_secure_urls=helpers.use_secure_urls, cloudstorage_use_multipart_upload=helpers.use_multipart_upload, + cloudstorage_max_upload_size=helpers.max_upload_size, ) # IConfigurable diff --git a/ckanext/cloudstorage/templates/package/new_resource.html b/ckanext/cloudstorage/templates/package/new_resource.html index 4f76aff..f99bec1 100644 --- a/ckanext/cloudstorage/templates/package/new_resource.html +++ b/ckanext/cloudstorage/templates/package/new_resource.html @@ -2,7 +2,7 @@ {% block form %} - {% set max_size = config.get('ckanext.cloudstorage.max_upload_size_gb') %} + {% set max_size = h.cloudstorage_max_upload_size() %} {% snippet 'cloudstorage/snippets/multipart_module.html', pkg_name=pkg_name, parent=super, max_size=max_size %} {% endblock %} diff --git a/ckanext/cloudstorage/templates/package/new_resource_not_draft.html b/ckanext/cloudstorage/templates/package/new_resource_not_draft.html index 9c575ae..415570f 100644 --- a/ckanext/cloudstorage/templates/package/new_resource_not_draft.html +++ b/ckanext/cloudstorage/templates/package/new_resource_not_draft.html @@ -2,6 +2,6 @@ {% block form %} - {% set max_size = config.get('ckanext.cloudstorage.max_upload_size_gb') %} - {% snippet 'cloudstorage/snippets/multipart_module.html', pkg_name=pkg_name, parent=super, max_size=max_size %} + {% set max_size = h.cloudstorage_max_upload_size() %} + {% snippet 'cloudstorage/snippets/multipart_module.html', pkg_name=pkg_name, parent=super, max_size=max_size %} {% endblock %} diff --git a/ckanext/cloudstorage/templates/package/resource_edit.html b/ckanext/cloudstorage/templates/package/resource_edit.html index d05eb09..137a45c 100644 --- a/ckanext/cloudstorage/templates/package/resource_edit.html +++ b/ckanext/cloudstorage/templates/package/resource_edit.html @@ -2,7 +2,7 @@ {% block form %} - {% set max_size = config.get('ckanext.cloudstorage.max_upload_size_gb') %} + {% set max_size = h.cloudstorage_max_upload_size() %} {% snippet 'cloudstorage/snippets/multipart_module.html', pkg_name=pkg.name, parent=super, max_size=max_size %} {% endblock %} From 2274def0e84e9be92cc403c1c6e4875f4118bacc Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Fri, 3 Sep 2021 11:37:22 +0300 Subject: [PATCH 38/44] Update last_modified --- ckanext/cloudstorage/storage.py | 2 ++ ckanext/cloudstorage/tests/test_utils.py | 8 +++----- dev-requirements.txt | 1 + 3 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 dev-requirements.txt diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index d8695e6..da4ebbd 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -218,12 +218,14 @@ def __init__(self, resource): self.file_upload = _get_underlying_file(upload_field_storage) resource['url'] = self.filename resource['url_type'] = 'upload' + resource['last_modified'] = datetime.utcnow() elif multipart_name and self.can_use_advanced_aws: # This means that file was successfully uploaded and stored # at cloud. # Currently implemented just AWS version resource['url'] = munge.munge_filename(multipart_name) resource['url_type'] = 'upload' + resource['last_modified'] = datetime.utcnow() elif self._clear and resource.get('id'): # Apparently, this is a created-but-not-commited resource whose # file upload has been canceled. We're copying the behaviour of diff --git a/ckanext/cloudstorage/tests/test_utils.py b/ckanext/cloudstorage/tests/test_utils.py index c6fd155..b990b5d 100644 --- a/ckanext/cloudstorage/tests/test_utils.py +++ b/ckanext/cloudstorage/tests/test_utils.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- import pytest -import mock import ckan.plugins.toolkit as tk @@ -11,12 +10,11 @@ @pytest.mark.ckan_config('ckan.plugins', 'cloudstorage') @pytest.mark.usefixtures('with_driver_options', 'with_plugins') class TestResourceDownload(object): - def test_utils_used_by_download_route(self, app, monkeypatch): + def test_utils_used_by_download_route(self, app, mocker): url = tk.url_for('resource.download', id='a', resource_id='b') - func = mock.Mock(return_value='') - monkeypatch.setattr(utils, 'resource_download', func) + mocker.patch('ckanext.cloudstorage.utils.resource_download') app.get(url) - func.assert_called_once_with('a', 'b', None) + utils.resource_doewnload.assert_called_once_with('a', 'b', None) @pytest.mark.usefixtures('clean_db') def test_status_codes(self, app): diff --git a/dev-requirements.txt b/dev-requirements.txt new file mode 100644 index 0000000..4ba02b4 --- /dev/null +++ b/dev-requirements.txt @@ -0,0 +1 @@ +pytest-mock==3.6.1 From 08858c123c12a7c3cedf31bd4f55dfe1917f1846 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Tue, 26 Oct 2021 13:27:51 +0300 Subject: [PATCH 39/44] Add ckanapi to requirements --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 176929f..8716d3a 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ install_requires=[ 'apache-libcloud~=2.8.2', 'six>=1.12.0', + 'ckanapi', ], entry_points=( """ From de394aaf6c063da162370d719f0122d7e330bab0 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Tue, 2 Nov 2021 12:28:26 +0200 Subject: [PATCH 40/44] Set content-type for multipart uploads --- ckanext/cloudstorage/logic/action/multipart.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ckanext/cloudstorage/logic/action/multipart.py b/ckanext/cloudstorage/logic/action/multipart.py index 33ea469..5e455a4 100644 --- a/ckanext/cloudstorage/logic/action/multipart.py +++ b/ckanext/cloudstorage/logic/action/multipart.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- import logging import datetime - +import mimetypes import libcloud.security from sqlalchemy.orm.exc import NoResultFound @@ -136,10 +136,15 @@ def initiate_multipart(context, data_dict): except Exception as e: log.exception('[delete from cloud] %s' % e) + headers = None + content_type, _ = mimetypes.guess_type(res_name) + if content_type: + headers = {"Content-type": content_type} upload_object = MultipartUpload( uploader.driver._initiate_multipart( container=uploader.container, - object_name=res_name + object_name=res_name, + headers=headers ), id, res_name, From 4b643bd5ffe4b0bc616dd86aad295f7573c8e82d Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Thu, 10 Feb 2022 11:02:33 +0200 Subject: [PATCH 41/44] Multipart allows subclassing of uploader --- ckanext/cloudstorage/logic/action/multipart.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ckanext/cloudstorage/logic/action/multipart.py b/ckanext/cloudstorage/logic/action/multipart.py index 5e455a4..f40975d 100644 --- a/ckanext/cloudstorage/logic/action/multipart.py +++ b/ckanext/cloudstorage/logic/action/multipart.py @@ -9,6 +9,7 @@ import ckan.model as model import ckan.lib.helpers as h import ckan.plugins.toolkit as toolkit +from ckan.lib.uploader import get_resource_uploader from ckanext.cloudstorage.storage import ResourceCloudStorage from ckanext.cloudstorage.model import MultipartUpload, MultipartPart @@ -107,7 +108,11 @@ def initiate_multipart(context, data_dict): user_obj = model.User.get(context['user']) user_id = user_obj.id if user_obj else None - uploader = ResourceCloudStorage({'multipart_name': name}) + uploader = get_resource_uploader({'multipart_name': name}) + if not isinstance(uploader, ResourceCloudStorage): + raise toolkit.ValidationError({ + "uploader": [f"Must be ResourceCloudStorage or its subclass, not {type(uploadev)}"] + }) res_name = uploader.path_from_filename(id, name) upload_object = MultipartUpload.by_name(res_name) From b0aa5c0f8ee8c7599b206eb39d9edd5ef339e965 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Thu, 3 Mar 2022 15:54:52 +0200 Subject: [PATCH 42/44] better multipart removal --- ckanext/cloudstorage/logic/action/multipart.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ckanext/cloudstorage/logic/action/multipart.py b/ckanext/cloudstorage/logic/action/multipart.py index f40975d..afd0c83 100644 --- a/ckanext/cloudstorage/logic/action/multipart.py +++ b/ckanext/cloudstorage/logic/action/multipart.py @@ -135,9 +135,15 @@ def initiate_multipart(context, data_dict): uploader.container, name_prefix ) + for obj in old_objects: - log.info('Removing cloud object: %s' % obj) - obj.delete() + for similar in model.Session.query(model.Resource).filter_by(url=obj.name[len(name_prefix)+1:]): + if obj.name == uploader.path_from_filename(similar.id, similar.url): + log.info('Leave cloud object because it is referenced by resource %s: %s', similar.id, obj) + break + else: + log.info('Removing cloud object: %s' % obj) + obj.delete() except Exception as e: log.exception('[delete from cloud] %s' % e) From f2cf270d3049f821da654b4a9521a02763cacd6a Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Thu, 17 Mar 2022 10:14:52 +0200 Subject: [PATCH 43/44] Update test usite --- ckanext/cloudstorage/storage.py | 1 + ckanext/cloudstorage/tests/fixtures.py | 190 ------------------------- conftest.py | 6 - dev-requirements.txt | 1 + setup.cfg | 2 +- 5 files changed, 3 insertions(+), 197 deletions(-) delete mode 100644 ckanext/cloudstorage/tests/fixtures.py delete mode 100644 conftest.py diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index da4ebbd..cb93771 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -10,6 +10,7 @@ from ast import literal_eval from datetime import datetime, timedelta import traceback + from ckan import model from ckan.lib import munge import ckan.plugins as p diff --git a/ckanext/cloudstorage/tests/fixtures.py b/ckanext/cloudstorage/tests/fixtures.py deleted file mode 100644 index 0ea2268..0000000 --- a/ckanext/cloudstorage/tests/fixtures.py +++ /dev/null @@ -1,190 +0,0 @@ -# -*- coding: utf-8 -*- - -try: - from ckan.tests.pytest_ckan.fixtures import * # NOQA - -except ImportError: - import pytest - import six - import ckan.tests.helpers as test_helpers - import ckan.tests.factories as factories - import ckan.plugins - import ckan.lib.search as search - - from ckan.common import config - - from ckanext.cloudstorage.utils import FakeFileStorage - - @pytest.fixture - def ckan_config(request, monkeypatch): - """Allows to override the configuration object used by tests - - Takes into account config patches introduced by the ``ckan_config`` - mark. - - If you just want to set one or more configuration options for the - scope of a test (or a test class), use the ``ckan_config`` mark:: - - @pytest.mark.ckan_config('ckan.auth.create_unowned_dataset', True) - def test_auth_create_unowned_dataset(): - - # ... - - To use the custom config inside a test, apply the - ``ckan_config`` mark to it and inject the ``ckan_config`` fixture: - - .. literalinclude:: /../ckan/tests/pytest_ckan/test_fixtures.py - :start-after: # START-CONFIG-OVERRIDE - :end-before: # END-CONFIG-OVERRIDE - - If the change only needs to be applied locally, use the - ``monkeypatch`` fixture - - .. literalinclude:: /../ckan/tests/test_common.py - :start-after: # START-CONFIG-OVERRIDE - :end-before: # END-CONFIG-OVERRIDE - - """ - _original = config.copy() - for mark in request.node.iter_markers(u"ckan_config"): - monkeypatch.setitem(config, *mark.args) - yield config - config.clear() - config.update(_original) - - @pytest.fixture - def make_app(ckan_config): - """Factory for client app instances. - - Unless you need to create app instances lazily for some reason, - use the ``app`` fixture instead. - """ - return test_helpers._get_test_app - - @pytest.fixture - def app(make_app): - """Returns a client app instance to use in functional tests - - To use it, just add the ``app`` parameter to your test function signature:: - - def test_dataset_search(self, app): - - url = h.url_for('dataset.search') - - response = app.get(url) - - - """ - return make_app() - - @pytest.fixture(scope=u"session") - def reset_db(): - """Callable for resetting the database to the initial state. - - If possible use the ``clean_db`` fixture instead. - - """ - return test_helpers.reset_db - - @pytest.fixture(scope=u"session") - def reset_index(): - """Callable for cleaning search index. - - If possible use the ``clean_index`` fixture instead. - """ - return search.clear_all - - @pytest.fixture - def clean_db(reset_db): - """Resets the database to the initial state. - - This can be used either for all tests in a class:: - - @pytest.mark.usefixtures("clean_db") - class TestExample(object): - - def test_example(self): - - or for a single test:: - - class TestExample(object): - - @pytest.mark.usefixtures("clean_db") - def test_example(self): - - """ - reset_db() - - @pytest.fixture - def clean_index(reset_index): - """Clear search index before starting the test. - """ - reset_index() - - @pytest.fixture - def with_plugins(ckan_config): - """Load all plugins specified by the ``ckan.plugins`` config option - at the beginning of the test. When the test ends (even it fails), it will - unload all the plugins in the reverse order. - - .. literalinclude:: /../ckan/tests/test_factories.py - :start-after: # START-CONFIG-OVERRIDE - :end-before: # END-CONFIG-OVERRIDE - - """ - plugins = ckan_config["ckan.plugins"].split() - for plugin in plugins: - if not ckan.plugins.plugin_loaded(plugin): - ckan.plugins.load(plugin) - yield - for plugin in reversed(plugins): - if ckan.plugins.plugin_loaded(plugin): - ckan.plugins.unload(plugin) - - @pytest.fixture - def test_request_context(app): - """Provide function for creating Flask request context. - """ - return app.flask_app.test_request_context - - @pytest.fixture - def with_request_context(test_request_context): - """Execute test inside requests context - """ - with test_request_context(): - yield - - @pytest.fixture - def create_with_upload(clean_db, ckan_config, monkeypatch, tmpdir): - """Shortcut for creating uploaded resource. - Requires content and name for newly created resource. By default - is using `resource_create` action, but it can be changed by - passing named argument `action`. - In addition, accepts named argument `context` that will be passed - to `ckan.tests.helpers.call_action` and arbitary number of - additional named arguments, that will be used as resource - properties. - Example:: - def test_uploaded_resource(create_with_upload): - resource = create_with_upload("hello world", "file.txt", package_id=factories.Dataset()['id']) - assert resource["url_type"] == "upload" - assert resource["format"] == "TXT" - assert resource["size"] == 11 - """ - monkeypatch.setitem(ckan_config, u'ckan.storage_path', str(tmpdir)) - monkeypatch.setattr(ckan.lib.uploader, u'_storage_path', str(tmpdir)) - - def factory(data, filename, context={}, **kwargs): - action = kwargs.pop(u"action", u"resource_create") - test_file = six.BytesIO() - test_file.write(six.ensure_binary(data)) - test_file.seek(0) - test_resource = FakeFileStorage(test_file, filename) - - params = { - u"url": u"http://data", - u"upload": test_resource, - } - params.update(kwargs) - return test_helpers.call_action(action, context, **params) - return factory diff --git a/conftest.py b/conftest.py deleted file mode 100644 index fe491a4..0000000 --- a/conftest.py +++ /dev/null @@ -1,6 +0,0 @@ -# -*- coding: utf-8 -*- - -pytest_plugins = [ - u'ckanext.cloudstorage.tests.ckan_setup', - u'ckanext.cloudstorage.tests.fixtures', -] diff --git a/dev-requirements.txt b/dev-requirements.txt index 4ba02b4..846e10f 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1 +1,2 @@ +pytest-ckan pytest-mock==3.6.1 diff --git a/setup.cfg b/setup.cfg index 0e8287b..7e01bee 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,4 +4,4 @@ filterwarnings = ignore::sqlalchemy.exc.SADeprecationWarning ignore::sqlalchemy.exc.SAWarning ignore::DeprecationWarning -addopts = --pdbcls=IPython.terminal.debugger:TerminalPdb \ No newline at end of file +addopts = --ckan-ini test.ini \ No newline at end of file From ed86723394f6cdbf9510d31418da677851c7eb8e Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Mon, 2 May 2022 21:03:47 +0300 Subject: [PATCH 44/44] Use custom uploader all the time --- ckanext/cloudstorage/logic/action/multipart.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/ckanext/cloudstorage/logic/action/multipart.py b/ckanext/cloudstorage/logic/action/multipart.py index afd0c83..aee0cbc 100644 --- a/ckanext/cloudstorage/logic/action/multipart.py +++ b/ckanext/cloudstorage/logic/action/multipart.py @@ -108,7 +108,7 @@ def initiate_multipart(context, data_dict): user_obj = model.User.get(context['user']) user_id = user_obj.id if user_obj else None - uploader = get_resource_uploader({'multipart_name': name}) + uploader = get_resource_uploader({'multipart_name': name, "id": id}) if not isinstance(uploader, ResourceCloudStorage): raise toolkit.ValidationError({ "uploader": [f"Must be ResourceCloudStorage or its subclass, not {type(uploadev)}"] @@ -175,8 +175,9 @@ def upload_multipart(context, data_dict): ['uploadId', 'partNumber', 'upload'] ) - uploader = ResourceCloudStorage({}) upload = model.Session.query(MultipartUpload).get(upload_id) + uploader = get_resource_uploader({"id": upload.resource_id}) + data = _get_underlying_file(part_content).read() resp = uploader.driver.connection.request( _get_object_url( @@ -224,7 +225,8 @@ def finish_multipart(context, data_dict): for part in model.Session.query(MultipartPart).filter_by( upload_id=upload_id).order_by(MultipartPart.n) ] - uploader = ResourceCloudStorage({}) + uploader = get_resource_uploader({"id": upload.resource_id}) + try: obj = uploader.container.get_object(upload.name) obj.delete() @@ -258,8 +260,8 @@ def finish_multipart(context, data_dict): def abort_multipart(context, data_dict): h.check_access('cloudstorage_abort_multipart', data_dict) id = toolkit.get_or_bust(data_dict, ['id']) - uploader = ResourceCloudStorage({}) + uploader = get_resource_uploader({"id": id}) resource_uploads = MultipartUpload.resource_uploads(id) aborted = [] @@ -288,7 +290,6 @@ def clean_multipart(context, data_dict): """ h.check_access('cloudstorage_clean_multipart', data_dict) - uploader = ResourceCloudStorage({}) delta = _get_max_multipart_lifetime() oldest_allowed = datetime.datetime.utcnow() - delta @@ -303,6 +304,8 @@ def clean_multipart(context, data_dict): } for upload in uploads_to_remove: + uploader = get_resource_uploader({"id": upload.resource_id}) + try: _delete_multipart(upload, uploader) except toolkit.ValidationError as e: