diff --git a/changes/220.backport.feature b/changes/220.backport.feature new file mode 100644 index 00000000000..3cea48e4ca6 --- /dev/null +++ b/changes/220.backport.feature @@ -0,0 +1 @@ +Backported [#8421](https://github.com/ckan/ckan/pull/8421) for faster metadata updates. Comes with the `metadata_modified` and `metadata_created` schemas, allowing for site package migrations with correct metadata for these fields. \ No newline at end of file diff --git a/changes/8407.feature b/changes/8407.feature new file mode 100644 index 00000000000..7b4be91bf14 --- /dev/null +++ b/changes/8407.feature @@ -0,0 +1,15 @@ +Faster dataset metadata updates by detecting changes and only updating resource +metadata when dataset fields defined by IDatasetForm.resource_validation_dependencies +have changed (default: none) + +Now activities are created and metadata_modified updated only if there is a real change. + +metadata_modified may now be set by sysadmins which is useful for harvesting or mirroring. + +The allow_partial_update context parameter has been removed, now normal API +users may call package_update without passing resources. In this case the existing +resources will remain untouched instead of being deleted. + +package_update and actions that call it now report whether there was a real change by +adding the package id to a new changed_entities context value or changed_entities +envelope value for API calls. diff --git a/ckan/lib/dictization/__init__.py b/ckan/lib/dictization/__init__.py index f2fc4e8b565..4484d6a27de 100644 --- a/ckan/lib/dictization/__init__.py +++ b/ckan/lib/dictization/__init__.py @@ -2,7 +2,7 @@ from __future__ import annotations import datetime -from typing import Any, Callable, Iterable +from typing import Any, Callable, Iterable, Literal import sqlalchemy from sqlalchemy import Table @@ -109,13 +109,21 @@ def get_unique_constraints(table: Table, context: Context) -> list[list[str]]: return list_of_constraints -def table_dict_save(table_dict: dict[str, Any], - ModelClass: Any, - context: Context, - extra_attrs: Iterable[str] = ()) -> Any: +def table_dict_save( + table_dict: dict[str, Any], + ModelClass: Any, + context: Context, + extra_attrs: Iterable[str] = () + ) -> tuple[Any, Literal['create', 'update', None]]: '''Given a dict and a model class, update or create a sqlalchemy object. This will use an existing object if "id" is supplied OR if any unique - constraints are met. e.g supplying just a tag name will get out that tag obj. + constraints are met. e.g supplying just a tag name will get out that tag + obj. + + Returns (obj, change) where change is: + - 'create' if this is a new object + - 'update' if any fields were changed or extra_attrs passed + - None if no change for an existing object ''' session = context["session"] @@ -128,7 +136,8 @@ def table_dict_save(table_dict: dict[str, Any], if id: obj = session.query(ModelClass).get(id) - if not obj: + new = not obj + if new: unique_constraints = get_unique_constraints(table, context) for constraint in unique_constraints: params = dict((key, table_dict.get(key)) for key in constraint) @@ -144,11 +153,13 @@ def table_dict_save(table_dict: dict[str, Any], if not obj: obj = ModelClass() - obj.from_dict(table_dict) + changed, _skipped = obj.from_dict(table_dict) for a in extra_attrs: if a in table_dict: setattr(obj, a, table_dict[a]) session.add(obj) - return obj + return ( + obj, 'create' if new else 'update' if changed or extra_attrs else None + ) diff --git a/ckan/lib/dictization/model_save.py b/ckan/lib/dictization/model_save.py index 24b17e71620..5f7ae356b49 100644 --- a/ckan/lib/dictization/model_save.py +++ b/ckan/lib/dictization/model_save.py @@ -6,7 +6,8 @@ import uuid import logging from typing import ( - Any, Collection, Optional, TYPE_CHECKING, Type, Union, cast, overload + Any, Collection, Optional, TYPE_CHECKING, Type, Union, cast, overload, + Literal, ) import ckan.lib.dictization as d @@ -21,8 +22,15 @@ log = logging.getLogger(__name__) -def resource_dict_save(res_dict: dict[str, Any], - context: Context) -> 'model.Resource': +def resource_dict_save( + res_dict: dict[str, Any], context: Context) \ + -> tuple['model.Resource', Literal['create', 'update', None]]: + ''' + Returns (resource_object, change) where change is: + - 'create' if this is a new resource object + - 'update' if any core fields or extras were changed + - None if no change for an existing resource object + ''' model = context["model"] session = context["session"] @@ -50,21 +58,38 @@ def resource_dict_save(res_dict: dict[str, Any], if 'url' in changed or ('last_modified' in changed and not new): obj.url_changed = True - if changed or obj.extras != skipped: + any_change = changed or obj.extras != skipped + # (canada fork only): metadata_modified for data migrations + # TODO: upstream contrib!! + if res_dict.get('metadata_modified'): + obj.metadata_modified = res_dict['metadata_modified'] + elif any_change: obj.metadata_modified = datetime.datetime.utcnow() obj.state = u'active' obj.extras = skipped - session.add(obj) - return obj + return obj, 'create' if new else 'update' if any_change else None def package_resource_list_save( res_dicts: Optional[list[dict[str, Any]]], - package: 'model.Package', context: Context) -> None: - allow_partial_update = context.get("allow_partial_update", False) - if res_dicts is None and allow_partial_update: - return + package: 'model.Package', context: Context, + copy_resources: dict[int, int] | tuple[()]) -> bool: + """ + Store a list of resources in the database. Returns True if any resources + were changed. + + :param res_dicts: List of resource dictionaries to store + :type res_dict: list of dicts + :param package: The package model object that resources belong to + :param package: model.Package + :param context: A context dict with extra information + :type context: dict + :param copy_resources: A dictionary with resource indexes that should be copied from the existing resource list rather than creating new models for them. It should have the format `{: ,}` + :type copy_resources: dict + """ + if res_dicts is None: + return False session = context['session'] model = context['model'] @@ -82,11 +107,22 @@ def package_resource_list_save( .filter(model.Resource.state == 'deleted')[:] obj_list = [] - for res_dict in res_dicts or []: + resources_changed = False + for i, res_dict in enumerate(res_dicts or []): + if i in copy_resources: + obj_list.append(old_list[copy_resources[i]]) + if i != copy_resources[i]: + resources_changed = True + continue if not u'package_id' in res_dict or not res_dict[u'package_id']: res_dict[u'package_id'] = package.id - obj = resource_dict_save(res_dict, context) + obj, change = resource_dict_save(res_dict, context) obj_list.append(obj) + if change: + resources_changed = True + + if old_list == obj_list: + return resources_changed # Set the package's resources. resource_list is an ORM relation - the # package's resources. If we didn't have the slice operator "[:]" then it @@ -107,13 +143,16 @@ def package_resource_list_save( resource.state = 'deleted' resource_list.append(resource) + return True + def package_extras_save( extra_dicts: Optional[list[dict[str, Any]]], pkg: 'model.Package', context: Context) -> None: - allow_partial_update = context.get("allow_partial_update", False) - if extra_dicts is None and allow_partial_update: - return + # (canada fork only): from backport PR#8407, support old package_extras table + extras_changed = False + if extra_dicts is None: + return extras_changed session = context["session"] @@ -132,6 +171,7 @@ def package_extras_save( #new for key in set(new_extras.keys()) - set(old_extras.keys()): pkg.extras[key] = new_extras[key] + extras_changed = True #changed for key in set(new_extras.keys()) & set(old_extras.keys()): extra = old_extras[key] @@ -139,18 +179,22 @@ def package_extras_save( continue extra.value = new_extras[key] session.add(extra) + extras_changed = True #deleted for key in set(old_extras.keys()) - set(new_extras.keys()): extra = old_extras[key] session.delete(extra) + extras_changed = True + return extras_changed -def package_tag_list_save(tag_dicts: Optional[list[dict[str, Any]]], - package: 'model.Package', context: Context) -> None: - allow_partial_update = context.get("allow_partial_update", False) - if tag_dicts is None and allow_partial_update: - return +def package_tag_list_save(tag_dicts: Optional[list[dict[str, Any]]], + package: 'model.Package', context: Context) -> bool: + ''' + Returns True if any tags were changed + ''' + changed = False model = context["model"] session = context["session"] @@ -167,7 +211,7 @@ def package_tag_list_save(tag_dicts: Optional[list[dict[str, Any]]], for tag_dict in tag_dicts or []: name_vocab = (tag_dict.get('name'), tag_dict.get('vocabulary_id')) if name_vocab not in tag_name_vocab: - tag_obj = d.table_dict_save(tag_dict, model.Tag, context) + tag_obj, _change = d.table_dict_save(tag_dict, model.Tag, context) tags.add(tag_obj) tag_name_vocab.add((tag_obj.name, tag_obj.vocabulary_id)) @@ -176,6 +220,7 @@ def package_tag_list_save(tag_dicts: Optional[list[dict[str, Any]]], for tag in set(tag_package_tag.keys()) - tags: package_tag = tag_package_tag[tag] package_tag.state = 'deleted' + changed = True # case 2: in new list but never used before for tag in tags - set(tag_package_tag.keys()): @@ -183,22 +228,30 @@ def package_tag_list_save(tag_dicts: Optional[list[dict[str, Any]]], package_tag_obj = model.PackageTag(package, tag, state) session.add(package_tag_obj) tag_package_tag[tag] = package_tag_obj + changed = True # case 3: in new list and already used but in deleted state for tag in tags.intersection(set(tag_package_tag_inactive.keys())): state = 'active' package_tag = tag_package_tag[tag] package_tag.state = state + changed = True + + if changed: + package.package_tags[:] = tag_package_tag.values() + return changed - package.package_tags[:] = tag_package_tag.values() def package_membership_list_save( group_dicts: Optional[list[dict[str, Any]]], - package: 'model.Package', context: Context) -> None: + package: 'model.Package', context: Context) -> bool: + ''' + Returns True if any member was changed. + ''' + changed = False - allow_partial_update = context.get("allow_partial_update", False) - if group_dicts is None and allow_partial_update: - return + if group_dicts is None: + return changed capacity = 'public' model = context["model"] @@ -234,19 +287,22 @@ def package_membership_list_save( member_obj = group_member[group] if member_obj and member_obj.state == 'deleted': continue - if authz.has_user_permission_for_group_or_org( - member_obj.group_id, user, 'read'): + if (context.get('ignore_auth') or + authz.has_user_permission_for_group_or_org( + member_obj.group_id, user, 'read')): member_obj.capacity = capacity member_obj.state = 'deleted' session.add(member_obj) + changed = True # Add any new groups for group in groups: member_obj = group_member.get(group) if member_obj and member_obj.state == 'active': continue - if authz.has_user_permission_for_group_or_org( - group.id, user, 'read'): + if (context.get('ignore_auth') or + authz.has_user_permission_for_group_or_org( + group.id, user, 'read')): member_obj = group_member.get(group) if member_obj: member_obj.capacity = capacity @@ -259,14 +315,16 @@ def package_membership_list_save( group_id=group.id, state = 'active') session.add(member_obj) + changed = True + + return changed def relationship_list_save( relationship_dicts: Optional[list[dict[str, Any]]], package: 'model.Package', attr: str, context: Context) -> None: - allow_partial_update = context.get("allow_partial_update", False) - if relationship_dicts is None and allow_partial_update: + if relationship_dicts is None: return model = context["model"] @@ -276,8 +334,8 @@ def relationship_list_save( relationships = [] for relationship_dict in relationship_dicts or []: - obj = d.table_dict_save(relationship_dict, - model.PackageRelationship, context) + obj, _change = d.table_dict_save( + relationship_dict, model.PackageRelationship, context) relationships.append(obj) relationship_list[:] = relationships @@ -287,32 +345,48 @@ def relationship_list_save( relationship_list.append(relationship) def package_dict_save( - pkg_dict: dict[str, Any], context: Context, - include_plugin_data: bool = False) -> 'model.Package': + pkg_dict: dict[str, Any], context: Context, + include_plugin_data: bool = False, + copy_resources: dict[int, int] | tuple[()] = ()) \ + -> tuple['model.Package', Literal['create', 'update', None]]: + ''' + Returns (package_object, change) where change is: + - 'create' if this is a new package object + - 'update' if any fields or resources were changed + - None if no change for an existing package object + ''' + model = context["model"] - package = context.get("package") - if package: - pkg_dict["id"] = package.id Package = model.Package - if 'metadata_created' in pkg_dict: - del pkg_dict['metadata_created'] - if 'metadata_modified' in pkg_dict: - del pkg_dict['metadata_modified'] + # (canada fork only): metadata_created for data migrations + # TODO: upstream contrib!! + # if 'metadata_created' in pkg_dict: + # del pkg_dict['metadata_created'] - plugin_data = pkg_dict.pop('plugin_data', None) + plugin_data = pkg_dict.pop('plugin_data', None) if include_plugin_data: pkg_dict['plugin_data'] = copy.deepcopy( plugin_data) if plugin_data else plugin_data - pkg = d.table_dict_save(pkg_dict, Package, context) + extras = { + e['key']: e['value'] for e in pkg_dict.get('extras', []) + } + + pkg, pkg_change = d.table_dict_save( + dict(pkg_dict, extras=extras), Package, context) if not pkg.id: pkg.id = str(uuid.uuid4()) - package_resource_list_save(pkg_dict.get("resources"), pkg, context) - package_tag_list_save(pkg_dict.get("tags"), pkg, context) - package_membership_list_save(pkg_dict.get("groups"), pkg, context) + res_change = package_resource_list_save( + pkg_dict.get("resources"), pkg, context, copy_resources) + tag_change = package_tag_list_save(pkg_dict.get("tags"), pkg, context) + group_change = package_membership_list_save( + pkg_dict.get("groups"), pkg, context) + # (canada fork only): from backport PR#8407, support old package_extras table + extras_change = package_extras_save( + pkg_dict.get("extras"), pkg, context) # relationships are not considered 'part' of the package, so only # process this if the key is provided @@ -323,9 +397,13 @@ def package_dict_save( objects = pkg_dict.get('relationships_as_object') relationship_list_save(objects, pkg, 'relationships_as_object', context) - package_extras_save(pkg_dict.get("extras"), pkg, context) - - return pkg + return ( + pkg, + 'create' if pkg_change == 'create' + # (canada fork only): from backport PR#8407, support old package_extras table + else 'update' if pkg_change or res_change or tag_change or group_change or extras_change + else None + ) def group_member_save(context: Context, group_dict: dict[str, Any], member_table_name: str) -> dict[str, Any]: @@ -336,10 +414,7 @@ def group_member_save(context: Context, group_dict: dict[str, Any], entity_list: list[dict[str, Any]] = group_dict.get(member_table_name, None) if entity_list is None: - if context.get('allow_partial_update', False): - return {'added': [], 'removed': []} - else: - entity_list = [] + return {'added': [], 'removed': []} entities: dict[tuple[str, str], Any] = {} Member = model.Member @@ -407,7 +482,11 @@ def group_dict_save(group_dict: dict[str, Any], context: Context, if group: group_dict["id"] = group.id - group = d.table_dict_save(group_dict, Group, context) + extras = { + e['key']: e['value'] for e in group_dict.get('extras', []) + } + + group, _change = d.table_dict_save(dict(group_dict, extras=extras), Group, context) if not group.id: group.id = str(uuid.uuid4()) @@ -426,10 +505,9 @@ def group_dict_save(group_dict: dict[str, Any], context: Context, } group_users_changed = group_member_save(context, group_dict, 'users') group_groups_changed = group_member_save(context, group_dict, 'groups') - group_tags_changed = group_member_save(context, group_dict, 'tags') log.debug('Group save membership changes - Packages: %r Users: %r ' - 'Groups: %r Tags: %r', pkgs_edited, group_users_changed, - group_groups_changed, group_tags_changed) + 'Groups: %r', pkgs_edited, group_users_changed, + group_groups_changed) extras = group_dict.get("extras", []) new_extras = {i['key'] for i in extras} @@ -467,7 +545,7 @@ def user_dict_save( if 'password' in user_dict and not len(user_dict['password']): del user_dict['password'] - user = d.table_dict_save( + user, _change = d.table_dict_save( user_dict, User, context, @@ -546,7 +624,7 @@ def task_status_dict_save(task_status_dict: dict[str, Any], if task_status: task_status_dict["id"] = task_status.id - task_status = d.table_dict_save( + task_status, _change = d.table_dict_save( task_status_dict, model.TaskStatus, context) return task_status @@ -606,7 +684,7 @@ def tag_dict_save(tag_dict: dict[str, Any], context: Context) -> 'model.Tag': tag = context.get('tag') if tag: tag_dict['id'] = tag.id - tag = d.table_dict_save(tag_dict, model.Tag, context) + tag, _change = d.table_dict_save(tag_dict, model.Tag, context) return tag @overload @@ -660,8 +738,8 @@ def resource_view_dict_save(data_dict: dict[str, Any], config[key] = value data_dict['config'] = config - - return d.table_dict_save(data_dict, model.ResourceView, context) + resview, _change = d.table_dict_save(data_dict, model.ResourceView, context) + return resview def api_token_save(data_dict: dict[str, Any], @@ -669,10 +747,11 @@ def api_token_save(data_dict: dict[str, Any], model = context[u"model"] user = model.User.get(data_dict['user']) assert user - return d.table_dict_save( + token, _change = d.table_dict_save( { u"user_id": user.id, u"name": data_dict[u"name"] }, model.ApiToken, context ) + return token diff --git a/ckan/logic/__init__.py b/ckan/logic/__init__.py index 65a5cb7f51c..7b6519ab5bb 100644 --- a/ckan/logic/__init__.py +++ b/ckan/logic/__init__.py @@ -898,6 +898,7 @@ def guard_against_duplicated_email(email: str): def fresh_context( context: Context, + **kwargs: Any, ) -> Context: """ Copy just the minimum fields into a new context for cases in which we reuse the context and @@ -908,5 +909,6 @@ def fresh_context( 'ignore_auth', 'defer_commit', ) if k in context } + new_context.update(kwargs) new_context = cast(Context, new_context) return new_context diff --git a/ckan/logic/action/create.py b/ckan/logic/action/create.py index 8984d218f52..001e08ac35b 100644 --- a/ckan/logic/action/create.py +++ b/ckan/logic/action/create.py @@ -210,7 +210,8 @@ def package_create( data['creator_user_id'] = user_obj.id include_plugin_data = user_obj.sysadmin and plugin_data - pkg = model_save.package_dict_save(data, context, include_plugin_data) + pkg, _change = model_save.package_dict_save( + data, context, include_plugin_data) # Needed to let extensions know the package and resources ids model.Session.flush() @@ -219,6 +220,9 @@ def package_create( for index, resource in enumerate(data['resources']): resource['id'] = pkg.resources[index].id + if not data.get('metadata_modified'): + pkg.metadata_modified = datetime.datetime.utcnow() + context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True @@ -311,9 +315,8 @@ def resource_create(context: Context, for plugin in plugins.PluginImplementations(plugins.IResourceController): plugin.before_resource_create(context, data_dict) - if 'resources' not in pkg_dict: - pkg_dict['resources'] = [] + # (canada fork only): keep mimetype and file size attributes upload = uploader.get_resource_uploader(data_dict) if 'mimetype' not in data_dict: @@ -323,12 +326,20 @@ def resource_create(context: Context, if 'size' not in data_dict: if hasattr(upload, 'filesize'): data_dict['size'] = upload.filesize + # (canada fork only): keep mimetype and file size attributes :END + + original_package = dict(pkg_dict) + + # allow metadata_modified to be updated + pkg_dict.pop('metadata_modified', None) + pkg_dict['resources'] = list(pkg_dict.get('resources', [])) pkg_dict['resources'].append(data_dict) try: context['defer_commit'] = True context['use_cache'] = False + context['original_package'] = original_package _get_action('package_update')(context, pkg_dict) context.pop('defer_commit') except ValidationError as e: diff --git a/ckan/logic/action/patch.py b/ckan/logic/action/patch.py index 95cea19c69b..9911966f644 100644 --- a/ckan/logic/action/patch.py +++ b/ckan/logic/action/patch.py @@ -2,11 +2,13 @@ '''API functions for partial updates of existing data in CKAN''' +from ckan import model from ckan.logic import ( get_action as _get_action, check_access as _check_access, get_or_bust as _get_or_bust, - fresh_context as _fresh_context + fresh_context as _fresh_context, + NotFound ) from ckan.types import Context, DataDict from ckan.types.logic import ActionResult @@ -50,6 +52,9 @@ def package_patch( {'id': _get_or_bust(data_dict, 'id')}) patched = dict(package_dict) + # allow metadata_modified to be updated if data has changed + patched.pop('metadata_modified', None) + patched.update(data_dict) patched['id'] = package_dict['id'] return _get_action('package_update')(context, patched) @@ -69,15 +74,27 @@ def resource_patch(context: Context, ''' _check_access('resource_patch', context, data_dict) + resource = model.Resource.get(_get_or_bust(data_dict, 'id')) + if not resource: + raise NotFound('Resource was not found.') + show_context: Context = _fresh_context(context) show_context.update({'for_update': True}) - resource_dict = _get_action('resource_show')( + package_dict = _get_action('package_show')( show_context, - {'id': _get_or_bust(data_dict, 'id')}) + {'id': resource.package_id}) + + if package_dict['resources'][resource.position]['id'] != resource.id: + raise NotFound('Resource was not found.') + + patched = dict(package_dict['resources'][resource.position]) + # allow metadata_modified to be updated if data has changed + patched.pop('metadata_modified', None) - patched = dict(resource_dict) patched.update(data_dict) + update_context = Context(context) + update_context['original_package'] = package_dict return _get_action('resource_update')(context, patched) @@ -106,7 +123,6 @@ def group_patch(context: Context, patched.update(data_dict) patch_context = context.copy() - patch_context['allow_partial_update'] = True return _get_action('group_update')(patch_context, patched) @@ -136,7 +152,6 @@ def organization_patch( patched.update(data_dict) patch_context = context.copy() - patch_context['allow_partial_update'] = True return _get_action('organization_update')(patch_context, patched) diff --git a/ckan/logic/action/update.py b/ckan/logic/action/update.py index ebb6ed1f07f..52eb29edbb5 100644 --- a/ckan/logic/action/update.py +++ b/ckan/logic/action/update.py @@ -8,6 +8,7 @@ import datetime import time import json +from copy import deepcopy from typing import Any, Union, TYPE_CHECKING, cast import ckan.lib.helpers as h @@ -89,10 +90,18 @@ def resource_update(context: Context, data_dict: DataDict) -> ActionResult.Resou _check_access('resource_update', context, data_dict) del context["resource"] - package_id = resource.package.id - package_show_context: Union[Context, Any] = dict(context, for_update=True) - pkg_dict = _get_action('package_show')( - package_show_context, {'id': package_id}) + update_context = Context(context) + pkg_dict = context.get('original_package') + if not pkg_dict: + package_id = resource.package.id + package_show_context: Union[Context, Any] = dict(context, for_update=True) + pkg_dict = _get_action('package_show')( + package_show_context, {'id': package_id}) + + update_context['original_package'] = dict(pkg_dict) + # allow dataset metadata_modified to be updated + pkg_dict.pop('metadata_modified', None) + pkg_dict['resources'] = list(pkg_dict['resources']) resources = cast("list[dict[str, Any]]", pkg_dict['resources']) for n, p in enumerate(resources): @@ -242,13 +251,12 @@ def package_update( You must be authorized to edit the dataset and the groups that it belongs to. - .. note:: Update methods may delete parameters not explicitly provided in the - data_dict. If you want to edit only a specific attribute use `package_patch` - instead. + .. note:: Update methods may delete parameters not explicitly provided. + If you want to edit only a specific attribute use + :py:func:`ckan.logic.action.get.package_patch` instead. - It is recommended to call - :py:func:`ckan.logic.action.get.package_show`, make the desired changes to - the result, and then call ``package_update()`` with it. + If the ``resources`` list is omitted the current resources will be left + unchanged. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the @@ -287,18 +295,55 @@ def package_update( package_plugin = lib_plugins.lookup_package_plugin(pkg.type) schema = context.get('schema') or package_plugin.update_package_schema() - if 'api_version' not in context: - # check_data_dict() is deprecated. If the package_plugin has a - # check_data_dict() we'll call it, if it doesn't have the method we'll - # do nothing. - check_data_dict = getattr(package_plugin, 'check_data_dict', None) - if check_data_dict: - try: - package_plugin.check_data_dict(data_dict, schema) - except TypeError: - # Old plugins do not support passing the schema so we need - # to ensure they still work. - package_plugin.check_data_dict(data_dict) + + # try to do less work + return_id_only = context.get('return_id_only', False) + original_package = context.get('original_package') + copy_resources = {} + changed_resources = data_dict.get('resources') + dataset_changed = True + + if not original_package or original_package.get('id') != pkg.id: + show_context = logic.fresh_context(context, ignore_auth=True) + original_package = _get_action('package_show')(show_context, { + 'id': data_dict['id'], + }) + + if data_dict.get('metadata_modified'): + dataset_changed = original_package != data_dict + else: + dataset_changed = dict( + original_package, metadata_modified=None, resources=None + ) != dict(data_dict, metadata_modified=None, resources=None) + if not dataset_changed and original_package.get('resources' + ) == data_dict.get('resources'): + return pkg.id if return_id_only else original_package + + res_deps = [] + if hasattr(package_plugin, 'resource_validation_dependencies'): + res_deps = package_plugin.resource_validation_dependencies( + pkg.type) + + _missing = object() + if 'resources' in data_dict and all( + original_package.get(rd, _missing) == data_dict.get(rd, _missing) + for rd in res_deps): + oids = {r['id']: r for r in original_package['resources']} + copy_resources = { + i: r['position'] + for (i, r) in enumerate(data_dict['resources']) + if r == oids.get(r.get('id'), ()) + } + changed_resources = [ + r for i, r in enumerate(data_dict['resources']) + if i not in copy_resources + ] + + if 'resources' not in data_dict and any( + original_package.get(rd, _missing) != data_dict.get(rd, _missing) + for rd in res_deps): + # re-validate resource fields even if not passed + changed_resources = original_package['resources'] resource_uploads = [] for resource in data_dict.get('resources', []): @@ -315,8 +360,17 @@ def package_update( resource_uploads.append(upload) + validate_data = dict(data_dict) + if changed_resources is not None: + validate_data['resources'] = changed_resources + data, errors = lib_plugins.plugin_validate( - package_plugin, context, data_dict, schema, 'package_update') + package_plugin, + context, + validate_data, + schema, + 'package_update' + ) log.debug('package_update validate_errs=%r user=%s package=%s data=%r', errors, user, context['package'].name, data) @@ -324,10 +378,23 @@ def package_update( model.Session.rollback() raise ValidationError(errors) - #avoid revisioning by updating directly - model.Session.query(model.Package).filter_by(id=pkg.id).update( - {"metadata_modified": datetime.datetime.utcnow()}) - model.Session.refresh(pkg) + # merge skipped resources back into validated data + if copy_resources: + assert original_package # make pyright happy + resources = [] + i = 0 + for r in data['resources']: + while i in copy_resources: + resources.append( + original_package['resources'][copy_resources[i]]) + i += 1 + resources.append(r) + i += 1 + while i in copy_resources: + resources.append( + original_package['resources'][copy_resources[i]]) + i += 1 + data['resources'] = resources include_plugin_data = False user_obj = context.get('auth_user_obj') @@ -335,48 +402,67 @@ def package_update( plugin_data = data.get('plugin_data', False) include_plugin_data = user_obj.sysadmin and plugin_data - pkg = model_save.package_dict_save(data, context, include_plugin_data) + nested = model.repo.session.begin_nested() + pkg, change = model_save.package_dict_save( + data, context, include_plugin_data, copy_resources) - context_org_update = context.copy() - context_org_update['ignore_auth'] = True - context_org_update['defer_commit'] = True - _get_action('package_owner_org_update')(context_org_update, - {'id': pkg.id, - 'organization_id': pkg.owner_org}) + if change: + if not data.get('metadata_modified'): + pkg.metadata_modified = datetime.datetime.utcnow() - # Needed to let extensions know the new resources ids - model.Session.flush() - for index, (resource, upload) in enumerate( - zip(data.get('resources', []), resource_uploads)): - resource['id'] = pkg.resources[index].id + context_org_update = logic.fresh_context( + context, ignore_auth=True, defer_commit=True) + _get_action('package_owner_org_update')(context_org_update, + {'id': pkg.id, + 'organization_id': pkg.owner_org}) - upload.upload(resource['id'], uploader.get_max_resource_size()) + # Needed to let extensions know the new resources ids + model.Session.flush() + if changed_resources: + resiter = iter(changed_resources) + uploaditer = iter(resource_uploads) + for i in range(len(pkg.resources)): + if i in copy_resources: + continue + resource = next(resiter) + upload = next(uploaditer) + resource['id'] = pkg.resources[i].id - for item in plugins.PluginImplementations(plugins.IPackageController): - item.edit(pkg) + upload.upload(resource['id'], uploader.get_max_resource_size()) - item.after_dataset_update(context, data) + for item in plugins.PluginImplementations(plugins.IPackageController): + item.edit(pkg) - if not context.get('defer_commit'): - model.repo.commit() + item.after_dataset_update(context, data) - log.debug('Updated object %s' % pkg.name) + nested.commit() + if not context.get('defer_commit'): + model.repo.commit() - return_id_only = context.get('return_id_only', False) + # return ids of changed entities via context because return value is + # the action's "result" data + context.setdefault( + 'changed_entities', {} + ).setdefault( + 'packages', set() + ).add(pkg.id) + log.debug('Updated object %s' % pkg.name) + else: + log.debug('No update for object %s' % pkg.name) + nested.rollback() + + if return_id_only: + return pkg.id - # Make sure that a user provided schema is not used on package_show - context.pop('schema', None) + if not change and original_package: + return original_package # we could update the dataset so we should still be able to read it. - context['ignore_auth'] = True - output = data_dict['id'] if return_id_only \ - else _get_action('package_show')( - context, - {'id': data_dict['id'], - "include_plugin_data": include_plugin_data - } - ) - return output + show_context = logic.fresh_context(context, ignore_auth=True) + return _get_action('package_show')(show_context, { + 'id': data_dict['id'], + 'include_plugin_data': include_plugin_data, + }) def package_revise(context: Context, data_dict: DataDict) -> ActionResult.PackageRevise: @@ -503,6 +589,10 @@ def package_revise(context: Context, data_dict: DataDict) -> ActionResult.Packag for unm in unmatched ]}) + revised = deepcopy(orig) + # allow metadata_modified to be updated if data has changed + revised.pop('metadata_modified', None) + if 'filter' in data: orig_id = orig['id'] dfunc.filter_glob_match(orig, data['filter']) @@ -563,7 +653,12 @@ def package_resource_reorder( package_show_context: Union[Context, Any] = dict(context, for_update=True) package_dict = _get_action('package_show')( package_show_context, {'id': id}) - existing_resources = package_dict.get('resources', []) + + # allow metadata_modified to be updated if data has changed + # removes from original_package for comparison in package_update + package_dict.pop('metadata_modified', None) + + existing_resources = list(package_dict.get('resources', [])) ordered_resources = [] for resource_id in order: @@ -924,16 +1019,15 @@ def group_update(context: Context, data_dict: DataDict) -> ActionResult.GroupUpd For further parameters see :py:func:`~ckan.logic.action.create.group_create`. + Callers can choose to not specify packages, users or groups and they will be + left at their existing values. + :param id: the name or id of the group to update :type id: string :returns: the updated group :rtype: dictionary - ''' - # Callers that set context['allow_partial_update'] = True can choose to not - # specify particular keys and they will be left at their existing - # values. This includes: packages, users, groups, tags, extras return _group_or_org_update(context, data_dict) def organization_update( @@ -949,6 +1043,9 @@ def organization_update( For further parameters see :py:func:`~ckan.logic.action.create.organization_create`. + Callers can choose to not specify packages, users or groups and they will be + left at their existing values. + :param id: the name or id of the organization to update :type id: string :param packages: ignored. use @@ -959,9 +1056,6 @@ def organization_update( :rtype: dictionary ''' - # Callers that set context['allow_partial_update'] = True can choose to not - # specify particular keys and they will be left at their existing - # values. This includes: users, groups, tags, extras return _group_or_org_update(context, data_dict, is_org=True) def user_update(context: Context, data_dict: DataDict) -> ActionResult.UserUpdate: @@ -1277,7 +1371,7 @@ def package_owner_org_update(context: Context, data_dict: DataDict) -> ActionRes need_update = False else: member_obj.state = 'deleted' - member_obj.save() + model.Session.add(member_obj) # add the organization to member table if org and need_update: diff --git a/ckan/logic/schema.py b/ckan/logic/schema.py index f770d2b6638..28accc2039c 100644 --- a/ckan/logic/schema.py +++ b/ckan/logic/schema.py @@ -36,7 +36,10 @@ def default_resource_schema( isodate: Validator, int_validator: Validator, extras_valid_json: Validator, keep_extras: Validator, resource_id_validator: Validator, - resource_id_does_not_exist: Validator): + resource_id_does_not_exist: Validator, + # (canada fork only): metadata_modified for data migrations + # TODO: upstream contrib!! + ignore_not_sysadmin: Validator): return cast(Schema, { 'id': [ignore_empty, resource_id_validator, resource_id_does_not_exist, unicode_safe], @@ -61,6 +64,9 @@ def default_resource_schema( 'tracking_summary': [ignore_missing], 'datastore_active': [ignore_missing], '__extras': [ignore_missing, extras_valid_json, keep_extras], + # (canada fork only): metadata_modified for data migrations + # TODO: upstream contrib!! + 'metadata_modified': [ignore_not_sysadmin, ignore_missing, isodate], }) @@ -123,8 +129,9 @@ def default_create_package_schema( datasets_with_no_organization_cannot_be_private: Validator, empty: Validator, tag_string_convert: Validator, owner_org_validator: Validator, json_object: Validator, - ignore_not_sysadmin: Validator, license_choices: Validator): - return cast(Schema, { + ignore_not_sysadmin: Validator, license_choices: Validator, + isodate: Validator) -> Schema: + return { '__before': [duplicate_extras_key, ignore], 'id': [empty_if_not_sysadmin, ignore_missing, unicode_safe, package_id_does_not_exist], @@ -162,8 +169,12 @@ def default_create_package_schema( 'name': [ignore_missing, unicode_safe], 'title': [ignore_missing, unicode_safe], '__extras': [ignore], - } - }) + }, + 'metadata_modified': [ignore_not_sysadmin, ignore_missing, isodate], + # (canada fork only): metadata_created for data migrations + # TODO: upstream contrib!! + 'metadata_created': [ignore_not_sysadmin, ignore_missing, isodate], + } @validator_args @@ -213,6 +224,9 @@ def default_show_package_schema(keep_extras: Validator, 'created': [ignore_missing], 'position': [not_empty], 'last_modified': [], + # (canada fork only): metadata_modified for data migrations + # TODO: upstream contrib!! + 'metadata_modified': [], 'cache_last_updated': [], 'package_id': [], 'size': [], diff --git a/ckan/model/tag.py b/ckan/model/tag.py index c047c94f359..18931c195d8 100644 --- a/ckan/model/tag.py +++ b/ckan/model/tag.py @@ -264,8 +264,7 @@ def __init__( def __repr__(self): assert self.package assert self.tag - s = u'' % (self.package.name, self.tag.name) - return s.encode('utf8') + return u'' % (self.package.name, self.tag.name) @classmethod @maintain.deprecated(since="2.9.0") diff --git a/ckan/plugins/interfaces.py b/ckan/plugins/interfaces.py index cc3794569b1..b36660ec90e 100644 --- a/ckan/plugins/interfaces.py +++ b/ckan/plugins/interfaces.py @@ -6,8 +6,10 @@ ''' from __future__ import annotations -from typing import (Any, Callable, Iterable, Mapping, Optional, Sequence, - TYPE_CHECKING, Type, Union) +from typing import ( + Any, Callable, Iterable, Mapping, Optional, Sequence, + TYPE_CHECKING, Type, Union, List, +) from pyutilib.component.core import Interface as _pca_Interface @@ -1308,6 +1310,18 @@ def prepare_resource_blueprint(self, package_type: str, ''' return blueprint + def resource_validation_dependencies( + self, package_type: str) -> List[str]: + ''' + Return a list of dataset field names that affect validation of + resource fields. + + package_update and related actions skip re-validating unchanged + resources unless one of the resource validation dependencies + fields returned here has changed. + ''' + return [] + class IGroupForm(Interface): u''' diff --git a/ckan/tests/lib/dictization/test_dictization.py b/ckan/tests/lib/dictization/test_dictization.py index acbaeb6ba08..2b1f60e9cab 100644 --- a/ckan/tests/lib/dictization/test_dictization.py +++ b/ckan/tests/lib/dictization/test_dictization.py @@ -45,7 +45,7 @@ def test_package_tag_list_save(self): name = u"testpkg18" context = {"model": model, "session": model.Session} pkg_dict = {"name": name} - package = table_dict_save(pkg_dict, model.Package, context) + package, _change = table_dict_save(pkg_dict, model.Package, context) tag_dicts = [{"name": "tag1"}, {"name": "tag2"}] package_tag_list_save(tag_dicts, package, context) @@ -61,7 +61,7 @@ def test_package_tag_list_save_duplicates(self): context = {"model": model, "session": model.Session} pkg_dict = {"name": name} - package = table_dict_save(pkg_dict, model.Package, context) + package, _change = table_dict_save(pkg_dict, model.Package, context) tag_dicts = [{"name": "tag1"}, {"name": "tag1"}] # duplicate package_tag_list_save(tag_dicts, package, context) diff --git a/ckan/tests/lib/dictization/test_model_dictize.py b/ckan/tests/lib/dictization/test_model_dictize.py index 6df2f426391..f1e39b81ac3 100644 --- a/ckan/tests/lib/dictization/test_model_dictize.py +++ b/ckan/tests/lib/dictization/test_model_dictize.py @@ -490,7 +490,7 @@ def test_package_dictize_resource_upload_and_striped(self): context = {"model": model, "session": model.Session} - result = model_save.resource_dict_save(resource, context) + result, _change = model_save.resource_dict_save(resource, context) expected_dict = {u"url": u"some_filename.csv", u"url_type": u"upload"} assert expected_dict["url"] == result.url @@ -506,7 +506,7 @@ def test_package_dictize_resource_upload_with_url_and_striped(self): context = {"model": model, "session": model.Session} - result = model_save.resource_dict_save(resource, context) + result, _change = model_save.resource_dict_save(resource, context) expected_dict = {u"url": u"some_filename.csv", u"url_type": u"upload"} assert expected_dict["url"] == result.url diff --git a/ckan/tests/logic/action/test_update.py b/ckan/tests/logic/action/test_update.py index ee00190abab..18011c2df30 100644 --- a/ckan/tests/logic/action/test_update.py +++ b/ckan/tests/logic/action/test_update.py @@ -397,6 +397,9 @@ def test_update_dataset_cant_change_type(self): == "dataset" ) + +@pytest.mark.usefixtures("non_clean_db") +class TestOrganizationUpdate(object): def test_update_organization_cant_change_type(self): user = factories.User() context = {"user": user["name"]} @@ -411,6 +414,62 @@ def test_update_organization_cant_change_type(self): type="ragtagband", ) + def test_extras_removed_if_missing(self): + org = factories.Organization(extras=[{'key': 'e', 'value': 'v'}]) + assert org['extras'] == [{'key': 'e', 'value': 'v'}] + updated_org = helpers.call_action( + "organization_update", + id=org["name"], + ) + + assert updated_org['extras'] == [] + + def test_groups_unchanged_if_missing(self): + group = factories.Group() + org = factories.Organization(groups=[{'name': group['name']}]) + assert org['groups'][0]['name'] == group['name'] + updated_org = helpers.call_action( + "organization_update", + id=org["name"], + ) + + assert updated_org['groups'][0]['name'] == group['name'] + + def test_groups_removed_if_empty_list(self): + group = factories.Group() + org = factories.Organization(groups=[{'name': group['name']}]) + assert org['groups'][0]['name'] == group['name'] + updated_org = helpers.call_action( + "organization_update", + id=org["name"], + groups=[], + ) + + assert updated_org['groups'] == [] + + def test_users_unchanged_if_missing(self): + user = factories.User() + org = factories.Organization(users=[user]) + assert user['name'] in (g['name'] for g in org['users']) + updated_org = helpers.call_action( + "organization_update", + id=org["name"], + ) + + assert user['name'] in (g['name'] for g in updated_org['users']) + + def test_users_removed_if_empty_list(self): + user = factories.User() + org = factories.Organization(users=[user]) + assert user['name'] in (g['name'] for g in org['users']) + updated_org = helpers.call_action( + "organization_update", + id=org["name"], + users=[] + ) + + assert updated_org['users'] == [] + @pytest.mark.usefixtures("non_clean_db") class TestDatasetUpdate(object): @@ -426,148 +485,131 @@ def test_name(self): dataset = factories.Dataset(user=user) stub = factories.Dataset.stub() - dataset_ = helpers.call_action( - "package_update", id=dataset["id"], name=stub.name - ) - - assert dataset_["name"] == stub.name - assert ( - helpers.call_action("package_show", id=dataset["id"])["name"] - == stub.name - ) + with freeze_time("2020-02-25 12:00:00"): + updated = helpers.call_action( + "package_update", id=dataset["id"], name=stub.name + ) + assert updated["name"] == stub.name + assert updated["metadata_modified"] == "2020-02-25T12:00:00" + assert updated == helpers.call_action("package_show", id=dataset["id"]) def test_title(self): user = factories.User() dataset = factories.Dataset(user=user) - dataset_ = helpers.call_action( - "package_update", id=dataset["id"], title="New Title" - ) - - assert dataset_["title"] == "New Title" - assert ( - helpers.call_action("package_show", id=dataset["id"])["title"] - == "New Title" - ) + with freeze_time("2020-02-25 12:00:00"): + updated = helpers.call_action( + "package_update", id=dataset["id"], title="New Title" + ) + assert updated["title"] == "New Title" + assert updated["metadata_modified"] == "2020-02-25T12:00:00" + assert updated == helpers.call_action("package_show", id=dataset["id"]) def test_extras(self): user = factories.User() dataset = factories.Dataset(user=user) - dataset_ = helpers.call_action( - "package_update", - id=dataset["id"], - extras=[{"key": "original media", "value": '"book"'}], - ) - - assert dataset_["extras"][0]["key"] == "original media" - assert dataset_["extras"][0]["value"] == '"book"' - dataset_ = helpers.call_action("package_show", id=dataset["id"]) - assert dataset_["extras"][0]["key"] == "original media" - assert dataset_["extras"][0]["value"] == '"book"' + with freeze_time("2020-02-25 12:00:00"): + updated = helpers.call_action( + "package_update", + id=dataset["id"], + extras=[{"key": "original media", "value": '"book"'}], + ) + assert updated["extras"][0]["key"] == "original media" + assert updated["extras"][0]["value"] == '"book"' + assert updated["metadata_modified"] == "2020-02-25T12:00:00" + assert updated == helpers.call_action("package_show", id=dataset["id"]) def test_extra_can_be_restored_after_deletion(self): user = factories.User() dataset = factories.Dataset(user=user) - dataset_ = helpers.call_action( - "package_update", - id=dataset["id"], - extras=[ - {"key": "old attribute", "value": "value"}, - {"key": "original media", "value": '"book"'}, - ], - ) - - assert len(dataset_["extras"]) == 2 - - dataset_ = helpers.call_action( - "package_update", - id=dataset["id"], - extras=[], - ) - - assert dataset_["extras"] == [] + with freeze_time("2020-02-25 12:00:00"): + updated = helpers.call_action( + "package_update", + id=dataset["id"], + extras=[ + {"key": "old attribute", "value": "value"}, + {"key": "original media", "value": '"book"'}, + ], + ) + assert len(updated["extras"]) == 2 + assert updated["metadata_modified"] == "2020-02-25T12:00:00" - dataset_ = helpers.call_action( - "package_update", - id=dataset["id"], - extras=[ - {"key": "original media", "value": '"book"'}, - {"key": "new attribute", "value": "value"}, - ], - ) + with freeze_time("2023-03-23 13:00:00"): + updated2 = helpers.call_action( + "package_update", + id=dataset["id"], + extras=[], + ) + assert updated2["extras"] == [] + assert updated2["metadata_modified"] == "2023-03-23T13:00:00" - assert len(dataset_["extras"]) == 2 + with freeze_time("2024-04-24 14:00:00"): + updated3 = helpers.call_action( + "package_update", + id=dataset["id"], + extras=[ + {"key": "original media", "value": '"book"'}, + {"key": "new attribute", "value": "value"}, + ], + ) + assert len(updated3["extras"]) == 2 + assert updated3["metadata_modified"] == "2024-04-24T14:00:00" def test_license(self): user = factories.User() dataset = factories.Dataset(user=user) - dataset_ = helpers.call_action( - "package_update", id=dataset["id"], license_id="other-open" - ) - - assert dataset_["license_id"] == "other-open" - dataset_ = helpers.call_action("package_show", id=dataset["id"]) - assert dataset_["license_id"] == "other-open" + with freeze_time("2020-02-25 12:00:00"): + updated = helpers.call_action( + "package_update", id=dataset["id"], license_id="other-open" + ) + assert updated["license_id"] == "other-open" + assert updated["metadata_modified"] == "2020-02-25T12:00:00" + assert updated == helpers.call_action("package_show", id=dataset["id"]) def test_notes(self): user = factories.User() dataset = factories.Dataset(user=user) - dataset_ = helpers.call_action( - "package_update", id=dataset["id"], notes="some notes" - ) - - assert dataset_["notes"] == "some notes" - dataset_ = helpers.call_action("package_show", id=dataset["id"]) - assert dataset_["notes"] == "some notes" + with freeze_time("2020-02-25 12:00:00"): + updated = helpers.call_action( + "package_update", id=dataset["id"], notes="some notes" + ) + assert updated["notes"] == "some notes" + assert updated["metadata_modified"] == "2020-02-25T12:00:00" + assert updated == helpers.call_action("package_show", id=dataset["id"]) def test_resources(self): user = factories.User() dataset = factories.Dataset(user=user) - dataset_ = helpers.call_action( - "package_update", - id=dataset["id"], - resources=[ - { - "alt_url": "alt123", - "description": "Full text.", - "somekey": "somevalue", # this is how to do resource extras - "extras": {"someotherkey": "alt234"}, # this isn't - "format": "plain text", - "hash": "abc123", - "position": 0, - "url": "http://datahub.io/download/", - }, - { - "description": "Index of the novel", - "format": "JSON", - "position": 1, - "url": "http://datahub.io/index.json", - }, - ], - ) - - resources_ = dataset_["resources"] - assert resources_[0]["alt_url"] == "alt123" - assert resources_[0]["description"] == "Full text." - assert resources_[0]["somekey"] == "somevalue" - assert "extras" not in resources_[0] - assert "someotherkey" not in resources_[0] - assert resources_[0]["format"] == "plain text" - assert resources_[0]["hash"] == "abc123" - assert resources_[0]["position"] == 0 - assert resources_[0]["url"] == "http://datahub.io/download/" - assert resources_[1]["description"] == "Index of the novel" - assert resources_[1]["format"] == "JSON" - assert resources_[1]["url"] == "http://datahub.io/index.json" - assert resources_[1]["position"] == 1 - resources_ = helpers.call_action("package_show", id=dataset["id"])[ - "resources" - ] + with freeze_time("2020-02-25 12:00:00"): + updated = helpers.call_action( + "package_update", + id=dataset["id"], + resources=[ + { + "alt_url": "alt123", + "description": "Full text.", + "somekey": "somevalue", # correct resource extras + "extras": {"someotherkey": "alt234"}, # this isn't + "format": "plain text", + "hash": "abc123", + "position": 0, + "url": "http://datahub.io/download/", + }, + { + "description": "Index of the novel", + "format": "JSON", + "position": 1, + "url": "http://datahub.io/index.json", + }, + ], + ) + assert updated["metadata_modified"] == "2020-02-25T12:00:00" + resources_ = updated["resources"] assert resources_[0]["alt_url"] == "alt123" assert resources_[0]["description"] == "Full text." assert resources_[0]["somekey"] == "somevalue" @@ -577,10 +619,14 @@ def test_resources(self): assert resources_[0]["hash"] == "abc123" assert resources_[0]["position"] == 0 assert resources_[0]["url"] == "http://datahub.io/download/" + assert resources_[0]["metadata_modified"] == "2020-02-25T12:00:00" assert resources_[1]["description"] == "Index of the novel" assert resources_[1]["format"] == "JSON" assert resources_[1]["url"] == "http://datahub.io/index.json" assert resources_[1]["position"] == 1 + assert resources_[1]["metadata_modified"] == "2020-02-25T12:00:00" + assert helpers.call_action("package_show", id=dataset["id"])[ + "resources"] == resources_ def test_invalid_characters_in_resource_id(self): user = factories.User() @@ -604,17 +650,16 @@ def test_tags(self): dataset = factories.Dataset(user=user) tag1 = factories.Tag.stub().name tag2 = factories.Tag.stub().name - dataset_ = helpers.call_action( - "package_update", - id=dataset["id"], - tags=[{"name": tag1}, {"name": tag2}], - ) - - tag_names = sorted(tag_dict["name"] for tag_dict in dataset_["tags"]) - assert tag_names == sorted([tag1, tag2]) - dataset_ = helpers.call_action("package_show", id=dataset["id"]) - tag_names = sorted(tag_dict["name"] for tag_dict in dataset_["tags"]) + with freeze_time("2020-02-25 12:00:00"): + updated = helpers.call_action( + "package_update", + id=dataset["id"], + tags=[{"name": tag1}, {"name": tag2}], + ) + assert updated["metadata_modified"] == "2020-02-25T12:00:00" + tag_names = sorted(tag_dict["name"] for tag_dict in updated["tags"]) assert tag_names == sorted([tag1, tag2]) + assert updated == helpers.call_action("package_show", id=dataset["id"]) def test_return_id_only(self): user = factories.User() @@ -629,6 +674,104 @@ def test_return_id_only(self): assert updated_dataset == dataset["id"] + def test_resources_unchanged_if_missing(self): + res = factories.Resource() + + with freeze_time("2020-02-25 12:00:00"): + updated_dataset = helpers.call_action( + 'package_update', + id=res['package_id'], + notes='hi', + ) + assert updated_dataset["metadata_modified"] == "2020-02-25T12:00:00" + assert updated_dataset['resources'] == [res] + + def test_resources_removed_if_empty_list(self): + res = factories.Resource() + + with freeze_time("2020-02-25 12:00:00"): + updated_dataset = helpers.call_action( + 'package_update', + id=res['package_id'], + notes='hi', + resources=[], + ) + assert updated_dataset["metadata_modified"] == "2020-02-25T12:00:00" + assert updated_dataset['resources'] == [] + + def test_tags_removed_if_missing(self): + dataset = factories.Dataset(tag_string='tagone,tagtwo') + assert dataset['tags'] + + with freeze_time("2020-02-25 12:00:00"): + updated_dataset = helpers.call_action( + 'package_update', + id=dataset['id'], + ) + assert updated_dataset["metadata_modified"] == "2020-02-25T12:00:00" + assert updated_dataset['tags'] == [] + + def test_groups_unchanged_if_missing(self): + group = factories.Group() + del group['users'] # avoid validation error + dataset = factories.Dataset(groups=[group]) + assert dataset['groups'] + + with freeze_time("2020-02-25 12:00:00"): + updated_dataset = helpers.call_action( + 'package_update', + id=dataset['id'], + ) + assert updated_dataset["metadata_modified"] == "2020-02-25T12:00:00" + assert updated_dataset['groups'] == dataset['groups'] + + def test_groups_removed_if_empty_list(self): + group = factories.Group() + del group['users'] # avoid validation error + dataset = factories.Dataset(groups=[group]) + assert dataset['groups'] + + with freeze_time("2020-02-25 12:00:00"): + updated_dataset = helpers.call_action( + 'package_update', + id=dataset['id'], + groups=[], + ) + assert updated_dataset["metadata_modified"] == "2020-02-25T12:00:00" + assert updated_dataset['groups'] == [] + + def test_metadata_modified_unchanged_on_no_changes(self): + with freeze_time("2020-02-25 12:00:00"): + dataset = factories.Dataset() + + # allow metadata_modified update + dataset.pop('metadata_modified') + updated_dataset = helpers.call_action('package_update', **dataset) + assert updated_dataset["metadata_modified"] == "2020-02-25T12:00:00" + + # unchanged even if the json differs but results in the same + # values in the db + assert dataset.pop('owner_org') is None + assert dataset.pop('tags') == [] + assert dataset.pop('resources') == [] + updated_dataset = helpers.call_action('package_update', **dataset) + assert updated_dataset["metadata_modified"] == "2020-02-25T12:00:00" + + def test_metadata_modified_can_be_updated(self): + dataset = factories.Dataset() + updated_dataset = helpers.call_action('package_update', **dict( + dataset, metadata_modified="2020-02-25T12:00:00" + )) + assert updated_dataset["metadata_modified"] == "2020-02-25T12:00:00" + + # but not for normal users + user = factories.User() + context = {"user": user["name"], "ignore_auth": False} + unchanged = helpers.call_action('package_update', context, **dict( + dataset, metadata_modified="2021-01-01T11:01:00" + )) + assert unchanged["metadata_modified"] == "2020-02-25T12:00:00" + @pytest.mark.ckan_config("ckan.views.default_views", "") @pytest.mark.ckan_config("ckan.plugins", "image_view") @@ -1272,6 +1415,8 @@ def test_edit_metadata_updates_metadata_modified_field(self): description="New Description", ) assert resource["metadata_modified"] == "2020-02-25T12:00:00" + package = helpers.call_action('package_show', id=resource['package_id']) + assert package['metadata_modified'] == '2020-02-25T12:00:00' def test_same_values_dont_update_metadata_modified_field(self): dataset = factories.Dataset() @@ -1287,6 +1432,10 @@ def test_same_values_dont_update_metadata_modified_field(self): resource["metadata_modified"] == datetime.datetime.utcnow().isoformat() ) + package = helpers.call_action( + 'package_show', id=resource['package_id']) + assert package['metadata_modified'] == datetime.datetime.utcnow( + ).isoformat() with freeze_time("2020-02-25 12:00:00"): resource = helpers.call_action( @@ -1294,13 +1443,15 @@ def test_same_values_dont_update_metadata_modified_field(self): id=resource["id"], description="Test", some_custom_field="test", - url="http://link.to.some.data", # Default Value from Factory + url="http://link.to.some.data", ) assert ( resource["metadata_modified"] != datetime.datetime.utcnow().isoformat() ) assert resource["metadata_modified"] == "1987-03-04T23:30:00" + package = helpers.call_action('package_show', id=resource['package_id']) + assert package['metadata_modified'] == '1987-03-04T23:30:00' def test_new_keys_update_metadata_modified_field(self): dataset = factories.Dataset() @@ -1313,6 +1464,10 @@ def test_new_keys_update_metadata_modified_field(self): resource["metadata_modified"] == datetime.datetime.utcnow().isoformat() ) + package = helpers.call_action( + 'package_show', id=resource['package_id']) + assert package['metadata_modified'] == datetime.datetime.utcnow( + ).isoformat() with freeze_time("2020-02-25 12:00:00"): resource = helpers.call_action( @@ -1320,13 +1475,13 @@ def test_new_keys_update_metadata_modified_field(self): id=resource["id"], description="test", some_custom_field="test", - url="http://link.to.some.data", # default value from factory - ) - assert ( - resource["metadata_modified"] - == datetime.datetime.utcnow().isoformat() + url="http://link.to.some.data", ) - assert resource["metadata_modified"] == "2020-02-25T12:00:00" + now = datetime.datetime.utcnow().isoformat() + assert resource["metadata_modified"] == now + package = helpers.call_action( + 'package_show', id=resource['package_id']) + assert package['metadata_modified'] == now def test_remove_keys_update_metadata_modified_field(self): dataset = factories.Dataset() @@ -1341,6 +1496,10 @@ def test_remove_keys_update_metadata_modified_field(self): resource["metadata_modified"] == datetime.datetime.utcnow().isoformat() ) + package = helpers.call_action( + 'package_show', id=resource['package_id']) + assert package['metadata_modified'] == datetime.datetime.utcnow( + ).isoformat() with freeze_time("2020-02-25 12:00:00"): resource = helpers.call_action( @@ -1349,11 +1508,11 @@ def test_remove_keys_update_metadata_modified_field(self): description="test", url="http://link.to.some.data", # default value from factory ) - assert ( - resource["metadata_modified"] - == datetime.datetime.utcnow().isoformat() - ) - assert resource["metadata_modified"] == "2020-02-25T12:00:00" + now = datetime.datetime.utcnow().isoformat() + assert resource["metadata_modified"] == now + package = helpers.call_action( + 'package_show', id=resource['package_id']) + assert package['metadata_modified'] == now def test_update_keys_update_metadata_modified_field(self): dataset = factories.Dataset() @@ -1368,6 +1527,10 @@ def test_update_keys_update_metadata_modified_field(self): resource["metadata_modified"] == datetime.datetime.utcnow().isoformat() ) + package = helpers.call_action( + 'package_show', id=resource['package_id']) + assert package['metadata_modified'] == datetime.datetime.utcnow( + ).isoformat() with freeze_time("2020-02-25 12:00:00"): resource = helpers.call_action( @@ -1377,11 +1540,11 @@ def test_update_keys_update_metadata_modified_field(self): some_custom_field="test2", url="http://link.to.some.data", # default value from factory ) - assert ( - resource["metadata_modified"] - == datetime.datetime.utcnow().isoformat() - ) - assert resource["metadata_modified"] == "2020-02-25T12:00:00" + now = datetime.datetime.utcnow().isoformat() + assert resource["metadata_modified"] == now + package = helpers.call_action( + 'package_show', id=resource['package_id']) + assert package['metadata_modified'] == now def test_resource_update_for_update(self): @@ -1516,6 +1679,62 @@ def test_group_update_cant_change_type(self): type="favouritecolour", ) + def test_extras_removed_if_missing(self): + group = factories.Group(extras=[{'key': 'e', 'value': 'v'}]) + assert group['extras'] == [{'key': 'e', 'value': 'v'}] + updated_group = helpers.call_action( + "group_update", + id=group["name"], + ) + + assert updated_group['extras'] == [] + + def test_groups_unchanged_if_missing(self): + group1 = factories.Group() + group2 = factories.Group(groups=[{'name': group1['name']}]) + assert group2['groups'][0]['name'] == group1['name'] + updated_group = helpers.call_action( + "group_update", + id=group2["name"], + ) + + assert updated_group['groups'][0]['name'] == group1['name'] + + def test_groups_removed_if_empty_list(self): + group1 = factories.Group() + group2 = factories.Group(groups=[{'name': group1['name']}]) + assert group2['groups'][0]['name'] == group1['name'] + updated_group = helpers.call_action( + "group_update", + id=group2["name"], + groups=[], + ) + + assert updated_group['groups'] == [] + + def test_users_unchanged_if_missing(self): + user = factories.User() + group = factories.Group(users=[user]) + assert user['name'] in (g['name'] for g in group['users']) + updated_group = helpers.call_action( + "group_update", + id=group["name"], + ) + + assert user['name'] in (g['name'] for g in updated_group['users']) + + def test_users_removed_if_empty_list(self): + user = factories.User() + group = factories.Group(users=[user]) + assert user['name'] in (g['name'] for g in group['users']) + updated_group = helpers.call_action( + "group_update", + id=group["name"], + users=[] + ) + + assert updated_group['users'] == [] + @pytest.mark.usefixtures("non_clean_db") class TestPackageOwnerOrgUpdate(object): diff --git a/ckan/types/__init__.py b/ckan/types/__init__.py index 15201802e25..779acf85b13 100644 --- a/ckan/types/__init__.py +++ b/ckan/types/__init__.py @@ -93,7 +93,6 @@ class Context(TypedDict, total=False): reset_password: bool save: bool active: bool - allow_partial_update: bool for_update: bool for_edit: bool for_view: bool @@ -129,6 +128,8 @@ class Context(TypedDict, total=False): table_names: list[str] plugin_data: dict[Any, Any] + original_package: dict[str, Any] + changed_entities: dict[str, set[str]] class AuthResult(TypedDict, total=False): diff --git a/ckan/views/api.py b/ckan/views/api.py index cf0d8e97778..1c8f8e91b11 100644 --- a/ckan/views/api.py +++ b/ckan/views/api.py @@ -231,6 +231,8 @@ def action(logic_function: str, ver: int = API_DEFAULT_VERSION) -> Response: * ``success``: A boolean indicating if the request was successful or an exception was raised * ``result``: The output of the action, generally an Object or an Array + * ``changed_entities``: types and ids of entities created, updated + or deleted as a result of this action (for some actions) ''' # Check if action exists @@ -287,8 +289,13 @@ def action(logic_function: str, ver: int = API_DEFAULT_VERSION) -> Response: # Call the action function, catch any exception try: result = function(context, request_data) - return_dict[u'success'] = True - return_dict[u'result'] = result + return_dict['success'] = True + return_dict['result'] = result + if 'changed_entities' in context: + return_dict['changed_entities'] = { + typ: sorted(ids) + for typ, ids in context['changed_entities'].items() + } except DataError as e: log.info(u'Format incorrect (Action API): %s - %s', e.error, request_data) diff --git a/ckan/views/dataset.py b/ckan/views/dataset.py index d05e28f480b..df16c2bfdf7 100644 --- a/ckan/views/dataset.py +++ b/ckan/views/dataset.py @@ -554,8 +554,6 @@ def post(self, package_type: str) -> Union[Response, str]: return base.abort(400, _(u'Integrity Error')) try: if ckan_phase: - # prevent clearing of groups etc - context[u'allow_partial_update'] = True # sort the tags if u'tag_string' in data_dict: data_dict[u'tags'] = _tag_string_to_list( @@ -755,8 +753,6 @@ def post(self, package_type: str, id: str) -> Union[Response, str]: return base.abort(400, _(u'Integrity Error')) try: if u'_ckan_phase' in data_dict: - # we allow partial updates to not destroy existing resources - context[u'allow_partial_update'] = True if u'tag_string' in data_dict: data_dict[u'tags'] = _tag_string_to_list( data_dict[u'tag_string'] diff --git a/ckan/views/group.py b/ckan/views/group.py index f0df1183cb5..d1c0ea93647 100644 --- a/ckan/views/group.py +++ b/ckan/views/group.py @@ -1033,7 +1033,6 @@ def post(self, except dict_fns.DataError: base.abort(400, _(u'Integrity Error')) data_dict['id'] = context['id'] - context['allow_partial_update'] = True try: group = _action(u'group_update')(context, data_dict) if id != group['name']: diff --git a/ckanext/activity/subscriptions.py b/ckanext/activity/subscriptions.py index 5dd20000ecc..d2b1a8bff28 100644 --- a/ckanext/activity/subscriptions.py +++ b/ckanext/activity/subscriptions.py @@ -20,7 +20,7 @@ def get_subscriptions() -> types.SignalMapping: {"sender": "bulk_update_private", "receiver": bulk_changed}, {"sender": "bulk_update_delete", "receiver": bulk_changed}, {"sender": "package_create", "receiver": package_changed}, - {"sender": "package_update", "receiver": package_changed}, + {"sender": "package_update", "receiver": package_updated}, {"sender": "package_delete", "receiver": package_changed}, { "sender": "resource_view_create", @@ -119,6 +119,16 @@ def package_changed(sender: str, **kwargs: Any): context["session"].commit() +def package_updated(sender: str, **kwargs: Any): + # package_update includes a context value indicating whether + # a real update occurred, if not, skip creating the activity + context = kwargs["context"] + if not context.get('changed_entities', {}).get('packages', set()): + return + + package_changed(sender, **kwargs) + + # action, context, data_dict, result def resource_view_changed(sender: str, **kwargs: Any): for key in ("result", "context", "data_dict"): diff --git a/ckanext/activity/tests/logic/test_action.py b/ckanext/activity/tests/logic/test_action.py index 099ea15b5c4..aa9458cb35f 100644 --- a/ckanext/activity/tests/logic/test_action.py +++ b/ckanext/activity/tests/logic/test_action.py @@ -130,6 +130,21 @@ def test_change_dataset(self): assert activities[1]["activity_type"] == "new package" assert activities[1]["data"]["package"]["title"] == original_title + def test_no_change_dataset(self): + user = factories.User() + _clear_activities() + dataset = factories.Dataset(user=user) + helpers.call_action( + "package_update", context={"user": user["name"]}, **dataset + ) + + activities = helpers.call_action( + "package_activity_list", id=dataset["id"] + ) + assert [activity["activity_type"] for activity in activities] == [ + "new package", + ] + def test_change_dataset_add_extra(self): user = factories.User() dataset = factories.Dataset(user=user) @@ -322,7 +337,7 @@ def test_private_dataset_has_activity(self): dataset = factories.Dataset( private=True, owner_org=org["id"], user=user ) - dataset["tags"] = [] + dataset["title"] = 'changed title' helpers.call_action( "package_update", context={"user": user["name"]}, **dataset ) diff --git a/ckanext/example_idatasetform/plugin.py b/ckanext/example_idatasetform/plugin.py index 8d8c2e81ce4..d3269a22b70 100644 --- a/ckanext/example_idatasetform/plugin.py +++ b/ckanext/example_idatasetform/plugin.py @@ -2,8 +2,11 @@ from __future__ import annotations from ckan.common import CKANConfig -from typing import Any, cast -from ckan.types import Context, Schema, Validator, ValidatorFactory +from typing import Any, cast, List +from ckan.types import ( + Context, Schema, Validator, ValidatorFactory, FlattenKey, FlattenErrorDict, + FlattenDataDict, +) import logging import ckan.plugins as plugins @@ -46,8 +49,18 @@ def country_codes_helper(): return None -class ExampleIDatasetFormPlugin(plugins.SingletonPlugin, - tk.DefaultDatasetForm): +def draft_todo_validator( + key: FlattenKey, data: FlattenDataDict, + errors: FlattenErrorDict, context: Context) -> None: + '''Error if the dataset state is not set to draft''' + if not data.get(('state',), '').startswith('draft'): + errors[key].append('must be blank when published') + + +class ExampleIDatasetFormPlugin( + tk.DefaultDatasetForm, + plugins.SingletonPlugin, +): '''An example IDatasetForm CKAN plugin. Uses a tag vocabulary to add a custom metadata field to datasets. @@ -104,7 +117,9 @@ def _modify_package_schema(self, schema: Schema): }) # Add our custom_resource_text metadata field to the schema cast(Schema, schema['resources']).update({ - 'custom_resource_text' : [ tk.get_validator('ignore_missing') ] + 'custom_resource_text' : [ tk.get_validator('ignore_missing') ], + 'draft_only_todo': [ + tk.get_validator('ignore_empty'), draft_todo_validator], }) return schema @@ -150,6 +165,9 @@ def show_package_schema(self) -> Schema: }) return schema + def resource_validation_dependencies(self, package_type: str) -> List[str]: + return ['state'] + # These methods just record how many times they're called, for testing # purposes. # TODO: It might be better to test that custom templates returned by diff --git a/ckanext/example_idatasetform/tests/test_example_idatasetform.py b/ckanext/example_idatasetform/tests/test_example_idatasetform.py index 4b3f6d3fd57..f3950525508 100644 --- a/ckanext/example_idatasetform/tests/test_example_idatasetform.py +++ b/ckanext/example_idatasetform/tests/test_example_idatasetform.py @@ -458,6 +458,31 @@ def test_package_show(self, test_request_context): == result["resources"][0]["custom_resource_text"] ) + def test_resource_validation_dependencies(self): + helpers.call_action( + "package_create", + name='res-val-dep', + state='draft', + resources=[{ + 'url': 'http://example.com', + 'draft_only_todo': 'need to fix this', + }, { + 'url': 'http://example.com', + 'draft_only_todo': 'need to fix this one too', + }], + ) + with pytest.raises(plugins.toolkit.ValidationError) as err: + helpers.call_action( + "package_update", + name='res-val-dep', + state='active', + ) + assert err.value.error_dict == { + 'resources': [ + {'draft_only_todo': ['must be blank when published']}, + {'draft_only_todo': ['must be blank when published']}, + ]} + @pytest.mark.ckan_config("ckan.plugins", u"example_idatasetform") @pytest.mark.usefixtures("clean_db", "clean_index", "with_plugins") diff --git a/ckanext/example_iuploader/test/test_plugin.py b/ckanext/example_iuploader/test/test_plugin.py index 921239abe82..4d23d8ea503 100644 --- a/ckanext/example_iuploader/test/test_plugin.py +++ b/ckanext/example_iuploader/test/test_plugin.py @@ -59,7 +59,7 @@ def test_resource_download_iuploader_called( "save": "go-metadata", "upload": ("README.rst", CONTENT) }) - assert mock_get_path.call_count == 3 + assert mock_get_path.call_count == 1 assert isinstance(mock_get_path.call_args[0][0], plugin.ResourceUpload) pkg = model.Package.by_name(dataset_name) assert mock_get_path.call_args[0][1] == pkg.resources[0].id