From cc625cc6d3e9fda50fc50b83253bc2792deb5787 Mon Sep 17 00:00:00 2001 From: Andrew Davison Date: Fri, 18 Apr 2025 16:45:42 +0200 Subject: [PATCH] Internalise code for tag management Since the version of django-tagging currently available on PyPI doesn't work with Django 5, and PyPI doesn't allow dependencies that aren't in PyPI, I have moved parts of the django-tagging code to within the django_store module, as allowed by the BSD license of django-tagging. The license notice from django-tagging has been appended to the LICENSE file of Sumatra. --- LICENSE | 61 +++++- README.rst | 2 +- doc/authors.txt | 8 + pyproject.toml | 3 +- sumatra/recordstore/django_store/__init__.py | 11 +- .../django_store/migrations/0001_initial.py | 5 +- .../migrations/0002_tag_taggeditem.py | 41 ++++ sumatra/recordstore/django_store/models.py | 17 +- sumatra/recordstore/django_store/tagging.py | 206 ++++++++++++++++++ .../recordstore/django_store/tagging_utils.py | 133 +++++++++++ sumatra/web/views.py | 3 +- 11 files changed, 465 insertions(+), 25 deletions(-) create mode 100644 sumatra/recordstore/django_store/migrations/0002_tag_taggeditem.py create mode 100644 sumatra/recordstore/django_store/tagging.py create mode 100644 sumatra/recordstore/django_store/tagging_utils.py diff --git a/LICENSE b/LICENSE index f3d60970..89d2fbf0 100644 --- a/LICENSE +++ b/LICENSE @@ -7,4 +7,63 @@ Redistribution and use in source and binary forms, with or without modification, 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Some of the Sumatra code is taken from/based on the django-tagging package: + +Django Tagging +-------------- + +Copyright (c) 2007-2015, Jonathan Buchanan, Julien Fache + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +Initially based on code from James Bennett's Cab: + +Cab +--- + +Copyright (c) 2007, James Bennett +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the author nor the names of other + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.rst b/README.rst index fe8a4cbf..7e5cf4be 100644 --- a/README.rst +++ b/README.rst @@ -50,7 +50,7 @@ Requirements ============ Sumatra requires Python version 3.9 or later The web interface requires -Django (>= 4.2) and the django-tagging package. +Django (>= 4.2). Sumatra requires that you keep your own code in a version control system (currently Subversion, Mercurial, Git and Bazaar are supported). If you are already using Bazaar there is nothing else to install. If you diff --git a/doc/authors.txt b/doc/authors.txt index 257a6434..503d25f1 100644 --- a/doc/authors.txt +++ b/doc/authors.txt @@ -62,3 +62,11 @@ Licence ======= Sumatra is freely available under the BSD 2-clause license. + + +django-tagging +============== + +Some of the code in Sumatra's `django_store` module is taken from or based on +the django-tagging package, by Jonathan Buchanan and Julien Fache, +which in turn was based on the Cab package by James Bennett. diff --git a/pyproject.toml b/pyproject.toml index 3899e8e7..f71eda63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,8 +55,7 @@ hg = ["mercurial", "hgapi"] web = [ "docutils", - "Django<6", - "django-tagging@git+https://github.com/jazzband/django-tagging" + "Django<6" ] remote = ["httplib2"] diff --git a/sumatra/recordstore/django_store/__init__.py b/sumatra/recordstore/django_store/__init__.py index 281f1f94..64fb12ad 100644 --- a/sumatra/recordstore/django_store/__init__.py +++ b/sumatra/recordstore/django_store/__init__.py @@ -26,10 +26,6 @@ from urllib.request import urlparse from io import StringIO -# Check that django-tagging is available. It would be better to try importing -# it, but that seems to mess with Django's internals. -importlib.util.find_spec("tagging") - def db_id(db): """Return a unique identifier for a database, for comparison purposes.""" @@ -49,8 +45,7 @@ def __init__(self): 'DEBUG': True, 'DATABASES': {}, 'INSTALLED_APPS': ['sumatra.recordstore.django_store', - 'django.contrib.contenttypes', # needed for tagging - 'tagging'], + 'django.contrib.contenttypes'], 'MIDDLEWARE_CLASSES': [], 'READ_ONLY': 0, 'SERVERSIDE': 0, @@ -329,8 +324,8 @@ def clear(self): cmds = ["BEGIN;"] + ['DROP TABLE "django_store_{0}";'.format(x) for x in ("record", "record_input_data", "record_dependencies", "record_platforms", "platforminformation", "datakey", "datastore", "launchmode", + "taggeditem", "tag", "parameterset", "repository", "dependency", "executable", "project")] + ["COMMIT;"] - # todo: also drop tagging_taggeditem, tagging_tag from django.db import connection cur = connection.cursor() for cmd in cmds: @@ -344,7 +339,7 @@ def _dump(self, indent=2): import sys data = StringIO() sys.stdout = data - management.call_command('dumpdata', 'django_store', 'tagging', indent=indent) + management.call_command('dumpdata', 'django_store', indent=indent) sys.stdout = sys.__stdout__ data.seek(0) return data.read() diff --git a/sumatra/recordstore/django_store/migrations/0001_initial.py b/sumatra/recordstore/django_store/migrations/0001_initial.py index 5029e79c..454cdca6 100644 --- a/sumatra/recordstore/django_store/migrations/0001_initial.py +++ b/sumatra/recordstore/django_store/migrations/0001_initial.py @@ -1,9 +1,10 @@ # Generated by Django 5.2 on 2025-04-16 08:53 import django.db.models.deletion -import tagging.fields from django.db import migrations, models +from ..models import TagField + class Migration(migrations.Migration): @@ -139,7 +140,7 @@ class Migration(migrations.Migration): ('version', models.CharField(max_length=50)), ('outcome', models.TextField(blank=True)), ('timestamp', models.DateTimeField()), - ('tags', tagging.fields.TagField(blank=True, max_length=255)), + ('tags', TagField(blank=True, max_length=255)), ('diff', models.TextField(blank=True)), ('user', models.CharField(max_length=100)), ('script_arguments', models.TextField(blank=True)), diff --git a/sumatra/recordstore/django_store/migrations/0002_tag_taggeditem.py b/sumatra/recordstore/django_store/migrations/0002_tag_taggeditem.py new file mode 100644 index 00000000..f09d4113 --- /dev/null +++ b/sumatra/recordstore/django_store/migrations/0002_tag_taggeditem.py @@ -0,0 +1,41 @@ +# Generated by Django 5.2 on 2025-04-18 14:15 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('contenttypes', '0002_remove_content_type_name'), + ('django_store', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='Tag', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(db_index=True, max_length=50, unique=True, verbose_name='name')), + ], + options={ + 'verbose_name': 'tag', + 'verbose_name_plural': 'tags', + 'ordering': ('name',), + }, + ), + migrations.CreateModel( + name='TaggedItem', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('object_id', models.PositiveIntegerField(db_index=True, verbose_name='object id')), + ('content_type', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='contenttypes.contenttype', verbose_name='content type')), + ('tag', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='items', to='django_store.tag', verbose_name='tag')), + ], + options={ + 'verbose_name': 'tagged item', + 'verbose_name_plural': 'tagged items', + 'unique_together': {('tag', 'content_type', 'object_id')}, + }, + ), + ] diff --git a/sumatra/recordstore/django_store/models.py b/sumatra/recordstore/django_store/models.py index b5978e46..f394d674 100644 --- a/sumatra/recordstore/django_store/models.py +++ b/sumatra/recordstore/django_store/models.py @@ -6,19 +6,18 @@ :license: BSD 2-clause, see LICENSE for details. """ +from datetime import datetime import json + +from packaging.version import parse as parse_version from django.db import models +import django + from sumatra import programs, launch, datastore, records, versioncontrol, parameters, dependency_finder from sumatra.datastore import get_data_store -from datetime import datetime -import django -from packaging.version import parse as parse_version from sumatra.core import get_registered_components -import warnings -with warnings.catch_warnings(): - warnings.simplefilter("ignore") - import tagging.fields - from tagging.models import Tag + +from .tagging import TagField, Tag, TaggedItem, TagManager class SumatraObjectsManager(models.Manager): @@ -257,7 +256,7 @@ class Record(BaseModel): input_datastore = models.ForeignKey(Datastore, related_name="input_to_records", on_delete=models.PROTECT) outcome = models.TextField(blank=True) timestamp = models.DateTimeField() - tags = tagging.fields.TagField() + tags = TagField() dependencies = models.ManyToManyField(Dependency) platforms = models.ManyToManyField(PlatformInformation) diff = models.TextField(blank=True) diff --git a/sumatra/recordstore/django_store/tagging.py b/sumatra/recordstore/django_store/tagging.py new file mode 100644 index 00000000..b346ecb5 --- /dev/null +++ b/sumatra/recordstore/django_store/tagging.py @@ -0,0 +1,206 @@ +# The following models and managers are taken from django-tagging +# Specifically from the following fork and commit: +# - https://github.com/jazzband/django-tagging/commit/79cb321b1e4c464d39ed96ddbef02617ab0c692e + + +from django.db import models +from django.contrib.contenttypes.fields import GenericForeignKey +from django.contrib.contenttypes.models import ContentType +from django.utils.encoding import smart_str +from django.db import connection +from django.db.models.query_utils import Q +from django.db.models import signals +from django.db.models.fields import CharField + +from .tagging_utils import (parse_tag_input, edit_string_for_tags) + +qn = connection.ops.quote_name + + +class TagField(CharField): + """ + A "special" character field that actually works as a relationship to tags + "under the hood". This exposes a space-separated string of tags, but does + the splitting/reordering/etc. under the hood. + """ + def __init__(self, *args, **kwargs): + kwargs['max_length'] = kwargs.get('max_length', 255) + kwargs['blank'] = kwargs.get('blank', True) + super(TagField, self).__init__(*args, **kwargs) + + def contribute_to_class(self, cls, name): + super(TagField, self).contribute_to_class(cls, name) + + # Make this object the descriptor for field access. + setattr(cls, self.name, self) + + # Save tags back to the database post-save + signals.post_save.connect(self._save, cls, True) + + def __get__(self, instance, owner=None): + """ + Tag getter. Returns an instance's tags if accessed on an instance, and + all of a model's tags if called on a class. That is, this model:: + + class Link(models.Model): + ... + tags = TagField() + + Lets you do both of these:: + + >>> l = Link.objects.get(...) + >>> l.tags + 'tag1 tag2 tag3' + + >>> Link.tags + 'tag1 tag2 tag3 tag4' + + """ + # Handle access on the model (i.e. Link.tags) + if instance is None: + return edit_string_for_tags(Tag.objects.usage_for_model(owner)) + + tags = self._get_instance_tag_cache(instance) + if tags is None: + if instance.pk is None: + self._set_instance_tag_cache(instance, '') + else: + self._set_instance_tag_cache( + instance, edit_string_for_tags( + Tag.objects.get_for_object(instance))) + return self._get_instance_tag_cache(instance) + + def __set__(self, instance, value): + """ + Set an object's tags. + """ + if instance is None: + raise AttributeError( + '%s can only be set on instances.' % self.name) + self._set_instance_tag_cache(instance, value) + + def _save(self, **kwargs): # signal, sender, instance): + """ + Save tags back to the database + """ + tags = self._get_instance_tag_cache(kwargs['instance']) + if tags is not None: + Tag.objects.update_tags(kwargs['instance'], tags) + + def __delete__(self, instance): + """ + Clear all of an object's tags. + """ + self._set_instance_tag_cache(instance, '') + + def _get_instance_tag_cache(self, instance): + """ + Helper: get an instance's tag cache. + """ + return getattr(instance, '_%s_cache' % self.attname, None) + + def _set_instance_tag_cache(self, instance, tags): + """ + Helper: set an instance's tag cache. + """ + # The next instruction does nothing particular, + # but needed to by-pass the deferred fields system + # when saving an instance, which check the keys present + # in instance.__dict__. + # The issue is introducted in Django 1.10 + instance.__dict__[self.attname] = tags + setattr(instance, '_%s_cache' % self.attname, tags) + + def get_internal_type(self): + return 'CharField' + + +class TagManager(models.Manager): + + def update_tags(self, obj, tag_names): + """ + Update tags associated with an object. + """ + ctype = ContentType.objects.get_for_model(obj) + current_tags = list(self.filter(items__content_type__pk=ctype.pk, + items__object_id=obj.pk)) + updated_tag_names = parse_tag_input(tag_names) + + # Remove tags which no longer apply + tags_for_removal = [tag for tag in current_tags + if tag.name not in updated_tag_names] + if len(tags_for_removal): + TaggedItem._default_manager.filter( + content_type__pk=ctype.pk, + object_id=obj.pk, + tag__in=tags_for_removal).delete() + # Add new tags + current_tag_names = [tag.name for tag in current_tags] + for tag_name in updated_tag_names: + if tag_name not in current_tag_names: + tag, created = self.get_or_create(name=tag_name) + TaggedItem._default_manager.get_or_create( + content_type_id=ctype.pk, + object_id=obj.pk, + tag=tag, + ) + + def get_for_object(self, obj): + """ + Create a queryset matching all tags associated with the given + object. + """ + ctype = ContentType.objects.get_for_model(obj) + return self.filter(items__content_type__pk=ctype.pk, + items__object_id=obj.pk) + + +class Tag(models.Model): + """ + A tag. + """ + name = models.CharField( + 'name', max_length=50, + unique=True, db_index=True) + + objects = TagManager() + + class Meta: + ordering = ('name',) + verbose_name = 'tag' + verbose_name_plural = 'tags' + + def __str__(self): + return self.name + + +class TaggedItem(models.Model): + """ + Holds the relationship between a tag and the item being tagged. + """ + tag = models.ForeignKey( + Tag, + verbose_name='tag', + related_name='items', + on_delete=models.CASCADE) + + content_type = models.ForeignKey( + ContentType, + verbose_name='content type', + on_delete=models.CASCADE) + + object_id = models.PositiveIntegerField( + 'object id', + db_index=True) + + object = GenericForeignKey( + 'content_type', 'object_id') + + class Meta: + # Enforce unique tag association per object + unique_together = (('tag', 'content_type', 'object_id'),) + verbose_name = 'tagged item' + verbose_name_plural = 'tagged items' + + def __str__(self): + return '%s [%s]' % (smart_str(self.object), smart_str(self.tag)) diff --git a/sumatra/recordstore/django_store/tagging_utils.py b/sumatra/recordstore/django_store/tagging_utils.py new file mode 100644 index 00000000..7bb1bd8a --- /dev/null +++ b/sumatra/recordstore/django_store/tagging_utils.py @@ -0,0 +1,133 @@ +""" +Tagging utilities - from user tag input parsing to tag cloud +calculation. + +The following functions are taken from django-tagging. +Specifically from the following fork and commit: + - https://github.com/jazzband/django-tagging/commit/79cb321b1e4c464d39ed96ddbef02617ab0c692e + +""" + +from django.utils.encoding import force_str + +# Font size distribution algorithms +LOGARITHMIC, LINEAR = 1, 2 + + +def parse_tag_input(input): + """ + Parses tag input, with multiple word input being activated and + delineated by commas and double quotes. Quotes take precedence, so + they may contain commas. + + Returns a sorted list of unique tag names. + """ + if not input: + return [] + + input = force_str(input) + + # Special case - if there are no commas or double quotes in the + # input, we don't *do* a recall... I mean, we know we only need to + # split on spaces. + if ',' not in input and '"' not in input: + words = list(set(split_strip(input, ' '))) + words.sort() + return words + + words = [] + buffer = [] + # Defer splitting of non-quoted sections until we know if there are + # any unquoted commas. + to_be_split = [] + saw_loose_comma = False + open_quote = False + i = iter(input) + try: + while 1: + c = next(i) + if c == '"': + if buffer: + to_be_split.append(''.join(buffer)) + buffer = [] + # Find the matching quote + open_quote = True + c = next(i) + while c != '"': + buffer.append(c) + c = next(i) + if buffer: + word = ''.join(buffer).strip() + if word: + words.append(word) + buffer = [] + open_quote = False + else: + if not saw_loose_comma and c == ',': + saw_loose_comma = True + buffer.append(c) + except StopIteration: + # If we were parsing an open quote which was never closed treat + # the buffer as unquoted. + if buffer: + if open_quote and ',' in buffer: + saw_loose_comma = True + to_be_split.append(''.join(buffer)) + if to_be_split: + if saw_loose_comma: + delimiter = ',' + else: + delimiter = ' ' + for chunk in to_be_split: + words.extend(split_strip(chunk, delimiter)) + words = list(set(words)) + words.sort() + return words + + +def split_strip(input, delimiter=','): + """ + Splits ``input`` on ``delimiter``, stripping each resulting string + and returning a list of non-empty strings. + """ + words = [w.strip() for w in input.split(delimiter)] + return [w for w in words if w] + + +def edit_string_for_tags(tags): + """ + Given list of ``Tag`` instances, creates a string representation of + the list suitable for editing by the user, such that submitting the + given string representation back without changing it will give the + same list of tags. + + Tag names which contain commas will be double quoted. + + If any tag name which isn't being quoted contains whitespace, the + resulting string of tag names will be comma-delimited, otherwise + it will be space-delimited. + """ + names = [] + use_commas = False + for tag in tags: + name = tag.name + if ',' in name: + names.append('"%s"' % name) + continue + elif ' ' in name: + if not use_commas: + use_commas = True + names.append(name) + if use_commas: + glue = ', ' + else: + glue = ' ' + result = glue.join(names) + + # If we only had one name, and it had spaces, + # we need to enclose it in quotes. + # Otherwise, it's interpreted as two tags. + if len(names) == 1 and use_commas: + result = '"' + result + '"' + + return result diff --git a/sumatra/web/views.py b/sumatra/web/views.py index 7ff15d0a..866722da 100644 --- a/sumatra/web/views.py +++ b/sumatra/web/views.py @@ -20,9 +20,8 @@ MonthArchiveView = object from django.views.generic import View, DetailView, TemplateView from django.db.models import Q -from tagging.models import Tag from sumatra.recordstore.serialization import datestring_to_datetime -from sumatra.recordstore.django_store.models import Project, Record, DataKey, Datastore +from sumatra.recordstore.django_store.models import Project, Record, DataKey, Datastore, Tag from sumatra.records import RecordDifference DEFAULT_MAX_DISPLAY_LENGTH = 10 * 1024