diff --git a/.annotation_safe_list.yml b/.annotation_safe_list.yml index 45c6a18f6..54d8341fd 100644 --- a/.annotation_safe_list.yml +++ b/.annotation_safe_list.yml @@ -13,9 +13,69 @@ auth.Group: ".. no_pii:": "This model has no PII" auth.Permission: ".. no_pii:": "This model has no PII" +auth.User: + ".. pii": "This model minimally contains a username, password, and email" + ".. pii_types": "username, email_address, password" + ".. pii_retirement": "consumer_api" contenttypes.ContentType: ".. no_pii:": "This model has no PII" -sessions.Session: +oel_collections.Collection: + ".. no_pii:": "This model has no PII" +oel_collections.CollectionPublishableEntity: + ".. no_pii:": "This model has no PII" +oel_components.Component: + ".. no_pii:": "This model has no PII" +oel_components.ComponentType: + ".. no_pii:": "This model has no PII" +oel_components.ComponentVersion: + ".. no_pii:": "This model has no PII" +oel_components.ComponentVersionContent: + ".. no_pii:": "This model has no PII" +oel_contents.Content: + ".. no_pii:": "This model has no PII" +oel_contents.MediaType: + ".. no_pii:": "This model has no PII" +oel_publishing.Container: + ".. no_pii:": "This model has no PII" +oel_publishing.ContainerVersion: + ".. no_pii:": "This model has no PII" +oel_publishing.Draft: + ".. no_pii:": "This model has no PII" +oel_publishing.EntityList: + ".. no_pii:": "This model has no PII" +oel_publishing.EntityListRow: + ".. no_pii:": "This model has no PII" +oel_publishing.LearningPackage: + ".. no_pii:": "This model has no PII" +oel_publishing.PublishLog: + ".. no_pii:": "This model has no PII" +oel_publishing.PublishLogRecord: + ".. no_pii:": "This model has no PII" +oel_publishing.PublishableEntity: + ".. no_pii:": "This model has no PII" +oel_publishing.PublishableEntityVersion: + ".. no_pii:": "This model has no PII" +oel_publishing.Published: + ".. no_pii:": "This model has no PII" +oel_tagging.ObjectTag: + ".. no_pii:": "This model has no PII" +oel_tagging.Tag: + ".. no_pii:": "This model has no PII" +oel_tagging.TagImportTask: + ".. no_pii:": "This model has no PII" +oel_tagging.Taxonomy: + ".. no_pii:": "This model has no PII" +oel_sections.Section: + ".. no_pii:": "This model has no PII" +oel_sections.SectionVersion: + ".. no_pii:": "This model has no PII" +oel_subsections.Subsection: + ".. no_pii:": "This model has no PII" +oel_subsections.SubsectionVersion: + ".. no_pii:": "This model has no PII" +oel_units.Unit: + ".. no_pii:": "This model has no PII" +oel_units.UnitVersion: ".. no_pii:": "This model has no PII" social_django.Association: ".. no_pii:": "This model has no PII" @@ -29,6 +89,8 @@ social_django.Partial: ".. no_pii:": "This model has no PII" social_django.UserSocialAuth: ".. no_pii:": "This model has no PII" +sessions.Session: + ".. no_pii:": "This model has no PII" waffle.Flag: ".. no_pii:": "This model has no PII" waffle.Sample: diff --git a/.coveragerc b/.coveragerc index 368d35820..fb2ddbfe5 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,10 +1,13 @@ [run] branch = True data_file = .coverage -source=openedx_learning +source = + openedx_learning + openedx_tagging omit = test_settings - *migrations* + **/migrations/* *admin.py *static* *templates* + **/tests/** diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..9186f7421 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +version: 2 +updates: + # Adding new check for github-actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..737005651 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,59 @@ +name: Python CI + +on: + push: + branches: [ main ] + pull_request: + branches: + - '**' + + +jobs: + run_tests: + name: tests + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] # Add macos-latest later? + python-version: ['3.11', '3.12'] + toxenv: ["django42", "django52", "package", "quality"] + # We're only testing against MySQL 8 right now because 5.7 is + # incompatible with Djagno 4.2. We'd have to make the tox.ini file more + # complicated than it's worth given the short expected shelf-life of + # MySQL 5.7 in our stack. + mysql-version: ["8"] + services: + mysql: + image: mysql:${{ matrix.mysql-version }} + ports: + - 3306:3306 + env: + MYSQL_DATABASE: "test_oel_db" + MYSQL_USER: "test_oel_user" + MYSQL_PASSWORD: "test_oel_pass" + MYSQL_RANDOM_ROOT_PASSWORD: true + # these options are blatantly copied from edx-platform's values + options: >- + --health-cmd "mysqladmin ping" + --health-interval 10s + --health-timeout 5s + --health-retries 3 + steps: + - uses: actions/checkout@v5 + - name: setup python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install pip + run: pip install -r requirements/pip.txt + + - name: Install Dependencies + run: pip install -r requirements/ci.txt + + - name: Run Tests + env: + TOXENV: ${{ matrix.toxenv }} + run: tox -e ${{ matrix.toxenv }} + diff --git a/.github/workflows/lint-imports.yml b/.github/workflows/lint-imports.yml new file mode 100644 index 000000000..9e27d5438 --- /dev/null +++ b/.github/workflows/lint-imports.yml @@ -0,0 +1,29 @@ +name: Lint Imports + +on: + push: + branches: [ main ] + pull_request: + branches: + - '**' + + +jobs: + lint-imports: + name: Lint Python Imports + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + - name: setup python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install pip + run: pip install -r requirements/pip.txt + + - name: Install Dependencies + run: pip install -r requirements/ci.txt + + - name: Analyze imports + run: lint-imports diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml new file mode 100644 index 000000000..11ccb0556 --- /dev/null +++ b/.github/workflows/pypi-publish.yml @@ -0,0 +1,30 @@ +name: Publish package to PyPI + +on: + push: + tags: + - '*' + +jobs: + push: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v5 + - name: setup python + uses: actions/setup-python@v5 + with: + python-version: 3.11 + + - name: Install pip + run: pip install -r requirements/pip.txt + + - name: Build package + run: python setup.py sdist bdist_wheel + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_UPLOAD_TOKEN }} diff --git a/.github/workflows/upgrade-python-requirements.yml b/.github/workflows/upgrade-python-requirements.yml new file mode 100644 index 000000000..14b5bc745 --- /dev/null +++ b/.github/workflows/upgrade-python-requirements.yml @@ -0,0 +1,31 @@ +name: Upgrade Python Requirements + +on: + schedule: + - cron: "0 0 * * 1" + workflow_dispatch: + inputs: + branch: + description: "Target branch against which to create requirements PR" + required: true + # If copying this template manually, you must provide your default branch name + # in quotes, such as 'master' + default: "main" + +jobs: + call-upgrade-python-requirements-workflow: + uses: openedx/.github/.github/workflows/upgrade-python-requirements.yml@master + with: + # If copying manually, also provide your default branch name in quotes here + branch: ${{ github.event.inputs.branch || 'main' }} + # optional parameters below; fill in if you'd like github or email notifications + # user_reviewers: "" + team_reviewers: "axim-aximprovements" + email_address: "aximimprovements@axim.org" + send_success_notification: false + # python_version: "" + secrets: + requirements_bot_github_token: ${{ secrets.REQUIREMENTS_BOT_GITHUB_TOKEN }} + requirements_bot_github_email: ${{ secrets.REQUIREMENTS_BOT_GITHUB_EMAIL }} + edx_smtp_username: ${{ secrets.EDX_SMTP_USERNAME }} + edx_smtp_password: ${{ secrets.EDX_SMTP_PASSWORD }} diff --git a/.gitignore b/.gitignore index f642f3b20..0f071cb7a 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ pip-log.txt # Unit test / coverage reports .cache/ .pytest_cache/ +.mypy_cache/ .coverage .coverage.* .tox @@ -61,7 +62,13 @@ requirements/private.in requirements/private.txt # database file -dev.db +dev.db* .vscode +venv/ +# Media files (for uploads) +media/ + +# Media files generated during test runs +test_media/ diff --git a/.importlinter b/.importlinter index 82d881926..6c278d0dd 100644 --- a/.importlinter +++ b/.importlinter @@ -23,22 +23,37 @@ name = Lib / Core / Contrib Layering type = layers layers= openedx_learning.contrib - openedx_learning.core + openedx_learning.apps openedx_learning.lib -# This is layering within our Core apps. -# -# The lowest layer is "publishing", which holds the basic primitives needed to -# create LearningPackages and versioning. -# -# One layer above that is "itemstore" which stores single Items (e.g. Problem, -# Video). -# -# Above "itemstore" are apps that can compose those Items into more interesting -# structures (like Units). +# This is layering within our Authoring apps. Every new app should be added to +# this list when it it created. [importlinter:contract:core_apps_layering] -name = Core App Dependency Layering +name = Authoring App Dependency Layering type = layers layers= - openedx_learning.core.components - openedx_learning.core.publishing + # The public authoring API is at the top–none of the apps should call to it. + openedx_learning.api.authoring + + # The "backup_restore" app handle the new export and import mechanism. + openedx_learning.apps.authoring.backup_restore + + # The "components" app is responsible for storing versioned Components, + # which is Open edX Studio terminology maps to things like individual + # Problems, Videos, and blocks of HTML text. This is also the type we would + # associate with a single "leaf" XBlock–one that is not a container type and + # has no child elements. + openedx_learning.apps.authoring.components + + # The "contents" app stores the simplest pieces of binary and text data, + # without versioning information. These belong to a single Learning Package. + openedx_learning.apps.authoring.contents + + # The "collections" app stores arbitrary groupings of PublishableEntities. + # Its only dependency should be the publishing app. + openedx_learning.apps.authoring.collections + + # The lowest layer is "publishing", which holds the basic primitives needed + # to create Learning Packages and manage the draft and publish states for + # various types of content. + openedx_learning.apps.authoring.publishing diff --git a/.readthedocs.yml b/.readthedocs.yaml similarity index 95% rename from .readthedocs.yml rename to .readthedocs.yaml index 0f029d2fd..8d28a715f 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yaml @@ -10,6 +10,6 @@ sphinx: configuration: docs/conf.py python: - version: 3.8 + version: 3.11 install: - requirements: requirements/doc.txt diff --git a/CHANGELOG.rst b/CHANGELOG.rst index fcbcbdf2e..be5c0b25f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -14,7 +14,10 @@ Change Log Unreleased ~~~~~~~~~~ -* +* Removed usage of ``tox-battery`` and added support for ``tox 4.0`` +* Switch from ``edx-sphinx-theme`` to ``sphinx-book-theme`` since the former is + deprecated. See https://github.com/openedx/edx-sphinx-theme/issues/184 for + more details. [0.1.0] - 2021-08-08 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/MANIFEST.in b/MANIFEST.in index 6ce1f3b20..73b8b7ee4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,3 +3,4 @@ include LICENSE.txt include README.rst include requirements/base.in recursive-include openedx_learning *.html *.png *.gif *.js *.css *.jpg *.jpeg *.svg *.py +recursive-include openedx_tagging *.html *.png *.gif *.js *.css *.jpg *.jpeg *.svg *.py *.yaml *.json *.csv diff --git a/Makefile b/Makefile index 2ad767893..7ae9b982f 100644 --- a/Makefile +++ b/Makefile @@ -30,13 +30,16 @@ docs: ## generate Sphinx HTML documentation, including API docs $(BROWSER)docs/_build/html/index.html # Define PIP_COMPILE_OPTS=-v to get more information during make upgrade. -PIP_COMPILE = pip-compile --rebuild --upgrade $(PIP_COMPILE_OPTS) +PIP_COMPILE = pip-compile --rebuild $(PIP_COMPILE_OPTS) -upgrade: export CUSTOM_COMPILE_COMMAND=make upgrade -upgrade: ## update the requirements/*.txt files with the latest packages satisfying requirements/*.in +compile-requirements: export CUSTOM_COMPILE_COMMAND=make upgrade +compile-requirements: ## update the requirements/*.txt files with the latest packages satisfying requirements/*.in pip install -qr requirements/pip-tools.txt - # Make sure to compile files after any other files they include! + # Make sure to compile files after any other files they include! + $(PIP_COMPILE) --allow-unsafe -o requirements/pip.txt requirements/pip.in $(PIP_COMPILE) -o requirements/pip-tools.txt requirements/pip-tools.in + pip install -qr requirements/pip.txt + pip install -qr requirements/pip-tools.txt $(PIP_COMPILE) -o requirements/base.txt requirements/base.in $(PIP_COMPILE) -o requirements/test.txt requirements/test.in $(PIP_COMPILE) -o requirements/doc.txt requirements/doc.in @@ -47,8 +50,12 @@ upgrade: ## update the requirements/*.txt files with the latest packages satisfy sed '/^[dD]jango==/d' requirements/test.txt > requirements/test.tmp mv requirements/test.tmp requirements/test.txt +upgrade: ## update the pip requirements files to use the latest releases satisfying our constraints + make compile-requirements PIP_COMPILE_OPTS="--upgrade" + quality: ## check coding style with pycodestyle and pylint tox -e quality + lint-imports pii_check: ## check for PII annotations on all Django models tox -e pii_check @@ -58,7 +65,7 @@ requirements: ## install development environment requirements pip-sync requirements/dev.txt requirements/private.* test: clean ## run tests in the current virtualenv - pytest + pytest tests diff_cover: test ## find diff lines that need test coverage diff-cover coverage.xml @@ -77,12 +84,16 @@ extract_translations: ## extract strings to be translated, outputting .mo files rm -rf docs/_build cd openedx_learning && ../manage.py makemessages -l en -v1 -d django cd openedx_learning && ../manage.py makemessages -l en -v1 -d djangojs + cd openedx_tagging && ../manage.py makemessages -l en -v1 -d django + cd openedx_tagging && ../manage.py makemessages -l en -v1 -d djangojs compile_translations: ## compile translation files, outputting .po files for each supported language cd openedx_learning && ../manage.py compilemessages + cd openedx_tagging && ../manage.py compilemessages detect_changed_source_translations: cd openedx_learning && i18n_tool changed + cd openedx_tagging && i18n_tool changed pull_translations: ## pull translations from Transifex tx pull -a -f -t --mode reviewed @@ -92,6 +103,7 @@ push_translations: ## push source translation files (.po) from Transifex dummy_translations: ## generate dummy translation (.po) files cd openedx_learning && i18n_tool dummy + cd openedx_tagging && i18n_tool dummy build_dummy_translations: extract_translations dummy_translations compile_translations ## generate and compile dummy translation files diff --git a/README.rst b/README.rst index 566537c0b..9d82684e4 100644 --- a/README.rst +++ b/README.rst @@ -1,16 +1,13 @@ -openedx-learning -============================= +Open edX Learning Core (and Tagging) +==================================== |pypi-badge| |ci-badge| |codecov-badge| |doc-badge| |pyversions-badge| |license-badge| -This is experimentation/prototyping and not in any way production ready! ------------------------------------------------------------------------- - Overview -------- -The Open edX Learning repository holds Django apps that represent core learning concepts and data models that have been extracted from edx-platform. +The ``openedx_learning`` package holds Django apps that represent core learning concepts and data models that have been extracted from edx-platform. At the moment, this repo also contains the ``openedx_tagging`` package, but this will likely be moved out in the future. Motivation ---------- @@ -26,17 +23,16 @@ Parts ~~~~~ * ``openedx_learning.lib`` is for shared utilities, and may include things like custom field types, plugin registration code, etc. -* ``openedx_learning.core`` contains our Core Django apps, where foundational data structures and APIs will live. +* ``openedx_learning.apps`` contains our Learning Core Django apps, where foundational data structures and APIs will live. The first of these is ``authoring``, which holds apps related to the editing and publishing of learning content. +* ``openedx_tagging.core`` contains the core Tagging app, which provides data structures and apis for tagging Open edX objects. -App Dependencies -~~~~~~~~~~~~~~~~ +Learning Core Package Dependencies +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Anything can import from ``openedx_learning.lib``. +Learning Core code should never import from ``edx-platform``. -Core apps can import from each other, but cannot import from other apps outside of core. For those apps: +We want to be very strict about dependency management internally as well. Please read the `.importlinter config file <.importlinter>`_ file and the `Python API Conventions ADR `_ for more details. -* ``learning_publishing`` has no dependencies. All the other apps depend on it. -* ``learning_composition`` and ``learning_navigation`` both depend on ``learning_partitioning`` Model Conventions ~~~~~~~~~~~~~~~~~ @@ -45,8 +41,8 @@ We have a few different identifier types in the schema, and we try to avoid ``_i * ``id`` is the auto-generated, internal row ID and primary key. This never changes. Data models should make foreign keys to this field, as per Django convention. * ``uuid`` is a randomly generated UUID4. This is the stable way to refer to a row/resource from an external service. This never changes. This is separate from ``id`` mostly because there are performance penalties when using UUIDs as primary keys with MySQL. -* ``identifier`` is intended to be a case-sensitive, alphanumeric identifier, which holds some meaning to library clients. This is usually stable, but can be changed, depending on the business logic of the client. The apps in this repo should make no assumptions about it being stable. It can be used as a suffix. -* ``num`` is like ``identifier``, but for use when it's strictly numeric. It can also be used as a suffix. +* ``key`` is intended to be a case-sensitive, alphanumeric key, which holds some meaning to library clients. This is usually stable, but can be changed, depending on the business logic of the client. The apps in this repo should make no assumptions about it being stable. It can be used as a suffix. Since ``key`` is a reserved name on certain database systems, the database field is ``_key``. +* ``num`` is like ``key``, but for use when it's strictly numeric. It can also be used as a suffix. See Also @@ -60,7 +56,7 @@ The structure of this repo follows [OEP-0049](https://open-edx-proposals.readthe Code Overview ------------- -The ``openedx_learning.apps`` package contains all our Django applications. All apps are named with a ``learning_`` prefix to better avoid name conflicts, because Django's app namespace is flat. Apps will adhere to `OEP-0049: Django App Patterns `_. +The ``openedx_learning.apps`` package contains all our Django applications. Development Workflow -------------------- @@ -74,7 +70,7 @@ One Time Setup cd openedx-learning # Set up a virtualenv using virtualenvwrapper with the same name as the repo and activate it - mkvirtualenv -p python3.8 openedx-learning + mkvirtualenv -p python3.11 openedx-learning Every time you develop something in this repo @@ -127,10 +123,10 @@ This repo is in a very experimental state. Discussion using GitHub Issues is wel Reporting Security Issues ------------------------- -Please do not report security issues in public. Please email security@edx.org. +Please do not report security issues in public. Please email security@openedx.org. -Getting Help ------------- +Help +---- If you're having trouble, we have discussion forums at https://discuss.openedx.org where you can connect with others in the community. diff --git a/catalog-info.yaml b/catalog-info.yaml index a875b1a8b..d57c3aa29 100644 --- a/catalog-info.yaml +++ b/catalog-info.yaml @@ -5,4 +5,4 @@ metadata: spec: type: other lifecycle: unknown - owner: tcril-engineering + owner: axim-engineering diff --git a/docs/api_reference.rst b/docs/api_reference.rst new file mode 100644 index 000000000..b46ac15a2 --- /dev/null +++ b/docs/api_reference.rst @@ -0,0 +1,8 @@ +API Reference +============= + +.. toctree:: + :maxdepth: 1 + :glob: + + public_apis/* diff --git a/docs/conf.py b/docs/conf.py index 5c9e7aceb..8d5e0f62f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,9 +14,9 @@ import os import re import sys +from datetime import datetime from subprocess import check_call -import edx_theme from django import setup as django_setup @@ -59,7 +59,6 @@ def get_version(*file_paths): # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'edx_theme', 'sphinx.ext.autodoc', 'sphinxcontrib_django', 'sphinx.ext.doctest', @@ -91,8 +90,8 @@ def get_version(*file_paths): # General information about the project. project = 'openedx-learning' -copyright = edx_theme.COPYRIGHT # pylint: disable=redefined-builtin -author = edx_theme.AUTHOR +copyright = f'{datetime.now().year}, edX Inc.' # pylint: disable=redefined-builtin +author = 'edX Inc.' project_title = 'Open edX Learning Core' documentation_title = f"{project_title}" @@ -163,16 +162,46 @@ def get_version(*file_paths): # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'edx_theme' +html_theme = 'sphinx_book_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # -# html_theme_options = {} +html_theme_options = { + "repository_url": "https://github.com/openedx/openedx-learning", + "repository_branch": "main", + "path_to_docs": "docs/", + "home_page_in_toc": True, + "use_repository_button": True, + "use_issues_button": True, + "use_edit_page_button": True, + # Please don't change unless you know what you're doing. + "extra_footer": """ + + Creative Commons License + +
+ These works by + The Axim Collaborative + are licensed under a + Creative Commons Attribution-ShareAlike 4.0 International License. + """ +} # Add any paths that contain custom themes here, relative to this directory. -html_theme_path = [edx_theme.get_html_theme_path()] +# html_theme_path = [] # The name for this set of Sphinx documents. # " v documentation" by default. @@ -186,13 +215,13 @@ def get_version(*file_paths): # The name of an image file (relative to this directory) to place at the top # of the sidebar. # -# html_logo = None +html_logo = "https://logos.openedx.org/open-edx-logo-color.png" # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # -# html_favicon = None +html_favicon = "https://logos.openedx.org/open-edx-favicon.ico" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -470,7 +499,7 @@ def get_version(*file_paths): # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - 'python': ('https://docs.python.org/3.8', None), + 'python': ('https://docs.python.org/3.11', None), 'django': ('https://docs.djangoproject.com/en/2.2/', 'https://docs.djangoproject.com/en/2.2/_objects/'), 'model_utils': ('https://django-model-utils.readthedocs.io/en/latest/', None), } @@ -482,6 +511,9 @@ def on_init(app): # pylint: disable=unused-argument Read the Docs won't run tox or custom shell commands, so we need this to avoid checking in the generated reStructuredText files. + + Note: there is no auto-building of API docs for the entire repo. We + explicitly build reference docs for the public API in /docs/public_apis/ """ docs_path = os.path.abspath(os.path.dirname(__file__)) root_path = os.path.abspath(os.path.join(docs_path, '..')) @@ -490,11 +522,10 @@ def on_init(app): # pylint: disable=unused-argument # If we are, assemble the path manually bin_path = os.path.abspath(os.path.join(sys.prefix, 'bin')) apidoc_path = os.path.join(bin_path, apidoc_path) - check_call([apidoc_path, '-o', docs_path, os.path.join(root_path, 'openedx_learning'), - os.path.join(root_path, 'openedx_learning/migrations')]) - def setup(app): - """Sphinx extension: run sphinx-apidoc.""" + """ + Sphinx extension: run sphinx-apidoc. + """ event = 'builder-inited' app.connect(event, on_init) diff --git a/docs/decisions/0002-content-flexibility.rst b/docs/decisions/0002-content-flexibility.rst index 12c9a123a..cea33e9c5 100644 --- a/docs/decisions/0002-content-flexibility.rst +++ b/docs/decisions/0002-content-flexibility.rst @@ -25,7 +25,7 @@ Unit A Unit is an ordered list of one or more Components that is typically displayed together. A common use case might be to display some introductory Text, a Video, and some followup Problem (all separate Components). An individual Component in a Unit may or may not make sense when taken outside of that Unit–e.g. a Video may be reusable elsewhere, but the Problem referencing the video might not be. Sequence - A Sequence is a collection of Units that are presented one after the other, either to assess student understanding or to achieve some learning objective. + A Sequence is a collection of Units that are presented one after the other, either to assess student understanding or to achieve some learning objective. A Sequence is analogous to a Subsection in a traditional Open edX course. diff --git a/docs/decisions/0004-content-tagging.rst b/docs/decisions/0004-content-tagging.rst new file mode 100644 index 000000000..0f43fd4eb --- /dev/null +++ b/docs/decisions/0004-content-tagging.rst @@ -0,0 +1,45 @@ +4. Content Tagging +================== + +Context +------- + +Tagging content is central to enable content re-use, facilitate the implementation of flexible content structures different from the current implementation and allow adaptive learning in the Open edX platform. + +Content tagging should be classified as "kernel" component following `OEP-57's `_ guidelines, as a baked-in feature of the platform. + +Decision +-------- + +Implement the new tagging service as a pluggable django app, and installed alongside learning-core as a dependency in the edx-platform. + +Tagging data models will follow the guidelines for this repository, and focus on extensibility and flexibility. + +Since some use cases for content tagging are not considered "kernel" (like providing data for a marketing site), a generic mechanism to differentiate those uses cases will be built, and proper Python and REST APIs will be provided, to different taxonomies/tags for the same content. + + +Rejected Alternatives +--------------------- + +Implementing tagging as Discovery service plugin +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Discovery is already used as the source of truth place for course/program/block metadata in the previous implementations, so keeping it here means keeping the platform metadata management "consistent" (specially since it needs to work with all kinds of contents - from atomic learning units all the way up to courses). + +Concerns: + +#. The course discovery repository is not well documented and has code that is only used by edX/2U. +#. Some implementations are tied to specific closed-source services (example: `openedx/taxonomy-connector `_ uses a 3rd party closed-source service to tag video blocks automatically). +#. The data flow from the discovery service is confusing, and evolved as the needs for that repo changed. `Reference `_. +#. Many people prefer/need to run the platform without the discovery service. + +Implement tagging as a new IDA +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A new IDA dedicated for tagging would represent a break from the current codebases and force implementation using REST APIs. +This option adds extra complexity without a good compelling reason for it. + +Concerns: + +#. One more app to host: maintenance cost increases over time. +#. Extra response time between services when handling synchronous operations related to tagging. diff --git a/docs/decisions/0005-identifier-conventions.rst b/docs/decisions/0005-identifier-conventions.rst new file mode 100644 index 000000000..6aae93c4f --- /dev/null +++ b/docs/decisions/0005-identifier-conventions.rst @@ -0,0 +1,39 @@ +5. Identifier Conventions +========================= + +Context +------- + +We want a common set of conventions for how to reference models in various situations, such as: + +* From a model in a different app, but in the same Python process. +* From a different server. + +Decision +-------- + +The content-related data models in our system will use the following convention for identifier fields: + +Primary Key + The primary key will be a BigAutoField. This will usually be ``id``, but it can be different if the model builds off another one via a ``OneToOneField`` primary key. Other apps that are running in the same process should directly make foreign key field references to this. This value will not change. + +UUID + The ``uuid`` field is a randomly generated UUID4 that is globally unique and should be treated as immutable. If you are making a reference to this record in another system (e.g. an external service), this is the identifier to store. + +Key + The ``key`` field is chosen by apps or users, and is the most likely piece to be human-readable (though it doesn't have to be). These values are only locally unique within a given scope, such as a particular LearningPackage or ComponentVersion. + + The correlates most closely to OpaqueKeys, though they are not precisely the same. In particular, we don't want to directly store BlockUsageKeys that have the LearningContextKey baked into them, because that makes it much harder to change the LearningContextKey later, e.g. if we ever want to change a CourseKey for a course. Different models can choose whether the ``key`` value is mutable or not, but outside users should assume that it can change, even if it rarely does in practice. + +Implementation Notes +-------------------- + +Helpers to generate these field types are in the ``openedx_learning.lib.fields`` module. + +Rejected Alternatives +--------------------- + +A number of names were considered for ``key``, and were rejected for different reasons: + +* ``identifier`` is used in some standards like QTI, but it's too easily confused with ``id`` or the general concept of the three types of identity-related fields we have. +* ``slug`` was considered, but ultimately rejected because that implied these fields would be human-readable, and that's not guaranteed. Most XBlock content that comes from MongoDB will be using GUIDs, for instance. diff --git a/docs/decisions/0006-app-label-prefix.rst b/docs/decisions/0006-app-label-prefix.rst new file mode 100644 index 000000000..376d079e5 --- /dev/null +++ b/docs/decisions/0006-app-label-prefix.rst @@ -0,0 +1,13 @@ +6. App Label Prefix +=================== + +Context +------- + +We want this repo to be useful in different Django projects outside of just edx-platform, and we want to avoid downstream collisions with our app names. + + +Decision +-------- + +All apps in this repo will create a default AppConfig that sets the ``label`` to have a prefix of ``oel_`` before the app name. So if the app name is ``publishing``, the ``label`` will be ``oel_publishing``. This means that all generated database tables will similarly be prefixed with ``oel_``. diff --git a/docs/decisions/0007-tagging-app.rst b/docs/decisions/0007-tagging-app.rst new file mode 100644 index 000000000..9993a49bb --- /dev/null +++ b/docs/decisions/0007-tagging-app.rst @@ -0,0 +1,40 @@ +7. Tagging App structure +======================== + +Context +------- + +We want the openedx_tagging app to be useful in different Django projects outside of just openedx-learning and edx-platform. + + +Decisions +--------- + +The openedx_tagging data structures and code will stand alone with no dependencies on other Open edX projects. + +Classes which require dependencies on other Open edX projects should be defined within a ``tagging`` module inside those projects. + +Taxonomy +~~~~~~~~ + +The ``openedx_tagging`` module defines ``openedx_tagging.core.models.Taxonomy``, whose data and functionality are self-contained to the ``openedx_tagging`` app. However in Studio, we need to be able to limit access to some Taxonomy by organization, using the same "course creator" access which limits course creation for an organization to a defined set of users. + +So in edx-platform, we will create the ``openedx.features.content_tagging`` app, to contain the models and logic for linking Organization owners to Taxonomies. Here, we can subclass ``Taxonomy`` as needed, preferably using proxy models. The APIs are responsible for ensuring that any ``Taxonomy`` instances are cast to the appropriate subclass. + +ObjectTag +~~~~~~~~~ + +Similarly, the ``openedx_tagging`` module defined ``openedx_tagging.core.models.ObjectTag``, also self-contained to the +``openedx_tagging`` app. + +But to tag content in the LMS/Studio, we need to enforce ``object_id`` as a CourseKey or UsageKey type. So to do this, we subclass ``ObjectTag``, and use this class when creating content object tags. Once the ``object_id`` is set, it is not editable, and so this key validation need not happen again. + +Rejected Alternatives +--------------------- + +Embed in edx-platform +~~~~~~~~~~~~~~~~~~~~~ + +Embedding the logic in edx-platform would provide the content tagging logic specifically required for the MVP. + +However, we plan to extend tagging to other object types (e.g. People) and contexts (e.g. Marketing), and so a generic, standalone library is preferable in the log run. diff --git a/docs/decisions/0008-tagging-tree-data-arch.rst b/docs/decisions/0008-tagging-tree-data-arch.rst new file mode 100644 index 000000000..eaf2d0633 --- /dev/null +++ b/docs/decisions/0008-tagging-tree-data-arch.rst @@ -0,0 +1,53 @@ +8. Tag tree data structure +========================== + +Context +------- + +A taxonomy groups a set of related tags under a namespace and a set of usage rules. Some taxonomies require tags to be selected from a list or nested tree of valid options. + +Do we need a formal tree data structure to represent hierarchical tags? + +Decision +-------- + +No, a simplistic tree structure is sufficient for the MVP and forseeable feature requests. + +Existing tree data structures are designed to support very dynamic and deeply nested trees (e.g. forum threads) which are traversed frequently, and this feature set is overkill for taxonomy trees. + +Taxonomy trees have a maximum depth of 3 levels, which limits the depth of node traversal, and simplifies the UI/UX required to tag or search filter content with nested tags. + +Taxonomy trees only require simple operations, and infrequent traversals. Frequent operations (like viewing content tags) will only display the leaf tag value, not its full lineage, to minimize tree traversal. Full trees can be fetched quickly enough during content tag editing. Taxonomy tree changes themselves will also be infrequent. + +Rejected Alternatives +--------------------- + +All taxonomies are trees +~~~~~~~~~~~~~~~~~~~~~~~~ + +We could use a tree structure for all taxonomies: flat taxonomies would have only 1 level of tags under the root, while nested taxonomies can be deeper. + +To implement this, we'd link each taxonomy to a root tag, with the user-visible tags underneath. + +It was simpler instead to link the tag to the taxonomy, which removes the need for the unseen root tag. + +Closure tables +~~~~~~~~~~~~~~ + +https://coderwall.com/p/lixing/closure-tables-for-browsing-trees-in-sql + +Implementing the taxonomy tree using closure tables allows for tree traversals in O(N) time or less, where N is the total number of tags in the taxonomy. So the tree depth isn't as much of a performance concern as the total number of tags. + +Options include: + +* `django-tree-queries `_ + + Simple, performant, and well-maintained tree data structure. However it uses RECURSIVE CTE queries, which aren't supported until MySQL 8.0. + +* `django-mptt `_ + + Already an edx-platform dependency, but no longer maintained. It can be added retroactively to an existing tree-like model. + +* `django-closuretree `_ + + Another a good reference implementation for closure tables which can be added retroactively to an existing tree-like model. It is not actively maintained. diff --git a/docs/decisions/0009-tagging-administrators.rst b/docs/decisions/0009-tagging-administrators.rst new file mode 100644 index 000000000..0c984ff37 --- /dev/null +++ b/docs/decisions/0009-tagging-administrators.rst @@ -0,0 +1,42 @@ +9. Taxonomy administrators +========================== + +Context +------- + +Taxonomy Administrators have the right to create, edit, populate, and delete taxonomies available globably for a given instance, or for a specific organization. + +How should these users be identified and their access granted? + +Decision +-------- + +In the Studio context, a modified version of "course creator" access will be used to identify Taxonomy Administrators (ref `get_organizations`_): + +#. Global staff and superusers can create/edit/populate/delete Taxonomies for the instance or for any org key. + +#. Users who can create courses for "any organization" access can create/edit/populate/delete Taxonomies for the instance or for any org key. + +#. Users who can create courses only for specific organizations can create/edit/populate/delete Taxonomies with only these org keys. + + +Permission #1 requires no external access, so can be enforced by the ``openedx_tagging`` app. + +But because permissions #2 + #3 require access to the edx-platform CMS model `CourseCreator`_, this access can only be enforced in Studio, and so will live under ``cms.djangoapps.content_tagging`` along with the ``ContentTag`` class. Tagging MVP must work for libraries v1, v2 and courses created in Studio, and so tying these permissions to Studio is reasonable for the MVP. + +Per `OEP-9`_, ``openedx_tagging`` will allow applications to use the standard Django API to query permissions, for example: ``user.has_perm('openedx_tagging.edit_taxonomy', taxonomy)``, and the appropriate permissions will be applied in that application's context. + +These rules will be enforced in the tagging `views`_, not the API or models, so that external code using this library need not have a logged-in user in order to call the API. So please use with care. + +Rejected Alternatives +--------------------- + +Django users & groups +~~~~~~~~~~~~~~~~~~~~~ + +This is a standard way to grant access in Django apps, but it is not used in Open edX. + +.. _get_organizations: https://github.com/openedx/edx-platform/blob/4dc35c73ffa6d6a1dcb6e9ea1baa5bed40721125/cms/djangoapps/contentstore/views/course.py#L1958 +.. _CourseCreator: https://github.com/openedx/edx-platform/blob/4dc35c73ffa6d6a1dcb6e9ea1baa5bed40721125/cms/djangoapps/course_creators/models.py#L27 +.. _OEP-9: https://open-edx-proposals.readthedocs.io/en/latest/best-practices/oep-0009-bp-permissions.html +.. _views: https://github.com/dfunckt/django-rules#permissions-in-views diff --git a/docs/decisions/0010-taxonomy-enable-context.rst b/docs/decisions/0010-taxonomy-enable-context.rst new file mode 100644 index 000000000..4c12badbc --- /dev/null +++ b/docs/decisions/0010-taxonomy-enable-context.rst @@ -0,0 +1,93 @@ +10. Taxonomy enabled for context +================================ + +Context +------- + +The MVP specification says that taxonomies need to be able to be enabled/disabled for the following contexts: instance, organization, and course. + +Taxonomy Administrators must be able to enable a taxonomy globally for all organizations in an instance, or to set a list of organizations who can use the taxonomy. + +Content Authors must be able to turn taxonomies (instance and org-levels) on/off at the course level. + +Decision +-------- + +When is a taxonomy field shown to course authors in a given course? + ++-------------+-----------------------------+--------------------------+-------------------------------+ +| tax.enabled | tax.enabled_for(course.org) | course enables all tax's | Is taxonomy shown for course? | ++=============+=============================+==========================+===============================+ +| True | True | True | True | ++-------------+-----------------------------+--------------------------+-------------------------------+ +| False | True | True | False | ++-------------+-----------------------------+--------------------------+-------------------------------+ +| True | False | True | False | ++-------------+-----------------------------+--------------------------+-------------------------------+ +| True | True | False | False | ++-------------+-----------------------------+--------------------------+-------------------------------+ +| False | True | False | False | ++-------------+-----------------------------+--------------------------+-------------------------------+ +| False | False | True | False | ++-------------+-----------------------------+--------------------------+-------------------------------+ + +.. _Course: + +Course +~~~~~~ + +We will add a Course `Advanced Settings`_ that allows course authors to enable/disable *all available taxonomies* for a given course. + +In order for a given taxonomy to be "available to a course", it must be enabled in the :ref:`Instance` context and the course's :ref:`Organization` context. + +Disabling taxonomies for a course will remove/hide the taxonomy fields from the course edit page and unit edit page(s), and tags will not be shown in Studio for that course. LMS use of tags is outside of this MVP. + +Future versions may add more granularity to these settings, to be determined by user needs. + +.. _Instance: + +Instance +~~~~~~~~ + +Taxonomy contains a boolean ``enabled`` field. + +A Taxonomy can be disabled for all contexts by setting ``enabled = False``. +If ``enabled = True``, then the :ref:`Organization` and :ref:`Course` contexts determine whether a taxonomy will be shown to course authors. + +.. _Organization: + +Organization +~~~~~~~~~~~~ + +OrgTaxonomy has a many-to-many relationship with the Organization model, accessed via the ``org_owners`` field. OrgTaxonomy lives under `cms.djangoapps.tagging` and so has access to the Organization model and logic in Studio. + +An OrgTaxonomy is enabled for all organizations if ``org_owners == []``. +If there are any ``org_owners`` set, then the OrgTaxonomy is only enabled for those orgas, i.e. only courses in these orgs will see the taxonomy field in Studio. + +Allowing multiple orgs to access a taxonomy reduces redundancy in data and maintenance. + +Rejected Alternatives +--------------------- + +Single org per taxonomy +~~~~~~~~~~~~~~~~~~~~~~~ + +Having a single org on a taxonomy is simpler from an implementation perspective, but the UI/UX frames demonstrated that it is simpler for the user to maintain a single taxonomy for multiple orgs. + +Course Waffle Flags +~~~~~~~~~~~~~~~~~~~ + +Use `Course Waffle Flags`_ to enable/disable all taxonomies for a given course. + +Waffle flags can only be changed by instance superusers, but the MVP specifically requires that content authors have control over this switch. + + +Link courses to taxonomies +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Link individual courses as enabled/disabled to specific taxonomies. +This was deemed too granular for the MVP, and the data structures and UI can be simplified by using a broader on/off flag. + + +.. _Advanced Settings: https://github.com/openedx/edx-platform/blob/4dc35c73ffa6d6a1dcb6e9ea1baa5bed40721125/cms/djangoapps/models/settings/course_metadata.py#L28 +.. _Course Waffle Flags: https://github.com/openedx/edx-platform/blob/4dc35c73ffa6d6a1dcb6e9ea1baa5bed40721125/openedx/core/djangoapps/waffle_utils/models.py#L14 diff --git a/docs/decisions/0011-tag-changes.rst b/docs/decisions/0011-tag-changes.rst new file mode 100644 index 000000000..6e1ea7d6d --- /dev/null +++ b/docs/decisions/0011-tag-changes.rst @@ -0,0 +1,35 @@ +11. Taxonomy and tag changes +============================ + +Context +------- + +Tagging content may be a labor-intensive, and the data produced is precious, both for human and automated users. Content tags should be structured and namespaced according to the needs of the instance's taxonomy administrators. But taxonomies themselves need to allow for changes: their tags can be overridden with a single import, they can be deleted, reworked, and their rules changed on the fly. + +What happens to the existing content tags if a Taxonomy or Tag is renamed, moved, or deleted? + +Decision +-------- + +Preserve content tag name:value pairs even if the associated taxonomy or tag is removed. +Reflect name:value changes from the linked taxonomy:tag immediately to the user. + +Content tags (through their base ObjectTag class) store a foreign key to their Taxonomy and Tag (if relevant), but they also store a copy of the Taxonomy.name and Tag.value, which can be used if there is no Taxonomy or Tag available. + +We consult the authoritative Taxonomy.name and Tag.value whenever displaying a content tag, so any changes are immediately reflected to the user. + +If a Taxonomy or Tag is deleted, the linked content tags will remain, and cached copies of the name:value pair will be displayed. + +This cached "value" field enables content tags (through their base ObjectTag class) to store free-text tag values, so that the free-text Taxonomy itself need not be modified when new free-text tags are added. + +This extra storage also allows tagged content to be imported independently of a taxonomy. The taxonomy (and appropriate tags) can be constructed later, and content tags validated and re-synced by editing the content tag or by running a maintenance command. + +Rejected Alternatives +--------------------- + +Require foreign keys +~~~~~~~~~~~~~~~~~~~~ + +Require a foreign key from a content tag to Taxonomy for the name, and Tag for the value. + +Only using foreign keys puts the labor-intensive content tag data at risk during taxonomy changes, and requires free-text tags to be made part of a taxonomy. diff --git a/docs/decisions/0012-system-taxonomy-creation.rst b/docs/decisions/0012-system-taxonomy-creation.rst new file mode 100644 index 000000000..36c27dcde --- /dev/null +++ b/docs/decisions/0012-system-taxonomy-creation.rst @@ -0,0 +1,65 @@ +12. System-defined Taxonomy & Tags creation +============================================ + +Context +-------- + +System-defined taxonomies are taxonomies created by the system. Some of these +depend on Django settings (e.g. Languages) and others depends on a core data +model (e.g. Organizations or Users). It is necessary to define how to create and +validate the System-defined taxonomies and their tags. + + +Decision +--------- + +System Tag lists and validation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Each Taxonomy has two methods for validating tags: +#. ``validate_value`` +#. ``validate_external_id`` + +These functions will return ``True`` if a given tag is valid, based on its +external ID or value. Subclasses should override these as needed, to implement +different types of taxonomy behavior (e.g. based on a model or baed on Django +settings). + +For example ``validate_value("English")`` will return ``True`` for the language +taxonomy if the English language is enabled in the Django settings. Likewise, +``validate_external_id("en")`` would return true, but +``validate_external_id("zz")`` would be ``False`` because there is no such +language. Or, for a User taxonomy, ``validate_value("username")`` would return +``True`` if a user with that username exists, or ``validate_external_id(...)`` +could validate if a user with that ID exists (note that the ID must be converted +to a string). + +In all of these cases, a ``Tag`` instance may or may not exist in the database. +Before saving an ``ObjectTag`` which references a tag in these taxonomies, the +tagging API will use either ``Taxonomy.tag_for_value`` or +``Taxonomy.tag_for_external_id``. These methods are responsible for both +validating the tag (like ``validate_...``) but also auto-creating the ``Tag`` +instance in case it doesn't already exist. Subclasses should override these as +needed. + +In this way, the system-defined taxonomies are fully dynamic and can represent +tags based on Languages, Users, or Organizations that may exist in large numbers +or be constantly created. + +At present, there isn't a good way to *list* all of the potential tags that +exist in a system-defined Taxonomy. We may add an API for that in the future, +for example to list all of the available languages. However for other cases like +users it doesn't make sense to even try to list all of the available tags. So +for now, the assumption is that the UI will not even try to display a list of +available tags for system-defined taxonomies. After all, system-defined tags are +usually applied automatically, rather than a user manually selecting from a +list. If there is a need to show a list of tags to the user, use the API that +lists the actually applied tags - i.e. the values of the ``ObjectTags`` +currently applied to objects using the taxonomy. + +Tags hard-coded by fixtures/migrations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the future there may be system-defined taxonomies that are not dynamics at +all, where the list of tags are defined by ``Tag`` instances created by a +fixture or migration. However, as of now we don't have a use case for that. diff --git a/docs/decisions/0013-system-taxonomy-auto-tagging.rst b/docs/decisions/0013-system-taxonomy-auto-tagging.rst new file mode 100644 index 000000000..ef506aaf2 --- /dev/null +++ b/docs/decisions/0013-system-taxonomy-auto-tagging.rst @@ -0,0 +1,51 @@ +13. System-defined automatic tagging +===================================== + +Context +-------- + +One main characteristic of system-defined taxonomies is automatic content tagging. +It is necessary to implement this functionality when the associated content is created or edited. + +Decision +--------- + +Events +~~~~~~~~ + +It is necessary to create `events`_ for each content for creation/edition: ``CourseCreation``, ``LibraryCreation``, etc. +This events will live in `openedx-events`_. + +Receivers +~~~~~~~~~~ + +Auto-tagging receivers will live under ``openedx.features.tagging``, +registered as `a receiver`_ with the respective code. + +Rejected Options +----------------- + +Django Signals +~~~~~~~~~~~~~~ + +Implement a function to add the tag from the content metadata and register that function +as a Django signal. This works for Django database models, but some of the content lives in Mongo, +outside of the Django models. Also, using openedx-filters is better in the edx context, but if there is +other no-edX project that need to use ``openedx-tagging``, can use the Django Signals approach. + + +openedx-filters +~~~~~~~~~~~~~~~ +Use `openedx-filters`_ to create a `filter`_ which calls `the auto tagging pipeline`_ after content +creation/editing. Although this approach works, there are more suitable options. Filters are +used to act on the input data and provide means to block the flow. This is not necessary in the +auto tagging context. The `hooks documentation`_ suggests the use of `events`_ hooks to expand functionality. + + +.. _openedx-events: https://github.com/openedx/openedx-events +.. _openedx-filters: https://github.com/openedx/openedx-filters +.. _filter: https://github.com/openedx/openedx-filters/blob/a4a192e1cac0b70bed31e0db8e4c4b058848c5c4/openedx_filters/learning/filters.py +.. _the auto tagging pipeline: https://github.com/openedx/edx-platform/blob/40613ae3f47eb470aff87359a952ed7e79ad8555/docs/guides/hooks/filters.rst#implement-pipeline-steps +.. _hooks documentation: https://github.com/openedx/edx-platform/blob/master/docs/guides/hooks/index.rst +.. _events: https://github.com/openedx/edx-platform/blob/master/docs/guides/hooks/events.rst +.. _a receiver: https://github.com/openedx/edx-platform/blob/master/docs/guides/hooks/events.rst#receiving-events diff --git a/docs/decisions/0014-single-taxonomy-view-api.rst b/docs/decisions/0014-single-taxonomy-view-api.rst new file mode 100644 index 000000000..cfdf24c98 --- /dev/null +++ b/docs/decisions/0014-single-taxonomy-view-api.rst @@ -0,0 +1,159 @@ +14. Single taxonomy view API +===================================== + +Context +-------- + +This view returns tags of a taxonomy (works with closed, open, and system-defined). It is necessary to make a decision about what structure the tags are going to have, how the pagination is going to work and how will the search for tags be implemented. It was taken into account that taxonomies commonly have the following characteristics: + +- It has few root tags. +- It may a very large number of children for each tag. +- It is mostly represented as trees on frontend, with a depth of up to 3 levels. + +For the decisions, the following use cases were taken into account: + +- As a taxonomy administrator, I want to see all the tags available for use with a closed taxonomy, + so that I can see the taxonomy's structure in the management interface. + + - As a taxonomy administrator, I want to see the available tags as a list of root tags + that can be expanded to show children tags. + - As a taxonomy administrator, I want to sort the list of root tags alphabetically: A-Z (default) and Z-A. + - As a taxonomy administrator, I want to expand all root tags to see all children tags. + - As a taxonomy administrator, I want to search for tags, so I can find root and children tags more easily. +- As a course author, when I am editing the tags of a component, I want to see all the tags available + from a particular taxonomy that I can use. + + - As a course author, I want to see the available tags as a list of root tags + that can be expanded to show children tags. + - As a course author, I want to search for tags, so I can find root and children tags more easily. + +Excluded use cases: + +- As a content author, when searching/filtering a course/library, I want to see which tags are applied to the content + and use them to refine my search. - This is excluded from this API's use case because this is automatically handled + by elasticsearch/opensearch. + + +Decision +--------- + +Views & Pagination +~~~~~~~~~~~~~~~~~~~ + +We will have one REST API endpoint that can handle these use cases: + +**/tagging/rest_api/v1/taxonomies/:id/tags/?parent_tag=...** + +that can handle this cases: + +- Get the root tags of the taxonomy. If ``parent_tag`` is omitted. +- Get the children of a tag. Called each time the user expands a parent tag to see its children. + In this case, ``parent_tag`` is set to the value of the parent tag. + +In both cases the results are paginated. In addition to the common pagination metadata, it is necessary to return: + +- Total number of pages. +- Total number of tags in the result. +- Range index of current page, Ex. Page 1: 1-12, Page 2: 13-24. +- Total number of children of each tag. + +The pagination of root tags and child tags are independent. + +**Optional full-depth response** + +In order to be able to fulfill the functionality of "Expand-all" in a scalable way, and allow users to quickly browse taxonomies that have lots of small branches, the API will accept an optional parameter ``?full_depth_threshold``. If specified (e.g. ``?full_depth_threshold=1000``) and there are fewer results than this threshold, the full tree of tags will be returned a a single giant page, including all tags up to three levels deep. + +**Pros** + +- This approach is simple and flexible. +- Paging both root tags and children mitigates the huge number of tags that can be found in large taxonomies. + +Search tags +~~~~~~~~~~~~ + +The same API endpoint will support an optional ``?search_term=...`` parameter to support searching/filtering tags by keyword. + +The API endpoint will work exactly as described above (returning a single level of tags by default, paginated, optionally listing only the tags below a specific parent tag, optionally returning a deep tree if the results are small enough) - BUT only tags that match the keyword OR their ancestor tags will be returned. We return their ancestor tags (even if the ancestors themselves don't match the keyword) so that the tree of tags that do match can be displayed correctly. This also allows the UI to load the matching tags one layer at a time, paginated, if desired. + +Tag representation +~~~~~~~~~~~~~~~~~~~ + +Return a list of root tags and within a link to obtain the children tags or the complete list of children tags depending on the requested ``?full_depth_threshold`` and the number of results. +The list of tags will be ordered in tree order (and alphabetically). If it has child tags, they must also be ordered alphabetically. + +**Example**:: + + { + "count": 100, + "tags": [ + { + "value": "Tag 1", + "depth": 0, + "external_id": None, + "child_count": 15, + "parent_value": None, + "sub_tags_url": "http//api-call-to-get-children.com" + }, + (....) + ] + } + + +**Pros:** + +- The edX's interfaces show the tags in the form of a tree. +- The frontend needs no further processing as it is in a displayable format. +- It is kept as a simple implementation. + + +Backend Python API +~~~~~~~~~~~~~~~~~~ + +On the backend, a very flexible API is available as ``Taxonomy.get_filtered_tags()`` which can cover all of the same use cases as the REST API endpoint (listing tags, shallow or deep, filtering on search term). However, the Python API returns a ``QuerySet`` of tag data dictionaries, rather than a JSON response. + + +Rejected Options +----------------- + + +Render as a simple list of tags +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Return a simple list of tags, regardless of whether it is root or leaf. + +**Pros:** + +- It is simple and does not need further implementation and processing in the API. + +**Cons:** + +- It is more work to re-process all that list in the frontend to know who it is whose father. +- In no edX's interface is it used this way and it would be a very specific use case. +- Pagination would be more complicated to perform. + + +Add the children to the root pagination +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Ex. If the ``page_size`` is 100, when fetching the first root tag, which has 10 children tags, +11 tags are counted for the total and there would be reamin 89 tags to be obtained. + +**Cons:** + +- If there is a branch with a number of tags that exceeds ``page_size``, + it would only return that branch. +- All branches are variable in size, therefore a variable number of root tags + would be returned. This would cause interfaces between taxonomies to be inconsistent + in the number of root tags shown. + + +Search on frontend +~~~~~~~~~~~~~~~~~~ + +We constrain the number of tags allowed in a taxonomy for MVP, so that the API +can return all the tags in one page. So we can perform the tag search on the frontend. + +**Cons:** + +- It is not scalable. +- Sets limits of tags that can be created in the taxonomy. diff --git a/docs/decisions/0015-serving-static-assets.rst b/docs/decisions/0015-serving-static-assets.rst new file mode 100644 index 000000000..969b40725 --- /dev/null +++ b/docs/decisions/0015-serving-static-assets.rst @@ -0,0 +1,154 @@ +15. Serving Course Team Authored Static Assets +============================================== + +Context +-------- + +Both Studio and the LMS need to serve course team authored static assets as part of the authoring and learning experiences. "Static assets" in the edx-platform context presently refers to: image files, audio files, text document files like PDFs, older video transcript files, and even JavaScript and Python files. It does NOT typically include video files, which are treated separately because of their large file size and complex workflows (processing for multiple resolutions, using third-party dictation services, etc.) + +This ADR is the synthesis of various ideas that were discussed across a handful of pull requests and issues. These links are provided for extra context, but they are not required to understand this ADR: + +* `File uploads + Experimental Media Server #31 `_ +* `File Uploads + media_server app #33 `_ +* `Modeling Files and File Dependencies #70 `_ +* `Serving static assets (disorganized thoughts) #108 `_ + +Data Storage Implementation +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The underlying data models live in the openedx-learning repo. The most relevant models are: + +* `Content in contents/models.py `_ +* `Component and ComponentVersion in components/models.py `_ + +Key takeaways about how this data is stored: + +* Assets are associated and versioned with Components, where a Component is typically an XBlock. So you don't ask for "version 5 of /static/fig1.webp", you ask for "the /static/fig1.webp associated with version 5 of this block". +* This initial MVP would be to serve assets for v2 content libraries, where all static assets are associated with a particular component XBlock. Later on, we'll want to allow courses to port their existing files and uploads into this system in a backwards compatible way. We will probably do this by creating a non-XBlock, filesystem Component type that can treat the entire course's uploads as a Component. The specifics for how that is modeled on the backend are out of scope for this ADR, but this general approach is meant to work for both use cases. +* The actual raw asset data is stored in django-storages using its hash value as the file name. This makes it cheap to make many references to the same asset data under different names and versions, but it means that we cannot simply give direct links to the raw file data to the browser (see the next section for details). + +The Difficulty with Direct Links to Raw Data Files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Since the raw data is stored as objects in an S3-like store, and the mapping of file names and versions to that raw data is stored in Django models, why not simply have a Django endpoint that redirects a request to the named asset to the hash-named raw data it corresponds to? + +**It will break relative links between assets.** + The raw data files exist in a flat space with hashes for names, meaning that any relative links between assets (e.g. a JavaScript file referencing an image) would break once a browser follows the redirect. + +**Setting Metadata: Content types, filenames, and caching.** + The assets won't generally "work" unless they're served with the correct Content-Type header. For users who want to download a file, it's quite inconvenient if the filename doesn't include the correct file extension (not to mention a friendly name instead of the hash). So we need to set the Content-Type and/or Content-Disposition: ; filename=... headers. + + Setting these values for each request has proved problematic because some (but not all) S3-compatible storage services (including S3 itself) only support setting those headers for each request if you issue a signed GET request, which then gets in the way of caching and introduces the probability of browsers caching expired links, leading to all kinds of annoying cache invalidation issues. + + Setting the filename value at upload time also doesn't work because the same data may be referenced under different filenames by different Components or even different versions of the same Component. + +Application Requirements +~~~~~~~~~~~~~~~~~~~~~~~~ + +**Relative links between assets must be preserved.** + Assets may reference each other in relative links, e.g. a JavaScript file that references images or other JavaScript files. That means that our solution cannot require querystring-based authorization tokens in the style of S3 signed URLs, since asset files would have no way to encode those into their relative links. + +**Multiple versions of the asset should be available at the same time.** + Our system should be able to serve at minimum the current draft and published versions of an asset. Ideally, it should be able to serve any version of an asset. This is a departure from the way Studio and the LMS currently handle files and uploads, since there is currently no versioning at all–assets exist in a flat namespace at the course level and are immediately published. + +Security Requirements +~~~~~~~~~~~~~~~~~~~~~ + +**Assets must enforce user+file read permissions at the Learning Context level.** + The MongoDB GridFS backed ContentStore currently supports course-level access checks that can be toggled on and off for individual assets. Uploaded assets are public by default, and can be downloaded by anyone who knows the URL, regardless of whether or not they are enrolled in the course. They can optionally be "locked", which will restrict downloads to students who are enrolled in the course. + +**Assets should enforce more granular permissions at the individual Component level.** + An important distinction between ContentStore and v2 Content Library assets is that the latter can be directly associated with a Component. As a long term goal, we should be able to make permissions check on per-Component basis. So if a student does not have permission to view a Component for whatever reason (wrong content group, exam hasn't started, etc.), then they should also not have permission to see static assets associated with that component. + + The further implication of this requirement is that *permissions checking must be extensible*. The openedx-learning repo will implement the details of how to serve an asset, but it will not have the necessary models and logic to determine whether it is allowed to. + +**Assets must be served from an entirely different domain than the LMS and Studio instances.** + To reduce our chance of maliciously uploaded JavaScript compromising LMS and Studio users, user-uploaded assets must live on an entirely different domain from LMS and Studio (i.e. not just another subdomain). So if our LMS is located at ``sandbox.openedx.org``, the files should be accessed at a URL like ``assets.sandbox.openedx.io``. + +Operational Requirements +~~~~~~~~~~~~~~~~~~~~~~~~ + +**The asset server must be capable of handling high levels of traffic.** + Django views are poor choice for streaming files at scale, especially when deploying using WSGI (as Open edX does), since it will tie down a worker process for the entire duration of the response. While a Django-based streaming response may sufficient for small-to-medium traffic sites, we should allow for a more scalable solution that fully takes advantage of modern CDN capabilities. + +**Serving assets should not *require* ASGI deployment.** + Deploying the LMS and Studio using ASGI would likely substantially improve the scalability of a Django-based streaming solution, but migrating and testing this new deployment type for the entire stack is a large task and is considered out of scope for this project. + +Decision +-------- + +URLs +~~~~ + +The format will be: ``https://{asset_server}/assets/apps/{app}/{learning_package_key}/{component_key}/{version}/{filepath}`` + +The assets will be served from a completely different domain from the LMS and Studio, and will not be a subdomain. + +A more concrete example: ``https://studio.assets.sandbox.openedx.io/apps/content_libraries/lib:Axim:200/xblock.v1:problem@826eb471-0db2-4943-b343-afa65a6fdeb5/v2/static/images/fig1.png`` + +The ``version`` can be: + +* ``draft`` indicating the latest draft version (viewed by authors in Studio). +* ``published`` indicating the latest published version (viewed by students in the LMS) +* ``v{num}`` meaning a specific version–e.g. ``v20`` for version 20. + +Asset Server Implementation +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There will be two asset server URLs–one corresponding to the LMS and one corresponding to Studio, each with their own subdomain. An example set of domains might be: + +* LMS: ``sandbox.openedx.org`` +* Studio: ``studio.sandbox.openedx.org`` +* LMS Assets: ``lms.assets.sandbox.openedx.io`` (note the ``.io`` top level domain) +* Studio Assets: ``studio.assets.sandbox.openedx.io`` + +The asset serving domains will be serviced by a Caddy instance that is configured as a reverse proxy to the LMS or Studio. Caddy will be configured to only proxy a specific set of paths that correspond to valid asset URLs. + +Django View Implemenation +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The LMS and Studio will each have one or two apps that implement view endpoints by extending a view that will be provided by the Learning Core. These views will only respond to requests that come via the asset domains (i.e. they will not work if you request the same paths using the LMS or Studio domains). + +Django is poorly suited to serving large static assets, particularly when deployed using WSGI. Instead of streaming the actual file data, the Django views serving assets will make use of the ``X-Accel-Redirect`` header. This header is supported by both Caddy and Nginx, and will cause them to fetch the data from the specified URI to send to the user. This redirect happens internally in the proxy and does *not* change the browser address. For sites using an object store like S3, the Django view will generate and send a signed URL to the asset. For sites using file-based Django media storage, the view will send a URL that Caddy or Nginx knows how to load from the file system. + +The Django view will also be responsible for setting other important header information, such as size, content type, and caching information. + +Permissions +~~~~~~~~~~~ + +The Learning Core provided view will contain the logic for looking up and serving assets, but it will be the responsibility of an app in Studio or the LMS to extend it with permissions checking logic. This logic may vary from app to app. For instance, Studio would likely implement a simple permissions checking model that only examines the learning context and restricts access to course staff. LMS might eventually use a much more sophisticated model that looks at the individual Component that an asset belongs to. + +Cookie Authentication +~~~~~~~~~~~~~~~~~~~~~ + +Authentication will use a session cookie for each asset server domain. + +Assets that are publicly readable will not require authentication. + +Asset requests may return a 403 error if the user is logged in but not authorized to download the asset. They will return a 401 error for users that are not authenticated. + +There will be a new endpoint exposed in LMS/Studio that will force a redirect and login to the asset server. Pages that make use of assets will be expected to load that endpoint in their ```` before any page assets are loaded. The flow would go like this: + +#. There is a ``