From 2cc7cbffcd542011d6544547e63cb7faa5a1524c Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Tue, 6 May 2025 13:03:14 -0700 Subject: [PATCH 1/7] Refactor restore-channel utilities --- .../management/commands/restore_channel.py | 61 +- .../tests/test_restore_channel.py | 190 --- .../contentcuration/utils/files.py | 35 +- .../contentcuration/utils/import_tools.py | 1127 +++++++++-------- 4 files changed, 674 insertions(+), 739 deletions(-) delete mode 100644 contentcuration/contentcuration/tests/test_restore_channel.py diff --git a/contentcuration/contentcuration/management/commands/restore_channel.py b/contentcuration/contentcuration/management/commands/restore_channel.py index 6133ec3806..49b785f725 100644 --- a/contentcuration/contentcuration/management/commands/restore_channel.py +++ b/contentcuration/contentcuration/management/commands/restore_channel.py @@ -2,27 +2,66 @@ from django.core.management.base import BaseCommand -from contentcuration.utils.import_tools import import_channel +from contentcuration.utils.import_tools import ImportManager logger = logging.getLogger("command") class Command(BaseCommand): + """ + This command is used to restore a channel from another Studio instance. This is for + development purposes only and should not be used in production. + """ + def add_arguments(self, parser): # ID of channel to read data from parser.add_argument("source_id", type=str) # ID of channel to write data to (can be same as source channel) - parser.add_argument("--target", help="restore channel db to TARGET CHANNEL ID") - parser.add_argument("--download-url", help="where to download db from") - parser.add_argument("--editor", help="add user as editor to channel") + parser.add_argument( + "--target", + help="A different channel ID for which to restore the channel. If not provided, the source channel ID will be used.", + ) + parser.add_argument( + "--source-url", + default="http://localhost:8080", + help="Studio instance from which to download the channel DB or content files", + ) + parser.add_argument("--token", help="API token for the Studio instance") + parser.add_argument( + "--editor", + default="a@a.com", + help="Add user as editor to channel with provided email address", + ) + parser.add_argument( + "--download-content", + action="store_true", + default=False, + help="Whether to download content files", + ) + parser.add_argument( + "--public", + action="store_true", + default=False, + help="Whether to make the channel public", + ) + parser.add_argument( + "--publish", + action="store_true", + default=False, + help="Whether to publish the channel after restoration", + ) def handle(self, *args, **options): - # Set up variables for restoration process logger.info("\n\n********** STARTING CHANNEL RESTORATION **********") - source_id = options["source_id"] - target_id = options.get("target") or source_id - download_url = options.get("download_url") - editor = options.get("editor") - - import_channel(source_id, target_id, download_url, editor, logger=logger) + manager = ImportManager( + options["source_url"], + options["source_id"], + target_id=options.get("target"), + editor=options.get("editor"), + public=options.get("public"), + publish=options.get("publish"), + token=options.get("token"), + download_content=options.get("download_content"), + ) + manager.run() diff --git a/contentcuration/contentcuration/tests/test_restore_channel.py b/contentcuration/contentcuration/tests/test_restore_channel.py deleted file mode 100644 index 6c5e1500ff..0000000000 --- a/contentcuration/contentcuration/tests/test_restore_channel.py +++ /dev/null @@ -1,190 +0,0 @@ -# -*- coding: utf-8 -*- -import datetime -import json -import uuid -from io import BytesIO - -from django.core.files.storage import default_storage -from django.template.loader import render_to_string -from django.utils.translation import activate -from django.utils.translation import deactivate -from le_utils.constants import exercises -from mixer.backend.django import mixer -from mock import MagicMock -from mock import patch - -from .base import StudioTestCase -from contentcuration.models import AssessmentItem -from contentcuration.models import generate_object_storage_name -from contentcuration.utils.import_tools import create_channel -from contentcuration.utils.import_tools import generate_assessment_item -from contentcuration.utils.import_tools import process_content - - -thumbnail_path = "/content/thumbnail.png" -ASSESSMENT_DATA = { - "input-question-test": { - "template": "perseus/input_question.json", - "type": exercises.INPUT_QUESTION, - "question": "Input question", - "question_images": [{"name": "test.jpg", "width": 12.71, "height": 12.12}], - "hints": [{"hint": "Hint 1"}], - "answers": [ - {"answer": "1", "correct": True, "images": []}, - {"answer": "2", "correct": True, "images": []}, - ], - "order": 0, - }, - "multiple-selection-test": { - "template": "perseus/multiple_selection.json", - "type": exercises.MULTIPLE_SELECTION, - "question": "Multiple selection question", - "question_images": [], - "hints": [], - "answers": [ - {"answer": "A", "correct": True, "images": []}, - {"answer": "B", "correct": True, "images": []}, - {"answer": "C", "correct": False, "images": []}, - ], - "multiple_select": True, - "order": 1, - "randomize": False, - }, - "single-selection-test": { - "template": "perseus/multiple_selection.json", - "type": exercises.SINGLE_SELECTION, - "question": "Single select question", - "question_images": [], - "hints": [{"hint": "Hint test"}], - "answers": [ - {"answer": "Correct answer", "correct": True, "images": []}, - {"answer": "Incorrect answer", "correct": False, "images": []}, - ], - "multiple_select": False, - "order": 2, - "randomize": True, - }, - "perseus-question-test": { - "template": "perseus/perseus_question.json", - "type": exercises.PERSEUS_QUESTION, - "order": 3, - "raw_data": "{}", - }, -} - - -class ChannelRestoreUtilityFunctionTestCase(StudioTestCase): - @patch( - "contentcuration.utils.import_tools.write_to_thumbnail_file", - return_value=thumbnail_path, - ) - def setUp(self, thumb_mock): - self.id = uuid.uuid4().hex - self.name = "test name" - self.description = "test description" - self.thumbnail_encoding = "base64 string" - self.root_pk = uuid.uuid4() - self.version = 7 - self.last_updated = datetime.datetime.now() - self.cursor_mock = MagicMock() - self.cursor_mock.execute.return_value.fetchone.return_value = ( - self.id, - self.name, - self.description, - self.thumbnail_encoding, - self.root_pk, - self.version, - self.last_updated, - ) - self.channel, _ = create_channel(self.cursor_mock, self.id, self.admin_user) - - def test_restore_channel_id(self): - self.assertEqual(self.channel.id, self.id) - - def test_restore_channel_name(self): - self.assertEqual(self.channel.name, self.name) - - def test_restore_channel_description(self): - self.assertEqual(self.channel.description, self.description) - - def test_restore_channel_thumbnail(self): - self.assertEqual(self.channel.thumbnail, thumbnail_path) - - def test_restore_channel_thumbnail_encoding(self): - self.assertEqual( - self.channel.thumbnail_encoding["base64"], self.thumbnail_encoding - ) - - def test_restore_channel_version(self): - self.assertEqual(self.channel.version, self.version) - - -class PerseusRestoreTestCase(StudioTestCase): - def setUp(self): - super(PerseusRestoreTestCase, self).setUp() - image_path = generate_object_storage_name("test", "test.png") - default_storage.save(image_path, BytesIO(b"test")) - - def test_process_content(self): - tests = [ - {"content": "test 1", "output": "test 1", "images": {}}, - { - "content": "test 2 ![test](${☣ LOCALPATH}/images/test.png)", - "output": "test 2 ![test](${☣ CONTENTSTORAGE}/test.png)", - "images": {}, - }, - { - "content": "test 3 ![](${☣ LOCALPATH}/images/test.png)", - "output": "test 3 ![](${☣ CONTENTSTORAGE}/test.png =50x50)", - "images": { - "${☣ LOCALPATH}/images/test.png": {"width": 50, "height": 50} - }, - }, - { - "content": "test 4 ![](${☣ LOCALPATH}/images/test.png) ![](${☣ LOCALPATH}/images/test.png)", - "output": "test 4 ![](${☣ CONTENTSTORAGE}/test.png) ![](${☣ CONTENTSTORAGE}/test.png)", - "images": {}, - }, - { - "content": "test 5  $\\sqrt{36}+\\frac{1}{2}$ ", - "output": "test 5 $$\\sqrt{36}+\\frac{1}{2}$$", - "images": {}, - }, - { - "content": "test 6 $\\frac{1}{2}$ $\\frac{3}{2}$", - "output": "test 6 $$\\frac{1}{2}$$ $$\\frac{3}{2}$$", - "images": {}, - }, - ] - for test in tests: - result = process_content(test, mixer.blend(AssessmentItem)) - self.assertEqual(result, test["output"]) - - def test_generate_assessment_item(self): - # Run in Spanish to ensure we are properly creating JSON with non-localized numbers - activate("es-es") - for assessment_id, data in list(ASSESSMENT_DATA.items()): - assessment_data = json.loads( - render_to_string(data["template"], data).encode("utf-8", "ignore") - ) - assessment_item = generate_assessment_item( - assessment_id, data["order"], data["type"], assessment_data - ) - self.assertEqual(assessment_item.type, data["type"]) - self.assertEqual(assessment_item.question, data.get("question", "")) - self.assertEqual(assessment_item.randomize, bool(data.get("randomize"))) - self.assertEqual(assessment_item.raw_data, data.get("raw_data", "")) - for hint in json.loads(assessment_item.hints): - self.assertTrue( - any(h for h in data["hints"] if h["hint"] == hint["hint"]) - ) - for answer in json.loads(assessment_item.answers): - self.assertTrue( - any( - a - for a in data["answers"] - if a["answer"] == str(answer["answer"]) - and a["correct"] == answer["correct"] - ) - ) - deactivate() diff --git a/contentcuration/contentcuration/utils/files.py b/contentcuration/contentcuration/utils/files.py index 0cb447a601..18f21dd702 100644 --- a/contentcuration/contentcuration/utils/files.py +++ b/contentcuration/contentcuration/utils/files.py @@ -85,12 +85,13 @@ def duplicate_file( return file_copy -def get_thumbnail_encoding(filename, dimension=THUMBNAIL_WIDTH): +def get_thumbnail_encoding(filename, dimension=THUMBNAIL_WIDTH, input_buffer=None): """ Generates a base64 encoding for a thumbnail Args: filename (str): thumbnail to generate encoding from (must be in storage already) dimension (int, optional): desired width of thumbnail. Defaults to 400. + input_buffer (BytesIO, optional): buffer to read from. Defaults to None. Returns base64 encoding of resized thumbnail """ @@ -103,23 +104,23 @@ def get_thumbnail_encoding(filename, dimension=THUMBNAIL_WIDTH): # make sure the aspect ratio between width and height is 16:9 thumbnail_size = [dimension, round(dimension / 1.77)] try: - if not filename.startswith(settings.STATIC_ROOT): - filename = generate_object_storage_name(checksum, filename) - inbuffer = default_storage.open(filename, "rb") - - else: - # Normalize the path and ensure it is indeed within STATIC_ROOT - normalized_path = os.path.normpath(filename) - static_root = os.path.abspath(settings.STATIC_ROOT) - abs_path = os.path.abspath(normalized_path) - if not abs_path.startswith(static_root + os.sep): - raise ValueError("Attempted access to file outside of STATIC_ROOT") - inbuffer = open(abs_path, "rb") - - if not inbuffer: + if not input_buffer: + if not filename.startswith(settings.STATIC_ROOT): + filename = generate_object_storage_name(checksum, filename) + input_buffer = default_storage.open(filename, "rb") + else: + # Normalize the path and ensure it is indeed within STATIC_ROOT + normalized_path = os.path.normpath(filename) + static_root = os.path.abspath(settings.STATIC_ROOT) + abs_path = os.path.abspath(normalized_path) + if not abs_path.startswith(static_root + os.sep): + raise ValueError("Attempted access to file outside of STATIC_ROOT") + input_buffer = open(filename, "rb") + + if not input_buffer: raise AssertionError - with Image.open(inbuffer) as image: + with Image.open(input_buffer) as image: image_format = image.format # Note: Image.thumbnail ensures that the image will fit in the @@ -136,7 +137,7 @@ def get_thumbnail_encoding(filename, dimension=THUMBNAIL_WIDTH): finally: # Try to close the inbuffer if it has been created try: - inbuffer.close() + input_buffer.close() except UnboundLocalError: pass outbuffer.close() diff --git a/contentcuration/contentcuration/utils/import_tools.py b/contentcuration/contentcuration/utils/import_tools.py index 0a187ce4c9..8f094ed7eb 100644 --- a/contentcuration/contentcuration/utils/import_tools.py +++ b/contentcuration/contentcuration/utils/import_tools.py @@ -1,30 +1,37 @@ # -*- coding: utf-8 -*- import datetime +import hashlib import json import logging import os import re -import shutil import sqlite3 import sys import tempfile -import zipfile +from functools import cached_property from io import BytesIO import requests -from django.conf import settings from django.core.files.storage import default_storage +from django.core.management import call_command from django.db import transaction +from kolibri_content.router import get_active_content_database +from kolibri_content.router import using_content_database +from le_utils.constants import completion_criteria from le_utils.constants import content_kinds from le_utils.constants import exercises from le_utils.constants import format_presets +from le_utils.constants import mastery_criteria from le_utils.constants import roles +from le_utils.constants.labels import learning_activities from contentcuration import models -from contentcuration.api import write_raw_content_to_storage from contentcuration.utils.files import create_file_from_contents +from contentcuration.utils.files import get_thumbnail_encoding from contentcuration.utils.files import write_base64_to_file from contentcuration.utils.garbage_collect import get_deleted_chefs_root +from contentcuration.utils.publish import publish_channel +from contentcuration.viewsets.assessmentitem import exercise_image_filename_regex CHANNEL_TABLE = "content_channelmetadata" @@ -47,126 +54,33 @@ log = logging.getLogger(__name__) -def import_channel( - source_id, target_id=None, download_url=None, editor=None, logger=None -): - """ - Import a channel from another Studio instance. This can be used to - copy online Studio channels into local machines for development, - testing, faster editing, or other purposes. - - :param source_id: The UUID of the channel to import from the source Studio instance. - :param target_id: The UUID of the channel on the local instance. Defaults to source_id. - :param download_url: The URL of the Studio instance to import from. - :param editor: The email address of the user you wish to add as an editor, if any. +class ImportClient(requests.Session): + def __init__(self, base_url, api_token=None): + super(ImportClient, self).__init__() + self.base_url = base_url + self.api_token = api_token - """ + def __getattr__(self, name): + if name.endswith("_with_token"): + if not self.api_token: + raise ValueError("API token is required for this method.") - global log - if logger: - log = logger - else: - log = logging.getLogger(__name__) - - # Set up variables for the import process - log.info("\n\n********** STARTING CHANNEL IMPORT **********") - start = datetime.datetime.now() - target_id = target_id or source_id - - # Test connection to database - log.info("Connecting to database for channel {}...".format(source_id)) - - tempf = tempfile.NamedTemporaryFile(suffix=".sqlite3", delete=False) - conn = None - try: - if download_url: - response = requests.get( - "{}/content/databases/{}.sqlite3".format(download_url, source_id) + target_method = getattr( + super(ImportClient, self), name.replace("_with_token", "") ) - for chunk in response: - tempf.write(chunk) - else: - filepath = "/".join([settings.DB_ROOT, "{}.sqlite3".format(source_id)]) - # Check if database exists - if not default_storage.exists(filepath): - raise IOError("The object requested does not exist.") - with default_storage.open(filepath) as fobj: - shutil.copyfileobj(fobj, tempf) - - tempf.close() - conn = sqlite3.connect(tempf.name) - cursor = conn.cursor() - - # Start by creating channel - log.info("Creating channel...") - editor = models.User.objects.get(email=editor) - channel, root_pk = create_channel(conn, target_id, editor) - channel.editors.add(editor) - channel.save() - - # Create root node - root = models.ContentNode.objects.create( - node_id=root_pk, - title=channel.name, - kind_id=content_kinds.TOPIC, - original_channel_id=target_id, - source_channel_id=target_id, - ) - - # Create nodes mapping to channel - log.info(" Creating nodes...") - with transaction.atomic(): - create_nodes(cursor, target_id, root, download_url=download_url) - # TODO: Handle prerequisites - - # Delete the previous tree if it exists - old_previous = channel.previous_tree - if old_previous: - old_previous.parent = get_deleted_chefs_root() - old_previous.title = "Old previous tree for channel {}".format(channel.pk) - old_previous.save() - - # Save tree to target tree - channel.previous_tree = channel.main_tree - channel.main_tree = root - channel.save() - finally: - conn and conn.close() - tempf.close() - os.unlink(tempf.name) - - # Print stats - log.info( - "\n\nChannel has been imported (time: {ms})\n".format( - ms=datetime.datetime.now() - start + token_headers = { + "Authorization": f"Token {self.api_token}", + } + return lambda url, *args, **kwargs: target_method( + url, *args, headers=token_headers, **kwargs + ) + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute '{name}'" ) - ) - log.info("\n\n********** IMPORT COMPLETE **********\n\n") - -def create_channel(cursor, target_id, editor): - """create_channel: Create channel at target id - Args: - cursor (sqlite3.Connection): connection to export database - target_id (str): channel_id to write to - Returns: channel model created and id of root node - """ - id, name, description, thumbnail, root_pk, version, last_updated = cursor.execute( - "SELECT id, name, description, thumbnail, root_pk, version, last_updated FROM {table}".format( - table=CHANNEL_TABLE - ) - ).fetchone() - channel, is_new = models.Channel.objects.get_or_create( - pk=target_id, actor_id=editor.id - ) - channel.name = name - channel.description = description - channel.thumbnail = write_to_thumbnail_file(thumbnail) - channel.thumbnail_encoding = {"base64": thumbnail, "points": [], "zoom": 0} - channel.version = version - channel.save() - log.info("\tCreated channel {} with name {}".format(target_id, name)) - return channel, root_pk + def request(self, method, url, *args, **kwargs): + url = f"{self.base_url}{url}" + return super(ImportClient, self).request(method, url, *args, **kwargs) def write_to_thumbnail_file(raw_thumbnail): @@ -195,446 +109,617 @@ def write_to_thumbnail_file(raw_thumbnail): os.unlink(tempf.name) -def create_nodes(cursor, target_id, parent, indent=1, download_url=None): - """create_channel: Create channel at target id - Args: - cursor (sqlite3.Connection): connection to export database - target_id (str): channel_id to write to - parent (models.ContentNode): node's parent - indent (int): How far to indent print statements - Returns: newly created node +def convert_metadata_to_dict(metadata): """ - # Read database rows that match parent - parent_query = "parent_id='{}'".format(parent.node_id) - - sql_command = ( - "SELECT id, title, content_id, description, sort_order, " - "license_owner, author, license_id, kind, coach_content, lang_id FROM {table} WHERE {query} ORDER BY sort_order;".format( - table=NODE_TABLE, query=parent_query - ) - ) - query = cursor.execute(sql_command).fetchall() - - # Parse through rows and create models - for ( - id, - title, - content_id, - description, - sort_order, - license_owner, - author, - license_id, - kind, - coach_content, - lang_id, - ) in query: - log.info( - "{indent} {id} ({title} - {kind})...".format( - indent=" |" * indent, id=id, title=title, kind=kind - ) - ) - - # Determine role - role = roles.LEARNER - if coach_content: - role = roles.COACH + Convert metadata from a string to a dictionary. - # Determine extra_fields - assessment_query = "SELECT mastery_model, randomize FROM {table} WHERE contentnode_id='{node}'".format( - table=ASSESSMENTMETADATA_TABLE, node=id - ) - result = cursor.execute(assessment_query).fetchone() - extra_fields = result[0] if result else {} - if isinstance(extra_fields, str): - extra_fields = json.loads(extra_fields) - if result: - extra_fields.update({"randomize": result[1]}) - - # Determine license - license = retrieve_license(cursor, license_id) - license_description = license[1] if license else "" - license = license[0] if license else None - - # TODO: Determine thumbnail encoding - - # Create new node model - node = models.ContentNode.objects.create( - node_id=id, - original_source_node_id=id, - source_node_id=id, - title=title, - content_id=content_id, - description=description, - sort_order=sort_order, - copyright_holder=license_owner, - author=author, - license=license, - license_description=license_description, - language_id=lang_id, - role_visibility=role, - extra_fields=extra_fields, - kind_id=kind, - parent=parent, - original_channel_id=target_id, - source_channel_id=target_id, - ) - - # Handle foreign key references (children, files, tags) - if kind == content_kinds.TOPIC: - create_nodes( - cursor, target_id, node, indent=indent + 1, download_url=download_url - ) - elif kind == content_kinds.EXERCISE: - create_assessment_items( - cursor, node, indent=indent + 1, download_url=download_url - ) - create_files(cursor, node, indent=indent + 1, download_url=download_url) - create_tags(cursor, node, target_id, indent=indent + 1) + :param metadata: The metadata string to convert. + :return: A dictionary representation of the metadata. + """ + if isinstance(metadata, str): + metadata_split = metadata.split(",") + return {metadata_key: True for metadata_key in metadata_split} + return metadata - return node +def convert_learning_activities_to_dict(content_kind, metadata): + """ + Convert learning activities from a string to a dictionary. -def retrieve_license(cursor, license_id): - """retrieve_license_name: Get license based on id from exported db - Args: - cursor (sqlite3.Connection): connection to export database - license_id (str): id of license on exported db - Returns: license model matching the name and the associated license description + :param content_kind: The content kind of the learning activities. + :param metadata: The learning activities string to convert. + :return: A dictionary representation of the learning activities. """ - # Handle no license being assigned - if license_id is None or license_id == "": + metadata = convert_metadata_to_dict(metadata) + if isinstance(metadata, dict): + return metadata + + if content_kind == content_kinds.EXERCISE: + return {learning_activities.PRACTICE: True} + elif content_kind in [content_kinds.HTML5, content_kinds.H5P]: + return {learning_activities.EXPLORE: True} + elif content_kind == content_kinds.AUDIO: + return {learning_activities.LISTEN: True} + elif content_kind == content_kinds.VIDEO: + return {learning_activities.WATCH: True} + elif content_kind == content_kinds.DOCUMENT: + return {learning_activities.READ: True} + elif content_kind == content_kinds.SLIDESHOW: + return {learning_activities.READ: True} + elif content_kind == content_kinds.TOPIC: return None + return {learning_activities.EXPLORE: True} - # Return license that matches name - name, description = cursor.execute( - "SELECT license_name, license_description FROM {table} WHERE id={id}".format( - table=LICENSE_TABLE, id=license_id - ) - ).fetchone() - return models.License.objects.get(license_name=name), description - - -def download_file( - filename, - download_url=None, - contentnode=None, - assessment_item=None, - preset=None, - file_size=None, - lang_id=None, -): - checksum, extension = os.path.splitext(filename) - extension = extension.lstrip(".") - filepath = models.generate_object_storage_name(checksum, filename) - - # Download file if it hasn't already been downloaded - if download_url and not default_storage.exists(filepath): - buffer = BytesIO() - response = requests.get( - "{}/content/storage/{}/{}/{}".format( - download_url, filename[0], filename[1], filename - ) - ) - for chunk in response: - buffer.write(chunk) - checksum, _, filepath = write_raw_content_to_storage( - buffer.getvalue(), ext=extension - ) - buffer.close() - - # Save values to new file object - file_obj = models.File( - file_format_id=extension, - file_size=file_size or default_storage.size(filepath), - contentnode=contentnode, - assessment_item=assessment_item, - language_id=lang_id, - preset_id=preset or "", - ) - file_obj.file_on_disk.name = filepath - file_obj.save() - - -def create_files(cursor, contentnode, indent=0, download_url=None): - """create_files: Get license - Args: - cursor (sqlite3.Connection): connection to export database - contentnode (models.ContentNode): node file references - indent (int): How far to indent print statements - Returns: None +class ImportManager(object): + """ + Import a channel from another Studio instance. This can be used to copy online Studio channels + into local machines for development, testing, faster editing, or other purposes. """ - # Parse database for files referencing content node and make file models - sql_command = ( - "SELECT checksum, extension, file_size, contentnode_id, " - "lang_id, preset FROM {table} WHERE contentnode_id='{id}';".format( - table=FILE_TABLE, id=contentnode.node_id - ) - ) - - query = cursor.execute(sql_command).fetchall() - for checksum, extension, file_size, contentnode_id, lang_id, preset in query: - filename = "{}.{}".format(checksum, extension) - log.info( - "{indent} * FILE {filename}...".format( - indent=" |" * indent, filename=filename - ) - ) - try: - download_file( - filename, - download_url=download_url, - contentnode=contentnode, - preset=preset, - file_size=file_size, - lang_id=lang_id, + def __init__( + self, + source_url, + source_id, + target_id=None, + editor=None, + public=False, + publish=False, + token=None, + download_content=True, + logger=None, + ): + self.source_id = source_id + self.target_id = target_id or source_id + self.source_url = source_url + self.editor = editor + self.public = public + self.publish = publish + self.token = token + self.download_content = download_content + self.logger = logger or logging.getLogger(__name__) + self.client = ImportClient(source_url, api_token=token) + self.conn = None + self.cursor = None + self.schema_version = None + + @cached_property + def editor_user(self): + """ + Get the User object for the editor email address. + + :return: The User object for the editor. + """ + return models.User.objects.get(email=self.editor) if self.editor else None + + def run(self): + """ + Run the import process. + """ + # Set up variables for the import process + self.logger.info("\n\n********** STARTING CHANNEL IMPORT **********") + start = datetime.datetime.now() + + if not self.token: + self.logger.warning( + "No API token provided. This may result in limited functionality." ) - except IOError as e: - log.warning("\b FAILED (check logs for more details)") - sys.stderr.write( - "Restoration Process Error: Failed to save file object {}: {}".format( - filename, os.strerror(e.errno) - ) - ) - continue + # Test connection to the database + self.logger.info(f"Connecting to database for channel {self.source_id}...") + tempf = tempfile.NamedTemporaryFile(suffix=".sqlite3", delete=False) + try: + response = self.client.get(f"/content/databases/{self.source_id}.sqlite3") + for chunk in response: + tempf.write(chunk) -def create_tags(cursor, contentnode, target_id, indent=0): - """create_tags: Create tags associated with node - Args: - cursor (sqlite3.Connection): connection to export database - contentnode (models.ContentNode): node file references - target_id (str): channel_id to write to - indent (int): How far to indent print statements - Returns: None - """ - # Parse database for files referencing content node and make file models - sql_command = ( - "SELECT ct.id, ct.tag_name FROM {cnttable} cnt " - "JOIN {cttable} ct ON cnt.contenttag_id = ct.id " - "WHERE cnt.contentnode_id='{id}';".format( - cnttable=NODE_TAG_TABLE, - cttable=TAG_TABLE, - id=contentnode.node_id, - ) - ) - query = cursor.execute(sql_command).fetchall() - - # Build up list of tags - tag_list = [] - for id, tag_name in query: - log.info( - "{indent} ** TAG {tag}...".format(indent=" |" * indent, tag=tag_name) - ) - # Save values to new or existing tag object - tag_obj, is_new = models.ContentTag.objects.get_or_create( - pk=id, - tag_name=tag_name, - channel_id=target_id, - ) - tag_list.append(tag_obj) + tempf.close() - # Save tags to node - contentnode.tags.set(tag_list) - contentnode.save() + with using_content_database(tempf.name): + call_command( + "migrate", + "content", + database=get_active_content_database(), + no_input=True, + ) + self.conn = sqlite3.connect(tempf.name) + self.cursor = self.conn.cursor() + + # Start by creating the channel + self.logger.info("Creating channel...") + channel, root_pk = self._create_channel() + channel.editors.add(self.editor_user) + channel.save() + + # Create the root node + root = models.ContentNode.objects.create( + node_id=root_pk, + title=channel.name, + kind_id=content_kinds.TOPIC, + original_channel_id=self.target_id, + source_channel_id=self.target_id, + complete=True, + ) -def create_assessment_items(cursor, contentnode, indent=0, download_url=None): - """create_assessment_items: Generate assessment items based on perseus zip - Args: - cursor (sqlite3.Connection): connection to export database - contentnode (models.ContentNode): node assessment items reference - indent (int): How far to indent print statements - download_url (str): Domain to download files from - Returns: None - """ + # Create nodes mapping to channel + self.logger.info(" Creating nodes...") + with transaction.atomic(): + self._create_nodes(root) + # TODO: Handle prerequisites + + # Delete the previous tree if it exists + old_previous = channel.previous_tree + if old_previous: + old_previous.parent = get_deleted_chefs_root() + old_previous.title = f"Old previous tree for channel {channel.pk}" + old_previous.save() + + # Save the new tree to the target tree, and preserve the old one + channel.previous_tree = channel.main_tree + channel.main_tree = root + channel.save() + finally: + self.conn and self.conn.close() + tempf.close() + os.unlink(tempf.name) + + # Publish the channel if requested + if self.publish: + self.logger.info("Publishing channel...") + publish_channel(self.editor_user.id, channel.id) - # Parse database for files referencing content node and make file models - sql_command = ( - "SELECT checksum, extension " - "preset FROM {table} WHERE contentnode_id='{id}' AND preset='exercise';".format( - table=FILE_TABLE, id=contentnode.node_id + # Print stats + self.logger.info( + f"\n\nChannel has been imported (time: {datetime.datetime.now() - start})\n" ) - ) - - query = cursor.execute(sql_command).fetchall() - for checksum, extension in query: - filename = "{}.{}".format(checksum, extension) - log.info( - "{indent} * EXERCISE {filename}...".format( - indent=" |" * indent, filename=filename - ) + self.logger.info("\n\n********** IMPORT COMPLETE **********\n\n") + + def _create_channel(self): + """ + Create the channel at target id + """ + ( + id, + name, + description, + thumbnail, + root_pk, + version, + last_updated, + schema_version, + ) = self.cursor.execute( + f""" + SELECT + id, name, description, thumbnail, root_pk, version, last_updated, + min_schema_version + FROM {CHANNEL_TABLE} + """ + ).fetchone() + lang_id, _ = self.cursor.execute( + f""" + SELECT lang_id, COUNT(id) AS node_by_lang_count + FROM {NODE_TABLE} + ORDER BY node_by_lang_count DESC + """ + ).fetchone() + channel, is_new = models.Channel.objects.get_or_create( + pk=self.target_id, actor_id=self.editor_user.id ) - - try: - # Store the downloaded zip into temporary storage - tempf = tempfile.NamedTemporaryFile( - suffix=".{}".format(extension), delete=False - ) - response = requests.get( - "{}/content/storage/{}/{}/{}".format( - download_url, filename[0], filename[1], filename + channel.name = name + channel.description = description + channel.language_id = lang_id + channel.thumbnail = write_to_thumbnail_file(thumbnail) + channel.thumbnail_encoding = {"base64": thumbnail, "points": [], "zoom": 0} + channel.version = version + channel.public = self.public + channel.save() + self.logger.info(f"\tCreated channel {self.target_id} with name {name}") + return channel, root_pk + + def _create_nodes(self, parent, indent=1): + """ + Create node(s) for a channel with target id + + :param parent: node's parent + :param indent: How far to indent print statements + """ + sql_command = f""" + SELECT + id, title, content_id, description, sort_order, license_owner, author, license_id, + kind, coach_content, lang_id, grade_levels, resource_types, learning_activities, + accessibility_labels, categories, learner_needs, duration, options + FROM {NODE_TABLE} + WHERE parent_id = ? + ORDER BY sort_order; + """ + query = self.cursor.execute( + sql_command, (getattr(parent, "node_id", parent),) + ).fetchall() + + # Parse through rows and create models + for ( + id, + title, + content_id, + description, + sort_order, + license_owner, + author, + license_id, + kind, + coach_content, + lang_id, + grade_levels, + resource_types, + learning_activities_, + accessibility_labels, + categories, + learner_needs, + duration, + options, + ) in query: + self.logger.info( + "{indent} {id} ({title} - {kind})...".format( + indent=" |" * indent, id=id, title=title, kind=kind ) ) - for chunk in response: - tempf.write(chunk) - tempf.close() - extract_assessment_items(tempf.name, contentnode, download_url=download_url) - except IOError as e: - log.warning("\b FAILED (check logs for more details)") - sys.stderr.write( - "Restoration Process Error: Failed to save file object {}: {}".format( - filename, os.strerror(e.errno) + + # Determine role + role = roles.LEARNER + if coach_content: + role = roles.COACH + + # Determine extra_fields + extra_fields = {} + if kind == content_kinds.EXERCISE: + randomize_sql = f""" + SELECT randomize + FROM {ASSESSMENTMETADATA_TABLE} + WHERE contentnode_id = ? + """ + randomize = self.cursor.execute(randomize_sql, (id,)).fetchone() + extra_fields["options"] = json.loads(options) if options else {} + extra_fields["randomize"] = bool(randomize[0]) if randomize else False + completion_criteria_ = extra_fields["options"].get( + "completion_criteria" ) + if ( + completion_criteria_ + and completion_criteria_.get("model") == completion_criteria.MASTERY + ): + mastery_model = completion_criteria_.get("threshold", {}).get( + "mastery_model" + ) + if mastery_model == mastery_criteria.DO_ALL: + completion_criteria_["threshold"] = { + "mastery_model": mastery_model, + } + if ( + completion_criteria_ + and "learner_managed" not in completion_criteria_ + ): + completion_criteria_["learner_managed"] = False + + # Determine license + license_result = self._retrieve_license(license_id) + license_description = license_result[1] if license_result else "" + license_result = license_result[0] if license_result else None + + # TODO: Determine thumbnail encoding + + # Create the new node model + node = models.ContentNode.objects.create( + node_id=id, + original_source_node_id=id, + source_node_id=id, + title=title, + content_id=content_id, + description=description, + sort_order=sort_order, + copyright_holder=license_owner, + author=author, + license=license_result, + license_description=license_description, + language_id=lang_id, + role_visibility=role, + extra_fields=extra_fields, + kind_id=kind, + parent=parent, + original_channel_id=self.target_id, + source_channel_id=self.target_id, + grade_levels=convert_metadata_to_dict(grade_levels), + resource_types=convert_metadata_to_dict(resource_types), + learning_activities=convert_learning_activities_to_dict( + kind, learning_activities_ + ), + accessibility_labels=convert_metadata_to_dict(accessibility_labels), + categories=convert_metadata_to_dict(categories), + learner_needs=convert_metadata_to_dict(learner_needs), ) - continue - finally: - os.unlink(tempf.name) - -def extract_assessment_items(filepath, contentnode, download_url=None): - """extract_assessment_items: Create and save assessment items to content node - Args: - filepath (str): Where perseus zip is stored - contentnode (models.ContentNode): node assessment items reference - download_url (str): Domain to download files from - Returns: None - """ + # Handle foreign key references (children, files, tags) + if kind == content_kinds.TOPIC: + self._create_nodes(node, indent=indent + 1) + elif kind == content_kinds.EXERCISE: + self._create_assessment_items(node, indent=indent + 1) + self._create_files(node, indent=indent + 1) + self._create_tags(node, indent=indent + 1) + + errors = node.mark_complete() + if errors: + self.logger.warning(f"Node {node.node_id} has errors: {errors}") + node.save() + + def _retrieve_license(self, license_id): + """ + Get license based on id from exported db + + :param license_id: id of license on exported db + :return: license model matching the id and the associated license description + :rtype: tuple + """ + # Handle no license being assigned + if license_id is None or license_id == "": + return None + + # Return license that matches name + name, description = self.cursor.execute( + f""" + SELECT license_name, license_description + FROM {LICENSE_TABLE} + WHERE id = ? + """, + (license_id,), + ).fetchone() + return models.License.objects.get(license_name=name), description + + def _create_files(self, contentnode, indent=0): + """ + Create and possibly download node files + + :param contentnode: node file references + :param indent: How far to indent print statements + """ + # Parse database for files referencing content node and make file models + sql_command = f""" + SELECT checksum, extension, file_size, contentnode_id, lang_id, preset, thumbnail + FROM {FILE_TABLE} + WHERE contentnode_id = ?; + """ + query = self.cursor.execute(sql_command, (contentnode.node_id,)).fetchall() + + for ( + checksum, + extension, + file_size, + contentnode_id, + lang_id, + preset, + is_thumbnail, + ) in query: + filename = "{}.{}".format(checksum, extension) + self.logger.info( + "{indent} * FILE {filename}...".format( + indent=" |" * indent, filename=filename + ) + ) - try: - tempdir = tempfile.mkdtemp() - with zipfile.ZipFile(filepath, "r") as zipf: - zipf.extractall(tempdir) - os.chdir(tempdir) - - with open("exercise.json", "rb") as fobj: - data = json.load(fobj) - - for index, assessment_id in enumerate(data["all_assessment_items"]): - with open("{}.json".format(assessment_id), "rb") as fobj: - assessment_item = generate_assessment_item( - assessment_id, - index, - data["assessment_mapping"][assessment_id], - json.load(fobj), - download_url=download_url, + try: + self._download_file( + filename, + contentnode=contentnode, + preset=preset, + file_size=file_size, + lang_id=lang_id, + is_thumbnail=is_thumbnail, ) - contentnode.assessment_items.add(assessment_item) - finally: - shutil.rmtree(tempdir) + except IOError as e: + self.logger.warning("\b FAILED (check logs for more details)") + if e.errno: + sys.stderr.write( + f"Restoration Process Error: Failed to save file object {filename}: {os.strerror(e.errno)}" + ) + continue + + def _download_file( + self, + filename, + contentnode=None, + assessment_item=None, + preset=None, + file_size=None, + lang_id=None, + is_thumbnail=False, + ): + """ + Create and possibly download a file from source instance and save to local storage + + :param filename: the name of the file to download + :param contentnode: the associated content node + :param assessment_item: the associated assessment item + :param preset: the format preset for the file + :param file_size: the known size of the file + :param lang_id: the language ID of the file + :param is_thumbnail: whether the file is a thumbnail + """ + checksum, extension = os.path.splitext(filename) + extension = extension.lstrip(".") + filepath = models.generate_object_storage_name(checksum, filename) + + file_url = f"/content/storage/{filename[0]}/{filename[1]}/{filename}" + file_exists = False + + # If the file already exists, get the size from the storage + if default_storage.exists(filepath): + file_size = file_size or default_storage.size(filepath) + file_exists = True + # if it needs downloading and if we were instructed to do so + elif self.download_content or (is_thumbnail and contentnode): + buffer = BytesIO() + response = self.client.get(file_url) + for chunk in response: + buffer.write(chunk) + if is_thumbnail and contentnode: + # If the file is a thumbnail, save it to the content node + contentnode.thumbnail_encoding = json.dumps( + { + "base64": get_thumbnail_encoding(filename, input_buffer=buffer), + "points": [], + "zoom": 0, + } + ) + else: + checksum = hashlib.md5() + checksum.update(buffer.getvalue()) + hashed_filename = checksum.hexdigest() + full_filename = "{}.{}".format(hashed_filename, extension.lower()) + filepath = models.generate_object_storage_name( + hashed_filename, full_filename + ) -def generate_assessment_item( - assessment_id, order, assessment_type, assessment_data, download_url=None -): - """generate_assessment_item: Generates a new assessment item - Args: - assessment_id (str): AssessmentItem.assessment_id value - order (Number): AssessmentItem.order value - assessment_type (str): AssessmentItem.type value - assessment_data (dict): Extracted data from perseus file - download_url (str): Domain to download files from - Returns: models.AssessmentItem - """ - assessment_item = models.AssessmentItem.objects.create( - assessment_id=assessment_id, type=assessment_type, order=order - ) - if assessment_type == exercises.PERSEUS_QUESTION: - assessment_item.raw_data = json.dumps(assessment_data) - else: - # Parse questions - assessment_data["question"]["content"] = "\n\n".join( - assessment_data["question"]["content"].split("\n\n")[:-1] - ) - assessment_item.question = process_content( - assessment_data["question"], assessment_item, download_url=download_url + self.storage.save(filepath, buffer) + buffer.close() + file_exists = True + # otherwise, if file size is not known, get it from the response headers + elif not file_size: + response = self.client.head(file_url) + file_size = int(response.headers.get("Content-Length", 0)) + + # Save values to a new file object + file_obj = models.File( + file_format_id=extension, + file_size=file_size, + contentnode=contentnode, + assessment_item=assessment_item, + language_id=lang_id, + preset_id=preset or "", + checksum=checksum, ) - - # Parse answers - answer_data = assessment_data["question"]["widgets"][ - ANSWER_FIELD_MAP[assessment_type] - ]["options"] - if assessment_type == exercises.INPUT_QUESTION: - assessment_item.answers = json.dumps( - [ - {"answer": answer["value"], "correct": True} - for answer in answer_data["answers"] - ] + file_obj.file_on_disk.name = filepath + # set_by_file_on_disk: skip unless the file has been downloaded + file_obj.save(set_by_file_on_disk=file_exists) + + def _create_tags(self, contentnode, indent=0): + """ + Create tags associated with node + + :param contentnode: node tags reference + :param indent: How far to indent print statements + """ + # Parse database for files referencing content node and make file models + sql_command = f""" + SELECT ct.id, ct.tag_name + FROM {NODE_TAG_TABLE} cnt + JOIN {TAG_TABLE} ct ON cnt.contenttag_id = ct.id + WHERE cnt.contentnode_id = ?; + """ + query = self.cursor.execute(sql_command, (contentnode.node_id,)).fetchall() + + # Build up list of tags + tag_list = [] + for id, tag_name in query: + self.logger.info( + "{indent} ** TAG {tag}...".format(indent=" |" * indent, tag=tag_name) ) - else: - assessment_item.answers = json.dumps( - [ - { - "answer": process_content( - answer, assessment_item, download_url=download_url - ), - "correct": answer["correct"], - } - for answer in answer_data["choices"] - ] + # Save values to new or existing tag object + tag_obj, is_new = models.ContentTag.objects.get_or_create( + pk=id, + tag_name=tag_name, + channel_id=self.target_id, ) - assessment_item.randomize = answer_data["randomize"] - - # Parse hints - assessment_item.hints = json.dumps( - [ - { - "hint": process_content( - hint, assessment_item, download_url=download_url - ) - } - for hint in assessment_data["hints"] - ] - ) - - assessment_item.save() - return assessment_item - + tag_list.append(tag_obj) + + # Save tags to node + contentnode.tags.set(tag_list) + contentnode.save() + + def _create_assessment_items(self, contentnode, indent=0): + """ + Generate assessment items based on perseus zip + + :param contentnode: node assessment items reference + :param indent: How far to indent print statements + """ + if not self.token: + self.logger.warning( + f"Skipping assessment items for node {contentnode.node_id}" + ) + return -def process_content(data, assessment_item, download_url=None): - """process_content: Parses perseus text for special formatting (e.g. formulas, images) - Args: - data (dict): Perseus data to parse (e.g. parsing 'question' field) - download_url (str): Domain to download files from - assessment_item (models.AssessmentItem): assessment item to save images to - Returns: models.AssessmentItem - """ - data["content"] = data["content"].replace( - " ", "" - ) # Remove unrecognized non unicode characters - # Process formulas - for match in re.finditer(r"(\$[^\$☣]+\$)", data["content"]): - data["content"] = data["content"].replace( - match.group(0), "${}$".format(match.group(0)) + # first obtain the content node's Studio ID with the node ID + node_response = self.client.get_with_token( + f"/api/contentnode?_node_id_channel_id___in={contentnode.node_id},{self.source_id}" ) + if node_response.status_code != 200: + self.logger.warning( + f"Failed to obtain assessment items for node {contentnode.node_id}" + ) + return - # Process images + node_data = node_response.json() + contentnode_id = node_data[0]["id"] if node_data else None + if not contentnode_id: + self.logger.warning(f"No content node found for node {contentnode.node_id}") + return - for match in re.finditer( - r"!\[[^\]]*\]\((\$(\{☣ LOCALPATH\}\/images)\/([^\.]+\.[^\)]+))\)", - data["content"], - ): - data["content"] = data["content"].replace( - match.group(2), exercises.CONTENT_STORAGE_PLACEHOLDER + # Get the content node's assessment items + assessment_response = self.client.get_with_token( + f"/api/assessmentitem?contentnode__in={contentnode_id}" ) - image_data = data["images"].get(match.group(1)) - if image_data and image_data.get("width"): - data["content"] = data["content"].replace( - match.group(3), - "{} ={}x{}".format( - match.group(3), image_data["width"], image_data["height"] - ), + if assessment_response.status_code != 200: + self.logger.warning( + f"Failed to obtain assessment items for node {contentnode.node_id}" ) + return - # Save files to db - download_file( - match.group(3), - assessment_item=assessment_item, - preset=format_presets.EXERCISE, - download_url=download_url, - ) + assessment_items = assessment_response.json() + if not assessment_items: + self.logger.warning( + f"No assessment items found for node {contentnode.node_id}" + ) + return - return data["content"] + # Create the assessment items + for item in assessment_items: + self.logger.info( + "{indent} ** ASSESSMENT ITEM {assessment_id}...".format( + indent=" |" * indent, assessment_id=item["assessment_id"] + ) + ) + assessment_item = models.AssessmentItem.objects.create( + assessment_id=item["assessment_id"], + type=item["type"], + order=item["order"], + question=item["question"], + answers=item["answers"], + hints=item["hints"], + randomize=item.get("randomize", False), + ) + contentnode.assessment_items.add(assessment_item) + contentnode.save() + + def _process_assessment_images(self, assessment_item): + """ + Process images in assessment items and save them to the database. + + :param assessment_item: The assessment item to process. + """ + if not self.download_content: + # Skip if not downloading content + return + + for content in [ + assessment_item.question, + assessment_item.answers, + assessment_item.hints, + ]: + for match in re.finditer(exercise_image_filename_regex, content): + # Save files to db + self._download_file( + match.group(3), + assessment_item=assessment_item, + preset=format_presets.EXERCISE, + ) From ecc9c34be63b8be944facd36d89f4c961aa91905 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Tue, 6 May 2025 15:11:49 -0700 Subject: [PATCH 2/7] Improve logging and progress output, perform bulk handling of assessments --- .../management/commands/restore_channel.py | 1 - .../contentcuration/utils/import_tools.py | 207 +++++++++++------- requirements-dev.in | 1 + requirements-dev.txt | 6 + 4 files changed, 131 insertions(+), 84 deletions(-) diff --git a/contentcuration/contentcuration/management/commands/restore_channel.py b/contentcuration/contentcuration/management/commands/restore_channel.py index 49b785f725..16b3976228 100644 --- a/contentcuration/contentcuration/management/commands/restore_channel.py +++ b/contentcuration/contentcuration/management/commands/restore_channel.py @@ -53,7 +53,6 @@ def add_arguments(self, parser): ) def handle(self, *args, **options): - logger.info("\n\n********** STARTING CHANNEL RESTORATION **********") manager = ImportManager( options["source_url"], options["source_id"], diff --git a/contentcuration/contentcuration/utils/import_tools.py b/contentcuration/contentcuration/utils/import_tools.py index 8f094ed7eb..c0d75207c5 100644 --- a/contentcuration/contentcuration/utils/import_tools.py +++ b/contentcuration/contentcuration/utils/import_tools.py @@ -12,6 +12,7 @@ from io import BytesIO import requests +import tqdm from django.core.files.storage import default_storage from django.core.management import call_command from django.db import transaction @@ -181,7 +182,7 @@ def __init__( self.client = ImportClient(source_url, api_token=token) self.conn = None self.cursor = None - self.schema_version = None + self.progress = None @cached_property def editor_user(self): @@ -194,10 +195,10 @@ def editor_user(self): def run(self): """ - Run the import process. + Run the import restoration process. """ + self.logger.info("********** STARTING CHANNEL RESTORATION **********") # Set up variables for the import process - self.logger.info("\n\n********** STARTING CHANNEL IMPORT **********") start = datetime.datetime.now() if not self.token: @@ -243,10 +244,38 @@ def run(self): complete=True, ) + self.logger.info("Creating nodes...") + total_nodes = self.cursor.execute( + f"SELECT COUNT(*) FROM {NODE_TABLE}" + ).fetchone()[0] + node_progress = tqdm.tqdm( + total=total_nodes, desc="Restoring nodes", unit="node" + ) + # Create nodes mapping to channel - self.logger.info(" Creating nodes...") with transaction.atomic(): - self._create_nodes(root) + self._create_nodes(root, node_progress) + node_progress.close() + self.logger.info("Creating assessment items...") + exercise_nodes = models.ContentNode.objects.filter( + kind_id=content_kinds.EXERCISE, tree_id=root.tree_id + ) + exercise_progress = tqdm.tqdm( + total=exercise_nodes.count(), + desc="Restoring assessments", + unit="node", + ) + chunk = [] + for node in exercise_nodes.iterator(chunk_size=10): + chunk.append(node) + if len(chunk) >= 10: + self._create_assessment_items(chunk) + exercise_progress.update(len(chunk)) + chunk = [] + if chunk: + self._create_assessment_items(chunk) + exercise_progress.update(len(chunk)) + exercise_progress.close() # TODO: Handle prerequisites # Delete the previous tree if it exists @@ -272,9 +301,9 @@ def run(self): # Print stats self.logger.info( - f"\n\nChannel has been imported (time: {datetime.datetime.now() - start})\n" + f"Channel has been imported (time: {datetime.datetime.now() - start})" ) - self.logger.info("\n\n********** IMPORT COMPLETE **********\n\n") + self.logger.info("********** IMPORT COMPLETE **********") def _create_channel(self): """ @@ -315,15 +344,15 @@ def _create_channel(self): channel.version = version channel.public = self.public channel.save() - self.logger.info(f"\tCreated channel {self.target_id} with name {name}") + self.logger.info(f"Created channel {self.target_id} with name {name}") return channel, root_pk - def _create_nodes(self, parent, indent=1): + def _create_nodes(self, parent, progress): """ Create node(s) for a channel with target id :param parent: node's parent - :param indent: How far to indent print statements + :param progress: progress bar for node creation """ sql_command = f""" SELECT @@ -360,12 +389,6 @@ def _create_nodes(self, parent, indent=1): duration, options, ) in query: - self.logger.info( - "{indent} {id} ({title} - {kind})...".format( - indent=" |" * indent, id=id, title=title, kind=kind - ) - ) - # Determine role role = roles.LEARNER if coach_content: @@ -441,16 +464,16 @@ def _create_nodes(self, parent, indent=1): # Handle foreign key references (children, files, tags) if kind == content_kinds.TOPIC: - self._create_nodes(node, indent=indent + 1) - elif kind == content_kinds.EXERCISE: - self._create_assessment_items(node, indent=indent + 1) - self._create_files(node, indent=indent + 1) - self._create_tags(node, indent=indent + 1) - - errors = node.mark_complete() - if errors: - self.logger.warning(f"Node {node.node_id} has errors: {errors}") + self._create_nodes(node, progress) + self._create_files(node) + self._create_tags(node) + + if kind != content_kinds.EXERCISE: + errors = node.mark_complete() + if errors: + self.logger.warning(f"Node {node.node_id} has errors: {errors}") node.save() + progress.update(1) def _retrieve_license(self, license_id): """ @@ -475,12 +498,11 @@ def _retrieve_license(self, license_id): ).fetchone() return models.License.objects.get(license_name=name), description - def _create_files(self, contentnode, indent=0): + def _create_files(self, contentnode): """ Create and possibly download node files :param contentnode: node file references - :param indent: How far to indent print statements """ # Parse database for files referencing content node and make file models sql_command = f""" @@ -500,11 +522,6 @@ def _create_files(self, contentnode, indent=0): is_thumbnail, ) in query: filename = "{}.{}".format(checksum, extension) - self.logger.info( - "{indent} * FILE {filename}...".format( - indent=" |" * indent, filename=filename - ) - ) try: self._download_file( @@ -602,104 +619,128 @@ def _download_file( # set_by_file_on_disk: skip unless the file has been downloaded file_obj.save(set_by_file_on_disk=file_exists) - def _create_tags(self, contentnode, indent=0): + def _create_tags(self, contentnode): """ Create tags associated with node :param contentnode: node tags reference - :param indent: How far to indent print statements """ # Parse database for files referencing content node and make file models sql_command = f""" - SELECT ct.id, ct.tag_name + SELECT ct.tag_name FROM {NODE_TAG_TABLE} cnt JOIN {TAG_TABLE} ct ON cnt.contenttag_id = ct.id WHERE cnt.contentnode_id = ?; """ query = self.cursor.execute(sql_command, (contentnode.node_id,)).fetchall() - # Build up list of tags - tag_list = [] - for id, tag_name in query: - self.logger.info( - "{indent} ** TAG {tag}...".format(indent=" |" * indent, tag=tag_name) - ) - # Save values to new or existing tag object - tag_obj, is_new = models.ContentTag.objects.get_or_create( - pk=id, - tag_name=tag_name, - channel_id=self.target_id, - ) - tag_list.append(tag_obj) + models.ContentTag.objects.bulk_create( + [ + models.ContentTag( + tag_name=tag_name, + channel_id=self.target_id, + ) + for tag_name in query + ], + ignore_conflicts=True, + ) # Save tags to node - contentnode.tags.set(tag_list) + contentnode.tags.set( + models.ContentTag.objects.filter( + tag_name__in=query, channel_id=self.target_id + ) + ) contentnode.save() - def _create_assessment_items(self, contentnode, indent=0): + def _create_assessment_items(self, nodes): """ - Generate assessment items based on perseus zip + Generate assessment items based on API data - :param contentnode: node assessment items reference - :param indent: How far to indent print statements + :param nodes: nodes to lookup assessment items """ + # Note: there are several different IDs being used within this method + node_ids = [node.node_id for node in nodes] + if not self.token: self.logger.warning( - f"Skipping assessment items for node {contentnode.node_id}" + f"Skipping assessment items for node(s) {','. join(node_ids)}" ) return - # first obtain the content node's Studio ID with the node ID - node_response = self.client.get_with_token( - f"/api/contentnode?_node_id_channel_id___in={contentnode.node_id},{self.source_id}" + # first obtain the remote nodes' IDs with the node ID and channel ID + node_channel_ids = f",{self.source_id},".join(node_ids) + nodes_response = self.client.get_with_token( + f"/api/contentnode?_node_id_channel_id___in={node_channel_ids},{self.source_id}" ) - if node_response.status_code != 200: + if nodes_response.status_code != 200: self.logger.warning( - f"Failed to obtain assessment items for node {contentnode.node_id}" + f"Failed to obtain assessment items for node(s) {','. join(node_ids)}" ) return - node_data = node_response.json() - contentnode_id = node_data[0]["id"] if node_data else None - if not contentnode_id: - self.logger.warning(f"No content node found for node {contentnode.node_id}") + nodes_data = nodes_response.json() + remote_node_pks = [n["id"] for n in nodes_data] if nodes_data else None + + if not remote_node_pks: + self.logger.warning( + f"No content node found for node(s) {','. join(node_ids)}" + ) return # Get the content node's assessment items assessment_response = self.client.get_with_token( - f"/api/assessmentitem?contentnode__in={contentnode_id}" + f"/api/assessmentitem?contentnode__in={','.join(remote_node_pks)}" ) if assessment_response.status_code != 200: self.logger.warning( - f"Failed to obtain assessment items for node {contentnode.node_id}" + f"Failed to obtain assessment items for node(s) {','. join(node_ids)}" ) return assessment_items = assessment_response.json() if not assessment_items: self.logger.warning( - f"No assessment items found for node {contentnode.node_id}" + f"No assessment items found for node(s) {','. join(node_ids)}" ) return - # Create the assessment items - for item in assessment_items: - self.logger.info( - "{indent} ** ASSESSMENT ITEM {assessment_id}...".format( - indent=" |" * indent, assessment_id=item["assessment_id"] + remote_node_pk_map = ( + {n["node_id"]: n["id"] for n in nodes_data} if nodes_data else {} + ) + + for local_node in nodes: + remote_contentnode_id = remote_node_pk_map.get(local_node.node_id) + reduced_assessment_items = [ + item + for item in assessment_items + if item["contentnode"] == remote_contentnode_id + ] + + if not reduced_assessment_items: + self.logger.warning( + f"No assessment items found for node {local_node.node_id}" ) - ) - assessment_item = models.AssessmentItem.objects.create( - assessment_id=item["assessment_id"], - type=item["type"], - order=item["order"], - question=item["question"], - answers=item["answers"], - hints=item["hints"], - randomize=item.get("randomize", False), - ) - contentnode.assessment_items.add(assessment_item) - contentnode.save() + continue + + for item in reduced_assessment_items: + assessment_item = models.AssessmentItem.objects.create( + assessment_id=item["assessment_id"], + type=item["type"], + order=item["order"], + question=item["question"], + answers=item["answers"], + hints=item["hints"], + raw_data=item["raw_data"], + source_url=item["source_url"], + randomize=item.get("randomize", False), + ) + self._process_assessment_images(assessment_item) + local_node.assessment_items.add(assessment_item) + errors = local_node.mark_complete() + if errors: + self.logger.warning(f"Node {local_node.node_id} has errors: {errors}") + local_node.save() def _process_assessment_images(self, assessment_item): """ diff --git a/requirements-dev.in b/requirements-dev.in index bd1d8385e8..02c2458af5 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -9,3 +9,4 @@ pre-commit==4.5.0 nodeenv pip-tools==7.5.2 drf-yasg==1.21.10 +tqdm diff --git a/requirements-dev.txt b/requirements-dev.txt index b1442ddbc2..50528b8eb2 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -113,6 +113,8 @@ tomli==1.2.3 # build # pip-tools # pytest +tqdm==4.67.1 + # via -r requirements-dev.in typing-extensions==4.15.0 # via # -c requirements.txt @@ -123,3 +125,7 @@ virtualenv==20.26.6 # via pre-commit wheel==0.38.1 # via pip-tools + +# The following packages are considered to be unsafe in a requirements file: +# pip +# setuptools From 09bb546eb9eea5cd0841b6d43f3f7bd70b0a78c7 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Tue, 6 May 2025 15:43:31 -0700 Subject: [PATCH 3/7] Local composite storage handling allowing read-only access to cloud bucket --- .../commands/set_content_mimetypes.py | 2 +- .../contentcuration/production_settings.py | 2 +- .../contentcuration/sandbox_settings.py | 2 +- contentcuration/contentcuration/settings.py | 2 +- .../tests/utils/test_cloud_storage.py | 10 --- .../tests/{ => utils}/test_gcs_storage.py | 14 ++-- .../test_storage.py} | 12 ++- .../contentcuration/utils/cloud_storage.py | 39 ---------- .../contentcuration/utils/storage/__init__.py | 0 .../contentcuration/utils/storage/base.py | 76 +++++++++++++++++++ .../{storage_common.py => storage/common.py} | 17 +++-- .../contentcuration/utils/storage/dev.py | 23 ++++++ .../utils/{gcs_storage.py => storage/gcs.py} | 66 ++-------------- .../contentcuration/viewsets/file.py | 2 +- 14 files changed, 132 insertions(+), 135 deletions(-) delete mode 100644 contentcuration/contentcuration/tests/utils/test_cloud_storage.py rename contentcuration/contentcuration/tests/{ => utils}/test_gcs_storage.py (95%) rename contentcuration/contentcuration/tests/{test_storage_common.py => utils/test_storage.py} (95%) delete mode 100644 contentcuration/contentcuration/utils/cloud_storage.py create mode 100644 contentcuration/contentcuration/utils/storage/__init__.py create mode 100644 contentcuration/contentcuration/utils/storage/base.py rename contentcuration/contentcuration/utils/{storage_common.py => storage/common.py} (94%) create mode 100644 contentcuration/contentcuration/utils/storage/dev.py rename contentcuration/contentcuration/utils/{gcs_storage.py => storage/gcs.py} (80%) diff --git a/contentcuration/contentcuration/management/commands/set_content_mimetypes.py b/contentcuration/contentcuration/management/commands/set_content_mimetypes.py index 27af4732fc..8a79fd02f5 100755 --- a/contentcuration/contentcuration/management/commands/set_content_mimetypes.py +++ b/contentcuration/contentcuration/management/commands/set_content_mimetypes.py @@ -14,7 +14,7 @@ from django.core.files.storage import default_storage from django.core.management.base import BaseCommand -from contentcuration.utils.storage_common import determine_content_type +from contentcuration.utils.storage.common import determine_content_type class Command(BaseCommand): diff --git a/contentcuration/contentcuration/production_settings.py b/contentcuration/contentcuration/production_settings.py index a00bf43a41..82319bd85e 100644 --- a/contentcuration/contentcuration/production_settings.py +++ b/contentcuration/contentcuration/production_settings.py @@ -10,7 +10,7 @@ MEDIA_ROOT = base_settings.STORAGE_ROOT -DEFAULT_FILE_STORAGE = "contentcuration.utils.gcs_storage.CompositeGCS" +DEFAULT_FILE_STORAGE = "contentcuration.utils.storage.gcs.CompositeGCS" SESSION_ENGINE = "django.contrib.sessions.backends.db" # email settings diff --git a/contentcuration/contentcuration/sandbox_settings.py b/contentcuration/contentcuration/sandbox_settings.py index 61e00a465f..912fed7244 100644 --- a/contentcuration/contentcuration/sandbox_settings.py +++ b/contentcuration/contentcuration/sandbox_settings.py @@ -3,7 +3,7 @@ DEBUG = True -DEFAULT_FILE_STORAGE = "contentcuration.utils.gcs_storage.CompositeGCS" +DEFAULT_FILE_STORAGE = "contentcuration.utils.storage.gcs.CompositeGCS" LANGUAGES += (("ar", gettext("Arabic")),) # noqa diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py index 0f18ed0131..e57064601d 100644 --- a/contentcuration/contentcuration/settings.py +++ b/contentcuration/contentcuration/settings.py @@ -357,7 +357,7 @@ def gettext(s): ORPHAN_DATE_CLEAN_UP_THRESHOLD = TWO_WEEKS_AGO # CLOUD STORAGE SETTINGS -DEFAULT_FILE_STORAGE = "django_s3_storage.storage.S3Storage" +DEFAULT_FILE_STORAGE = "contentcuration.utils.storage.dev.CompositeStorage" AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") or "development" AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY") or "development" AWS_S3_BUCKET_NAME = os.getenv("AWS_BUCKET_NAME") or "content" diff --git a/contentcuration/contentcuration/tests/utils/test_cloud_storage.py b/contentcuration/contentcuration/tests/utils/test_cloud_storage.py deleted file mode 100644 index 5d84fd9f10..0000000000 --- a/contentcuration/contentcuration/tests/utils/test_cloud_storage.py +++ /dev/null @@ -1,10 +0,0 @@ -from django.test import TestCase - -from contentcuration.utils.cloud_storage import CloudStorage - - -class CloudStorageTestCase(TestCase): - def test_backend_initialization(self): - cloud_storage_instance = CloudStorage() - self.assertIsNotNone(cloud_storage_instance) - self.assertIsInstance(cloud_storage_instance, CloudStorage) diff --git a/contentcuration/contentcuration/tests/test_gcs_storage.py b/contentcuration/contentcuration/tests/utils/test_gcs_storage.py similarity index 95% rename from contentcuration/contentcuration/tests/test_gcs_storage.py rename to contentcuration/contentcuration/tests/utils/test_gcs_storage.py index a58420873e..9036641774 100755 --- a/contentcuration/contentcuration/tests/test_gcs_storage.py +++ b/contentcuration/contentcuration/tests/utils/test_gcs_storage.py @@ -8,8 +8,8 @@ from google.cloud.storage.blob import Blob from mixer.main import mixer -from contentcuration.utils.gcs_storage import CompositeGCS -from contentcuration.utils.gcs_storage import GoogleCloudStorage +from contentcuration.utils.storage.gcs import CompositeGCS +from contentcuration.utils.storage.gcs import GoogleCloudStorage class GoogleCloudStorageSaveTestCase(TestCase): @@ -74,9 +74,9 @@ def test_uploads_cache_control_private_if_content_database(self): self.storage.save(filename, self.content, blob_object=self.blob_obj) assert "private" in self.blob_obj.cache_control - @mock.patch("contentcuration.utils.gcs_storage.BytesIO") + @mock.patch("contentcuration.utils.storage.gcs.BytesIO") @mock.patch( - "contentcuration.utils.gcs_storage.GoogleCloudStorage._is_file_empty", + "contentcuration.utils.storage.gcs.GoogleCloudStorage._is_file_empty", return_value=False, ) def test_gzip_if_content_database(self, bytesio_mock, file_empty_mock): @@ -158,10 +158,10 @@ def setUp(self): self.mock_anon_client.get_bucket.return_value = self.mock_anon_bucket with mock.patch( - "contentcuration.utils.gcs_storage._create_default_client", + "contentcuration.utils.storage.gcs._create_default_client", return_value=self.mock_default_client, ), mock.patch( - "contentcuration.utils.gcs_storage.Client.create_anonymous_client", + "contentcuration.utils.storage.gcs.Client.create_anonymous_client", return_value=self.mock_anon_client, ): self.storage = CompositeGCS() @@ -192,7 +192,7 @@ def test_open(self): self.assertIsInstance(f, File) self.mock_default_bucket.get_blob.assert_called_with("blob") - @mock.patch("contentcuration.utils.gcs_storage.Blob") + @mock.patch("contentcuration.utils.storage.gcs.Blob") def test_save(self, mock_blob): self.storage.save("blob", BytesIO(b"content")) blob = mock_blob.return_value diff --git a/contentcuration/contentcuration/tests/test_storage_common.py b/contentcuration/contentcuration/tests/utils/test_storage.py similarity index 95% rename from contentcuration/contentcuration/tests/test_storage_common.py rename to contentcuration/contentcuration/tests/utils/test_storage.py index f89534c194..84cb774646 100644 --- a/contentcuration/contentcuration/tests/test_storage_common.py +++ b/contentcuration/contentcuration/tests/utils/test_storage.py @@ -10,14 +10,12 @@ from django_s3_storage.storage import S3Storage from mock import MagicMock -from .base import StudioTestCase +from ..base import StudioTestCase from contentcuration.models import generate_object_storage_name -from contentcuration.utils.storage_common import _get_gcs_presigned_put_url -from contentcuration.utils.storage_common import determine_content_type -from contentcuration.utils.storage_common import get_presigned_upload_url -from contentcuration.utils.storage_common import UnknownStorageBackendError - -# The modules we'll test +from contentcuration.utils.storage.common import _get_gcs_presigned_put_url +from contentcuration.utils.storage.common import determine_content_type +from contentcuration.utils.storage.common import get_presigned_upload_url +from contentcuration.utils.storage.common import UnknownStorageBackendError class MimeTypesTestCase(TestCase): diff --git a/contentcuration/contentcuration/utils/cloud_storage.py b/contentcuration/contentcuration/utils/cloud_storage.py deleted file mode 100644 index a331226905..0000000000 --- a/contentcuration/contentcuration/utils/cloud_storage.py +++ /dev/null @@ -1,39 +0,0 @@ -from automation.utils.appnexus.base import Backend -from automation.utils.appnexus.base import BackendFactory -from automation.utils.appnexus.base import BackendRequest -from automation.utils.appnexus.base import BackendResponse - - -class CloudStorageBackendRequest(BackendRequest): - pass - - -class CloudStorageRequest(CloudStorageBackendRequest): - def __init__(self) -> None: - super().__init__() - - -class CloudStorageBackendResponse(BackendResponse): - pass - - -class CloudStorageResponse(CloudStorageBackendResponse): - def __init__(self) -> None: - pass - - -class CloudStorageBackendFactory(BackendFactory): - def create_backend(self) -> Backend: - return super().create_backend() - - -class CloudStorage(Backend): - def connect(self) -> None: - return super().connect() - - def make_request(self, request) -> CloudStorageResponse: - return super().make_request(request) - - @classmethod - def _create_instance(cls) -> "CloudStorage": - return cls() diff --git a/contentcuration/contentcuration/utils/storage/__init__.py b/contentcuration/contentcuration/utils/storage/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contentcuration/contentcuration/utils/storage/base.py b/contentcuration/contentcuration/utils/storage/base.py new file mode 100644 index 0000000000..4f3d59d635 --- /dev/null +++ b/contentcuration/contentcuration/utils/storage/base.py @@ -0,0 +1,76 @@ +from django.core.files.storage import Storage as BaseStorage + + +class Storage(BaseStorage): + def writeable(self): + """ + :rtype: bool + """ + return True + + def get_client(self): + """ + :rtype: object + """ + return None + + +class CompositeStorage(Storage): + def __init__(self): + self.backends = [] + + def _get_writeable_backend(self): + """ + :rtype: Storage + """ + for backend in self.backends: + if backend.writeable: + return backend + raise AssertionError("No writeable backend found") + + def _get_readable_backend(self, name): + """ + :rtype: Storage + """ + for backend in self.backends: + if backend.exists(name): + return backend + raise FileNotFoundError("{} not found".format(name)) + + def get_client(self): + return self._get_writeable_backend().get_client() + + def open(self, name, mode="rb"): + return self._get_readable_backend(name).open(name, mode) + + def save(self, name, content, max_length=None): + return self._get_writeable_backend().save(name, content, max_length=max_length) + + def delete(self, name): + self._get_writeable_backend().delete(name) + + def exists(self, name): + try: + self._get_readable_backend(name) + return True + except FileNotFoundError: + return False + + def listdir(self, path): + # This method was not implemented on GoogleCloudStorage to begin with + raise NotImplementedError("listdir is not implemented for CompositeStorage") + + def size(self, name): + return self._get_readable_backend(name).size(name) + + def url(self, name): + return self._get_readable_backend(name).url(name) + + def get_accessed_time(self, name): + return self._get_readable_backend(name).get_accessed_time(name) + + def get_created_time(self, name): + return self._get_readable_backend(name).get_created_time(name) + + def get_modified_time(self, name): + return self._get_readable_backend(name).get_modified_time(name) diff --git a/contentcuration/contentcuration/utils/storage_common.py b/contentcuration/contentcuration/utils/storage/common.py similarity index 94% rename from contentcuration/contentcuration/utils/storage_common.py rename to contentcuration/contentcuration/utils/storage/common.py index 10d79bd5c5..16cfa5ef8a 100644 --- a/contentcuration/contentcuration/utils/storage_common.py +++ b/contentcuration/contentcuration/utils/storage/common.py @@ -6,8 +6,10 @@ from django.core.files.storage import default_storage from django_s3_storage.storage import S3Storage -from .gcs_storage import CompositeGCS -from .gcs_storage import GoogleCloudStorage +from .base import CompositeStorage +from .base import Storage +from .gcs import CompositeGCS +from .gcs import GoogleCloudStorage # Do this to ensure that we infer mimetypes for files properly, specifically @@ -67,15 +69,16 @@ def get_presigned_upload_url( # both storage types are having difficulties enforcing it. mimetype = determine_content_type(filepath) - if isinstance(storage, (GoogleCloudStorage, CompositeGCS)): + bucket = settings.AWS_S3_BUCKET_NAME + + if isinstance(storage, Storage): client = client or storage.get_client() - bucket = settings.AWS_S3_BUCKET_NAME + + if isinstance(storage, (GoogleCloudStorage, CompositeGCS)): upload_url = _get_gcs_presigned_put_url( client, bucket, filepath, md5sum_b64, lifetime_sec, mimetype=mimetype ) - elif isinstance(storage, S3Storage): - bucket = settings.AWS_S3_BUCKET_NAME - client = client or storage.s3_connection + elif isinstance(storage, (S3Storage, CompositeStorage)): upload_url = _get_s3_presigned_put_url( client, bucket, filepath, md5sum_b64, lifetime_sec ) diff --git a/contentcuration/contentcuration/utils/storage/dev.py b/contentcuration/contentcuration/utils/storage/dev.py new file mode 100644 index 0000000000..defb7dfaed --- /dev/null +++ b/contentcuration/contentcuration/utils/storage/dev.py @@ -0,0 +1,23 @@ +from django_s3_storage.storage import S3Storage +from google.cloud.storage import Client + +from contentcuration.utils.storage.base import CompositeStorage as BaseCompositeStorage +from contentcuration.utils.storage.base import Storage as BaseStorage +from contentcuration.utils.storage.gcs import GoogleCloudStorage + + +class Storage(S3Storage, BaseStorage): + def get_client(self): + """ + :rtype: object + """ + return self.s3_connection + + +class CompositeStorage(BaseCompositeStorage): + def __init__(self): + super(CompositeStorage, self).__init__() + self.backends.append(Storage()) + self.backends.append( + GoogleCloudStorage(Client.create_anonymous_client(), "studio-content") + ) diff --git a/contentcuration/contentcuration/utils/gcs_storage.py b/contentcuration/contentcuration/utils/storage/gcs.py similarity index 80% rename from contentcuration/contentcuration/utils/gcs_storage.py rename to contentcuration/contentcuration/utils/storage/gcs.py index 5c4a425aec..95d6b7fec5 100644 --- a/contentcuration/contentcuration/utils/gcs_storage.py +++ b/contentcuration/contentcuration/utils/storage/gcs.py @@ -6,11 +6,13 @@ import backoff from django.conf import settings from django.core.files import File -from django.core.files.storage import Storage from google.cloud.exceptions import InternalServerError from google.cloud.storage import Client from google.cloud.storage.blob import Blob +from contentcuration.utils.storage.base import CompositeStorage +from contentcuration.utils.storage.base import Storage + OLD_STUDIO_STORAGE_PREFIX = "/contentworkshop_content/" CONTENT_DATABASES_MAX_AGE = 5 # seconds @@ -122,7 +124,7 @@ def save(self, name, fobj, max_length=None, blob_object=None): # determine the current file's mimetype based on the name # import determine_content_type lazily in here, so we don't get into an infinite loop with circular dependencies - from contentcuration.utils.storage_common import determine_content_type + from contentcuration.utils.storage.common import determine_content_type content_type = determine_content_type(name) @@ -216,9 +218,9 @@ def _is_file_empty(fobj): return len(byt) == 0 -class CompositeGCS(Storage): +class CompositeGCS(CompositeStorage): def __init__(self): - self.backends = [] + super(CompositeGCS, self).__init__() self.backends.append( GoogleCloudStorage(_create_default_client(), settings.AWS_S3_BUCKET_NAME) ) @@ -227,59 +229,3 @@ def __init__(self): self.backends.append( GoogleCloudStorage(Client.create_anonymous_client(), "studio-content") ) - - def _get_writeable_backend(self): - """ - :rtype: GoogleCloudStorage - """ - for backend in self.backends: - if backend.writeable: - return backend - raise AssertionError("No writeable backend found") - - def _get_readable_backend(self, name): - """ - :rtype: GoogleCloudStorage - """ - for backend in self.backends: - if backend.exists(name): - return backend - raise FileNotFoundError("{} not found".format(name)) - - def get_client(self): - return self._get_writeable_backend().get_client() - - def open(self, name, mode="rb"): - return self._get_readable_backend(name).open(name, mode) - - def save(self, name, content, max_length=None): - return self._get_writeable_backend().save(name, content, max_length=max_length) - - def delete(self, name): - self._get_writeable_backend().delete(name) - - def exists(self, name): - try: - self._get_readable_backend(name) - return True - except FileNotFoundError: - return False - - def listdir(self, path): - # This method was not implemented on GoogleCloudStorage to begin with - raise NotImplementedError("listdir is not implemented for CompositeGCS") - - def size(self, name): - return self._get_readable_backend(name).size(name) - - def url(self, name): - return self._get_readable_backend(name).url(name) - - def get_accessed_time(self, name): - return self._get_readable_backend(name).get_accessed_time(name) - - def get_created_time(self, name): - return self._get_readable_backend(name).get_created_time(name) - - def get_modified_time(self, name): - return self._get_readable_backend(name).get_modified_time(name) diff --git a/contentcuration/contentcuration/viewsets/file.py b/contentcuration/contentcuration/viewsets/file.py index afadbff0cb..4c2477ad52 100644 --- a/contentcuration/contentcuration/viewsets/file.py +++ b/contentcuration/contentcuration/viewsets/file.py @@ -18,7 +18,7 @@ from contentcuration.models import generate_storage_url from contentcuration.utils.cache import ResourceSizeCache from contentcuration.utils.sentry import report_exception -from contentcuration.utils.storage_common import get_presigned_upload_url +from contentcuration.utils.storage.common import get_presigned_upload_url from contentcuration.utils.user import calculate_user_storage from contentcuration.viewsets.base import BulkDeleteMixin from contentcuration.viewsets.base import BulkListSerializer From 2450dee6d9324bba9f41b0be9c45cb1dbfc69e47 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Wed, 7 May 2025 07:37:38 -0700 Subject: [PATCH 4/7] Consolidate presigned URL handling into storage arch --- .../tests/utils/test_storage.py | 37 +++----- .../contentcuration/utils/storage/base.py | 21 +++++ .../contentcuration/utils/storage/common.py | 94 +------------------ .../contentcuration/utils/storage/dev.py | 32 ++++++- .../contentcuration/utils/storage/gcs.py | 32 +++++++ .../contentcuration/viewsets/file.py | 4 +- 6 files changed, 103 insertions(+), 117 deletions(-) diff --git a/contentcuration/contentcuration/tests/utils/test_storage.py b/contentcuration/contentcuration/tests/utils/test_storage.py index 84cb774646..b4c0e0db20 100644 --- a/contentcuration/contentcuration/tests/utils/test_storage.py +++ b/contentcuration/contentcuration/tests/utils/test_storage.py @@ -7,15 +7,15 @@ import requests from django.core.files.storage import FileSystemStorage from django.test import TestCase -from django_s3_storage.storage import S3Storage from mock import MagicMock from ..base import StudioTestCase from contentcuration.models import generate_object_storage_name -from contentcuration.utils.storage.common import _get_gcs_presigned_put_url from contentcuration.utils.storage.common import determine_content_type from contentcuration.utils.storage.common import get_presigned_upload_url from contentcuration.utils.storage.common import UnknownStorageBackendError +from contentcuration.utils.storage.dev import Storage as DevStorage +from contentcuration.utils.storage.gcs import GoogleCloudStorage class MimeTypesTestCase(TestCase): @@ -79,7 +79,6 @@ def test_raises_error(self): "nice", "err", 5, - 0, storage=self.STORAGE, ) @@ -93,7 +92,9 @@ class GoogleCloudStoragePresignedURLUnitTestCase(TestCase): """ def setUp(self): + super().setUp() self.client = MagicMock() + self.storage = GoogleCloudStorage(self.client, "fake") self.generate_signed_url_method = ( self.client.get_bucket.return_value.blob.return_value.generate_signed_url ) @@ -105,19 +106,15 @@ def test_that_generate_signed_url_is_called(self): """ Check that we even call blob.generate_signed_url in the first place. """ - bucket = "fake" - _get_gcs_presigned_put_url(self.client, bucket, "/object.jpg", "aBc", 0, 0) + get_presigned_upload_url("/object.jpg", "aBc", 0, storage=self.storage) self.generate_signed_url_method.assert_called_once() def test_that_we_return_a_string(self): """ Check that _get_gcs_presigned_put_url returns a string. """ - bucket = "fake" - ret = _get_gcs_presigned_put_url( - self.client, bucket, "/object.jpg", "aBc", 0, 0 - ) - assert isinstance(ret, str) + ret = get_presigned_upload_url("/object.jpg", "aBc", 0, storage=self.storage) + assert isinstance(ret["uploadURL"], str) def test_generate_signed_url_called_with_required_arguments(self): """ @@ -135,11 +132,9 @@ def test_generate_signed_url_called_with_required_arguments(self): bucket_name = "fake" filepath = "object.jpg" lifetime = 20 # seconds - mimetype = "doesntmatter" + mimetype = "image/jpeg" - _get_gcs_presigned_put_url( - self.client, bucket_name, filepath, content_md5, lifetime, mimetype - ) + get_presigned_upload_url(filepath, content_md5, lifetime, storage=self.storage) # assert that we're creating the right object self.client.get_bucket.assert_called_once_with(bucket_name) @@ -151,8 +146,8 @@ def test_generate_signed_url_called_with_required_arguments(self): self.generate_signed_url_method.assert_called_once_with( method=method, content_md5=content_md5, - expiration=lifetime_timedelta, content_type=mimetype, + expiration=lifetime_timedelta, ) @@ -161,11 +156,9 @@ class S3StoragePresignedURLUnitTestCase(StudioTestCase): Test cases for generating presigned URLs for S3 storage, i.e. Minio. """ - STORAGE = S3Storage() - def setUp(self): - self.client = MagicMock() super().setUp() + self.storage = DevStorage() def test_returns_string_if_inputs_are_valid(self): """ @@ -174,9 +167,7 @@ def test_returns_string_if_inputs_are_valid(self): """ # use a real connection here as a sanity check - ret = get_presigned_upload_url( - "a/b/abc.jpg", "aBc", 10, 1, storage=self.STORAGE, client=None - ) + ret = get_presigned_upload_url("a/b/abc.jpg", "aBc", 10, storage=self.storage) url = ret["uploadURL"] assert isinstance(url, str) @@ -197,9 +188,7 @@ def test_can_upload_file_to_presigned_url(self): filename = "blahfile.jpg" filepath = generate_object_storage_name(md5_checksum, filename) - ret = get_presigned_upload_url( - filepath, md5_checksum_base64, 1000, len(file_contents) - ) + ret = get_presigned_upload_url(filepath, md5_checksum_base64, 1000) url = ret["uploadURL"] content_type = ret["mimetype"] diff --git a/contentcuration/contentcuration/utils/storage/base.py b/contentcuration/contentcuration/utils/storage/base.py index 4f3d59d635..a78e54153f 100644 --- a/contentcuration/contentcuration/utils/storage/base.py +++ b/contentcuration/contentcuration/utils/storage/base.py @@ -14,6 +14,20 @@ def get_client(self): """ return None + def get_presigned_put_url( + self, filepath, md5sum, lifetime_sec, mimetype="application/octet-stream" + ): + """ + Creates a pre-signed URL for uploading files. + + :param filepath: A string representing the destination file path inside the bucket + :param md5sum: A MD5 checksum of the file to be uploaded + :param lifetime_sec: The lifetime of the URL in seconds + :param mimetype: The content type of the file to be uploaded + :return: A pre-signed URL for uploading the file + """ + raise NotImplementedError("Subclasses must implement this method") + class CompositeStorage(Storage): def __init__(self): @@ -74,3 +88,10 @@ def get_created_time(self, name): def get_modified_time(self, name): return self._get_readable_backend(name).get_modified_time(name) + + def get_presigned_put_url( + self, filepath, md5sum, lifetime_sec, mimetype="application/octet-stream" + ): + return self._get_writeable_backend().get_presigned_put_url( + filepath, md5sum, lifetime_sec, mimetype=mimetype + ) diff --git a/contentcuration/contentcuration/utils/storage/common.py b/contentcuration/contentcuration/utils/storage/common.py index 16cfa5ef8a..6a40768720 100644 --- a/contentcuration/contentcuration/utils/storage/common.py +++ b/contentcuration/contentcuration/utils/storage/common.py @@ -1,15 +1,10 @@ import mimetypes import os -from datetime import timedelta -from django.conf import settings from django.core.files.storage import default_storage -from django_s3_storage.storage import S3Storage from .base import CompositeStorage from .base import Storage -from .gcs import CompositeGCS -from .gcs import GoogleCloudStorage # Do this to ensure that we infer mimetypes for files properly, specifically @@ -41,11 +36,10 @@ def get_presigned_upload_url( filepath, md5sum_b64, lifetime_sec, - content_length, storage=default_storage, - client=None, ): - """Return a presigned URL that can modify the given filepath through a PUT + """ + Return a presigned URL that can modify the given filepath through a PUT request. Performing a PUT request on the returned URL changes the object's contents with the contents of your PUT request. @@ -54,9 +48,6 @@ def get_presigned_upload_url( have to set a Content-MD5 HTTP header matching this md5sum once it initiates the download. :param: lifetime_sec: the lifetime of the generated upload url, in seconds. - :param: content_length: the size of the content, in bytes. - :param: client: the storage client that will be used to gennerate the presigned URL. - This must have an API that's similar to either the GCS client or the boto3 client. :returns: a dictionary containing 2 keys: mimetype: the mimetype that will be required to send as part of the file upload's mimetype header @@ -64,23 +55,11 @@ def get_presigned_upload_url( :raises: :class:`UnknownStorageBackendError`: If the storage backend is not S3 or GCS. """ - - # Aron: note that content_length is not used right now because - # both storage types are having difficulties enforcing it. - mimetype = determine_content_type(filepath) - bucket = settings.AWS_S3_BUCKET_NAME - if isinstance(storage, Storage): - client = client or storage.get_client() - - if isinstance(storage, (GoogleCloudStorage, CompositeGCS)): - upload_url = _get_gcs_presigned_put_url( - client, bucket, filepath, md5sum_b64, lifetime_sec, mimetype=mimetype - ) - elif isinstance(storage, (S3Storage, CompositeStorage)): - upload_url = _get_s3_presigned_put_url( - client, bucket, filepath, md5sum_b64, lifetime_sec + if isinstance(storage, (Storage, CompositeStorage)): + upload_url = storage.get_presigned_put_url( + filepath, md5sum_b64, lifetime_sec, mimetype=mimetype ) else: raise UnknownStorageBackendError( @@ -88,66 +67,3 @@ def get_presigned_upload_url( ) return {"mimetype": mimetype, "uploadURL": upload_url} - - -def _get_gcs_presigned_put_url( - gcs_client, - bucket, - filepath, - md5sum, - lifetime_sec, - mimetype="application/octet-stream", -): - bucket_obj = gcs_client.get_bucket(bucket) - blob_obj = bucket_obj.blob(filepath) - - # ensure the md5sum doesn't have any whitespace, including newlines. - # We should do the same whitespace stripping as well on any client that actually - # uses the returned presigned url. - md5sum_stripped = md5sum.strip() - - # convert the lifetime to a timedelta, so gcloud library will interpret the lifetime - # as the seconds from right now. If we use an absolute integer, it's the number of seconds - # from unix time - lifetime_timedelta = timedelta(seconds=lifetime_sec) - - url = blob_obj.generate_signed_url( - method="PUT", - content_md5=md5sum_stripped, - content_type=mimetype, - expiration=lifetime_timedelta, - ) - - return url - - -def _get_s3_presigned_put_url(s3_client, bucket, filepath, md5sum, lifetime_sec): - """ - Creates a pre-signed URL for S3-like backends, e.g. Minio. - - Note that since our production object storage backend is GCS, we do not enforce or require - any Content-MD5 value. - - :param: s3_client: an initialized S3 client. We will use this to create the presigned PUT url. - :param: bucket: the bucket where the user can PUT their object. - :param: filepath: the file path inside the bucket that the user can PUT their object. - :param: md5sum: the base64-encoded MD5sum of the object the user is planning to PUT. - This is ignored for this function and added solely to maintain API compatibility with other - private presigned URL functions. - :param: lifetime_sec: how long before the presigned URL expires, in seconds. - """ - # S3's PUT Object parameters: - # https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html - method = "put_object" - fields = { - "Bucket": bucket, - "Key": filepath, - } - - response = s3_client.generate_presigned_url( - ClientMethod=method, - Params=fields, - ExpiresIn=lifetime_sec, - ) - - return response diff --git a/contentcuration/contentcuration/utils/storage/dev.py b/contentcuration/contentcuration/utils/storage/dev.py index defb7dfaed..7e77a6e305 100644 --- a/contentcuration/contentcuration/utils/storage/dev.py +++ b/contentcuration/contentcuration/utils/storage/dev.py @@ -1,3 +1,4 @@ +from django.conf import settings from django_s3_storage.storage import S3Storage from google.cloud.storage import Client @@ -9,10 +10,39 @@ class Storage(S3Storage, BaseStorage): def get_client(self): """ - :rtype: object + :rtype: botocore.client.BaseClient """ return self.s3_connection + def get_presigned_put_url(self, filepath, md5sum, lifetime_sec, mimetype=None): + """ + Creates a pre-signed URL for development storage backends + + Note that since our production object storage backend is GCS, we do not enforce or require + any Content-MD5 value. + + :param: filepath: the file path inside the bucket that the user can PUT their object. + :param: md5sum: the base64-encoded MD5sum of the object the user is planning to PUT. + This is ignored for this function and added solely to maintain API compatibility with other + private presigned URL functions. + :param: lifetime_sec: how long before the presigned URL expires, in seconds. + :param: mimetype: the content type of the file to be uploaded + :return: A pre-signed URL for uploading the file + """ + # S3's PUT Object parameters: + # https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html + method = "put_object" + fields = { + "Bucket": settings.AWS_S3_BUCKET_NAME, + "Key": filepath, + } + + return self.get_client().generate_presigned_url( + ClientMethod=method, + Params=fields, + ExpiresIn=lifetime_sec, + ) + class CompositeStorage(BaseCompositeStorage): def __init__(self): diff --git a/contentcuration/contentcuration/utils/storage/gcs.py b/contentcuration/contentcuration/utils/storage/gcs.py index 95d6b7fec5..38e0347d2d 100644 --- a/contentcuration/contentcuration/utils/storage/gcs.py +++ b/contentcuration/contentcuration/utils/storage/gcs.py @@ -1,5 +1,6 @@ import logging import tempfile +from datetime import timedelta from gzip import GzipFile from io import BytesIO @@ -217,6 +218,37 @@ def _is_file_empty(fobj): fobj.seek(current_location) return len(byt) == 0 + def get_presigned_put_url( + self, filepath, md5sum, lifetime_sec, mimetype="application/octet-stream" + ): + """ + Creates a pre-signed URL for GCS. + + :param filepath: A string representing the destination file path inside the bucket + :param md5sum: A MD5 checksum of the file to be uploaded + :param lifetime_sec: The lifetime of the URL in seconds + :param mimetype: The content type of the file to be uploaded + :return: A pre-signed URL for uploading the file + """ + blob_obj = self.bucket.blob(filepath) + + # ensure the md5sum doesn't have any whitespace, including newlines. + # We should do the same whitespace stripping as well on any client that actually + # uses the returned presigned url. + md5sum_stripped = md5sum.strip() + + # convert the lifetime to a timedelta, so gcloud library will interpret the lifetime + # as the seconds from right now. If we use an absolute integer, it's the number of seconds + # from unix time + lifetime_timedelta = timedelta(seconds=lifetime_sec) + + return blob_obj.generate_signed_url( + method="PUT", + content_md5=md5sum_stripped, + content_type=mimetype, + expiration=lifetime_timedelta, + ) + class CompositeGCS(CompositeStorage): def __init__(self): diff --git a/contentcuration/contentcuration/viewsets/file.py b/contentcuration/contentcuration/viewsets/file.py index 4c2477ad52..f2e3444686 100644 --- a/contentcuration/contentcuration/viewsets/file.py +++ b/contentcuration/contentcuration/viewsets/file.py @@ -252,9 +252,7 @@ def upload_url(self, request): checksum_base64 = codecs.encode( codecs.decode(checksum, "hex"), "base64" ).decode() - retval = get_presigned_upload_url( - filepath, checksum_base64, 600, content_length=size - ) + retval = get_presigned_upload_url(filepath, checksum_base64, 600) file = File( file_size=size, From a75233bc9f503e853a750ca54ccbceb43b6bd184 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Wed, 7 May 2025 08:50:42 -0700 Subject: [PATCH 5/7] Speed up restoration by avoiding read-only GCS storage --- .../contentcuration/utils/import_tools.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/contentcuration/contentcuration/utils/import_tools.py b/contentcuration/contentcuration/utils/import_tools.py index c0d75207c5..58461ee750 100644 --- a/contentcuration/contentcuration/utils/import_tools.py +++ b/contentcuration/contentcuration/utils/import_tools.py @@ -32,6 +32,7 @@ from contentcuration.utils.files import write_base64_to_file from contentcuration.utils.garbage_collect import get_deleted_chefs_root from contentcuration.utils.publish import publish_channel +from contentcuration.utils.storage.base import CompositeStorage from contentcuration.viewsets.assessmentitem import exercise_image_filename_regex @@ -60,6 +61,11 @@ def __init__(self, base_url, api_token=None): super(ImportClient, self).__init__() self.base_url = base_url self.api_token = api_token + self.headers.update( + { + "User-Agent": f"restore_channel/kolibri-studio/dev python-requests/{requests.__version__}", + } + ) def __getattr__(self, name): if name.endswith("_with_token"): @@ -180,6 +186,11 @@ def __init__( self.download_content = download_content self.logger = logger or logging.getLogger(__name__) self.client = ImportClient(source_url, api_token=token) + self.storage = ( + default_storage._get_writeable_backend() + if isinstance(default_storage, CompositeStorage) + else default_storage + ) self.conn = None self.cursor = None self.progress = None @@ -569,8 +580,8 @@ def _download_file( file_exists = False # If the file already exists, get the size from the storage - if default_storage.exists(filepath): - file_size = file_size or default_storage.size(filepath) + if self.storage.exists(filepath): + file_size = file_size or self.storage.size(filepath) file_exists = True # if it needs downloading and if we were instructed to do so elif self.download_content or (is_thumbnail and contentnode): From 71a85bc1efad36d5a61f886114d3e0ba1a8730a5 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Wed, 7 May 2025 11:45:21 -0700 Subject: [PATCH 6/7] More defensive and thorough handling of completion and mastery criteria --- .../contentcuration/utils/import_tools.py | 101 +++++++++++------- 1 file changed, 64 insertions(+), 37 deletions(-) diff --git a/contentcuration/contentcuration/utils/import_tools.py b/contentcuration/contentcuration/utils/import_tools.py index 58461ee750..bc6e58baa1 100644 --- a/contentcuration/contentcuration/utils/import_tools.py +++ b/contentcuration/contentcuration/utils/import_tools.py @@ -277,9 +277,9 @@ def run(self): unit="node", ) chunk = [] - for node in exercise_nodes.iterator(chunk_size=10): + for node in exercise_nodes.iterator(chunk_size=20): chunk.append(node) - if len(chunk) >= 10: + if len(chunk) >= 20: self._create_assessment_items(chunk) exercise_progress.update(len(chunk)) chunk = [] @@ -405,44 +405,11 @@ def _create_nodes(self, parent, progress): if coach_content: role = roles.COACH - # Determine extra_fields - extra_fields = {} - if kind == content_kinds.EXERCISE: - randomize_sql = f""" - SELECT randomize - FROM {ASSESSMENTMETADATA_TABLE} - WHERE contentnode_id = ? - """ - randomize = self.cursor.execute(randomize_sql, (id,)).fetchone() - extra_fields["options"] = json.loads(options) if options else {} - extra_fields["randomize"] = bool(randomize[0]) if randomize else False - completion_criteria_ = extra_fields["options"].get( - "completion_criteria" - ) - if ( - completion_criteria_ - and completion_criteria_.get("model") == completion_criteria.MASTERY - ): - mastery_model = completion_criteria_.get("threshold", {}).get( - "mastery_model" - ) - if mastery_model == mastery_criteria.DO_ALL: - completion_criteria_["threshold"] = { - "mastery_model": mastery_model, - } - if ( - completion_criteria_ - and "learner_managed" not in completion_criteria_ - ): - completion_criteria_["learner_managed"] = False - # Determine license license_result = self._retrieve_license(license_id) license_description = license_result[1] if license_result else "" license_result = license_result[0] if license_result else None - # TODO: Determine thumbnail encoding - # Create the new node model node = models.ContentNode.objects.create( node_id=id, @@ -458,7 +425,7 @@ def _create_nodes(self, parent, progress): license_description=license_description, language_id=lang_id, role_visibility=role, - extra_fields=extra_fields, + extra_fields=self._prepare_node_extra_fields(id, kind, options), kind_id=kind, parent=parent, original_channel_id=self.target_id, @@ -479,6 +446,8 @@ def _create_nodes(self, parent, progress): self._create_files(node) self._create_tags(node) + # assessments are handled after all nodes are created, which also ensures nodes + # are marked complete if kind != content_kinds.EXERCISE: errors = node.mark_complete() if errors: @@ -486,6 +455,64 @@ def _create_nodes(self, parent, progress): node.save() progress.update(1) + def _prepare_node_extra_fields(self, node_id, kind, options): + """ + Prepare extra fields for the node based on the kind and options. For exercises, it + retrieves the additional info from the assessment metadata. + + :param node_id: the node ID + :param kind: the content kind + :param options: the options JSON string + :return: a dictionary of extra fields + """ + extra_fields = { + "options": json.loads(options) if options else {}, + } + completion_criteria_ = extra_fields["options"].get("completion_criteria", {}) + + # don't fill anything in if there is no completion_criteria, otherwise validation will fail + if completion_criteria_ and "learner_managed" not in completion_criteria_: + completion_criteria_.update(learner_managed=False) + + if kind == content_kinds.EXERCISE: + randomize_sql = f""" + SELECT randomize, mastery_model + FROM {ASSESSMENTMETADATA_TABLE} + WHERE contentnode_id = ? + """ + randomize, mastery_criteria_ = self.cursor.execute( + randomize_sql, (node_id,) + ).fetchone() + extra_fields["randomize"] = bool(randomize) if randomize else False + if mastery_criteria_: + mastery_criteria_ = json.loads(mastery_criteria_) + mastery_criteria_.update(mastery_model=mastery_criteria_.pop("type")) + completion_criteria_.update( + { + "model": completion_criteria.MASTERY, + "threshold": mastery_criteria_, + } + ) + + if completion_criteria_.get("model") == completion_criteria.MASTERY: + mastery_model = completion_criteria_.get("threshold", {}).get( + "mastery_model" + ) + if mastery_model in [ + mastery_criteria.DO_ALL, + mastery_criteria.NUM_CORRECT_IN_A_ROW_2, + mastery_criteria.NUM_CORRECT_IN_A_ROW_3, + mastery_criteria.NUM_CORRECT_IN_A_ROW_5, + mastery_criteria.NUM_CORRECT_IN_A_ROW_10, + ]: + # remove m,n values + completion_criteria_["threshold"] = { + "mastery_model": mastery_model, + } + + extra_fields["options"].update(completion_criteria=completion_criteria_) + return extra_fields + def _retrieve_license(self, license_id): """ Get license based on id from exported db @@ -544,7 +571,7 @@ def _create_files(self, contentnode): is_thumbnail=is_thumbnail, ) except IOError as e: - self.logger.warning("\b FAILED (check logs for more details)") + self.logger.warning(f"FAILED to download '{filename}': {str(e)}") if e.errno: sys.stderr.write( f"Restoration Process Error: Failed to save file object {filename}: {os.strerror(e.errno)}" From 6b0a37fcfddbfde69d96e8464b04cdc0edafa136 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Wed, 17 Dec 2025 13:19:41 -0800 Subject: [PATCH 7/7] Change python server port and add nginx to services for proxying --- .run/devserver.run.xml | 2 +- Makefile | 2 +- contentcuration/contentcuration/models.py | 43 +++-------------------- docker-compose.yml | 10 +++--- package.json | 2 +- webpack.config.js | 40 +++++++++++++-------- 6 files changed, 36 insertions(+), 63 deletions(-) diff --git a/.run/devserver.run.xml b/.run/devserver.run.xml index 1c94ee6402..55b6546404 100644 --- a/.run/devserver.run.xml +++ b/.run/devserver.run.xml @@ -13,7 +13,7 @@