From d9e638e0eb2070d5135ef5622e113fa4ad719783 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Tue, 6 May 2025 13:03:14 -0700 Subject: [PATCH 1/8] Refactor restore-channel utilities --- .../management/commands/restore_channel.py | 64 +- .../tests/test_restore_channel.py | 179 --- .../contentcuration/utils/files.py | 23 +- .../contentcuration/utils/import_tools.py | 1075 ++++++++++------- 4 files changed, 709 insertions(+), 632 deletions(-) delete mode 100644 contentcuration/contentcuration/tests/test_restore_channel.py diff --git a/contentcuration/contentcuration/management/commands/restore_channel.py b/contentcuration/contentcuration/management/commands/restore_channel.py index efaeb3ee7c..49b785f725 100644 --- a/contentcuration/contentcuration/management/commands/restore_channel.py +++ b/contentcuration/contentcuration/management/commands/restore_channel.py @@ -2,28 +2,66 @@ from django.core.management.base import BaseCommand -from contentcuration.utils.import_tools import import_channel +from contentcuration.utils.import_tools import ImportManager -logger = logging.getLogger('command') +logger = logging.getLogger("command") class Command(BaseCommand): + """ + This command is used to restore a channel from another Studio instance. This is for + development purposes only and should not be used in production. + """ def add_arguments(self, parser): # ID of channel to read data from - parser.add_argument('source_id', type=str) + parser.add_argument("source_id", type=str) # ID of channel to write data to (can be same as source channel) - parser.add_argument('--target', help='restore channel db to TARGET CHANNEL ID') - parser.add_argument('--download-url', help='where to download db from') - parser.add_argument('--editor', help='add user as editor to channel') + parser.add_argument( + "--target", + help="A different channel ID for which to restore the channel. If not provided, the source channel ID will be used.", + ) + parser.add_argument( + "--source-url", + default="http://localhost:8080", + help="Studio instance from which to download the channel DB or content files", + ) + parser.add_argument("--token", help="API token for the Studio instance") + parser.add_argument( + "--editor", + default="a@a.com", + help="Add user as editor to channel with provided email address", + ) + parser.add_argument( + "--download-content", + action="store_true", + default=False, + help="Whether to download content files", + ) + parser.add_argument( + "--public", + action="store_true", + default=False, + help="Whether to make the channel public", + ) + parser.add_argument( + "--publish", + action="store_true", + default=False, + help="Whether to publish the channel after restoration", + ) def handle(self, *args, **options): - # Set up variables for restoration process logger.info("\n\n********** STARTING CHANNEL RESTORATION **********") - source_id = options['source_id'] - target_id = options.get('target') or source_id - download_url = options.get('download_url') - editor = options.get('editor') - - import_channel(source_id, target_id, download_url, editor, logger=logger) + manager = ImportManager( + options["source_url"], + options["source_id"], + target_id=options.get("target"), + editor=options.get("editor"), + public=options.get("public"), + publish=options.get("publish"), + token=options.get("token"), + download_content=options.get("download_content"), + ) + manager.run() diff --git a/contentcuration/contentcuration/tests/test_restore_channel.py b/contentcuration/contentcuration/tests/test_restore_channel.py deleted file mode 100644 index a4d1e13a39..0000000000 --- a/contentcuration/contentcuration/tests/test_restore_channel.py +++ /dev/null @@ -1,179 +0,0 @@ -# -*- coding: utf-8 -*- -import datetime -import json -import uuid -from io import BytesIO - -from django.core.files.storage import default_storage -from django.template.loader import render_to_string -from django.utils.translation import activate -from django.utils.translation import deactivate -from le_utils.constants import exercises -from mixer.backend.django import mixer -from mock import MagicMock -from mock import patch - -from .base import StudioTestCase -from contentcuration.models import AssessmentItem -from contentcuration.models import generate_object_storage_name -from contentcuration.utils.import_tools import create_channel -from contentcuration.utils.import_tools import generate_assessment_item -from contentcuration.utils.import_tools import process_content - - -thumbnail_path = "/content/thumbnail.png" -ASSESSMENT_DATA = { - 'input-question-test': { - 'template': 'perseus/input_question.json', - 'type': exercises.INPUT_QUESTION, - 'question': "Input question", - 'question_images': [{"name": "test.jpg", "width": 12.71, "height": 12.12}], - 'hints': [{'hint': 'Hint 1'}], - 'answers': [ - {'answer': '1', 'correct': True, 'images': []}, - {'answer': '2', 'correct': True, 'images': []} - ], - 'order': 0 - }, - 'multiple-selection-test': { - 'template': 'perseus/multiple_selection.json', - 'type': exercises.MULTIPLE_SELECTION, - 'question': "Multiple selection question", - 'question_images': [], - 'hints': [], - 'answers': [ - {'answer': 'A', 'correct': True, 'images': []}, - {'answer': 'B', 'correct': True, 'images': []}, - {'answer': 'C', 'correct': False, 'images': []}, - ], - 'multiple_select': True, - 'order': 1, - 'randomize': False - }, - 'single-selection-test': { - 'template': 'perseus/multiple_selection.json', - 'type': exercises.SINGLE_SELECTION, - 'question': "Single select question", - 'question_images': [], - 'hints': [{'hint': 'Hint test'}], - 'answers': [ - {'answer': 'Correct answer', 'correct': True, 'images': []}, - {'answer': 'Incorrect answer', 'correct': False, 'images': []}, - ], - 'multiple_select': False, - 'order': 2, - 'randomize': True - }, - 'perseus-question-test': { - 'template': 'perseus/perseus_question.json', - 'type': exercises.PERSEUS_QUESTION, - 'order': 3, - 'raw_data': '{}' - } -} - - -class ChannelRestoreUtilityFunctionTestCase(StudioTestCase): - @patch("contentcuration.utils.import_tools.write_to_thumbnail_file", return_value=thumbnail_path) - def setUp(self, thumb_mock): - self.id = uuid.uuid4().hex - self.name = "test name" - self.description = "test description" - self.thumbnail_encoding = "base64 string" - self.root_pk = uuid.uuid4() - self.version = 7 - self.last_updated = datetime.datetime.now() - self.cursor_mock = MagicMock() - self.cursor_mock.execute.return_value.fetchone.return_value = ( - self.id, - self.name, - self.description, - self.thumbnail_encoding, - self.root_pk, - self.version, - self.last_updated, - ) - self.channel, _ = create_channel(self.cursor_mock, self.id, self.admin_user) - - def test_restore_channel_id(self): - self.assertEqual(self.channel.id, self.id) - - def test_restore_channel_name(self): - self.assertEqual(self.channel.name, self.name) - - def test_restore_channel_description(self): - self.assertEqual(self.channel.description, self.description) - - def test_restore_channel_thumbnail(self): - self.assertEqual(self.channel.thumbnail, thumbnail_path) - - def test_restore_channel_thumbnail_encoding(self): - self.assertEqual(self.channel.thumbnail_encoding["base64"], self.thumbnail_encoding) - - def test_restore_channel_version(self): - self.assertEqual(self.channel.version, self.version) - - -class PerseusRestoreTestCase(StudioTestCase): - def setUp(self): - super(PerseusRestoreTestCase, self).setUp() - image_path = generate_object_storage_name('test', 'test.png') - default_storage.save(image_path, BytesIO(b'test')) - - def test_process_content(self): - tests = [ - { - "content": 'test 1', - "output": 'test 1', - 'images': {} - }, - { - "content": 'test 2 ![test](${☣ LOCALPATH}/images/test.png)', - "output": 'test 2 ![test](${☣ CONTENTSTORAGE}/test.png)', - 'images': {} - }, - { - "content": 'test 3 ![](${☣ LOCALPATH}/images/test.png)', - "output": 'test 3 ![](${☣ CONTENTSTORAGE}/test.png =50x50)', - 'images': { - '${☣ LOCALPATH}/images/test.png': { - 'width': 50, - 'height': 50 - } - } - }, - { - "content": 'test 4 ![](${☣ LOCALPATH}/images/test.png) ![](${☣ LOCALPATH}/images/test.png)', - "output": 'test 4 ![](${☣ CONTENTSTORAGE}/test.png) ![](${☣ CONTENTSTORAGE}/test.png)', - 'images': {} - }, - { - "content": 'test 5  $\\sqrt{36}+\\frac{1}{2}$ ', - "output": 'test 5 $$\\sqrt{36}+\\frac{1}{2}$$', - 'images': {} - }, - { - "content": 'test 6 $\\frac{1}{2}$ $\\frac{3}{2}$', - "output": 'test 6 $$\\frac{1}{2}$$ $$\\frac{3}{2}$$', - 'images': {} - } - ] - for test in tests: - result = process_content(test, mixer.blend(AssessmentItem)) - self.assertEqual(result, test['output']) - - def test_generate_assessment_item(self): - # Run in Spanish to ensure we are properly creating JSON with non-localized numbers - activate("es-es") - for assessment_id, data in list(ASSESSMENT_DATA.items()): - assessment_data = json.loads(render_to_string(data['template'], data).encode('utf-8', "ignore")) - assessment_item = generate_assessment_item(assessment_id, data['order'], data['type'], assessment_data) - self.assertEqual(assessment_item.type, data['type']) - self.assertEqual(assessment_item.question, data.get('question', '')) - self.assertEqual(assessment_item.randomize, bool(data.get('randomize'))) - self.assertEqual(assessment_item.raw_data, data.get('raw_data', '')) - for hint in json.loads(assessment_item.hints): - self.assertTrue(any(h for h in data['hints'] if h['hint'] == hint['hint'])) - for answer in json.loads(assessment_item.answers): - self.assertTrue(any(a for a in data['answers'] if a['answer'] == str(answer['answer']) and a['correct'] == answer['correct'])) - deactivate() diff --git a/contentcuration/contentcuration/utils/files.py b/contentcuration/contentcuration/utils/files.py index a5d8361e8c..74c53f8ba9 100644 --- a/contentcuration/contentcuration/utils/files.py +++ b/contentcuration/contentcuration/utils/files.py @@ -79,12 +79,13 @@ def duplicate_file(file_object, node=None, assessment_item=None, preset_id=None, return file_copy -def get_thumbnail_encoding(filename, dimension=THUMBNAIL_WIDTH): +def get_thumbnail_encoding(filename, dimension=THUMBNAIL_WIDTH, input_buffer=None): """ Generates a base64 encoding for a thumbnail Args: filename (str): thumbnail to generate encoding from (must be in storage already) dimension (int, optional): desired width of thumbnail. Defaults to 400. + input_buffer (BytesIO, optional): buffer to read from. Defaults to None. Returns base64 encoding of resized thumbnail """ @@ -97,17 +98,17 @@ def get_thumbnail_encoding(filename, dimension=THUMBNAIL_WIDTH): # make sure the aspect ratio between width and height is 16:9 thumbnail_size = [dimension, round(dimension / 1.77)] try: - if not filename.startswith(settings.STATIC_ROOT): - filename = generate_object_storage_name(checksum, filename) - inbuffer = default_storage.open(filename, 'rb') - - else: - inbuffer = open(filename, 'rb') - - if not inbuffer: + if not input_buffer: + if not filename.startswith(settings.STATIC_ROOT): + filename = generate_object_storage_name(checksum, filename) + input_buffer = default_storage.open(filename, 'rb') + else: + input_buffer = open(filename, 'rb') + + if not input_buffer: raise AssertionError - with Image.open(inbuffer) as image: + with Image.open(input_buffer) as image: image_format = image.format # Note: Image.thumbnail ensures that the image will fit in the @@ -122,7 +123,7 @@ def get_thumbnail_encoding(filename, dimension=THUMBNAIL_WIDTH): finally: # Try to close the inbuffer if it has been created try: - inbuffer.close() + input_buffer.close() except UnboundLocalError: pass outbuffer.close() diff --git a/contentcuration/contentcuration/utils/import_tools.py b/contentcuration/contentcuration/utils/import_tools.py index e662b75fc4..875291d84b 100644 --- a/contentcuration/contentcuration/utils/import_tools.py +++ b/contentcuration/contentcuration/utils/import_tools.py @@ -4,498 +4,715 @@ import logging import os import re -import shutil import sqlite3 import sys import tempfile -import zipfile +from functools import cached_property from io import BytesIO import requests -from django.conf import settings from django.core.files.storage import default_storage +from django.core.management import call_command from django.db import transaction +from kolibri_content.router import get_active_content_database +from kolibri_content.router import using_content_database +from le_utils.constants import completion_criteria from le_utils.constants import content_kinds from le_utils.constants import exercises from le_utils.constants import format_presets +from le_utils.constants import mastery_criteria from le_utils.constants import roles +from le_utils.constants.labels import learning_activities from contentcuration import models from contentcuration.api import write_raw_content_to_storage from contentcuration.utils.files import create_file_from_contents +from contentcuration.utils.files import get_thumbnail_encoding from contentcuration.utils.files import write_base64_to_file from contentcuration.utils.garbage_collect import get_deleted_chefs_root - - -CHANNEL_TABLE = 'content_channelmetadata' -NODE_TABLE = 'content_contentnode' -ASSESSMENTMETADATA_TABLE = 'content_assessmentmetadata' -FILE_TABLE = 'content_file' -TAG_TABLE = 'content_contenttag' -NODE_TAG_TABLE = 'content_contentnode_tags' -LICENSE_TABLE = 'content_license' +from contentcuration.utils.publish import publish_channel +from contentcuration.viewsets.assessmentitem import exercise_image_filename_regex + +CHANNEL_TABLE = "content_channelmetadata" +NODE_TABLE = "content_contentnode" +ASSESSMENTMETADATA_TABLE = "content_assessmentmetadata" +FILE_TABLE = "content_file" +TAG_TABLE = "content_contenttag" +NODE_TAG_TABLE = "content_contentnode_tags" +LICENSE_TABLE = "content_license" NODE_COUNT = 0 FILE_COUNT = 0 TAG_COUNT = 0 ANSWER_FIELD_MAP = { - exercises.SINGLE_SELECTION: 'radio 1', - exercises.MULTIPLE_SELECTION: 'radio 1', - exercises.INPUT_QUESTION: 'numeric-input 1', + exercises.SINGLE_SELECTION: "radio 1", + exercises.MULTIPLE_SELECTION: "radio 1", + exercises.INPUT_QUESTION: "numeric-input 1", } log = logging.getLogger(__name__) -def import_channel(source_id, target_id=None, download_url=None, editor=None, logger=None): - """ - Import a channel from another Studio instance. This can be used to - copy online Studio channels into local machines for development, - testing, faster editing, or other purposes. - - :param source_id: The UUID of the channel to import from the source Studio instance. - :param target_id: The UUID of the channel on the local instance. Defaults to source_id. - :param download_url: The URL of the Studio instance to import from. - :param editor: The email address of the user you wish to add as an editor, if any. - - """ - - global log - if logger: - log = logger - else: - log = logging.getLogger(__name__) - - # Set up variables for the import process - log.info("\n\n********** STARTING CHANNEL IMPORT **********") - start = datetime.datetime.now() - target_id = target_id or source_id - - # Test connection to database - log.info("Connecting to database for channel {}...".format(source_id)) - - tempf = tempfile.NamedTemporaryFile(suffix=".sqlite3", delete=False) - conn = None - try: - if download_url: - response = requests.get('{}/content/databases/{}.sqlite3'.format(download_url, source_id)) - for chunk in response: - tempf.write(chunk) - else: - filepath = "/".join([settings.DB_ROOT, "{}.sqlite3".format(source_id)]) - # Check if database exists - if not default_storage.exists(filepath): - raise IOError("The object requested does not exist.") - with default_storage.open(filepath) as fobj: - shutil.copyfileobj(fobj, tempf) - - tempf.close() - conn = sqlite3.connect(tempf.name) - cursor = conn.cursor() - - # Start by creating channel - log.info("Creating channel...") - editor = models.User.objects.get(email=editor) - channel, root_pk = create_channel(conn, target_id, editor) - channel.editors.add(editor) - channel.save() - - # Create root node - root = models.ContentNode.objects.create( - node_id=root_pk, - title=channel.name, - kind_id=content_kinds.TOPIC, - original_channel_id=target_id, - source_channel_id=target_id, +class ImportClient(requests.Session): + def __init__(self, base_url, api_token=None): + super(ImportClient, self).__init__() + self.base_url = base_url + self.api_token = api_token + + def __getattr__(self, name): + if name.endswith("_with_token"): + if not self.api_token: + raise ValueError("API token is required for this method.") + + target_method = getattr( + super(ImportClient, self), name.replace("_with_token", "") + ) + token_headers = { + "Authorization": f"Token {self.api_token}", + } + return lambda url, *args, **kwargs: target_method( + url, *args, headers=token_headers, **kwargs + ) + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute '{name}'" ) - # Create nodes mapping to channel - log.info(" Creating nodes...") - with transaction.atomic(): - create_nodes(cursor, target_id, root, download_url=download_url) - # TODO: Handle prerequisites - - # Delete the previous tree if it exists - old_previous = channel.previous_tree - if old_previous: - old_previous.parent = get_deleted_chefs_root() - old_previous.title = "Old previous tree for channel {}".format(channel.pk) - old_previous.save() - - # Save tree to target tree - channel.previous_tree = channel.main_tree - channel.main_tree = root - channel.save() - finally: - conn and conn.close() - tempf.close() - os.unlink(tempf.name) - - # Print stats - log.info("\n\nChannel has been imported (time: {ms})\n".format(ms=datetime.datetime.now() - start)) - log.info("\n\n********** IMPORT COMPLETE **********\n\n") - - -def create_channel(cursor, target_id, editor): - """ create_channel: Create channel at target id - Args: - cursor (sqlite3.Connection): connection to export database - target_id (str): channel_id to write to - Returns: channel model created and id of root node - """ - id, name, description, thumbnail, root_pk, version, last_updated = cursor.execute( - 'SELECT id, name, description, thumbnail, root_pk, version, last_updated FROM {table}' - .format(table=CHANNEL_TABLE)).fetchone() - channel, is_new = models.Channel.objects.get_or_create(pk=target_id, actor_id=editor.id) - channel.name = name - channel.description = description - channel.thumbnail = write_to_thumbnail_file(thumbnail) - channel.thumbnail_encoding = {'base64': thumbnail, 'points': [], 'zoom': 0} - channel.version = version - channel.save() - log.info("\tCreated channel {} with name {}".format(target_id, name)) - return channel, root_pk + def request(self, method, url, *args, **kwargs): + url = f"{self.base_url}{url}" + return super(ImportClient, self).request(method, url, *args, **kwargs) def write_to_thumbnail_file(raw_thumbnail): - """ write_to_thumbnail_file: Convert base64 thumbnail to file - Args: - raw_thumbnail (str): base64 encoded thumbnail - Returns: thumbnail filename + """write_to_thumbnail_file: Convert base64 thumbnail to file + Args: + raw_thumbnail (str): base64 encoded thumbnail + Returns: thumbnail filename """ - if raw_thumbnail and isinstance(raw_thumbnail, str) and raw_thumbnail != "" and 'static' not in raw_thumbnail: + if ( + raw_thumbnail + and isinstance(raw_thumbnail, str) + and raw_thumbnail != "" + and "static" not in raw_thumbnail + ): with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tempf: try: tempf.close() write_base64_to_file(raw_thumbnail, tempf.name) - with open(tempf.name, 'rb') as tf: - fobj = create_file_from_contents(tf.read(), ext="png", preset_id=format_presets.CHANNEL_THUMBNAIL) + with open(tempf.name, "rb") as tf: + fobj = create_file_from_contents( + tf.read(), ext="png", preset_id=format_presets.CHANNEL_THUMBNAIL + ) return str(fobj) finally: tempf.close() os.unlink(tempf.name) -def create_nodes(cursor, target_id, parent, indent=1, download_url=None): - """ create_channel: Create channel at target id - Args: - cursor (sqlite3.Connection): connection to export database - target_id (str): channel_id to write to - parent (models.ContentNode): node's parent - indent (int): How far to indent print statements - Returns: newly created node +def convert_metadata_to_dict(metadata): """ - # Read database rows that match parent - parent_query = "parent_id=\'{}\'".format(parent.node_id) - - sql_command = 'SELECT id, title, content_id, description, sort_order, '\ - 'license_owner, author, license_id, kind, coach_content, lang_id FROM {table} WHERE {query} ORDER BY sort_order;'\ - .format(table=NODE_TABLE, query=parent_query) - query = cursor.execute(sql_command).fetchall() - - # Parse through rows and create models - for id, title, content_id, description, sort_order, license_owner, author, license_id, kind, coach_content, lang_id in query: - log.info("{indent} {id} ({title} - {kind})...".format(indent=" |" * indent, id=id, title=title, kind=kind)) - - # Determine role - role = roles.LEARNER - if coach_content: - role = roles.COACH - - # Determine extra_fields - assessment_query = "SELECT mastery_model, randomize FROM {table} WHERE contentnode_id='{node}'".format(table=ASSESSMENTMETADATA_TABLE, node=id) - result = cursor.execute(assessment_query).fetchone() - extra_fields = result[0] if result else {} - if isinstance(extra_fields, str): - extra_fields = json.loads(extra_fields) - if result: - extra_fields.update({"randomize": result[1]}) - - # Determine license - license = retrieve_license(cursor, license_id) - license_description = license[1] if license else "" - license = license[0] if license else None - - # TODO: Determine thumbnail encoding - - # Create new node model - node = models.ContentNode.objects.create( - node_id=id, - original_source_node_id=id, - source_node_id=id, - title=title, - content_id=content_id, - description=description, - sort_order=sort_order, - copyright_holder=license_owner, - author=author, - license=license, - license_description=license_description, - language_id=lang_id, - role_visibility=role, - extra_fields=extra_fields, - kind_id=kind, - parent=parent, - original_channel_id=target_id, - source_channel_id=target_id, - ) - - # Handle foreign key references (children, files, tags) - if kind == content_kinds.TOPIC: - create_nodes(cursor, target_id, node, indent=indent + 1, download_url=download_url) - elif kind == content_kinds.EXERCISE: - create_assessment_items(cursor, node, indent=indent + 1, download_url=download_url) - create_files(cursor, node, indent=indent + 1, download_url=download_url) - create_tags(cursor, node, target_id, indent=indent + 1) + Convert metadata from a string to a dictionary. - return node + :param metadata: The metadata string to convert. + :return: A dictionary representation of the metadata. + """ + if isinstance(metadata, str): + metadata_split = metadata.split(",") + return {metadata_key: True for metadata_key in metadata_split} + return metadata -def retrieve_license(cursor, license_id): - """ retrieve_license_name: Get license based on id from exported db - Args: - cursor (sqlite3.Connection): connection to export database - license_id (str): id of license on exported db - Returns: license model matching the name and the associated license description +def convert_learning_activities_to_dict(content_kind, metadata): """ - # Handle no license being assigned - if license_id is None or license_id == "": - return None + Convert learning activities from a string to a dictionary. - # Return license that matches name - name, description = cursor.execute( - 'SELECT license_name, license_description FROM {table} WHERE id={id}'.format(table=LICENSE_TABLE, id=license_id) - ).fetchone() - return models.License.objects.get(license_name=name), description - - -def download_file(filename, download_url=None, contentnode=None, assessment_item=None, preset=None, file_size=None, lang_id=None): - checksum, extension = os.path.splitext(filename) - extension = extension.lstrip('.') - filepath = models.generate_object_storage_name(checksum, filename) - - # Download file if it hasn't already been downloaded - if download_url and not default_storage.exists(filepath): - buffer = BytesIO() - response = requests.get('{}/content/storage/{}/{}/{}'.format(download_url, filename[0], filename[1], filename)) - for chunk in response: - buffer.write(chunk) - - checksum, _, filepath = write_raw_content_to_storage(buffer.getvalue(), ext=extension) - buffer.close() - - # Save values to new file object - file_obj = models.File( - file_format_id=extension, - file_size=file_size or default_storage.size(filepath), - contentnode=contentnode, - assessment_item=assessment_item, - language_id=lang_id, - preset_id=preset or "", - ) - file_obj.file_on_disk.name = filepath - file_obj.save() - - -def create_files(cursor, contentnode, indent=0, download_url=None): - """ create_files: Get license - Args: - cursor (sqlite3.Connection): connection to export database - contentnode (models.ContentNode): node file references - indent (int): How far to indent print statements - Returns: None + :param content_kind: The content kind of the learning activities. + :param metadata: The learning activities string to convert. + :return: A dictionary representation of the learning activities. """ - # Parse database for files referencing content node and make file models - sql_command = 'SELECT checksum, extension, file_size, contentnode_id, '\ - 'lang_id, preset FROM {table} WHERE contentnode_id=\'{id}\';'\ - .format(table=FILE_TABLE, id=contentnode.node_id) + metadata = convert_metadata_to_dict(metadata) + if isinstance(metadata, dict): + return metadata + + if content_kind == content_kinds.EXERCISE: + return {learning_activities.PRACTICE: True} + elif content_kind in [content_kinds.HTML5, content_kinds.H5P]: + return {learning_activities.EXPLORE: True} + elif content_kind == content_kinds.AUDIO: + return {learning_activities.LISTEN: True} + elif content_kind == content_kinds.VIDEO: + return {learning_activities.WATCH: True} + elif content_kind == content_kinds.DOCUMENT: + return {learning_activities.READ: True} + elif content_kind == content_kinds.SLIDESHOW: + return {learning_activities.READ: True} + elif content_kind == content_kinds.TOPIC: + return None + return {learning_activities.EXPLORE: True} - query = cursor.execute(sql_command).fetchall() - for checksum, extension, file_size, contentnode_id, lang_id, preset in query: - filename = "{}.{}".format(checksum, extension) - log.info("{indent} * FILE {filename}...".format(indent=" |" * indent, filename=filename)) - try: - download_file(filename, download_url=download_url, contentnode=contentnode, preset=preset, file_size=file_size, lang_id=lang_id) - - except IOError as e: - log.warning("\b FAILED (check logs for more details)") - sys.stderr.write("Restoration Process Error: Failed to save file object {}: {}".format(filename, os.strerror(e.errno))) - continue - - -def create_tags(cursor, contentnode, target_id, indent=0): - """ create_tags: Create tags associated with node - Args: - cursor (sqlite3.Connection): connection to export database - contentnode (models.ContentNode): node file references - target_id (str): channel_id to write to - indent (int): How far to indent print statements - Returns: None +class ImportManager(object): """ - # Parse database for files referencing content node and make file models - sql_command = 'SELECT ct.id, ct.tag_name FROM {cnttable} cnt '\ - 'JOIN {cttable} ct ON cnt.contenttag_id = ct.id ' \ - 'WHERE cnt.contentnode_id=\'{id}\';'\ - .format( - cnttable=NODE_TAG_TABLE, - cttable=TAG_TABLE, - id=contentnode.node_id, - ) - query = cursor.execute(sql_command).fetchall() - - # Build up list of tags - tag_list = [] - for id, tag_name in query: - log.info("{indent} ** TAG {tag}...".format(indent=" |" * indent, tag=tag_name)) - # Save values to new or existing tag object - tag_obj, is_new = models.ContentTag.objects.get_or_create( - pk=id, - tag_name=tag_name, - channel_id=target_id, - ) - tag_list.append(tag_obj) - - # Save tags to node - contentnode.tags.set(tag_list) - contentnode.save() - - -def create_assessment_items(cursor, contentnode, indent=0, download_url=None): - """ create_assessment_items: Generate assessment items based on perseus zip - Args: - cursor (sqlite3.Connection): connection to export database - contentnode (models.ContentNode): node assessment items reference - indent (int): How far to indent print statements - download_url (str): Domain to download files from - Returns: None + Import a channel from another Studio instance. This can be used to copy online Studio channels + into local machines for development, testing, faster editing, or other purposes. """ - # Parse database for files referencing content node and make file models - sql_command = 'SELECT checksum, extension '\ - 'preset FROM {table} WHERE contentnode_id=\'{id}\' AND preset=\'exercise\';'\ - .format(table=FILE_TABLE, id=contentnode.node_id) - - query = cursor.execute(sql_command).fetchall() - for checksum, extension in query: - filename = "{}.{}".format(checksum, extension) - log.info("{indent} * EXERCISE {filename}...".format(indent=" |" * indent, filename=filename)) - + def __init__( + self, + source_url, + source_id, + target_id=None, + editor=None, + public=False, + publish=False, + token=None, + download_content=True, + logger=None, + ): + self.source_id = source_id + self.target_id = target_id or source_id + self.source_url = source_url + self.editor = editor + self.public = public + self.publish = publish + self.token = token + self.download_content = download_content + self.logger = logger or logging.getLogger(__name__) + self.client = ImportClient(source_url, api_token=token) + self.conn = None + self.cursor = None + self.schema_version = None + + @cached_property + def editor_user(self): + """ + Get the User object for the editor email address. + + :return: The User object for the editor. + """ + return models.User.objects.get(email=self.editor) if self.editor else None + + def run(self): + """ + Run the import process. + """ + # Set up variables for the import process + self.logger.info("\n\n********** STARTING CHANNEL IMPORT **********") + start = datetime.datetime.now() + + if not self.token: + self.logger.warning( + "No API token provided. This may result in limited functionality." + ) + + # Test connection to the database + self.logger.info(f"Connecting to database for channel {self.source_id}...") + + tempf = tempfile.NamedTemporaryFile(suffix=".sqlite3", delete=False) try: - # Store the downloaded zip into temporary storage - tempf = tempfile.NamedTemporaryFile(suffix='.{}'.format(extension), delete=False) - response = requests.get('{}/content/storage/{}/{}/{}'.format(download_url, filename[0], filename[1], filename)) + response = self.client.get(f"/content/databases/{self.source_id}.sqlite3") for chunk in response: tempf.write(chunk) - tempf.close() - extract_assessment_items(tempf.name, contentnode, download_url=download_url) - except IOError as e: - log.warning("\b FAILED (check logs for more details)") - sys.stderr.write("Restoration Process Error: Failed to save file object {}: {}".format(filename, os.strerror(e.errno))) - continue - finally: - os.unlink(tempf.name) + tempf.close() -def extract_assessment_items(filepath, contentnode, download_url=None): - """ extract_assessment_items: Create and save assessment items to content node - Args: - filepath (str): Where perseus zip is stored - contentnode (models.ContentNode): node assessment items reference - download_url (str): Domain to download files from - Returns: None - """ - - try: - tempdir = tempfile.mkdtemp() - with zipfile.ZipFile(filepath, 'r') as zipf: - zipf.extractall(tempdir) - os.chdir(tempdir) - - with open('exercise.json', 'rb') as fobj: - data = json.load(fobj) - - for index, assessment_id in enumerate(data['all_assessment_items']): - with open('{}.json'.format(assessment_id), 'rb') as fobj: - assessment_item = generate_assessment_item( - assessment_id, - index, - data['assessment_mapping'][assessment_id], - json.load(fobj), - download_url=download_url + with using_content_database(tempf.name): + call_command( + "migrate", + "content", + database=get_active_content_database(), + no_input=True, ) - contentnode.assessment_items.add(assessment_item) - finally: - shutil.rmtree(tempdir) - - -def generate_assessment_item(assessment_id, order, assessment_type, assessment_data, download_url=None): - """ generate_assessment_item: Generates a new assessment item - Args: - assessment_id (str): AssessmentItem.assessment_id value - order (Number): AssessmentItem.order value - assessment_type (str): AssessmentItem.type value - assessment_data (dict): Extracted data from perseus file - download_url (str): Domain to download files from - Returns: models.AssessmentItem - """ - assessment_item = models.AssessmentItem.objects.create( - assessment_id=assessment_id, - type=assessment_type, - order=order - ) - if assessment_type == exercises.PERSEUS_QUESTION: - assessment_item.raw_data = json.dumps(assessment_data) - else: - # Parse questions - assessment_data['question']['content'] = '\n\n'.join(assessment_data['question']['content'].split('\n\n')[:-1]) - assessment_item.question = process_content(assessment_data['question'], assessment_item, download_url=download_url) - - # Parse answers - answer_data = assessment_data['question']['widgets'][ANSWER_FIELD_MAP[assessment_type]]['options'] - if assessment_type == exercises.INPUT_QUESTION: - assessment_item.answers = json.dumps([ - {'answer': answer['value'], 'correct': True} for answer in answer_data['answers'] - ]) - else: - assessment_item.answers = json.dumps([ - {'answer': process_content(answer, assessment_item, download_url=download_url), 'correct': answer['correct']} - for answer in answer_data['choices'] - ]) - assessment_item.randomize = answer_data['randomize'] - - # Parse hints - assessment_item.hints = json.dumps([ - {'hint': process_content(hint, assessment_item, download_url=download_url)} - for hint in assessment_data['hints'] - ]) - - assessment_item.save() - return assessment_item - - -def process_content(data, assessment_item, download_url=None): - """ process_content: Parses perseus text for special formatting (e.g. formulas, images) - Args: - data (dict): Perseus data to parse (e.g. parsing 'question' field) - download_url (str): Domain to download files from - assessment_item (models.AssessmentItem): assessment item to save images to - Returns: models.AssessmentItem - """ - data['content'] = data['content'].replace(' ', '') # Remove unrecognized non unicode characters - # Process formulas - for match in re.finditer(r'(\$[^\$☣]+\$)', data['content']): - data['content'] = data['content'].replace(match.group(0), '${}$'.format(match.group(0))) - # Process images + self.conn = sqlite3.connect(tempf.name) + self.cursor = self.conn.cursor() + + # Start by creating the channel + self.logger.info("Creating channel...") + channel, root_pk = self._create_channel() + channel.editors.add(self.editor_user) + channel.save() + + # Create the root node + root = models.ContentNode.objects.create( + node_id=root_pk, + title=channel.name, + kind_id=content_kinds.TOPIC, + original_channel_id=self.target_id, + source_channel_id=self.target_id, + complete=True, + ) + + # Create nodes mapping to channel + self.logger.info(" Creating nodes...") + with transaction.atomic(): + self._create_nodes(root) + # TODO: Handle prerequisites + + # Delete the previous tree if it exists + old_previous = channel.previous_tree + if old_previous: + old_previous.parent = get_deleted_chefs_root() + old_previous.title = f"Old previous tree for channel {channel.pk}" + old_previous.save() + + # Save the new tree to the target tree, and preserve the old one + channel.previous_tree = channel.main_tree + channel.main_tree = root + channel.save() + finally: + self.conn and self.conn.close() + tempf.close() + os.unlink(tempf.name) - for match in re.finditer(r'!\[[^\]]*\]\((\$(\{☣ LOCALPATH\}\/images)\/([^\.]+\.[^\)]+))\)', data['content']): - data['content'] = data['content'].replace(match.group(2), exercises.CONTENT_STORAGE_PLACEHOLDER) - image_data = data['images'].get(match.group(1)) - if image_data and image_data.get('width'): - data['content'] = data['content'].replace(match.group(3), '{} ={}x{}'.format(match.group(3), image_data['width'], image_data['height'])) + # Publish the channel if requested + if self.publish: + self.logger.info("Publishing channel...") + publish_channel(self.editor_user.id, channel.id) - # Save files to db - download_file(match.group(3), assessment_item=assessment_item, preset=format_presets.EXERCISE, download_url=download_url) + # Print stats + self.logger.info( + f"\n\nChannel has been imported (time: {datetime.datetime.now() - start})\n" + ) + self.logger.info("\n\n********** IMPORT COMPLETE **********\n\n") + + def _create_channel(self): + """ + Create the channel at target id + """ + ( + id, + name, + description, + thumbnail, + root_pk, + version, + last_updated, + schema_version, + ) = self.cursor.execute( + f""" + SELECT + id, name, description, thumbnail, root_pk, version, last_updated, + min_schema_version + FROM {CHANNEL_TABLE} + """ + ).fetchone() + lang_id, _ = self.cursor.execute( + f""" + SELECT lang_id, COUNT(id) AS node_by_lang_count + FROM {NODE_TABLE} + ORDER BY node_by_lang_count DESC + """ + ).fetchone() + channel, is_new = models.Channel.objects.get_or_create( + pk=self.target_id, actor_id=self.editor_user.id + ) + channel.name = name + channel.description = description + channel.language_id = lang_id + channel.thumbnail = write_to_thumbnail_file(thumbnail) + channel.thumbnail_encoding = {"base64": thumbnail, "points": [], "zoom": 0} + channel.version = version + channel.public = self.public + channel.save() + self.logger.info(f"\tCreated channel {self.target_id} with name {name}") + return channel, root_pk + + def _create_nodes(self, parent, indent=1): + """ + Create node(s) for a channel with target id + + :param parent: node's parent + :param indent: How far to indent print statements + """ + sql_command = f""" + SELECT + id, title, content_id, description, sort_order, license_owner, author, license_id, + kind, coach_content, lang_id, grade_levels, resource_types, learning_activities, + accessibility_labels, categories, learner_needs, duration, options + FROM {NODE_TABLE} + WHERE parent_id = ? + ORDER BY sort_order; + """ + query = self.cursor.execute( + sql_command, (getattr(parent, "node_id", parent),) + ).fetchall() + + # Parse through rows and create models + for ( + id, + title, + content_id, + description, + sort_order, + license_owner, + author, + license_id, + kind, + coach_content, + lang_id, + grade_levels, + resource_types, + learning_activities_, + accessibility_labels, + categories, + learner_needs, + duration, + options, + ) in query: + self.logger.info( + "{indent} {id} ({title} - {kind})...".format( + indent=" |" * indent, id=id, title=title, kind=kind + ) + ) + + # Determine role + role = roles.LEARNER + if coach_content: + role = roles.COACH + + # Determine extra_fields + extra_fields = {} + if kind == content_kinds.EXERCISE: + randomize_sql = f""" + SELECT randomize + FROM {ASSESSMENTMETADATA_TABLE} + WHERE contentnode_id = ? + """ + randomize = self.cursor.execute(randomize_sql, (id,)).fetchone() + extra_fields["options"] = json.loads(options) if options else {} + extra_fields["randomize"] = bool(randomize[0]) if randomize else False + completion_criteria_ = extra_fields["options"].get( + "completion_criteria" + ) + if ( + completion_criteria_ + and completion_criteria_.get("model") == completion_criteria.MASTERY + ): + mastery_model = completion_criteria_.get("threshold", {}).get( + "mastery_model" + ) + if mastery_model == mastery_criteria.DO_ALL: + completion_criteria_["threshold"] = { + "mastery_model": mastery_model, + } + if ( + completion_criteria_ + and "learner_managed" not in completion_criteria_ + ): + completion_criteria_["learner_managed"] = False + + # Determine license + license_result = self._retrieve_license(license_id) + license_description = license_result[1] if license_result else "" + license_result = license_result[0] if license_result else None + + # TODO: Determine thumbnail encoding + + # Create the new node model + node = models.ContentNode.objects.create( + node_id=id, + original_source_node_id=id, + source_node_id=id, + title=title, + content_id=content_id, + description=description, + sort_order=sort_order, + copyright_holder=license_owner, + author=author, + license=license_result, + license_description=license_description, + language_id=lang_id, + role_visibility=role, + extra_fields=extra_fields, + kind_id=kind, + parent=parent, + original_channel_id=self.target_id, + source_channel_id=self.target_id, + grade_levels=convert_metadata_to_dict(grade_levels), + resource_types=convert_metadata_to_dict(resource_types), + learning_activities=convert_learning_activities_to_dict( + kind, learning_activities_ + ), + accessibility_labels=convert_metadata_to_dict(accessibility_labels), + categories=convert_metadata_to_dict(categories), + learner_needs=convert_metadata_to_dict(learner_needs), + ) + + # Handle foreign key references (children, files, tags) + if kind == content_kinds.TOPIC: + self._create_nodes(node, indent=indent + 1) + elif kind == content_kinds.EXERCISE: + self._create_assessment_items(node, indent=indent + 1) + self._create_files(node, indent=indent + 1) + self._create_tags(node, indent=indent + 1) + + errors = node.mark_complete() + if errors: + self.logger.warning(f"Node {node.node_id} has errors: {errors}") + node.save() + + def _retrieve_license(self, license_id): + """ + Get license based on id from exported db + + :param license_id: id of license on exported db + :return: license model matching the id and the associated license description + :rtype: tuple + """ + # Handle no license being assigned + if license_id is None or license_id == "": + return None + + # Return license that matches name + name, description = self.cursor.execute( + f""" + SELECT license_name, license_description + FROM {LICENSE_TABLE} + WHERE id = ? + """, + (license_id,), + ).fetchone() + return models.License.objects.get(license_name=name), description + + def _create_files(self, contentnode, indent=0): + """ + Create and possibly download node files + + :param contentnode: node file references + :param indent: How far to indent print statements + """ + # Parse database for files referencing content node and make file models + sql_command = f""" + SELECT checksum, extension, file_size, contentnode_id, lang_id, preset, thumbnail + FROM {FILE_TABLE} + WHERE contentnode_id = ?; + """ + query = self.cursor.execute(sql_command, (contentnode.node_id,)).fetchall() + + for ( + checksum, + extension, + file_size, + contentnode_id, + lang_id, + preset, + is_thumbnail, + ) in query: + filename = "{}.{}".format(checksum, extension) + self.logger.info( + "{indent} * FILE {filename}...".format( + indent=" |" * indent, filename=filename + ) + ) - return data['content'] + try: + self._download_file( + filename, + contentnode=contentnode, + preset=preset, + file_size=file_size, + lang_id=lang_id, + is_thumbnail=is_thumbnail, + ) + except IOError as e: + self.logger.warning("\b FAILED (check logs for more details)") + if e.errno: + sys.stderr.write( + f"Restoration Process Error: Failed to save file object {filename}: {os.strerror(e.errno)}" + ) + continue + + def _download_file( + self, + filename, + contentnode=None, + assessment_item=None, + preset=None, + file_size=None, + lang_id=None, + is_thumbnail=False, + ): + """ + Create and possibly download a file from source instance and save to local storage + + :param filename: the name of the file to download + :param contentnode: the associated content node + :param assessment_item: the associated assessment item + :param preset: the format preset for the file + :param file_size: the known size of the file + :param lang_id: the language ID of the file + :param is_thumbnail: whether the file is a thumbnail + """ + checksum, extension = os.path.splitext(filename) + extension = extension.lstrip(".") + filepath = models.generate_object_storage_name(checksum, filename) + + file_url = f"/content/storage/{filename[0]}/{filename[1]}/{filename}" + file_exists = False + + # If the file already exists, get the size from the storage + if default_storage.exists(filepath): + file_size = file_size or default_storage.size(filepath) + file_exists = True + # if it needs downloading and if we were instructed to do so + elif self.download_content or (is_thumbnail and contentnode): + buffer = BytesIO() + response = self.client.get(file_url) + for chunk in response: + buffer.write(chunk) + + if is_thumbnail and contentnode: + # If the file is a thumbnail, save it to the content node + contentnode.thumbnail_encoding = json.dumps( + { + "base64": get_thumbnail_encoding(filename, input_buffer=buffer), + "points": [], + "zoom": 0, + } + ) + else: + checksum, _, filepath = write_raw_content_to_storage( + buffer.getvalue(), ext=extension + ) + buffer.close() + file_exists = True + # otherwise, if file size is not known, get it from the response headers + elif not file_size: + response = self.client.head(file_url) + file_size = int(response.headers.get("Content-Length", 0)) + + # Save values to a new file object + file_obj = models.File( + file_format_id=extension, + file_size=file_size, + contentnode=contentnode, + assessment_item=assessment_item, + language_id=lang_id, + preset_id=preset or "", + checksum=checksum, + ) + file_obj.file_on_disk.name = filepath + # set_by_file_on_disk: skip unless the file has been downloaded + file_obj.save(set_by_file_on_disk=file_exists) + + def _create_tags(self, contentnode, indent=0): + """ + Create tags associated with node + + :param contentnode: node tags reference + :param indent: How far to indent print statements + """ + # Parse database for files referencing content node and make file models + sql_command = f""" + SELECT ct.id, ct.tag_name + FROM {NODE_TAG_TABLE} cnt + JOIN {TAG_TABLE} ct ON cnt.contenttag_id = ct.id + WHERE cnt.contentnode_id = ?; + """ + query = self.cursor.execute(sql_command, (contentnode.node_id,)).fetchall() + + # Build up list of tags + tag_list = [] + for id, tag_name in query: + self.logger.info( + "{indent} ** TAG {tag}...".format(indent=" |" * indent, tag=tag_name) + ) + # Save values to new or existing tag object + tag_obj, is_new = models.ContentTag.objects.get_or_create( + pk=id, + tag_name=tag_name, + channel_id=self.target_id, + ) + tag_list.append(tag_obj) + + # Save tags to node + contentnode.tags.set(tag_list) + contentnode.save() + + def _create_assessment_items(self, contentnode, indent=0): + """ + Generate assessment items based on perseus zip + + :param contentnode: node assessment items reference + :param indent: How far to indent print statements + """ + if not self.token: + self.logger.warning( + f"Skipping assessment items for node {contentnode.node_id}" + ) + return + + # first obtain the content node's Studio ID with the node ID + node_response = self.client.get_with_token( + f"/api/contentnode?_node_id_channel_id___in={contentnode.node_id},{self.source_id}" + ) + if node_response.status_code != 200: + self.logger.warning( + f"Failed to obtain assessment items for node {contentnode.node_id}" + ) + return + + node_data = node_response.json() + contentnode_id = node_data[0]["id"] if node_data else None + if not contentnode_id: + self.logger.warning(f"No content node found for node {contentnode.node_id}") + return + + # Get the content node's assessment items + assessment_response = self.client.get_with_token( + f"/api/assessmentitem?contentnode__in={contentnode_id}" + ) + if assessment_response.status_code != 200: + self.logger.warning( + f"Failed to obtain assessment items for node {contentnode.node_id}" + ) + return + + assessment_items = assessment_response.json() + if not assessment_items: + self.logger.warning( + f"No assessment items found for node {contentnode.node_id}" + ) + return + + # Create the assessment items + for item in assessment_items: + self.logger.info( + "{indent} ** ASSESSMENT ITEM {assessment_id}...".format( + indent=" |" * indent, assessment_id=item["assessment_id"] + ) + ) + assessment_item = models.AssessmentItem.objects.create( + assessment_id=item["assessment_id"], + type=item["type"], + order=item["order"], + question=item["question"], + answers=item["answers"], + hints=item["hints"], + randomize=item.get("randomize", False), + ) + contentnode.assessment_items.add(assessment_item) + contentnode.save() + + def _process_assessment_images(self, assessment_item): + """ + Process images in assessment items and save them to the database. + + :param assessment_item: The assessment item to process. + """ + if not self.download_content: + # Skip if not downloading content + return + + for content in [ + assessment_item.question, + assessment_item.answers, + assessment_item.hints, + ]: + for match in re.finditer(exercise_image_filename_regex, content): + # Save files to db + self._download_file( + match.group(3), + assessment_item=assessment_item, + preset=format_presets.EXERCISE, + ) From a8e64dbda06d24af0cae76a9cc8e9fb3207d0371 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Tue, 6 May 2025 13:04:08 -0700 Subject: [PATCH 2/8] Update documentation and utilities for always running nginx in front of devserver --- .run/devserver.run.xml | 2 +- Makefile | 45 +++++-- contentcuration/contentcuration/models.py | 42 +----- docker-compose.yml | 16 +-- docs/_index.md | 5 +- docs/host_services_setup.md | 42 +----- docs/local_dev.md | 157 ++++++++++++++++++++++ docs/local_dev_docker.md | 93 ------------- docs/local_dev_host.md | 146 -------------------- docs/local_dev_wsl.md | 117 ++-------------- package.json | 12 -- webpack.config.js | 34 +++-- 12 files changed, 240 insertions(+), 471 deletions(-) create mode 100644 docs/local_dev.md delete mode 100644 docs/local_dev_docker.md delete mode 100644 docs/local_dev_host.md diff --git a/.run/devserver.run.xml b/.run/devserver.run.xml index 1c94ee6402..55b6546404 100644 --- a/.run/devserver.run.xml +++ b/.run/devserver.run.xml @@ -13,7 +13,7 @@