diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py
index 5bf29b11..7fde125a 100644
--- a/openedx_learning/__init__.py
+++ b/openedx_learning/__init__.py
@@ -1,4 +1,4 @@
"""
Open edX Learning ("Learning Core").
"""
-__version__ = "0.13.1"
+__version__ = "0.14.0"
diff --git a/openedx_learning/apps/authoring/components/admin.py b/openedx_learning/apps/authoring/components/admin.py
index f09483e9..edb83921 100644
--- a/openedx_learning/apps/authoring/components/admin.py
+++ b/openedx_learning/apps/authoring/components/admin.py
@@ -1,6 +1,8 @@
"""
Django admin for components models
"""
+import base64
+
from django.contrib import admin
from django.template.defaultfilters import filesizeformat
from django.urls import reverse
@@ -67,19 +69,22 @@ def get_queryset(self, request):
)
fields = [
- "format_key",
+ "key",
"format_size",
"learner_downloadable",
"rendered_data",
]
readonly_fields = [
"content",
- "format_key",
+ "key",
"format_size",
"rendered_data",
]
extra = 0
+ def has_file(self, cvc_obj):
+ return cvc_obj.content.has_file
+
def rendered_data(self, cvc_obj):
return content_preview(cvc_obj)
@@ -87,15 +92,6 @@ def rendered_data(self, cvc_obj):
def format_size(self, cvc_obj):
return filesizeformat(cvc_obj.content.size)
- @admin.display(description="Key")
- def format_key(self, cvc_obj):
- return format_html(
- '{}',
- link_for_cvc(cvc_obj),
- # reverse("admin:components_content_change", args=(cvc_obj.content_id,)),
- cvc_obj.key,
- )
-
@admin.register(ComponentVersion)
class ComponentVersionAdmin(ReadOnlyModelAdmin):
@@ -129,18 +125,6 @@ def get_queryset(self, request):
)
-def link_for_cvc(cvc_obj: ComponentVersionContent) -> str:
- """
- Get the download URL for the given ComponentVersionContent instance
- """
- return "/media_server/component_asset/{}/{}/{}/{}".format(
- cvc_obj.content.learning_package.key,
- cvc_obj.component_version.component.key,
- cvc_obj.component_version.version_num,
- cvc_obj.key,
- )
-
-
def format_text_for_admin_display(text: str) -> SafeText:
"""
Get the HTML to display the given plain text (preserving formatting)
@@ -158,9 +142,17 @@ def content_preview(cvc_obj: ComponentVersionContent) -> SafeText:
content_obj = cvc_obj.content
if content_obj.media_type.type == "image":
+ # This base64 encoding looks really goofy and is bad for performance,
+ # but image previews in the admin are extremely useful, and this lets us
+ # have them without creating a separate view in Learning Core. (Keep in
+ # mind that these assets are private, so they cannot be accessed via the
+ # MEDIA_URL like most Django uploaded assets.)
+ data = content_obj.read_file().read()
return format_html(
- '
',
- content_obj.file_url(),
+ '
{}',
+ content_obj.mime_type,
+ base64.encodebytes(data).decode('utf8'),
+ content_obj.os_path(),
)
return format_text_for_admin_display(
diff --git a/openedx_learning/apps/authoring/components/api.py b/openedx_learning/apps/authoring/components/api.py
index a65eee80..3be562a8 100644
--- a/openedx_learning/apps/authoring/components/api.py
+++ b/openedx_learning/apps/authoring/components/api.py
@@ -269,7 +269,15 @@ def get_component_by_uuid(uuid: UUID) -> Component:
def get_component_version_by_uuid(uuid: UUID) -> ComponentVersion:
- return ComponentVersion.objects.get(publishable_entity_version__uuid=uuid)
+ return (
+ ComponentVersion
+ .objects
+ .select_related(
+ "component",
+ "component__learning_package",
+ )
+ .get(publishable_entity_version__uuid=uuid)
+ )
def component_exists_by_key(
@@ -395,7 +403,21 @@ def create_component_version_content(
) -> ComponentVersionContent:
"""
Add a Content to the given ComponentVersion
+
+ We don't allow keys that would be absolute paths, e.g. ones that start with
+ '/'. Storing these causes headaches with building relative paths and because
+ of mismatches with things that expect a leading slash and those that don't.
+ So for safety and consistency, we strip off leading slashes and emit a
+ warning when we do.
"""
+ if key.startswith('/'):
+ logger.warning(
+ "Absolute paths are not supported: "
+ f"removed leading '/' from ComponentVersion {component_version_id} "
+ f"content key: {repr(key)} (content_id: {content_id})"
+ )
+ key = key.lstrip('/')
+
cvrc, _created = ComponentVersionContent.objects.get_or_create(
component_version_id=component_version_id,
content_id=content_id,
@@ -510,7 +532,12 @@ def _error_header(error: AssetError) -> dict[str, str]:
# Check: Does the ComponentVersion exist?
try:
- component_version = get_component_version_by_uuid(component_version_uuid)
+ component_version = (
+ ComponentVersion
+ .objects
+ .select_related("component", "component__learning_package")
+ .get(publishable_entity_version__uuid=component_version_uuid)
+ )
except ComponentVersion.DoesNotExist:
# No need to add headers here, because no ComponentVersion was found.
logger.error(f"Asset Not Found: No ComponentVersion with UUID {component_version_uuid}")
@@ -567,10 +594,9 @@ def _error_header(error: AssetError) -> dict[str, str]:
# At this point, we know that there is valid Content that we want to send.
# This adds Content-level headers, like the hash/etag and content type.
info_headers.update(contents_api.get_content_info_headers(content))
- stored_file_path = content.file_path()
# Recompute redirect headers (reminder: this should never be cached).
- redirect_headers = contents_api.get_redirect_headers(stored_file_path, public)
+ redirect_headers = contents_api.get_redirect_headers(content.path, public)
logger.info(
"Asset redirect (uncached metadata): "
f"{component_version_uuid}/{asset_path} -> {redirect_headers}"
diff --git a/openedx_learning/apps/authoring/contents/admin.py b/openedx_learning/apps/authoring/contents/admin.py
index 036d171b..029ebfd8 100644
--- a/openedx_learning/apps/authoring/contents/admin.py
+++ b/openedx_learning/apps/authoring/contents/admin.py
@@ -1,6 +1,8 @@
"""
Django admin for contents models
"""
+import base64
+
from django.contrib import admin
from django.utils.html import format_html
@@ -16,7 +18,6 @@ class ContentAdmin(ReadOnlyModelAdmin):
"""
list_display = [
"hash_digest",
- "file_link",
"learning_package",
"media_type",
"size",
@@ -29,24 +30,42 @@ class ContentAdmin(ReadOnlyModelAdmin):
"media_type",
"size",
"created",
- "file_link",
- "text_preview",
"has_file",
+ "path",
+ "os_path",
+ "text_preview",
+ "image_preview",
]
list_filter = ("media_type", "learning_package")
search_fields = ("hash_digest",)
- def file_link(self, content: Content):
- if not content.has_file:
- return ""
+ @admin.display(description="OS Path")
+ def os_path(self, content: Content):
+ return content.os_path() or ""
- return format_html(
- 'Download',
- content.file_url(),
- )
+ def path(self, content: Content):
+ return content.path if content.has_file else ""
def text_preview(self, content: Content):
+ if not content.text:
+ return ""
return format_html(
'\n{}\n',
content.text,
)
+
+ def image_preview(self, content: Content):
+ """
+ Return HTML for an image, if that is the underlying Content.
+
+ Otherwise, just return a blank string.
+ """
+ if content.media_type.type != "image":
+ return ""
+
+ data = content.read_file().read()
+ return format_html(
+ '
',
+ content.mime_type,
+ base64.encodebytes(data).decode('utf8'),
+ )
diff --git a/openedx_learning/apps/authoring/contents/models.py b/openedx_learning/apps/authoring/contents/models.py
index 7e4ff8cc..214c2d27 100644
--- a/openedx_learning/apps/authoring/contents/models.py
+++ b/openedx_learning/apps/authoring/contents/models.py
@@ -7,11 +7,13 @@
from functools import cache, cached_property
-from django.core.exceptions import ValidationError
+from django.conf import settings
+from django.core.exceptions import ImproperlyConfigured, ValidationError
from django.core.files.base import File
-from django.core.files.storage import Storage, default_storage
+from django.core.files.storage import Storage
from django.core.validators import MaxValueValidator
from django.db import models
+from django.utils.module_loading import import_string
from ....lib.fields import MultiCollationTextField, case_insensitive_char_field, hash_field, manual_date_time_field
from ....lib.managers import WithRelationsManager
@@ -28,13 +30,25 @@ def get_storage() -> Storage:
"""
Return the Storage instance for our Content file persistence.
- For right now, we're still only storing inline text and not static assets in
- production, so just return the default_storage. We're also going through a
- transition between Django 3.2 -> 4.2, where storage configuration has moved.
+ This will first search for an OPENEDX_LEARNING config dictionary and return
+ a Storage subclass based on that configuration.
- Make this work properly as part of adding support for static assets.
+ If there is no value for the OPENEDX_LEARNING setting, we return the default
+ MEDIA storage class. TODO: Should we make it just error instead?
"""
- return default_storage
+ config_dict = getattr(settings, 'OPENEDX_LEARNING', {})
+
+ if 'MEDIA' in config_dict:
+ storage_cls = import_string(config_dict['MEDIA']['BACKEND'])
+ options = config_dict['MEDIA'].get('OPTIONS', {})
+ return storage_cls(**options)
+
+ raise ImproperlyConfigured(
+ "Cannot access file storage: Missing the OPENEDX_LEARNING['MEDIA'] "
+ "setting, which should have a storage BACKEND and OPTIONS values for "
+ "a Storage subclass. These files should be stored in a location that "
+ "is NOT publicly accessible to browsers (so not in the MEDIA_ROOT)."
+ )
class MediaType(models.Model):
@@ -282,22 +296,53 @@ def mime_type(self) -> str:
"""
return str(self.media_type)
- def file_path(self):
+ @cached_property
+ def path(self):
+ """
+ Logical path at which this content is stored (or would be stored).
+
+ This path is relative to OPENEDX_LEARNING['MEDIA'] configured storage
+ root. This file may not exist because has_file=False, or because we
+ haven't written the file yet (this is the method we call when trying to
+ figure out where the file *should* go).
+ """
+ return f"content/{self.learning_package.uuid}/{self.hash_digest}"
+
+ def os_path(self):
+ """
+ The full OS path for the underlying file for this Content.
+
+ This will not be supported by all Storage class types.
+
+ This will return ``None`` if there is no backing file (has_file=False).
"""
- Path at which this content is stored (or would be stored).
+ if self.has_file:
+ return get_storage().path(self.path)
+ return None
- This path is relative to configured storage root.
+ def read_file(self) -> File:
"""
- return f"{self.learning_package.uuid}/{self.hash_digest}"
+ Get a File object that has been open for reading.
+
+ We intentionally don't expose an `open()` call where callers can open
+ this file in write mode. Writing a Content file should happen at most
+ once, and the logic is not obvious (see ``write_file``).
+
+ At the end of the day, the caller can close the returned File and reopen
+ it in whatever mode they want, but we're trying to gently discourage
+ that kind of usage.
+ """
+ return get_storage().open(self.path, 'rb')
def write_file(self, file: File) -> None:
"""
Write file contents to the file storage backend.
- This function does nothing if the file already exists.
+ This function does nothing if the file already exists. Note that Content
+ is supposed to be immutable, so this should normally only be called once
+ for a given Content row.
"""
storage = get_storage()
- file_path = self.file_path()
# There are two reasons why a file might already exist even if the the
# Content row is new:
@@ -314,15 +359,15 @@ def write_file(self, file: File) -> None:
# 3. Similar to (2), but only part of the file was written before an
# error occurred. This seems unlikely, but possible if the underlying
# storage engine writes in chunks.
- if storage.exists(file_path) and storage.size(file_path) == file.size:
+ if storage.exists(self.path) and storage.size(self.path) == file.size:
return
- storage.save(file_path, file)
+ storage.save(self.path, file)
def file_url(self) -> str:
"""
This will sometimes be a time-limited signed URL.
"""
- return content_file_url(self.file_path())
+ return get_storage().url(self.path)
def clean(self):
"""
@@ -361,7 +406,3 @@ class Meta:
]
verbose_name = "Content"
verbose_name_plural = "Contents"
-
-
-def content_file_url(file_path):
- return get_storage().url(file_path)
diff --git a/test_settings.py b/test_settings.py
index db27f354..f5b154f5 100644
--- a/test_settings.py
+++ b/test_settings.py
@@ -69,3 +69,14 @@ def root(*args):
'DEFAULT_PAGINATION_CLASS': 'edx_rest_framework_extensions.paginators.DefaultPagination',
'PAGE_SIZE': 10,
}
+
+######################## LEARNING CORE SETTINGS ########################
+
+OPENEDX_LEARNING = {
+ 'MEDIA': {
+ 'BACKEND': 'django.core.files.storage.InMemoryStorage',
+ 'OPTIONS': {
+ 'location': MEDIA_ROOT + "_private"
+ }
+ }
+}
diff --git a/tests/openedx_learning/apps/authoring/components/test_api.py b/tests/openedx_learning/apps/authoring/components/test_api.py
index 18e5d04d..1fd8f494 100644
--- a/tests/openedx_learning/apps/authoring/components/test_api.py
+++ b/tests/openedx_learning/apps/authoring/components/test_api.py
@@ -381,7 +381,7 @@ def test_add(self):
components_api.create_component_version_content(
new_version.pk,
new_content.pk,
- key="hello.txt",
+ key="my/path/to/hello.txt",
learner_downloadable=False,
)
# re-fetch from the database to check to see if we wrote it correctly
@@ -390,7 +390,23 @@ def test_add(self):
.get(publishable_entity_version__version_num=1)
assert (
new_content ==
- new_version.contents.get(componentversioncontent__key="hello.txt")
+ new_version.contents.get(componentversioncontent__key="my/path/to/hello.txt")
+ )
+
+ # Write the same content again, but to an absolute path (should auto-
+ # strip) the leading '/'s.
+ components_api.create_component_version_content(
+ new_version.pk,
+ new_content.pk,
+ key="//nested/path/hello.txt",
+ learner_downloadable=False,
+ )
+ new_version = components_api.get_component(self.problem.pk) \
+ .versions \
+ .get(publishable_entity_version__version_num=1)
+ assert (
+ new_content ==
+ new_version.contents.get(componentversioncontent__key="nested/path/hello.txt")
)
def test_multiple_versions(self):
diff --git a/tests/openedx_learning/apps/authoring/components/test_assets.py b/tests/openedx_learning/apps/authoring/components/test_assets.py
index 6d631738..56bfda41 100644
--- a/tests/openedx_learning/apps/authoring/components/test_assets.py
+++ b/tests/openedx_learning/apps/authoring/components/test_assets.py
@@ -167,7 +167,7 @@ def _assert_html_content_headers(self, response):
assert response.status_code == 200
assert response.headers["Etag"] == self.html_asset_content.hash_digest
assert response.headers["Content-Type"] == "text/html"
- assert response.headers["X-Accel-Redirect"] == self.html_asset_content.file_path()
+ assert response.headers["X-Accel-Redirect"] == self.html_asset_content.path
assert "X-Open-edX-Error" not in response.headers
def test_public_asset_response(self):
diff --git a/tests/openedx_learning/apps/authoring/contents/test_file_storage.py b/tests/openedx_learning/apps/authoring/contents/test_file_storage.py
new file mode 100644
index 00000000..a94b3157
--- /dev/null
+++ b/tests/openedx_learning/apps/authoring/contents/test_file_storage.py
@@ -0,0 +1,79 @@
+"""
+Tests for file-backed Content
+"""
+from datetime import datetime, timezone
+
+from django.conf import settings
+from django.core.exceptions import ImproperlyConfigured
+from django.test import override_settings
+
+from openedx_learning.apps.authoring.contents import api as contents_api
+from openedx_learning.apps.authoring.contents.models import get_storage
+from openedx_learning.apps.authoring.publishing import api as publishing_api
+from openedx_learning.lib.test_utils import TestCase
+
+
+class ContentFileStorageTestCase(TestCase):
+ """
+ Test the storage of files backing Content.
+ """
+ def setUp(self) -> None:
+ """
+ It's actually important that we use setUp and not setUpTestData here,
+ because at least one of our tests will clear the get_storage cache,
+ meaning that subsequent tests will get a new instance of the
+ InMemoryStorage backend–and consequently wouldn't see any data loaded
+ by setUpTestData.
+
+ Recreating the test data for every test lets individual tests change the
+ storage configuration without creating side-effects for other tests.
+ """
+ super().setUp()
+ learning_package = publishing_api.create_learning_package(
+ key="ContentFileStorageTestCase-test-key",
+ title="Content File Storage Test Case Learning Package",
+ )
+ self.html_media_type = contents_api.get_or_create_media_type("text/html")
+ self.html_content = contents_api.get_or_create_file_content(
+ learning_package.id,
+ self.html_media_type.id,
+ data=b"hello world!",
+ created=datetime.now(tz=timezone.utc),
+ )
+
+ def test_file_path(self):
+ """
+ Test that the file path doesn't change.
+
+ If this test breaks, it means that we've changed the convention for
+ where we're storing the backing files for Content, which means we'll be
+ breaking backwards compatibility for everyone. Please be very careful if
+ you're updating this test.
+ """
+ content = self.html_content
+ assert content.path == f"content/{content.learning_package.uuid}/{content.hash_digest}"
+
+ storage_root = settings.OPENEDX_LEARNING['MEDIA']['OPTIONS']['location']
+ assert content.os_path() == f"{storage_root}/{content.path}"
+
+ def test_read(self):
+ """Make sure we can read the file data back."""
+ assert b"hello world!" == self.html_content.read_file().read()
+
+ @override_settings()
+ def test_misconfiguration(self):
+ """
+ Require the OPENEDX_LEARNING setting for file operations.
+
+ The main goal of this is that we don't want to store our files in the
+ default MEDIA_ROOT, because that would make them publicly accessible.
+
+ We set our OPENEDX_LEARNING value in test_settings.py. We're going to
+ delete this setting in our test and make sure we raise the correct
+ exception (The @override_settings decorator will set everything back to
+ normal after the test completes.)
+ """
+ get_storage.cache_clear()
+ del settings.OPENEDX_LEARNING
+ with self.assertRaises(ImproperlyConfigured):
+ self.html_content.read_file()