From a9155f7fbc25609e9ebe5dc47e8d2d53db3f1a1c Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Thu, 20 Jul 2023 17:43:43 +0200 Subject: [PATCH 01/19] Development: updated 'date' tag matching and removed 'date' tag redundancy --- cds_dojson/marc21/fields/videos/video.py | 24 ++++++++++++++++++++---- cds_dojson/marc21/models/videos/video.py | 2 +- cds_dojson/marc21/utils.py | 18 ++++++++++++++++-- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py index 714e7fdf..e2289555 100644 --- a/cds_dojson/marc21/fields/videos/video.py +++ b/cds_dojson/marc21/fields/videos/video.py @@ -181,11 +181,27 @@ def accelerator_experiment(self, key, value): 'project': value.get('p'), } - -@model.over('date', '^269__') +@model.over('date', '(^269__)|(^260__)') def date(self, key, value): """Date.""" - return arrow.get(value.get('c')).strftime('%Y-%m-%d') + if key == '269__': + try: + return arrow.get(value.get('c')).strftime('%Y-%m-%d') + + except: + match = re.search(r'^(19|20)\d\d-(0[0-9]|1[012])-00', value.get('c')) + if match is not None: + return match.string.replace('-00', '') + + else: + return 'No Date' + + else: + try: + return arrow.get(value.get('c')).strftime('%Y') + + except: + return 'No Date' @model.over('copyright', '^542__') @@ -263,7 +279,7 @@ def get_tags_to_guess_preset(context_type, value): def get_tags_to_transform(context_type, value): if context_type in ['frame', 'poster']: - return {'timestamp': int(value.get('y').split(' ')[3])} + return {'timestamp': int(float(value.get('y').split(' ')[3]))} def get_frame_name(result): _, ext = os.path.splitext(result['key']) diff --git a/cds_dojson/marc21/models/videos/video.py b/cds_dojson/marc21/models/videos/video.py index 0b505c28..b1620c62 100644 --- a/cds_dojson/marc21/models/videos/video.py +++ b/cds_dojson/marc21/models/videos/video.py @@ -38,7 +38,7 @@ class CDSVideo(OverdoJSONSchema): '035__9', '035__a', '100__9', - '260__c', + #'260__c', '269__b', '300__b', '300__c', diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py index 8a5a6c79..a0ff9f85 100644 --- a/cds_dojson/marc21/utils.py +++ b/cds_dojson/marc21/utils.py @@ -53,15 +53,18 @@ def create_record(marcxml, correct=False, keep_singletons=True): text = leader.text or '' record.append(('leader', text)) + index_offset = 0 controlfield_iterator = tree.iter(tag='{*}controlfield') - for controlfield in controlfield_iterator: + for index, controlfield in enumerate(controlfield_iterator): tag = controlfield.attrib.get('tag', '!') text = controlfield.text or '' if text or keep_singletons: record.append((tag, text)) + index_offset += 1 + tags_indexes = {} datafield_iterator = tree.iter(tag='{*}datafield') - for datafield in datafield_iterator: + for index, datafield in enumerate(datafield_iterator): tag = datafield.attrib.get('tag', '!') ind1 = datafield.attrib.get('ind1', '!') ind2 = datafield.attrib.get('ind2', '!') @@ -83,6 +86,17 @@ def create_record(marcxml, correct=False, keep_singletons=True): if fields or keep_singletons: key = '{0}{1}{2}'.format(tag, ind1, ind2) record.append((key, MementoDict(fields))) + tags_indexes[key] = index + index_offset + + # Removing redundant tags. + # Always use as (tag_to_be_removed, tag_to_be_mantained) + redundant_tags = [ + ('260__', '269__') + ] + + for redundant in redundant_tags: + if tags_indexes.get(redundant[0]) is not None and tags_indexes.get(redundant[1]) is not None: + record.pop(tags_indexes[redundant[0]]) return MementoDict(record) From 523a29ada06fbb7b64f561266ae3faab74db915a Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Fri, 21 Jul 2023 17:01:29 +0200 Subject: [PATCH 02/19] Development: CI updates and test corrections for isort --- .github/workflows/pypi-publish.yml | 8 ++++---- .github/workflows/tests.yml | 14 +++++++------- tests/test_videos_project.py | 2 +- tests/test_videos_video.py | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 8ba42b45..8e2cf1f9 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -10,16 +10,16 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: - python-version: 2.7 + python-version: 3.6 - name: Install dependencies run: | - python -m pip install --upgrade "pip>=20,<21" "setuptools>=40,<46" wheel + python -m pip install --upgrade pip setuptools wheel - name: Build package # Remove `compile_catalog` if the package has no translations. run: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ed8af9ef..738ba592 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -24,27 +24,27 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - #python-version: [2.7, 3.6] - python-version: [2.7] - requirements-level: [min, pypi] + python-version: [3.6] + #python-version: [2.7] + requirements-level: [pypi] steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Generate dependencies run: | - python -m pip install --upgrade "pip>=20,<21" "setuptools>=40,<46" py + python -m pip install --upgrade pip setuptools py python -m pip install wheel coveralls requirements-builder configparser requirements-builder --level=${{ matrix.requirements-level }} setup.py > .${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt - name: Cache pip - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('.${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt') }} diff --git a/tests/test_videos_project.py b/tests/test_videos_project.py index f2f3f4e9..b10ccdfe 100644 --- a/tests/test_videos_project.py +++ b/tests/test_videos_project.py @@ -18,11 +18,11 @@ # 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Video rules tests.""" import mock +from helpers import load_fixture_file, mock_contributor_fetch, validate from cds_dojson.marc21.fields.videos.utils import language_to_isocode from cds_dojson.marc21.models.videos.project import model from cds_dojson.marc21.utils import create_record -from helpers import load_fixture_file, mock_contributor_fetch, validate def test_required_fields(app): diff --git a/tests/test_videos_video.py b/tests/test_videos_video.py index d1bdfd45..8e3a44a5 100644 --- a/tests/test_videos_video.py +++ b/tests/test_videos_video.py @@ -18,11 +18,11 @@ # 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Video rules tests.""" import mock +from helpers import load_fixture_file, mock_contributor_fetch, validate from cds_dojson.marc21.fields.videos.utils import language_to_isocode from cds_dojson.marc21.models.videos.video import model from cds_dojson.marc21.utils import create_record -from helpers import load_fixture_file, mock_contributor_fetch, validate def test_required_fields(app): From 69a52db7e25f436ba7a41a277b1e70a0f9db2111 Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Fri, 21 Jul 2023 17:07:21 +0200 Subject: [PATCH 03/19] Development: CI updates --- .github/workflows/pypi-publish.yml | 2 +- .github/workflows/tests.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 8e2cf1f9..1533bf95 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -19,7 +19,7 @@ jobs: - name: Install dependencies run: | - python -m pip install --upgrade pip setuptools wheel + python -m pip install --upgrade "pip==21" "setuptools==40" wheel - name: Build package # Remove `compile_catalog` if the package has no translations. run: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 738ba592..d0067a05 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -39,7 +39,7 @@ jobs: - name: Generate dependencies run: | - python -m pip install --upgrade pip setuptools py + python -m pip install --upgrade "pip==21" "setuptools==40" py python -m pip install wheel coveralls requirements-builder configparser requirements-builder --level=${{ matrix.requirements-level }} setup.py > .${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt From 1aa9d31f0b26b5d41f5d76ee7ca15950bc589441 Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Mon, 24 Jul 2023 17:58:55 +0200 Subject: [PATCH 04/19] Development: fixing bugs on data extraction for existing tags. --- cds_dojson/marc21/fields/utils.py | 11 +++++++++-- cds_dojson/marc21/fields/videos/video.py | 24 ++++++++++++++++++++---- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/cds_dojson/marc21/fields/utils.py b/cds_dojson/marc21/fields/utils.py index 5cacc252..47143e2f 100644 --- a/cds_dojson/marc21/fields/utils.py +++ b/cds_dojson/marc21/fields/utils.py @@ -79,6 +79,7 @@ def _get_correct_video_contributor_role(role): 'autor': 'Creator', 'camera': 'Camera Operator', 'camera & sound': 'Camera Operator', + 'chairperson': 'Chairperson', 'co-produced by': 'Co-Producer', 'co-production': 'Co-Producer', 'commentaire': 'Comments by', @@ -119,6 +120,7 @@ def _get_correct_video_contributor_role(role): 'made by': 'Creator', 'montage': 'Editor', 'narrator': 'Narrator', + 'organiser': 'Organiser', 'presentator': 'Reporter', 'presented by': 'Reporter', 'presenter': 'Reporter', @@ -146,6 +148,7 @@ def _get_correct_video_contributor_role(role): 'shooting and editing': ('Camera Operator', 'Editor'), 'son': 'Music by', 'speaker': 'Speaker', + 'sponsor': 'Sponsor', 'writen by': 'Screenwriter', 'writer and director': ('Screenwriter', 'Director'), 'written & directed by': ('Screenwriter', 'Director'), @@ -227,8 +230,12 @@ def build_contributor(value): # Avoids a few calls value = get_author_info_from_people_collection(value) - role = _get_correct_video_contributor_role( - value.get('e', 'producer')) # always unicode + try: + role = _get_correct_video_contributor_role( + value.get('e', 'producer')) # always unicode + except: + role = 'Not default: ' + value.get('e', 'producer') + contributors = [] contributor = { 'ids': _extract_json_ids(value) or None, diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py index e2289555..d538842c 100644 --- a/cds_dojson/marc21/fields/videos/video.py +++ b/cds_dojson/marc21/fields/videos/video.py @@ -153,7 +153,10 @@ def internal_note(self, key, value): if v.get('a') in CATEGS: _internal_categories[v.get('a')].append(v.get('s')) else: - _internal_notes.append(v.get('a')) + if v.get('a') is not None: + _internal_notes.append(v.get('a')) + else: + _internal_notes.append('No Category') if _internal_categories: self['internal_categories'] = dict(_internal_categories) @@ -184,12 +187,22 @@ def accelerator_experiment(self, key, value): @model.over('date', '(^269__)|(^260__)') def date(self, key, value): """Date.""" + if value.get('c') is None: + return 'No Date' + if key == '269__': try: - return arrow.get(value.get('c')).strftime('%Y-%m-%d') + if type(value.get('c')) is tuple: + return arrow.get(value.get('c')[0]).strftime('%Y-%m-%d') + else: + return arrow.get(value.get('c')).strftime('%Y-%m-%d') except: - match = re.search(r'^(19|20)\d\d-(0[0-9]|1[012])-00', value.get('c')) + if type(value.get('c')) is tuple: + match = re.search(r'^(19|20)\d\d-(0[0-9]|1[012])-00', value.get('c')[0]) + else: + match = re.search(r'^(19|20)\d\d-(0[0-9]|1[012])-00', value.get('c')) + if match is not None: return match.string.replace('-00', '') @@ -198,7 +211,10 @@ def date(self, key, value): else: try: - return arrow.get(value.get('c')).strftime('%Y') + if type(value.get('c')) is tuple: + return arrow.get(value.get('c')[0]).strftime('%Y') + else: + return arrow.get(value.get('c')).strftime('%Y') except: return 'No Date' From daf410ac226fc692fd4e499d7a7d854619b3c79d Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Fri, 28 Jul 2023 17:04:59 +0200 Subject: [PATCH 05/19] Development: multiple JSONs for multiple videos inside the same record --- cds_dojson/marc21/fields/videos/video.py | 1 - cds_dojson/marc21/utils.py | 67 ++++++++++++++++++++---- 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py index d538842c..8e7f1237 100644 --- a/cds_dojson/marc21/fields/videos/video.py +++ b/cds_dojson/marc21/fields/videos/video.py @@ -205,7 +205,6 @@ def date(self, key, value): if match is not None: return match.string.replace('-00', '') - else: return 'No Date' diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py index a0ff9f85..69d4ca9e 100644 --- a/cds_dojson/marc21/utils.py +++ b/cds_dojson/marc21/utils.py @@ -21,6 +21,7 @@ from dojson.contrib.marc21.utils import MARC21_DTD, split_stream from lxml import etree from six import StringIO, binary_type, text_type +import copy from ..utils import MementoDict @@ -53,18 +54,16 @@ def create_record(marcxml, correct=False, keep_singletons=True): text = leader.text or '' record.append(('leader', text)) - index_offset = 0 controlfield_iterator = tree.iter(tag='{*}controlfield') for index, controlfield in enumerate(controlfield_iterator): tag = controlfield.attrib.get('tag', '!') text = controlfield.text or '' if text or keep_singletons: record.append((tag, text)) - index_offset += 1 - tags_indexes = {} + multi_video = set() datafield_iterator = tree.iter(tag='{*}datafield') - for index, datafield in enumerate(datafield_iterator): + for datafield in datafield_iterator: tag = datafield.attrib.get('tag', '!') ind1 = datafield.attrib.get('ind1', '!') ind2 = datafield.attrib.get('ind2', '!') @@ -75,6 +74,7 @@ def create_record(marcxml, correct=False, keep_singletons=True): ind1 = ind1.replace(' ', '_') ind2 = ind2.replace(' ', '_') + multi_video_with_index = False fields = [] subfield_iterator = datafield.iter(tag='{*}subfield') for subfield in subfield_iterator: @@ -83,11 +83,56 @@ def create_record(marcxml, correct=False, keep_singletons=True): if text or keep_singletons: fields.append((code, text)) + # Getting video indexes to create multiple records + if tag == '856' and code == '8': + multi_video_with_index = True + multi_video = multi_video.union({text}) + + # Handle the not indexed video + if not multi_video_with_index: + multi_video = multi_video.union({'not_indexed'}) + if fields or keep_singletons: key = '{0}{1}{2}'.format(tag, ind1, ind2) record.append((key, MementoDict(fields))) - tags_indexes[key] = index + index_offset + # Creating multiple records + tags_indexes = {video: {} for video in multi_video} + tags_counter = {video: 0 for video in multi_video} + multi_video_dict = {video: [] for video in multi_video} + for tag in record: + # Tags with no code or with codes, but no '8' code + if type(tag[1]) is not MementoDict or '8' not in tag[1].keys(): + for video in multi_video: + multi_video_dict[video].append(copy.deepcopy(tag)) + + if not(tag[0] in tags_indexes[video]): + tags_indexes[video][tag[0]] = tags_counter[video] + + tags_counter[video] += 1 + + + # Tags with code '8' + else: + # Code 8 within the indexes of videos + try: + multi_video_dict[tag[1]['8']].append(copy.deepcopy(tag)) + + if not(tag[0] in tags_indexes[tag[1]['8']]): + tags_indexes[tag[1]['8']][tag[0]] = tags_counter[tag[1]['8']] + + tags_counter[tag[1]['8']] += 1 + + # Wrong code 8 + except: + for video in multi_video: + multi_video_dict[video].append(copy.deepcopy(tag)) + + if not(tag[0] in tags_indexes[video]): + tags_indexes[video][tag[0]] = tags_counter[video] + + tags_counter[video] += 1 + # Removing redundant tags. # Always use as (tag_to_be_removed, tag_to_be_mantained) redundant_tags = [ @@ -95,10 +140,14 @@ def create_record(marcxml, correct=False, keep_singletons=True): ] for redundant in redundant_tags: - if tags_indexes.get(redundant[0]) is not None and tags_indexes.get(redundant[1]) is not None: - record.pop(tags_indexes[redundant[0]]) - - return MementoDict(record) + for video in multi_video: + if tags_indexes[video].get(redundant[0]) is not None and tags_indexes[video].get(redundant[1]) is not None: + + index_to_remove = tags_indexes[video][redundant[0]] + while multi_video_dict[video][index_to_remove][0] == redundant[0]: + multi_video_dict[video].pop(tags_indexes[video][redundant[0]]) + + return [MementoDict(video_record) for video_record in multi_video_dict.values()] def load(source): From 39c97d7e3c50d5cecd3e7cb99c67345ace3f7c43 Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Fri, 28 Jul 2023 17:19:44 +0200 Subject: [PATCH 06/19] Git: recommiting because of wrong origin --- cds_dojson/marc21/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py index 69d4ca9e..b0fd1fea 100644 --- a/cds_dojson/marc21/utils.py +++ b/cds_dojson/marc21/utils.py @@ -120,7 +120,6 @@ def create_record(marcxml, correct=False, keep_singletons=True): if not(tag[0] in tags_indexes[tag[1]['8']]): tags_indexes[tag[1]['8']][tag[0]] = tags_counter[tag[1]['8']] - tags_counter[tag[1]['8']] += 1 # Wrong code 8 From 5d5eeb3ebd9e4e06a5e9bdd1213c93596155de68 Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Fri, 28 Jul 2023 17:29:35 +0200 Subject: [PATCH 07/19] Development: fixing return type bug --- cds_dojson/marc21/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py index b0fd1fea..ff6399aa 100644 --- a/cds_dojson/marc21/utils.py +++ b/cds_dojson/marc21/utils.py @@ -146,6 +146,9 @@ def create_record(marcxml, correct=False, keep_singletons=True): while multi_video_dict[video][index_to_remove][0] == redundant[0]: multi_video_dict[video].pop(tags_indexes[video][redundant[0]]) + if len(multi_video_dict.keys()) == 1: + return MementoDict(multi_video_dict['not_indexed']) + return [MementoDict(video_record) for video_record in multi_video_dict.values()] From a59c37e1552d8d233c532895e5ab7daf78468a5d Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Mon, 31 Jul 2023 10:23:20 +0200 Subject: [PATCH 08/19] Development: bug fixed to handle controlfield only marcxml --- cds_dojson/marc21/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py index ff6399aa..8a35c7f9 100644 --- a/cds_dojson/marc21/utils.py +++ b/cds_dojson/marc21/utils.py @@ -146,9 +146,15 @@ def create_record(marcxml, correct=False, keep_singletons=True): while multi_video_dict[video][index_to_remove][0] == redundant[0]: multi_video_dict[video].pop(tags_indexes[video][redundant[0]]) + # MARCXML with no datafield - only controlfield + if len(multi_video) == 0: + return MementoDict(record) + + # Single not indexed video if len(multi_video_dict.keys()) == 1: return MementoDict(multi_video_dict['not_indexed']) + # Multiple indexed videos return [MementoDict(video_record) for video_record in multi_video_dict.values()] From 573133021a4d7e964c1bb23344aa661114b17ec3 Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Tue, 1 Aug 2023 17:02:45 +0200 Subject: [PATCH 09/19] Development: tag 8564 added to model --- cds_dojson/marc21/fields/videos/video.py | 50 +++++++++++++++++------- cds_dojson/marc21/models/videos/video.py | 17 ++++---- 2 files changed, 44 insertions(+), 23 deletions(-) diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py index 8e7f1237..a2fe4265 100644 --- a/cds_dojson/marc21/fields/videos/video.py +++ b/cds_dojson/marc21/fields/videos/video.py @@ -230,7 +230,7 @@ def copyright(self, key, value): } -@model.over('_files', '^8567_') +@model.over('_files', '^(8567|8564)_') @for_each_value @filter_values def _files(self, key, value): @@ -277,9 +277,14 @@ def get_tags(context_type, value): def get_filepath(value): if value.get('d'): - return value.get('d')[ - len('\\\\cern.ch\\dfs\\Services\\'): - ].replace('\\', '/') + if 'cern.ch\\dfs\\Services' in value.get('d'): + return value.get('d')[ + len('\\\\cern.ch\\dfs\\Services\\'): + ].replace('\\', '/') + + else: + return 'http://cern.ch' + value.get('d').split('www')[-1] + else: return re.sub( 'https?://mediaarchive.cern.ch/', '', value.get('u', '') @@ -327,18 +332,33 @@ def compute(value, context_type, media_type): return result - result = compute(deepcopy(value), *get_context_type(value)) + if key == '8567_': + result = compute(deepcopy(value), *get_context_type(value)) - # if it's the poster frame, make a copy for a frame! - if result['tags']['context_type'] == 'poster' and \ - result['tags_to_transform']['timestamp'] == 5: - frame_5 = compute(value, 'frame', 'image') - if '_files' not in self: - self['_files'] = [] - self['_files'].append(frame_5) - # update posterframe key name - _, ext = os.path.splitext(result['key']) - result['key'] = 'posterframe{0}'.format(ext) + # if it's the poster frame, make a copy for a frame! + if result['tags']['context_type'] == 'poster' and \ + result['tags_to_transform']['timestamp'] == 5: + frame_5 = compute(value, 'frame', 'image') + if '_files' not in self: + self['_files'] = [] + self['_files'].append(frame_5) + # update posterframe key name + _, ext = os.path.splitext(result['key']) + result['key'] = 'posterframe{0}'.format(ext) + + else: + result = {} + result['key'] = get_key(value) + + result['tags'] = {} + if value.get('u'): + result['tags']['preview'] = True + result['tags']['context_type'] = 'master' + result['tags']['media_type'] = value.get('y').split('-')[0].lower() + result['tags']['content_type'] = value.get('q').lower() + + result['filepath'] = value.get('u') + result['tags_to_transform'] = get_tags_to_transform(result['tags']['context_type'], value) return result diff --git a/cds_dojson/marc21/models/videos/video.py b/cds_dojson/marc21/models/videos/video.py index b1620c62..7dd59ae5 100644 --- a/cds_dojson/marc21/models/videos/video.py +++ b/cds_dojson/marc21/models/videos/video.py @@ -61,14 +61,15 @@ class CDSVideo(OverdoJSONSchema): '852__j', '852__x', # FIXME need to double check (see #85) - '8564_8', - '8564_d', - '8564_q', - '8564_s', - '8564_u', - '8564_x', - '8564_y', - '8564_z', + #'8564_8', + #'8564_d', + #'8564_q', + #'8564_s', + #'8564_u', + #'8564_x', + #'8564_y', + #'8564_z', + '8564_2', '8567_2', '916__s', '916__w', From 393daf13ce5434d3456e723d6d17480a7cdc5dda Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Fri, 4 Aug 2023 17:48:28 +0200 Subject: [PATCH 10/19] Development: bug on tag 8564 fixed and made 'not_indexed' video the master with all information --- cds_dojson/marc21/fields/videos/video.py | 15 ++++++++++++--- cds_dojson/marc21/utils.py | 5 +++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py index a2fe4265..05a74e8b 100644 --- a/cds_dojson/marc21/fields/videos/video.py +++ b/cds_dojson/marc21/fields/videos/video.py @@ -351,11 +351,20 @@ def compute(value, context_type, media_type): result['key'] = get_key(value) result['tags'] = {} - if value.get('u'): + if value.get('u') and value.get('q') is not None: result['tags']['preview'] = True result['tags']['context_type'] = 'master' - result['tags']['media_type'] = value.get('y').split('-')[0].lower() - result['tags']['content_type'] = value.get('q').lower() + result['tags']['content_type'] = value.get('q').lower() + + else: + result['tags']['preview'] = False + result['tags']['context_type'] = value.get('q') + + if value.get('y') is None: + result['tags']['media_type'] = value.get('y') + + else: + result['tags']['media_type'] = value.get('y').split('-')[0].lower() result['filepath'] = value.get('u') result['tags_to_transform'] = get_tags_to_transform(result['tags']['context_type'], value) diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py index 8a35c7f9..c5d0da44 100644 --- a/cds_dojson/marc21/utils.py +++ b/cds_dojson/marc21/utils.py @@ -117,11 +117,16 @@ def create_record(marcxml, correct=False, keep_singletons=True): # Code 8 within the indexes of videos try: multi_video_dict[tag[1]['8']].append(copy.deepcopy(tag)) + multi_video_dict['not_indexed'].append(copy.deepcopy(tag)) if not(tag[0] in tags_indexes[tag[1]['8']]): tags_indexes[tag[1]['8']][tag[0]] = tags_counter[tag[1]['8']] tags_counter[tag[1]['8']] += 1 + if not(tag[0] in tags_indexes['not_indexed']): + tags_indexes['not_indexed'][tag[0]] = tags_counter['not_indexed'] + tags_counter['not_indexed'] += 1 + # Wrong code 8 except: for video in multi_video: From 84df89103f6df84ae4a8674179cf85b0fc11bea3 Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Wed, 16 Aug 2023 17:20:12 +0200 Subject: [PATCH 11/19] Development: changed not_indexed tag set and fixed a bug when reading '8564' tag --- cds_dojson/marc21/fields/videos/video.py | 5 +++- cds_dojson/marc21/utils.py | 35 ++++++++++++++---------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py index 05a74e8b..3398d5fd 100644 --- a/cds_dojson/marc21/fields/videos/video.py +++ b/cds_dojson/marc21/fields/videos/video.py @@ -364,7 +364,10 @@ def compute(value, context_type, media_type): result['tags']['media_type'] = value.get('y') else: - result['tags']['media_type'] = value.get('y').split('-')[0].lower() + try: + result['tags']['media_type'] = value.get('y').split('-')[0].lower() + except: + result['tags']['media_type'] = None result['filepath'] = value.get('u') result['tags_to_transform'] = get_tags_to_transform(result['tags']['context_type'], value) diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py index c5d0da44..32b29405 100644 --- a/cds_dojson/marc21/utils.py +++ b/cds_dojson/marc21/utils.py @@ -89,7 +89,7 @@ def create_record(marcxml, correct=False, keep_singletons=True): multi_video = multi_video.union({text}) # Handle the not indexed video - if not multi_video_with_index: + if tag == '856' and not multi_video_with_index: multi_video = multi_video.union({'not_indexed'}) if fields or keep_singletons: @@ -103,13 +103,25 @@ def create_record(marcxml, correct=False, keep_singletons=True): for tag in record: # Tags with no code or with codes, but no '8' code if type(tag[1]) is not MementoDict or '8' not in tag[1].keys(): - for video in multi_video: - multi_video_dict[video].append(copy.deepcopy(tag)) - - if not(tag[0] in tags_indexes[video]): - tags_indexes[video][tag[0]] = tags_counter[video] + + # Propagating non-ndexed information to all videos + if tag[0][:3] != '856': + for video in multi_video: + multi_video_dict[video].append(copy.deepcopy(tag)) + + if not(tag[0] in tags_indexes[video]): + tags_indexes[video][tag[0]] = tags_counter[video] + + tags_counter[video] += 1 + + # Video file special case + else: + multi_video_dict['not_indexed'].append(copy.deepcopy(tag)) + + if not(tag[0] in tags_indexes['not_indexed']): + tags_indexes['not_indexed'][tag[0]] = tags_counter['not_indexed'] - tags_counter[video] += 1 + tags_counter['not_indexed'] += 1 # Tags with code '8' @@ -117,16 +129,11 @@ def create_record(marcxml, correct=False, keep_singletons=True): # Code 8 within the indexes of videos try: multi_video_dict[tag[1]['8']].append(copy.deepcopy(tag)) - multi_video_dict['not_indexed'].append(copy.deepcopy(tag)) - + if not(tag[0] in tags_indexes[tag[1]['8']]): tags_indexes[tag[1]['8']][tag[0]] = tags_counter[tag[1]['8']] tags_counter[tag[1]['8']] += 1 - if not(tag[0] in tags_indexes['not_indexed']): - tags_indexes['not_indexed'][tag[0]] = tags_counter['not_indexed'] - tags_counter['not_indexed'] += 1 - # Wrong code 8 except: for video in multi_video: @@ -136,7 +143,7 @@ def create_record(marcxml, correct=False, keep_singletons=True): tags_indexes[video][tag[0]] = tags_counter[video] tags_counter[video] += 1 - + # Removing redundant tags. # Always use as (tag_to_be_removed, tag_to_be_mantained) redundant_tags = [ From b7c9718a643caff630ec073605c03aff6c993787 Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Thu, 17 Aug 2023 17:38:14 +0200 Subject: [PATCH 12/19] Development: fixing name bug for non-indexed videos and tag 246 added --- cds_dojson/marc21/fields/base.py | 4 ++++ cds_dojson/marc21/fields/utils.py | 2 +- cds_dojson/marc21/utils.py | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/cds_dojson/marc21/fields/base.py b/cds_dojson/marc21/fields/base.py index a1a9e481..eeb9802b 100644 --- a/cds_dojson/marc21/fields/base.py +++ b/cds_dojson/marc21/fields/base.py @@ -87,6 +87,10 @@ def translations(self, key, value): translation = self.get('translations', [{}])[0] if key.startswith('246'): translation['title'] = {'title': value.get('a')} + if value.get('n'): + translation['description'] = value.get('n') + if value.get('p'): + translation['title']['subtitle'] = value.get('p') if key.startswith('590'): translation['description'] = value.get('a') translation['language'] = 'fr' diff --git a/cds_dojson/marc21/fields/utils.py b/cds_dojson/marc21/fields/utils.py index 47143e2f..8413326d 100644 --- a/cds_dojson/marc21/fields/utils.py +++ b/cds_dojson/marc21/fields/utils.py @@ -234,7 +234,7 @@ def build_contributor(value): role = _get_correct_video_contributor_role( value.get('e', 'producer')) # always unicode except: - role = 'Not default: ' + value.get('e', 'producer') + role = 'Producer' contributors = [] contributor = { diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py index 32b29405..317b5d33 100644 --- a/cds_dojson/marc21/utils.py +++ b/cds_dojson/marc21/utils.py @@ -164,7 +164,8 @@ def create_record(marcxml, correct=False, keep_singletons=True): # Single not indexed video if len(multi_video_dict.keys()) == 1: - return MementoDict(multi_video_dict['not_indexed']) + key = [i for i in multi_video_dict.keys()][0] + return MementoDict(multi_video_dict[key]) # Multiple indexed videos return [MementoDict(video_record) for video_record in multi_video_dict.values()] From f23b6f2b54f8ea18c25d4e1154ac265dd0cc27a9 Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Fri, 18 Aug 2023 15:34:36 +0200 Subject: [PATCH 13/19] Development: new tags added to existing properties --- cds_dojson/marc21/fields/base.py | 43 +++++++++++++++-- cds_dojson/marc21/fields/utils.py | 17 +++++-- cds_dojson/marc21/fields/videos/video.py | 60 +++++++++++++++++++++--- 3 files changed, 105 insertions(+), 15 deletions(-) diff --git a/cds_dojson/marc21/fields/base.py b/cds_dojson/marc21/fields/base.py index eeb9802b..469e3734 100644 --- a/cds_dojson/marc21/fields/base.py +++ b/cds_dojson/marc21/fields/base.py @@ -24,7 +24,7 @@ ignore_value) from ..models.base import model -from .utils import build_contributor, build_contributor_from_508 +from .utils import build_contributor, build_contributor_from_508, build_contributor_from_906 @model.over('recid', '^001') @@ -54,14 +54,25 @@ def report_number(self, key, value): return rn -@model.over('contributors', '^(100|700|508)__') +@model.over('contributors', '^(100|700|508|906)__') def contributors(self, key, value): """Contributors.""" authors = self.get('contributors', []) if key in ['100__', '700__']: items = build_contributor(value) - else: + elif key == '508__': items = build_contributor_from_508(value) + else: + items = build_contributor_from_906(value) + if 'contributors' in self.keys(): + import ipdb + ipdb.set_trace() + + names = [dic['name'] for dic in self['contributors']] + roles = [dic['role'] for dic in self['contributors']] + if items[0]['name'] in names: + if items[0]['role'] == roles[names.index(items[0]['name'])]: + items = None # add only contributors that are not part of the authors if items: authors.extend( @@ -98,10 +109,32 @@ def translations(self, key, value): raise IgnoreKey('translations') -@model.over('description', '^520__') +@model.over('description', '(^511__)|(^5111_)|(^518__)|(^520__)') def description(self, key, value): """Description.""" - return value.get('a') + if key == '511__' or key == '5111_': + if value.get('a'): + return 'Filmed people: ' + value.get('a') + elif value.get('1'): + return 'Filmed people: ' + value.get('1') + return '' + + if key == '518__': + if value.get('a'): + if 'description' in self.keys(): + return self['description'] + '\nPlace and/or date of event: ' + value.get('a') + return value.get('a') + if 'description' in self.keys(): + return self['description'] + return '' + + if value.get('a'): + if 'description' in self.keys(): + return self['description'] + '\nPlace and/or date of event: ' + value.get('a') + return value.get('a') + if 'description' in self.keys(): + return self['description'] + return '' @model.over('keywords', '^6531_') diff --git a/cds_dojson/marc21/fields/utils.py b/cds_dojson/marc21/fields/utils.py index 8413326d..1eddf84a 100644 --- a/cds_dojson/marc21/fields/utils.py +++ b/cds_dojson/marc21/fields/utils.py @@ -230,11 +230,15 @@ def build_contributor(value): # Avoids a few calls value = get_author_info_from_people_collection(value) - try: + if value.get('e'): role = _get_correct_video_contributor_role( value.get('e', 'producer')) # always unicode - except: - role = 'Producer' + else: + try: + role = _get_correct_video_contributor_role( + value.get('g', 'producer')) # always unicode + except: + role = 'Producer' contributors = [] contributor = { @@ -277,3 +281,10 @@ def build_contributor_from_508(value): return contributors else: return build_contributor({'a': item.strip(), 'e': 'credits'}) + +def build_contributor_from_906(value): + contributor = {'name': value.get('p'), 'role': 'Speaker'} + if value.get('u'): + contributor['affiliations'] = (value.get('u')) + + return [contributor] \ No newline at end of file diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py index 3398d5fd..b75d63ed 100644 --- a/cds_dojson/marc21/fields/videos/video.py +++ b/cds_dojson/marc21/fields/videos/video.py @@ -112,6 +112,19 @@ def find_match(seq, copy): return [dict((k, v) for k, v in iteritems(i) if v is not None) for i in _physical_medium] +@model.over('related_links', '^775__') +def related_links(self, key, value): + + related_link = {} + if value.get('b') and value.get('w'): + if value.get('c'): + related_link['name'] = value.get('b') + ' ' + value.get('c') + else: + related_link['name'] = value.get('b') + + related_link['url'] = 'https://cds.cern.ch/record/' + value.get('w') + return related_link + @model.over('_project_id', '^773__') @ignore_value @@ -135,8 +148,11 @@ def project_id(self, key, value): return project_id -@model.over('location', '^110__') +@model.over('location', '(^110__)|(^901__)') def location(self, key, value): + if key == '901__' and 'location' not in self.keys(): + return value.get('u') + """Location.""" return value.get('a') @@ -219,15 +235,45 @@ def date(self, key, value): return 'No Date' -@model.over('copyright', '^542__') +@model.over('copyright', '(^269__)|(^542__)|(^5421_)') @filter_values def copyright(self, key, value): """Copyright.""" - return { - 'holder': value.get('d'), - 'year': value.get('g'), - 'message': value.get('f'), - } + if key == '269__': + if value.get('b'): + return { + 'holder': value.get('b') + } + return {'holder': ''} + + if key == '5421_': + if 'copyright' not in self.keys(): + try: + if value.get('a'): + return { + 'holder': value.get('a'), + 'year': value.get('g') + } + else: + return { + 'holder': value.get('d'), + 'year': value.get('g') + } + except: + return {'holder': ''} + + if value.get('a'): + return { + 'holder': value.get('a'), + 'year': value.get('g'), + 'message': value.get('f'), + } + else: + return { + 'holder': value.get('d'), + 'year': value.get('g'), + 'message': value.get('f'), + } @model.over('_files', '^(8567|8564)_') From 3fd87e6167186ea97c3061d7ae31fa70343aee3b Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Fri, 18 Aug 2023 15:39:59 +0200 Subject: [PATCH 14/19] Documentation: fixing missing docstring error --- cds_dojson/marc21/fields/utils.py | 1 + cds_dojson/marc21/fields/videos/video.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cds_dojson/marc21/fields/utils.py b/cds_dojson/marc21/fields/utils.py index 1eddf84a..7a4529f6 100644 --- a/cds_dojson/marc21/fields/utils.py +++ b/cds_dojson/marc21/fields/utils.py @@ -283,6 +283,7 @@ def build_contributor_from_508(value): return build_contributor({'a': item.strip(), 'e': 'credits'}) def build_contributor_from_906(value): + """Build contributors from field 508.""" contributor = {'name': value.get('p'), 'role': 'Speaker'} if value.get('u'): contributor['affiliations'] = (value.get('u')) diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py index b75d63ed..337dacac 100644 --- a/cds_dojson/marc21/fields/videos/video.py +++ b/cds_dojson/marc21/fields/videos/video.py @@ -114,7 +114,7 @@ def find_match(seq, copy): @model.over('related_links', '^775__') def related_links(self, key, value): - + """Related links""" related_link = {} if value.get('b') and value.get('w'): if value.get('c'): @@ -150,10 +150,10 @@ def project_id(self, key, value): @model.over('location', '(^110__)|(^901__)') def location(self, key, value): + """Location.""" if key == '901__' and 'location' not in self.keys(): return value.get('u') - """Location.""" return value.get('a') From 4e6bb8fcffd5dd2307610cda2768873f77788cc0 Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Fri, 18 Aug 2023 15:43:15 +0200 Subject: [PATCH 15/19] Documentation: fixing missing docstring error --- cds_dojson/marc21/fields/videos/video.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py index 337dacac..abbe77b2 100644 --- a/cds_dojson/marc21/fields/videos/video.py +++ b/cds_dojson/marc21/fields/videos/video.py @@ -114,7 +114,7 @@ def find_match(seq, copy): @model.over('related_links', '^775__') def related_links(self, key, value): - """Related links""" + """Related links.""" related_link = {} if value.get('b') and value.get('w'): if value.get('c'): From fa92d486ddb52b01944fb930d8a406890d3cbfca Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Mon, 21 Aug 2023 17:21:28 +0200 Subject: [PATCH 16/19] Development: '_digitization' field added to the sceham and tags added to processing step --- cds_dojson/marc21/fields/base.py | 3 - cds_dojson/marc21/fields/videos/video.py | 111 ++++++++++++++ .../records/videos/video/video-v1.0.0.json | 135 ++++++++++++++++++ 3 files changed, 246 insertions(+), 3 deletions(-) diff --git a/cds_dojson/marc21/fields/base.py b/cds_dojson/marc21/fields/base.py index 469e3734..52f14288 100644 --- a/cds_dojson/marc21/fields/base.py +++ b/cds_dojson/marc21/fields/base.py @@ -65,9 +65,6 @@ def contributors(self, key, value): else: items = build_contributor_from_906(value) if 'contributors' in self.keys(): - import ipdb - ipdb.set_trace() - names = [dic['name'] for dic in self['contributors']] roles = [dic['role'] for dic in self['contributors']] if items[0]['name'] in names: diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py index abbe77b2..25c43d29 100644 --- a/cds_dojson/marc21/fields/videos/video.py +++ b/cds_dojson/marc21/fields/videos/video.py @@ -126,6 +126,117 @@ def related_links(self, key, value): return related_link +@model.over('_digitization', '(^300__)|(^336__)|(^337__)|(^5081_)|(^514__)|(^5421_)|(^5831_)|(^583__)|(^594__)|(^595__)|(^597__)|(^65027)|(690C_)|(^773__)|(^7870_)|(^787__)|(^852__)|(^8564_)|(^856__)|(^961__)|(^962__)|(^981__)') +@for_each_value +def digitization(self, key, value): + """Digitization field.""" + #import ipdb + #ipdb.set_trace() + + data = {} + try: + if key == '300__': + data['CERN_ID'] = value.get('2', '') + data['res_ar_fps'] = value.get('b', '') + data['FPS'] = value.get('c', '') + data['resolution'] = value.get('d', '') + data['aspect_ratio'] = value.get('e', '') + + elif key == '336__': + data['curator_split_comment'] = value.get('a', '') + data['curator_split_time'] = value.get('b', '') + + elif key == '337__': + data['media_type'] = value.get('a', '') + + elif key == '5081_': + data['director_info'] = value.get('a', '') + + elif key == '514__': + data['picturae_media_quality'] = value.get('a', '') + + elif key =='5421_': + data['copyright'] = value.get('a', '') + + elif key == '5831_': + data['quality_control_info'] = [value.get(code) for code in ['3', '5', '6', 'a', 'b', 'c', 'f', 'i', 'k', 'l', 'n', 'o', 'u', 'x', 'z'] if value.get(code)] + + elif key =='583__': + data['curated'] = value.get('a', '') + data['curation_date'] = value.get('c', '') + data['curation_quality_control'] = value.get('z', '') + + elif key =='594__': + data['curator_category'] = value.get('a', '') + + elif key == '595__': + data['internal_note_datetime'] = value.get('d', '') + + elif key == '597__': + data['internal_note'] = value.get('a', '') + + elif key == '65027': + data['epfl_category'] = value.get('a', '') + + elif key == '690C_': + data['collection'] = value.get('a', '') + + elif key == '773__': + data['host_item_entry'] = value.get('o', '') + data['library_report_number'] = value.get('r', '') + + elif key == '7870_': + data['related_links_info'] = [value.get(code) for code in ['i', 'r', 'w'] if value.get(code)] + + elif key == '787__': + data['related_links_info'] = data['related_links_info'] = [value.get(code) for code in ['1', 'a', 'i', 'w'] if value.get(code)] + + elif key == '852__': + data['physical_media_note'] = value.get('h', '') + data['has_copy'] = value.get('j', '') + data['physical_media_type'] = value.get('x', '') + + elif key == '8564_': + if value.get('1'): + data['has_subtitles'] = value.get('1', '') + else: + data['has_subtitles'] = value.get('i', '') + + data['storage_service'] = value.get('2', '') + data['file_size'] = value.get('s', '') + data['record_control_number'] = value.get('w', '') + data['record_id'] = value.get('y', '') + data['format_resolution'] = value.get('z', '') + + elif key == '856_2': + data['subtitle_extension'] = value.get('q', '') + data['subtitle_path'] = value.get('u', '') + + if value.get('x'): + data['subtitle_language'] = value.get('x', '') + else: + data['subtitle_language'] = value.get('y', '') + + data['subtitle_note'] = value.get('z', '') + + elif key == '961__': + data['curator_name'] = value.get('a', '') + data['curator_title'] = value.get('b', '') + data['curation_time'] = value.get('h', '') + + elif key == '962__': + data['conference_cds_recid'] = value.get('b', '') + data['conference_cds_id'] = value.get('n', '') + + elif key == '962__': + data['deleted_cds_records'] = value.get('a', '') + + except Exception as exception: + #print(exception) + pass + + return data + @model.over('_project_id', '^773__') @ignore_value def project_id(self, key, value): diff --git a/cds_dojson/schemas/records/videos/video/video-v1.0.0.json b/cds_dojson/schemas/records/videos/video/video-v1.0.0.json index 647db46a..d9476528 100644 --- a/cds_dojson/schemas/records/videos/video/video-v1.0.0.json +++ b/cds_dojson/schemas/records/videos/video/video-v1.0.0.json @@ -151,6 +151,141 @@ } } }, + "_digitization": { + "type": "object", + "description": "Field with digitization information for old videos.", + "properties": { + "CERN_ID": { + "type": "string" + }, + "res_ar_fps": { + "type": "string" + }, + "FPS": { + "type": "string" + }, + "resolution": { + "type": "string" + }, + "aspect_ratio": { + "type": "string" + }, + "curated": { + "type": "string" + }, + "curator_name": { + "type": "string" + }, + "curator_title": { + "type": "string" + }, + "curation_date": { + "type": "string" + }, + "curation_time": { + "type": "string" + }, + "curation_quality_control": { + "type": "string" + }, + "curator_category": { + "type": "string" + }, + "curator_split_comment": { + "type": "string" + }, + "curator_split_time": { + "type": "string" + }, + "media_type": { + "type": "string" + }, + "director_info": { + "type": "string" + }, + "picturae_media_quality": { + "type": "string" + }, + "copyright": { + "type": "string" + }, + "quality_control_info": { + "items": { + "type": "object" + }, + "type": "array" + }, + "internal_note": { + "type": "string" + }, + "internal_note_datetime": { + "type": "string" + }, + "epfl_category": { + "type": "string" + }, + "collection": { + "type": "string" + }, + "host_item_entry": { + "type": "string" + }, + "library_report_number": { + "type": "string" + }, + "related_links_info": { + "items": { + "type": "object" + }, + "type": "array" + }, + "physical_media_type": { + "type": "string" + }, + "has_copy": { + "type": "string" + }, + "has_subtitles": { + "type": "string" + }, + "storage_service": { + "type": "string" + }, + "file_size": { + "type": "string" + }, + "record_control_number": { + "type": "string" + }, + "record_id": { + "type": "string" + }, + "format_resolution": { + "type": "string" + }, + "subtitle_extension": { + "type": "string" + }, + "subtitle_path": { + "type": "string" + }, + "subtitle_language": { + "type": "string" + }, + "subtitle_note": { + "type": "string" + }, + "conference_cds_recid": { + "type": "string" + }, + "conference_cds_id": { + "type": "string" + }, + "deleted_cds_records": { + "type": "string" + } + } + }, "keywords": { "items": { "type": "object" From 67131feb61ca418023ae8e2a8f654deffff7b916 Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Tue, 22 Aug 2023 15:28:02 +0200 Subject: [PATCH 17/19] Development: video test revision, bug fixing on new read tags --- cds_dojson/marc21/fields/videos/video.py | 6 +++- cds_dojson/marc21/models/videos/video.py | 42 ++++++++++++------------ tests/test_videos_video.py | 14 +++++++- 3 files changed, 39 insertions(+), 23 deletions(-) diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py index 25c43d29..266720bf 100644 --- a/cds_dojson/marc21/fields/videos/video.py +++ b/cds_dojson/marc21/fields/videos/video.py @@ -228,13 +228,17 @@ def digitization(self, key, value): data['conference_cds_recid'] = value.get('b', '') data['conference_cds_id'] = value.get('n', '') - elif key == '962__': + elif key == '981__': data['deleted_cds_records'] = value.get('a', '') except Exception as exception: #print(exception) pass + empty_keys = [key for key in data.keys() if data[key] == ''] + for key in empty_keys: + data.pop(key) + return data @model.over('_project_id', '^773__') diff --git a/cds_dojson/marc21/models/videos/video.py b/cds_dojson/marc21/models/videos/video.py index 7dd59ae5..d42daeba 100644 --- a/cds_dojson/marc21/models/videos/video.py +++ b/cds_dojson/marc21/models/videos/video.py @@ -39,54 +39,54 @@ class CDSVideo(OverdoJSONSchema): '035__a', '100__9', #'260__c', - '269__b', - '300__b', - '300__c', - '300__d', - '300__e', - '337__a', + #'269__b', + #'300__b', + #'300__c', + #'300__d', + #'300__e', + #'337__a', '5061_2', '5061_5', '5061_a', '5061_f', '5061_z', '542__e', - '690C_a', + #'690C_a', '700__0', '700__9', - '773__o', - '773__r', - '787__i', - '787__w', - '852__j', - '852__x', + #'773__o', + #'773__r', + #'787__i', + #'787__w', + #'852__j', + #'852__x', # FIXME need to double check (see #85) #'8564_8', - #'8564_d', + '8564_d', #'8564_q', #'8564_s', - #'8564_u', - #'8564_x', + '8564_u', + '8564_x', #'8564_y', #'8564_z', - '8564_2', + #'8564_2', '8567_2', '916__s', '916__w', '937__c', '960__a', '961__c', - '961__h', + #'961__h', '961__l', '961__x', - '962__b', + #'962__b', '962__l', - '962__n', + #'962__n', '962__t', '963__a', '980__a', '980__b', - '981__a', + #'981__a', } diff --git a/tests/test_videos_video.py b/tests/test_videos_video.py index 8e3a44a5..ba4ca416 100644 --- a/tests/test_videos_video.py +++ b/tests/test_videos_video.py @@ -47,7 +47,7 @@ def test_required_fields(app): 'test-email@cern.ch', 'example@test.com'], 'update': ['another.user@cern.ch', - 'tuser@cern.ch']}, + 'tuser@cern.ch']}, '_files': [ { 'filepath': 'MediaArchive/Video/Masters/Movies/CERN/2017/CERN-MOVIE-2017-023/Final_Output/CERN-MOVIE-2017-023-001.mov', @@ -202,6 +202,18 @@ def test_required_fields(app): 'tags_to_transform': {'timestamp': 95} } ], + '_digitization': [ + { + 'res_ar_fps': '1920x1080 16/9, 25.00', + 'FPS': '25', + 'resolution': '1920x1080', + 'aspect_ratio': '16:9' + }, + { + 'host_item_entry': 'AVW.project.2963', + 'library_report_number': 'CERN-MOVIE-2017-023' + } + ], '_project_id': 'https://cds.cern.ch/record/1', 'category': 'CERN', 'contributors': [ From 09eaa24b579b952be3a49642dca838cc6edfcba0 Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Tue, 22 Aug 2023 17:45:10 +0200 Subject: [PATCH 18/19] Development: bug fixed on tag capturing and new version of tests for videos added --- cds_dojson/marc21/fields/videos/video.py | 131 ++++++--- .../records/videos/video/video-v1.0.0.json | 261 +++++++++--------- tests/test_videos_video.py | 22 +- 3 files changed, 242 insertions(+), 172 deletions(-) diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py index 266720bf..9180044c 100644 --- a/cds_dojson/marc21/fields/videos/video.py +++ b/cds_dojson/marc21/fields/videos/video.py @@ -45,6 +45,23 @@ def duration(self, key, value): i.e. '2 min.', we will extract it programatically later to avoid the hassle off dealing with more regex. """ + data = {} + data['CERN_ID'] = value.get('2', '') + data['res_ar_fps'] = value.get('b', '') + data['FPS'] = value.get('c', '') + data['resolution'] = value.get('d', '') + data['aspect_ratio'] = value.get('e', '') + + empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == ''] + for aux_key in empty_keys: + data.pop(aux_key) + + if len(data.keys()) > 0: + if '_digitization' not in self.keys(): + self['_digitization'] = [data] + else: + self['_digitization'].append(data) + try: return re.match(r'(\d{2}:\d{2}:\d{2})(\.\d+)?', value.get('a')) \ .group(1) @@ -66,6 +83,21 @@ def language(self, key, value): @model.over('physical_medium', '(^340__)|(^852__)') def physical_medium(self, key, value): """Physical medium.""" + data = {} + data['physical_media_note'] = value.get('h', '') + data['has_copy'] = value.get('j', '') + data['physical_media_type'] = value.get('x', '') + + empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == ''] + for aux_key in empty_keys: + data.pop(aux_key) + + if len(data.keys()) > 0: + if '_digitization' not in self.keys(): + self['_digitization'] = [data] + else: + self['_digitization'].append(data) + def find_match(seq, copy): if not seq and not copy \ and key == '852__' and len(_physical_medium) == 1: @@ -126,8 +158,9 @@ def related_links(self, key, value): return related_link -@model.over('_digitization', '(^300__)|(^336__)|(^337__)|(^5081_)|(^514__)|(^5421_)|(^5831_)|(^583__)|(^594__)|(^595__)|(^597__)|(^65027)|(690C_)|(^773__)|(^7870_)|(^787__)|(^852__)|(^8564_)|(^856__)|(^961__)|(^962__)|(^981__)') +@model.over('_digitization', '(^336__)|(^337__)|(^5081_)|(^514__)|(^5831_)|(^583__)|(^594__)|(^597__)|(^65027)|(690C_)|(^7870_)|(^787__)|(^856_2)|(^961__)|(^962__)|(^981__)') @for_each_value +@ignore_value def digitization(self, key, value): """Digitization field.""" #import ipdb @@ -135,14 +168,7 @@ def digitization(self, key, value): data = {} try: - if key == '300__': - data['CERN_ID'] = value.get('2', '') - data['res_ar_fps'] = value.get('b', '') - data['FPS'] = value.get('c', '') - data['resolution'] = value.get('d', '') - data['aspect_ratio'] = value.get('e', '') - - elif key == '336__': + if key == '336__': data['curator_split_comment'] = value.get('a', '') data['curator_split_time'] = value.get('b', '') @@ -154,9 +180,6 @@ def digitization(self, key, value): elif key == '514__': data['picturae_media_quality'] = value.get('a', '') - - elif key =='5421_': - data['copyright'] = value.get('a', '') elif key == '5831_': data['quality_control_info'] = [value.get(code) for code in ['3', '5', '6', 'a', 'b', 'c', 'f', 'i', 'k', 'l', 'n', 'o', 'u', 'x', 'z'] if value.get(code)] @@ -168,9 +191,6 @@ def digitization(self, key, value): elif key =='594__': data['curator_category'] = value.get('a', '') - - elif key == '595__': - data['internal_note_datetime'] = value.get('d', '') elif key == '597__': data['internal_note'] = value.get('a', '') @@ -181,32 +201,11 @@ def digitization(self, key, value): elif key == '690C_': data['collection'] = value.get('a', '') - elif key == '773__': - data['host_item_entry'] = value.get('o', '') - data['library_report_number'] = value.get('r', '') - elif key == '7870_': data['related_links_info'] = [value.get(code) for code in ['i', 'r', 'w'] if value.get(code)] elif key == '787__': data['related_links_info'] = data['related_links_info'] = [value.get(code) for code in ['1', 'a', 'i', 'w'] if value.get(code)] - - elif key == '852__': - data['physical_media_note'] = value.get('h', '') - data['has_copy'] = value.get('j', '') - data['physical_media_type'] = value.get('x', '') - - elif key == '8564_': - if value.get('1'): - data['has_subtitles'] = value.get('1', '') - else: - data['has_subtitles'] = value.get('i', '') - - data['storage_service'] = value.get('2', '') - data['file_size'] = value.get('s', '') - data['record_control_number'] = value.get('w', '') - data['record_id'] = value.get('y', '') - data['format_resolution'] = value.get('z', '') elif key == '856_2': data['subtitle_extension'] = value.get('q', '') @@ -235,16 +234,33 @@ def digitization(self, key, value): #print(exception) pass - empty_keys = [key for key in data.keys() if data[key] == ''] - for key in empty_keys: - data.pop(key) + empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == ''] + for aux_key in empty_keys: + data.pop(aux_key) - return data + if len(data.keys()) > 0: + return data + + return None @model.over('_project_id', '^773__') @ignore_value def project_id(self, key, value): """Report number.""" + data = {} + data['host_item_entry'] = value.get('o', '') + data['library_report_number'] = value.get('r', '') + + empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == ''] + for aux_key in empty_keys: + data.pop(aux_key) + + if len(data.keys()) > 0: + if '_digitization' not in self.keys(): + self['_digitization'] = [data] + else: + self['_digitization'].append(data) + values = force_list(value) project_id = None related_links = self.get('related_links', []) @@ -291,6 +307,13 @@ def internal_note(self, key, value): if _internal_categories: self['internal_categories'] = dict(_internal_categories) + + if value.get('d'): + if '_digitization' not in self.keys(): + self['_digitization'] = [{'internal_note_datetime': value.get('d')}] + else: + self['_digitization'].append({'internal_note_datetime': value.get('d')}) + return '\n'.join(_internal_notes) or None @@ -362,6 +385,13 @@ def copyright(self, key, value): return {'holder': ''} if key == '5421_': + + if value.get('a'): + if '_digitization' not in self.keys(): + self['_digitization'] = [{'copyright': value.get('a')}] + else: + self['_digitization'].append({'copyright': value.get('a')}) + if 'copyright' not in self.keys(): try: if value.get('a'): @@ -508,6 +538,27 @@ def compute(value, context_type, media_type): result['key'] = 'posterframe{0}'.format(ext) else: + data = {} + if value.get('1'): + data['has_subtitles'] = value.get('1', '') + else: + data['has_subtitles'] = value.get('i', '') + data['storage_service'] = value.get('2', '') + data['file_size'] = value.get('s', '') + data['record_control_number'] = value.get('w', '') + data['record_id'] = value.get('y', '') + data['format_resolution'] = value.get('z', '') + + empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == ''] + for aux_key in empty_keys: + data.pop(aux_key) + + if len(data.keys()) > 0: + if '_digitization' not in self.keys(): + self['_digitization'] = [data] + else: + self['_digitization'].append(data) + result = {} result['key'] = get_key(value) diff --git a/cds_dojson/schemas/records/videos/video/video-v1.0.0.json b/cds_dojson/schemas/records/videos/video/video-v1.0.0.json index d9476528..9eb170b5 100644 --- a/cds_dojson/schemas/records/videos/video/video-v1.0.0.json +++ b/cds_dojson/schemas/records/videos/video/video-v1.0.0.json @@ -152,137 +152,140 @@ } }, "_digitization": { - "type": "object", - "description": "Field with digitization information for old videos.", - "properties": { - "CERN_ID": { - "type": "string" - }, - "res_ar_fps": { - "type": "string" - }, - "FPS": { - "type": "string" - }, - "resolution": { - "type": "string" - }, - "aspect_ratio": { - "type": "string" - }, - "curated": { - "type": "string" - }, - "curator_name": { - "type": "string" - }, - "curator_title": { - "type": "string" - }, - "curation_date": { - "type": "string" - }, - "curation_time": { - "type": "string" - }, - "curation_quality_control": { - "type": "string" - }, - "curator_category": { - "type": "string" - }, - "curator_split_comment": { - "type": "string" - }, - "curator_split_time": { - "type": "string" - }, - "media_type": { - "type": "string" - }, - "director_info": { - "type": "string" - }, - "picturae_media_quality": { - "type": "string" - }, - "copyright": { - "type": "string" - }, - "quality_control_info": { - "items": { - "type": "object" + "type": "array", + "items": { + "type": "object", + "description": "Field with digitization information for old videos.", + "properties": { + "CERN_ID": { + "type": "string" }, - "type": "array" - }, - "internal_note": { - "type": "string" - }, - "internal_note_datetime": { - "type": "string" - }, - "epfl_category": { - "type": "string" - }, - "collection": { - "type": "string" - }, - "host_item_entry": { - "type": "string" - }, - "library_report_number": { - "type": "string" - }, - "related_links_info": { - "items": { - "type": "object" + "res_ar_fps": { + "type": "string" }, - "type": "array" - }, - "physical_media_type": { - "type": "string" - }, - "has_copy": { - "type": "string" - }, - "has_subtitles": { - "type": "string" - }, - "storage_service": { - "type": "string" - }, - "file_size": { - "type": "string" - }, - "record_control_number": { - "type": "string" - }, - "record_id": { - "type": "string" - }, - "format_resolution": { - "type": "string" - }, - "subtitle_extension": { - "type": "string" - }, - "subtitle_path": { - "type": "string" - }, - "subtitle_language": { - "type": "string" - }, - "subtitle_note": { - "type": "string" - }, - "conference_cds_recid": { - "type": "string" - }, - "conference_cds_id": { - "type": "string" - }, - "deleted_cds_records": { - "type": "string" + "FPS": { + "type": "string" + }, + "resolution": { + "type": "string" + }, + "aspect_ratio": { + "type": "string" + }, + "curated": { + "type": "string" + }, + "curator_name": { + "type": "string" + }, + "curator_title": { + "type": "string" + }, + "curation_date": { + "type": "string" + }, + "curation_time": { + "type": "string" + }, + "curation_quality_control": { + "type": "string" + }, + "curator_category": { + "type": "string" + }, + "curator_split_comment": { + "type": "string" + }, + "curator_split_time": { + "type": "string" + }, + "media_type": { + "type": "string" + }, + "director_info": { + "type": "string" + }, + "picturae_media_quality": { + "type": "string" + }, + "copyright": { + "type": "string" + }, + "quality_control_info": { + "items": { + "type": "object" + }, + "type": "array" + }, + "internal_note": { + "type": "string" + }, + "internal_note_datetime": { + "type": "string" + }, + "epfl_category": { + "type": "string" + }, + "collection": { + "type": "string" + }, + "host_item_entry": { + "type": "string" + }, + "library_report_number": { + "type": "string" + }, + "related_links_info": { + "items": { + "type": "object" + }, + "type": "array" + }, + "physical_media_type": { + "type": "string" + }, + "has_copy": { + "type": "string" + }, + "has_subtitles": { + "type": "string" + }, + "storage_service": { + "type": "string" + }, + "file_size": { + "type": "string" + }, + "record_control_number": { + "type": "string" + }, + "record_id": { + "type": "string" + }, + "format_resolution": { + "type": "string" + }, + "subtitle_extension": { + "type": "string" + }, + "subtitle_path": { + "type": "string" + }, + "subtitle_language": { + "type": "string" + }, + "subtitle_note": { + "type": "string" + }, + "conference_cds_recid": { + "type": "string" + }, + "conference_cds_id": { + "type": "string" + }, + "deleted_cds_records": { + "type": "string" + } } } }, diff --git a/tests/test_videos_video.py b/tests/test_videos_video.py index ba4ca416..f682bb9d 100644 --- a/tests/test_videos_video.py +++ b/tests/test_videos_video.py @@ -209,6 +209,9 @@ def test_required_fields(app): 'resolution': '1920x1080', 'aspect_ratio': '16:9' }, + { + 'collection': 'publvideomovie' + }, { 'host_item_entry': 'AVW.project.2963', 'library_report_number': 'CERN-MOVIE-2017-023' @@ -473,6 +476,10 @@ def check_transformation(marcxml_body, json_body): 'related_links': [ {'name': 'Version anglaise', 'url': 'http://cds.cern.ch/record/43172'}, {'name': 'Version allemande', 'url': 'https://cds.cern.ch/record/2194933'}, + ], + '_digitization': [ + {'library_report_number': 'CERN-FILM-1965-44'}, + {'host_item_entry': 'AVW.project.111', 'library_report_number': 'CERN-MOVIE-1965-001'} ]} ) check_transformation( @@ -531,7 +538,10 @@ def check_transformation(marcxml_body, json_body): 16:9 """, { - 'duration': '00:00:00' + 'duration': '00:00:00', + '_digitization': [ + {'aspect_ratio': '16:9'} + ] }) check_transformation( """ @@ -540,7 +550,10 @@ def check_transformation(marcxml_body, json_body): 16:9 """, { - 'duration': '12:33:12' + 'duration': '12:33:12', + '_digitization': [ + {'aspect_ratio': '16:9'} + ] }) check_transformation( """ @@ -549,7 +562,10 @@ def check_transformation(marcxml_body, json_body): 16:9 """, { - 'duration': '00:00:00' + 'duration': '00:00:00', + '_digitization': [ + {'res_ar_fps': '16:9', 'aspect_ratio': '16:9'} + ] }) check_transformation( """ From 520c154cdf4d22c52bcb23abfb48aa16200c42a3 Mon Sep 17 00:00:00 2001 From: Luis Zerkowski Date: Wed, 23 Aug 2023 10:10:29 +0200 Subject: [PATCH 19/19] Development: updated deposit video schema and modified tests to include '_digitization' field --- .../records/videos/video/video-v1.0.0.json | 138 ++++++++++++++++++ .../videos/video/video_src-v1.0.0.json | 138 ++++++++++++++++++ tests/test_cli.py | 2 + tests/test_videos_video.py | 2 +- 4 files changed, 279 insertions(+), 1 deletion(-) diff --git a/cds_dojson/schemas/deposits/records/videos/video/video-v1.0.0.json b/cds_dojson/schemas/deposits/records/videos/video/video-v1.0.0.json index fe4256e8..b9ec4d11 100644 --- a/cds_dojson/schemas/deposits/records/videos/video/video-v1.0.0.json +++ b/cds_dojson/schemas/deposits/records/videos/video/video-v1.0.0.json @@ -416,6 +416,144 @@ } } }, + "_digitization": { + "type": "array", + "items": { + "type": "object", + "description": "Field with digitization information for old videos.", + "properties": { + "CERN_ID": { + "type": "string" + }, + "res_ar_fps": { + "type": "string" + }, + "FPS": { + "type": "string" + }, + "resolution": { + "type": "string" + }, + "aspect_ratio": { + "type": "string" + }, + "curated": { + "type": "string" + }, + "curator_name": { + "type": "string" + }, + "curator_title": { + "type": "string" + }, + "curation_date": { + "type": "string" + }, + "curation_time": { + "type": "string" + }, + "curation_quality_control": { + "type": "string" + }, + "curator_category": { + "type": "string" + }, + "curator_split_comment": { + "type": "string" + }, + "curator_split_time": { + "type": "string" + }, + "media_type": { + "type": "string" + }, + "director_info": { + "type": "string" + }, + "picturae_media_quality": { + "type": "string" + }, + "copyright": { + "type": "string" + }, + "quality_control_info": { + "items": { + "type": "object" + }, + "type": "array" + }, + "internal_note": { + "type": "string" + }, + "internal_note_datetime": { + "type": "string" + }, + "epfl_category": { + "type": "string" + }, + "collection": { + "type": "string" + }, + "host_item_entry": { + "type": "string" + }, + "library_report_number": { + "type": "string" + }, + "related_links_info": { + "items": { + "type": "object" + }, + "type": "array" + }, + "physical_media_type": { + "type": "string" + }, + "has_copy": { + "type": "string" + }, + "has_subtitles": { + "type": "string" + }, + "storage_service": { + "type": "string" + }, + "file_size": { + "type": "string" + }, + "record_control_number": { + "type": "string" + }, + "record_id": { + "type": "string" + }, + "format_resolution": { + "type": "string" + }, + "subtitle_extension": { + "type": "string" + }, + "subtitle_path": { + "type": "string" + }, + "subtitle_language": { + "type": "string" + }, + "subtitle_note": { + "type": "string" + }, + "conference_cds_recid": { + "type": "string" + }, + "conference_cds_id": { + "type": "string" + }, + "deleted_cds_records": { + "type": "string" + } + } + } + }, "translations": { "items": { "type": "object", diff --git a/cds_dojson/schemas/records/videos/video/video_src-v1.0.0.json b/cds_dojson/schemas/records/videos/video/video_src-v1.0.0.json index 8cbf3036..8c86cbe7 100644 --- a/cds_dojson/schemas/records/videos/video/video_src-v1.0.0.json +++ b/cds_dojson/schemas/records/videos/video/video_src-v1.0.0.json @@ -111,6 +111,144 @@ "_project_id": { "type": "string" }, + "_digitization": { + "type": "array", + "items": { + "type": "object", + "description": "Field with digitization information for old videos.", + "properties": { + "CERN_ID": { + "type": "string" + }, + "res_ar_fps": { + "type": "string" + }, + "FPS": { + "type": "string" + }, + "resolution": { + "type": "string" + }, + "aspect_ratio": { + "type": "string" + }, + "curated": { + "type": "string" + }, + "curator_name": { + "type": "string" + }, + "curator_title": { + "type": "string" + }, + "curation_date": { + "type": "string" + }, + "curation_time": { + "type": "string" + }, + "curation_quality_control": { + "type": "string" + }, + "curator_category": { + "type": "string" + }, + "curator_split_comment": { + "type": "string" + }, + "curator_split_time": { + "type": "string" + }, + "media_type": { + "type": "string" + }, + "director_info": { + "type": "string" + }, + "picturae_media_quality": { + "type": "string" + }, + "copyright": { + "type": "string" + }, + "quality_control_info": { + "items": { + "type": "object" + }, + "type": "array" + }, + "internal_note": { + "type": "string" + }, + "internal_note_datetime": { + "type": "string" + }, + "epfl_category": { + "type": "string" + }, + "collection": { + "type": "string" + }, + "host_item_entry": { + "type": "string" + }, + "library_report_number": { + "type": "string" + }, + "related_links_info": { + "items": { + "type": "object" + }, + "type": "array" + }, + "physical_media_type": { + "type": "string" + }, + "has_copy": { + "type": "string" + }, + "has_subtitles": { + "type": "string" + }, + "storage_service": { + "type": "string" + }, + "file_size": { + "type": "string" + }, + "record_control_number": { + "type": "string" + }, + "record_id": { + "type": "string" + }, + "format_resolution": { + "type": "string" + }, + "subtitle_extension": { + "type": "string" + }, + "subtitle_path": { + "type": "string" + }, + "subtitle_language": { + "type": "string" + }, + "subtitle_note": { + "type": "string" + }, + "conference_cds_recid": { + "type": "string" + }, + "conference_cds_id": { + "type": "string" + }, + "deleted_cds_records": { + "type": "string" + } + } + } + }, "_cds": { "type": "object", "properties": { diff --git a/tests/test_cli.py b/tests/test_cli.py index 43aacd93..c8c5fab6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -57,4 +57,6 @@ def test_cli(src, compiled): pkg_resources.resource_filename('cds_dojson.schemas', compiled), 'r') as f: compile_schema_expected = json.load(f) + print(compile_schema_expected) + print(compiled_schema_result) assert compile_schema_expected == compiled_schema_result diff --git a/tests/test_videos_video.py b/tests/test_videos_video.py index f682bb9d..8bafda03 100644 --- a/tests/test_videos_video.py +++ b/tests/test_videos_video.py @@ -564,7 +564,7 @@ def check_transformation(marcxml_body, json_body): """, { 'duration': '00:00:00', '_digitization': [ - {'res_ar_fps': '16:9', 'aspect_ratio': '16:9'} + {'res_ar_fps': '16:9,', 'aspect_ratio': '16:9'} ] }) check_transformation(