diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 8ba42b45..1533bf95 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -10,16 +10,16 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: - python-version: 2.7 + python-version: 3.6 - name: Install dependencies run: | - python -m pip install --upgrade "pip>=20,<21" "setuptools>=40,<46" wheel + python -m pip install --upgrade "pip==21" "setuptools==40" wheel - name: Build package # Remove `compile_catalog` if the package has no translations. run: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ed8af9ef..d0067a05 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -24,27 +24,27 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - #python-version: [2.7, 3.6] - python-version: [2.7] - requirements-level: [min, pypi] + python-version: [3.6] + #python-version: [2.7] + requirements-level: [pypi] steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Generate dependencies run: | - python -m pip install --upgrade "pip>=20,<21" "setuptools>=40,<46" py + python -m pip install --upgrade "pip==21" "setuptools==40" py python -m pip install wheel coveralls requirements-builder configparser requirements-builder --level=${{ matrix.requirements-level }} setup.py > .${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt - name: Cache pip - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('.${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt') }} diff --git a/cds_dojson/marc21/fields/base.py b/cds_dojson/marc21/fields/base.py index a1a9e481..52f14288 100644 --- a/cds_dojson/marc21/fields/base.py +++ b/cds_dojson/marc21/fields/base.py @@ -24,7 +24,7 @@ ignore_value) from ..models.base import model -from .utils import build_contributor, build_contributor_from_508 +from .utils import build_contributor, build_contributor_from_508, build_contributor_from_906 @model.over('recid', '^001') @@ -54,14 +54,22 @@ def report_number(self, key, value): return rn -@model.over('contributors', '^(100|700|508)__') +@model.over('contributors', '^(100|700|508|906)__') def contributors(self, key, value): """Contributors.""" authors = self.get('contributors', []) if key in ['100__', '700__']: items = build_contributor(value) - else: + elif key == '508__': items = build_contributor_from_508(value) + else: + items = build_contributor_from_906(value) + if 'contributors' in self.keys(): + names = [dic['name'] for dic in self['contributors']] + roles = [dic['role'] for dic in self['contributors']] + if items[0]['name'] in names: + if items[0]['role'] == roles[names.index(items[0]['name'])]: + items = None # add only contributors that are not part of the authors if items: authors.extend( @@ -87,6 +95,10 @@ def translations(self, key, value): translation = self.get('translations', [{}])[0] if key.startswith('246'): translation['title'] = {'title': value.get('a')} + if value.get('n'): + translation['description'] = value.get('n') + if value.get('p'): + translation['title']['subtitle'] = value.get('p') if key.startswith('590'): translation['description'] = value.get('a') translation['language'] = 'fr' @@ -94,10 +106,32 @@ def translations(self, key, value): raise IgnoreKey('translations') -@model.over('description', '^520__') +@model.over('description', '(^511__)|(^5111_)|(^518__)|(^520__)') def description(self, key, value): """Description.""" - return value.get('a') + if key == '511__' or key == '5111_': + if value.get('a'): + return 'Filmed people: ' + value.get('a') + elif value.get('1'): + return 'Filmed people: ' + value.get('1') + return '' + + if key == '518__': + if value.get('a'): + if 'description' in self.keys(): + return self['description'] + '\nPlace and/or date of event: ' + value.get('a') + return value.get('a') + if 'description' in self.keys(): + return self['description'] + return '' + + if value.get('a'): + if 'description' in self.keys(): + return self['description'] + '\nPlace and/or date of event: ' + value.get('a') + return value.get('a') + if 'description' in self.keys(): + return self['description'] + return '' @model.over('keywords', '^6531_') diff --git a/cds_dojson/marc21/fields/utils.py b/cds_dojson/marc21/fields/utils.py index 5cacc252..7a4529f6 100644 --- a/cds_dojson/marc21/fields/utils.py +++ b/cds_dojson/marc21/fields/utils.py @@ -79,6 +79,7 @@ def _get_correct_video_contributor_role(role): 'autor': 'Creator', 'camera': 'Camera Operator', 'camera & sound': 'Camera Operator', + 'chairperson': 'Chairperson', 'co-produced by': 'Co-Producer', 'co-production': 'Co-Producer', 'commentaire': 'Comments by', @@ -119,6 +120,7 @@ def _get_correct_video_contributor_role(role): 'made by': 'Creator', 'montage': 'Editor', 'narrator': 'Narrator', + 'organiser': 'Organiser', 'presentator': 'Reporter', 'presented by': 'Reporter', 'presenter': 'Reporter', @@ -146,6 +148,7 @@ def _get_correct_video_contributor_role(role): 'shooting and editing': ('Camera Operator', 'Editor'), 'son': 'Music by', 'speaker': 'Speaker', + 'sponsor': 'Sponsor', 'writen by': 'Screenwriter', 'writer and director': ('Screenwriter', 'Director'), 'written & directed by': ('Screenwriter', 'Director'), @@ -227,8 +230,16 @@ def build_contributor(value): # Avoids a few calls value = get_author_info_from_people_collection(value) - role = _get_correct_video_contributor_role( - value.get('e', 'producer')) # always unicode + if value.get('e'): + role = _get_correct_video_contributor_role( + value.get('e', 'producer')) # always unicode + else: + try: + role = _get_correct_video_contributor_role( + value.get('g', 'producer')) # always unicode + except: + role = 'Producer' + contributors = [] contributor = { 'ids': _extract_json_ids(value) or None, @@ -270,3 +281,11 @@ def build_contributor_from_508(value): return contributors else: return build_contributor({'a': item.strip(), 'e': 'credits'}) + +def build_contributor_from_906(value): + """Build contributors from field 508.""" + contributor = {'name': value.get('p'), 'role': 'Speaker'} + if value.get('u'): + contributor['affiliations'] = (value.get('u')) + + return [contributor] \ No newline at end of file diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py index 714e7fdf..9180044c 100644 --- a/cds_dojson/marc21/fields/videos/video.py +++ b/cds_dojson/marc21/fields/videos/video.py @@ -45,6 +45,23 @@ def duration(self, key, value): i.e. '2 min.', we will extract it programatically later to avoid the hassle off dealing with more regex. """ + data = {} + data['CERN_ID'] = value.get('2', '') + data['res_ar_fps'] = value.get('b', '') + data['FPS'] = value.get('c', '') + data['resolution'] = value.get('d', '') + data['aspect_ratio'] = value.get('e', '') + + empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == ''] + for aux_key in empty_keys: + data.pop(aux_key) + + if len(data.keys()) > 0: + if '_digitization' not in self.keys(): + self['_digitization'] = [data] + else: + self['_digitization'].append(data) + try: return re.match(r'(\d{2}:\d{2}:\d{2})(\.\d+)?', value.get('a')) \ .group(1) @@ -66,6 +83,21 @@ def language(self, key, value): @model.over('physical_medium', '(^340__)|(^852__)') def physical_medium(self, key, value): """Physical medium.""" + data = {} + data['physical_media_note'] = value.get('h', '') + data['has_copy'] = value.get('j', '') + data['physical_media_type'] = value.get('x', '') + + empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == ''] + for aux_key in empty_keys: + data.pop(aux_key) + + if len(data.keys()) > 0: + if '_digitization' not in self.keys(): + self['_digitization'] = [data] + else: + self['_digitization'].append(data) + def find_match(seq, copy): if not seq and not copy \ and key == '852__' and len(_physical_medium) == 1: @@ -112,11 +144,123 @@ def find_match(seq, copy): return [dict((k, v) for k, v in iteritems(i) if v is not None) for i in _physical_medium] +@model.over('related_links', '^775__') +def related_links(self, key, value): + """Related links.""" + related_link = {} + if value.get('b') and value.get('w'): + if value.get('c'): + related_link['name'] = value.get('b') + ' ' + value.get('c') + else: + related_link['name'] = value.get('b') + + related_link['url'] = 'https://cds.cern.ch/record/' + value.get('w') + return related_link + + +@model.over('_digitization', '(^336__)|(^337__)|(^5081_)|(^514__)|(^5831_)|(^583__)|(^594__)|(^597__)|(^65027)|(690C_)|(^7870_)|(^787__)|(^856_2)|(^961__)|(^962__)|(^981__)') +@for_each_value +@ignore_value +def digitization(self, key, value): + """Digitization field.""" + #import ipdb + #ipdb.set_trace() + + data = {} + try: + if key == '336__': + data['curator_split_comment'] = value.get('a', '') + data['curator_split_time'] = value.get('b', '') + + elif key == '337__': + data['media_type'] = value.get('a', '') + + elif key == '5081_': + data['director_info'] = value.get('a', '') + + elif key == '514__': + data['picturae_media_quality'] = value.get('a', '') + + elif key == '5831_': + data['quality_control_info'] = [value.get(code) for code in ['3', '5', '6', 'a', 'b', 'c', 'f', 'i', 'k', 'l', 'n', 'o', 'u', 'x', 'z'] if value.get(code)] + + elif key =='583__': + data['curated'] = value.get('a', '') + data['curation_date'] = value.get('c', '') + data['curation_quality_control'] = value.get('z', '') + + elif key =='594__': + data['curator_category'] = value.get('a', '') + + elif key == '597__': + data['internal_note'] = value.get('a', '') + + elif key == '65027': + data['epfl_category'] = value.get('a', '') + + elif key == '690C_': + data['collection'] = value.get('a', '') + + elif key == '7870_': + data['related_links_info'] = [value.get(code) for code in ['i', 'r', 'w'] if value.get(code)] + + elif key == '787__': + data['related_links_info'] = data['related_links_info'] = [value.get(code) for code in ['1', 'a', 'i', 'w'] if value.get(code)] + + elif key == '856_2': + data['subtitle_extension'] = value.get('q', '') + data['subtitle_path'] = value.get('u', '') + + if value.get('x'): + data['subtitle_language'] = value.get('x', '') + else: + data['subtitle_language'] = value.get('y', '') + + data['subtitle_note'] = value.get('z', '') + + elif key == '961__': + data['curator_name'] = value.get('a', '') + data['curator_title'] = value.get('b', '') + data['curation_time'] = value.get('h', '') + + elif key == '962__': + data['conference_cds_recid'] = value.get('b', '') + data['conference_cds_id'] = value.get('n', '') + + elif key == '981__': + data['deleted_cds_records'] = value.get('a', '') + + except Exception as exception: + #print(exception) + pass + + empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == ''] + for aux_key in empty_keys: + data.pop(aux_key) + + if len(data.keys()) > 0: + return data + + return None @model.over('_project_id', '^773__') @ignore_value def project_id(self, key, value): """Report number.""" + data = {} + data['host_item_entry'] = value.get('o', '') + data['library_report_number'] = value.get('r', '') + + empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == ''] + for aux_key in empty_keys: + data.pop(aux_key) + + if len(data.keys()) > 0: + if '_digitization' not in self.keys(): + self['_digitization'] = [data] + else: + self['_digitization'].append(data) + values = force_list(value) project_id = None related_links = self.get('related_links', []) @@ -135,9 +279,12 @@ def project_id(self, key, value): return project_id -@model.over('location', '^110__') +@model.over('location', '(^110__)|(^901__)') def location(self, key, value): """Location.""" + if key == '901__' and 'location' not in self.keys(): + return value.get('u') + return value.get('a') @@ -153,10 +300,20 @@ def internal_note(self, key, value): if v.get('a') in CATEGS: _internal_categories[v.get('a')].append(v.get('s')) else: - _internal_notes.append(v.get('a')) + if v.get('a') is not None: + _internal_notes.append(v.get('a')) + else: + _internal_notes.append('No Category') if _internal_categories: self['internal_categories'] = dict(_internal_categories) + + if value.get('d'): + if '_digitization' not in self.keys(): + self['_digitization'] = [{'internal_note_datetime': value.get('d')}] + else: + self['_digitization'].append({'internal_note_datetime': value.get('d')}) + return '\n'.join(_internal_notes) or None @@ -181,25 +338,90 @@ def accelerator_experiment(self, key, value): 'project': value.get('p'), } - -@model.over('date', '^269__') +@model.over('date', '(^269__)|(^260__)') def date(self, key, value): """Date.""" - return arrow.get(value.get('c')).strftime('%Y-%m-%d') + if value.get('c') is None: + return 'No Date' + if key == '269__': + try: + if type(value.get('c')) is tuple: + return arrow.get(value.get('c')[0]).strftime('%Y-%m-%d') + else: + return arrow.get(value.get('c')).strftime('%Y-%m-%d') + + except: + if type(value.get('c')) is tuple: + match = re.search(r'^(19|20)\d\d-(0[0-9]|1[012])-00', value.get('c')[0]) + else: + match = re.search(r'^(19|20)\d\d-(0[0-9]|1[012])-00', value.get('c')) + + if match is not None: + return match.string.replace('-00', '') + else: + return 'No Date' + + else: + try: + if type(value.get('c')) is tuple: + return arrow.get(value.get('c')[0]).strftime('%Y') + else: + return arrow.get(value.get('c')).strftime('%Y') + + except: + return 'No Date' -@model.over('copyright', '^542__') + +@model.over('copyright', '(^269__)|(^542__)|(^5421_)') @filter_values def copyright(self, key, value): """Copyright.""" - return { - 'holder': value.get('d'), - 'year': value.get('g'), - 'message': value.get('f'), - } + if key == '269__': + if value.get('b'): + return { + 'holder': value.get('b') + } + return {'holder': ''} + + if key == '5421_': + + if value.get('a'): + if '_digitization' not in self.keys(): + self['_digitization'] = [{'copyright': value.get('a')}] + else: + self['_digitization'].append({'copyright': value.get('a')}) + + if 'copyright' not in self.keys(): + try: + if value.get('a'): + return { + 'holder': value.get('a'), + 'year': value.get('g') + } + else: + return { + 'holder': value.get('d'), + 'year': value.get('g') + } + except: + return {'holder': ''} + + if value.get('a'): + return { + 'holder': value.get('a'), + 'year': value.get('g'), + 'message': value.get('f'), + } + else: + return { + 'holder': value.get('d'), + 'year': value.get('g'), + 'message': value.get('f'), + } -@model.over('_files', '^8567_') +@model.over('_files', '^(8567|8564)_') @for_each_value @filter_values def _files(self, key, value): @@ -246,9 +468,14 @@ def get_tags(context_type, value): def get_filepath(value): if value.get('d'): - return value.get('d')[ - len('\\\\cern.ch\\dfs\\Services\\'): - ].replace('\\', '/') + if 'cern.ch\\dfs\\Services' in value.get('d'): + return value.get('d')[ + len('\\\\cern.ch\\dfs\\Services\\'): + ].replace('\\', '/') + + else: + return 'http://cern.ch' + value.get('d').split('www')[-1] + else: return re.sub( 'https?://mediaarchive.cern.ch/', '', value.get('u', '') @@ -263,7 +490,7 @@ def get_tags_to_guess_preset(context_type, value): def get_tags_to_transform(context_type, value): if context_type in ['frame', 'poster']: - return {'timestamp': int(value.get('y').split(' ')[3])} + return {'timestamp': int(float(value.get('y').split(' ')[3]))} def get_frame_name(result): _, ext = os.path.splitext(result['key']) @@ -296,18 +523,66 @@ def compute(value, context_type, media_type): return result - result = compute(deepcopy(value), *get_context_type(value)) + if key == '8567_': + result = compute(deepcopy(value), *get_context_type(value)) + + # if it's the poster frame, make a copy for a frame! + if result['tags']['context_type'] == 'poster' and \ + result['tags_to_transform']['timestamp'] == 5: + frame_5 = compute(value, 'frame', 'image') + if '_files' not in self: + self['_files'] = [] + self['_files'].append(frame_5) + # update posterframe key name + _, ext = os.path.splitext(result['key']) + result['key'] = 'posterframe{0}'.format(ext) + + else: + data = {} + if value.get('1'): + data['has_subtitles'] = value.get('1', '') + else: + data['has_subtitles'] = value.get('i', '') + data['storage_service'] = value.get('2', '') + data['file_size'] = value.get('s', '') + data['record_control_number'] = value.get('w', '') + data['record_id'] = value.get('y', '') + data['format_resolution'] = value.get('z', '') + + empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == ''] + for aux_key in empty_keys: + data.pop(aux_key) + + if len(data.keys()) > 0: + if '_digitization' not in self.keys(): + self['_digitization'] = [data] + else: + self['_digitization'].append(data) - # if it's the poster frame, make a copy for a frame! - if result['tags']['context_type'] == 'poster' and \ - result['tags_to_transform']['timestamp'] == 5: - frame_5 = compute(value, 'frame', 'image') - if '_files' not in self: - self['_files'] = [] - self['_files'].append(frame_5) - # update posterframe key name - _, ext = os.path.splitext(result['key']) - result['key'] = 'posterframe{0}'.format(ext) + result = {} + result['key'] = get_key(value) + + result['tags'] = {} + if value.get('u') and value.get('q') is not None: + result['tags']['preview'] = True + result['tags']['context_type'] = 'master' + result['tags']['content_type'] = value.get('q').lower() + + else: + result['tags']['preview'] = False + result['tags']['context_type'] = value.get('q') + + if value.get('y') is None: + result['tags']['media_type'] = value.get('y') + + else: + try: + result['tags']['media_type'] = value.get('y').split('-')[0].lower() + except: + result['tags']['media_type'] = None + + result['filepath'] = value.get('u') + result['tags_to_transform'] = get_tags_to_transform(result['tags']['context_type'], value) return result diff --git a/cds_dojson/marc21/models/videos/video.py b/cds_dojson/marc21/models/videos/video.py index 0b505c28..d42daeba 100644 --- a/cds_dojson/marc21/models/videos/video.py +++ b/cds_dojson/marc21/models/videos/video.py @@ -38,54 +38,55 @@ class CDSVideo(OverdoJSONSchema): '035__9', '035__a', '100__9', - '260__c', - '269__b', - '300__b', - '300__c', - '300__d', - '300__e', - '337__a', + #'260__c', + #'269__b', + #'300__b', + #'300__c', + #'300__d', + #'300__e', + #'337__a', '5061_2', '5061_5', '5061_a', '5061_f', '5061_z', '542__e', - '690C_a', + #'690C_a', '700__0', '700__9', - '773__o', - '773__r', - '787__i', - '787__w', - '852__j', - '852__x', + #'773__o', + #'773__r', + #'787__i', + #'787__w', + #'852__j', + #'852__x', # FIXME need to double check (see #85) - '8564_8', + #'8564_8', '8564_d', - '8564_q', - '8564_s', + #'8564_q', + #'8564_s', '8564_u', '8564_x', - '8564_y', - '8564_z', + #'8564_y', + #'8564_z', + #'8564_2', '8567_2', '916__s', '916__w', '937__c', '960__a', '961__c', - '961__h', + #'961__h', '961__l', '961__x', - '962__b', + #'962__b', '962__l', - '962__n', + #'962__n', '962__t', '963__a', '980__a', '980__b', - '981__a', + #'981__a', } diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py index 8a5a6c79..317b5d33 100644 --- a/cds_dojson/marc21/utils.py +++ b/cds_dojson/marc21/utils.py @@ -21,6 +21,7 @@ from dojson.contrib.marc21.utils import MARC21_DTD, split_stream from lxml import etree from six import StringIO, binary_type, text_type +import copy from ..utils import MementoDict @@ -54,12 +55,13 @@ def create_record(marcxml, correct=False, keep_singletons=True): record.append(('leader', text)) controlfield_iterator = tree.iter(tag='{*}controlfield') - for controlfield in controlfield_iterator: + for index, controlfield in enumerate(controlfield_iterator): tag = controlfield.attrib.get('tag', '!') text = controlfield.text or '' if text or keep_singletons: record.append((tag, text)) + multi_video = set() datafield_iterator = tree.iter(tag='{*}datafield') for datafield in datafield_iterator: tag = datafield.attrib.get('tag', '!') @@ -72,6 +74,7 @@ def create_record(marcxml, correct=False, keep_singletons=True): ind1 = ind1.replace(' ', '_') ind2 = ind2.replace(' ', '_') + multi_video_with_index = False fields = [] subfield_iterator = datafield.iter(tag='{*}subfield') for subfield in subfield_iterator: @@ -80,11 +83,92 @@ def create_record(marcxml, correct=False, keep_singletons=True): if text or keep_singletons: fields.append((code, text)) + # Getting video indexes to create multiple records + if tag == '856' and code == '8': + multi_video_with_index = True + multi_video = multi_video.union({text}) + + # Handle the not indexed video + if tag == '856' and not multi_video_with_index: + multi_video = multi_video.union({'not_indexed'}) + if fields or keep_singletons: key = '{0}{1}{2}'.format(tag, ind1, ind2) record.append((key, MementoDict(fields))) - return MementoDict(record) + # Creating multiple records + tags_indexes = {video: {} for video in multi_video} + tags_counter = {video: 0 for video in multi_video} + multi_video_dict = {video: [] for video in multi_video} + for tag in record: + # Tags with no code or with codes, but no '8' code + if type(tag[1]) is not MementoDict or '8' not in tag[1].keys(): + + # Propagating non-ndexed information to all videos + if tag[0][:3] != '856': + for video in multi_video: + multi_video_dict[video].append(copy.deepcopy(tag)) + + if not(tag[0] in tags_indexes[video]): + tags_indexes[video][tag[0]] = tags_counter[video] + + tags_counter[video] += 1 + + # Video file special case + else: + multi_video_dict['not_indexed'].append(copy.deepcopy(tag)) + + if not(tag[0] in tags_indexes['not_indexed']): + tags_indexes['not_indexed'][tag[0]] = tags_counter['not_indexed'] + + tags_counter['not_indexed'] += 1 + + + # Tags with code '8' + else: + # Code 8 within the indexes of videos + try: + multi_video_dict[tag[1]['8']].append(copy.deepcopy(tag)) + + if not(tag[0] in tags_indexes[tag[1]['8']]): + tags_indexes[tag[1]['8']][tag[0]] = tags_counter[tag[1]['8']] + tags_counter[tag[1]['8']] += 1 + + # Wrong code 8 + except: + for video in multi_video: + multi_video_dict[video].append(copy.deepcopy(tag)) + + if not(tag[0] in tags_indexes[video]): + tags_indexes[video][tag[0]] = tags_counter[video] + + tags_counter[video] += 1 + + # Removing redundant tags. + # Always use as (tag_to_be_removed, tag_to_be_mantained) + redundant_tags = [ + ('260__', '269__') + ] + + for redundant in redundant_tags: + for video in multi_video: + if tags_indexes[video].get(redundant[0]) is not None and tags_indexes[video].get(redundant[1]) is not None: + + index_to_remove = tags_indexes[video][redundant[0]] + while multi_video_dict[video][index_to_remove][0] == redundant[0]: + multi_video_dict[video].pop(tags_indexes[video][redundant[0]]) + + # MARCXML with no datafield - only controlfield + if len(multi_video) == 0: + return MementoDict(record) + + # Single not indexed video + if len(multi_video_dict.keys()) == 1: + key = [i for i in multi_video_dict.keys()][0] + return MementoDict(multi_video_dict[key]) + + # Multiple indexed videos + return [MementoDict(video_record) for video_record in multi_video_dict.values()] def load(source): diff --git a/cds_dojson/schemas/deposits/records/videos/video/video-v1.0.0.json b/cds_dojson/schemas/deposits/records/videos/video/video-v1.0.0.json index fe4256e8..b9ec4d11 100644 --- a/cds_dojson/schemas/deposits/records/videos/video/video-v1.0.0.json +++ b/cds_dojson/schemas/deposits/records/videos/video/video-v1.0.0.json @@ -416,6 +416,144 @@ } } }, + "_digitization": { + "type": "array", + "items": { + "type": "object", + "description": "Field with digitization information for old videos.", + "properties": { + "CERN_ID": { + "type": "string" + }, + "res_ar_fps": { + "type": "string" + }, + "FPS": { + "type": "string" + }, + "resolution": { + "type": "string" + }, + "aspect_ratio": { + "type": "string" + }, + "curated": { + "type": "string" + }, + "curator_name": { + "type": "string" + }, + "curator_title": { + "type": "string" + }, + "curation_date": { + "type": "string" + }, + "curation_time": { + "type": "string" + }, + "curation_quality_control": { + "type": "string" + }, + "curator_category": { + "type": "string" + }, + "curator_split_comment": { + "type": "string" + }, + "curator_split_time": { + "type": "string" + }, + "media_type": { + "type": "string" + }, + "director_info": { + "type": "string" + }, + "picturae_media_quality": { + "type": "string" + }, + "copyright": { + "type": "string" + }, + "quality_control_info": { + "items": { + "type": "object" + }, + "type": "array" + }, + "internal_note": { + "type": "string" + }, + "internal_note_datetime": { + "type": "string" + }, + "epfl_category": { + "type": "string" + }, + "collection": { + "type": "string" + }, + "host_item_entry": { + "type": "string" + }, + "library_report_number": { + "type": "string" + }, + "related_links_info": { + "items": { + "type": "object" + }, + "type": "array" + }, + "physical_media_type": { + "type": "string" + }, + "has_copy": { + "type": "string" + }, + "has_subtitles": { + "type": "string" + }, + "storage_service": { + "type": "string" + }, + "file_size": { + "type": "string" + }, + "record_control_number": { + "type": "string" + }, + "record_id": { + "type": "string" + }, + "format_resolution": { + "type": "string" + }, + "subtitle_extension": { + "type": "string" + }, + "subtitle_path": { + "type": "string" + }, + "subtitle_language": { + "type": "string" + }, + "subtitle_note": { + "type": "string" + }, + "conference_cds_recid": { + "type": "string" + }, + "conference_cds_id": { + "type": "string" + }, + "deleted_cds_records": { + "type": "string" + } + } + } + }, "translations": { "items": { "type": "object", diff --git a/cds_dojson/schemas/records/videos/video/video-v1.0.0.json b/cds_dojson/schemas/records/videos/video/video-v1.0.0.json index 647db46a..9eb170b5 100644 --- a/cds_dojson/schemas/records/videos/video/video-v1.0.0.json +++ b/cds_dojson/schemas/records/videos/video/video-v1.0.0.json @@ -151,6 +151,144 @@ } } }, + "_digitization": { + "type": "array", + "items": { + "type": "object", + "description": "Field with digitization information for old videos.", + "properties": { + "CERN_ID": { + "type": "string" + }, + "res_ar_fps": { + "type": "string" + }, + "FPS": { + "type": "string" + }, + "resolution": { + "type": "string" + }, + "aspect_ratio": { + "type": "string" + }, + "curated": { + "type": "string" + }, + "curator_name": { + "type": "string" + }, + "curator_title": { + "type": "string" + }, + "curation_date": { + "type": "string" + }, + "curation_time": { + "type": "string" + }, + "curation_quality_control": { + "type": "string" + }, + "curator_category": { + "type": "string" + }, + "curator_split_comment": { + "type": "string" + }, + "curator_split_time": { + "type": "string" + }, + "media_type": { + "type": "string" + }, + "director_info": { + "type": "string" + }, + "picturae_media_quality": { + "type": "string" + }, + "copyright": { + "type": "string" + }, + "quality_control_info": { + "items": { + "type": "object" + }, + "type": "array" + }, + "internal_note": { + "type": "string" + }, + "internal_note_datetime": { + "type": "string" + }, + "epfl_category": { + "type": "string" + }, + "collection": { + "type": "string" + }, + "host_item_entry": { + "type": "string" + }, + "library_report_number": { + "type": "string" + }, + "related_links_info": { + "items": { + "type": "object" + }, + "type": "array" + }, + "physical_media_type": { + "type": "string" + }, + "has_copy": { + "type": "string" + }, + "has_subtitles": { + "type": "string" + }, + "storage_service": { + "type": "string" + }, + "file_size": { + "type": "string" + }, + "record_control_number": { + "type": "string" + }, + "record_id": { + "type": "string" + }, + "format_resolution": { + "type": "string" + }, + "subtitle_extension": { + "type": "string" + }, + "subtitle_path": { + "type": "string" + }, + "subtitle_language": { + "type": "string" + }, + "subtitle_note": { + "type": "string" + }, + "conference_cds_recid": { + "type": "string" + }, + "conference_cds_id": { + "type": "string" + }, + "deleted_cds_records": { + "type": "string" + } + } + } + }, "keywords": { "items": { "type": "object" diff --git a/cds_dojson/schemas/records/videos/video/video_src-v1.0.0.json b/cds_dojson/schemas/records/videos/video/video_src-v1.0.0.json index 8cbf3036..8c86cbe7 100644 --- a/cds_dojson/schemas/records/videos/video/video_src-v1.0.0.json +++ b/cds_dojson/schemas/records/videos/video/video_src-v1.0.0.json @@ -111,6 +111,144 @@ "_project_id": { "type": "string" }, + "_digitization": { + "type": "array", + "items": { + "type": "object", + "description": "Field with digitization information for old videos.", + "properties": { + "CERN_ID": { + "type": "string" + }, + "res_ar_fps": { + "type": "string" + }, + "FPS": { + "type": "string" + }, + "resolution": { + "type": "string" + }, + "aspect_ratio": { + "type": "string" + }, + "curated": { + "type": "string" + }, + "curator_name": { + "type": "string" + }, + "curator_title": { + "type": "string" + }, + "curation_date": { + "type": "string" + }, + "curation_time": { + "type": "string" + }, + "curation_quality_control": { + "type": "string" + }, + "curator_category": { + "type": "string" + }, + "curator_split_comment": { + "type": "string" + }, + "curator_split_time": { + "type": "string" + }, + "media_type": { + "type": "string" + }, + "director_info": { + "type": "string" + }, + "picturae_media_quality": { + "type": "string" + }, + "copyright": { + "type": "string" + }, + "quality_control_info": { + "items": { + "type": "object" + }, + "type": "array" + }, + "internal_note": { + "type": "string" + }, + "internal_note_datetime": { + "type": "string" + }, + "epfl_category": { + "type": "string" + }, + "collection": { + "type": "string" + }, + "host_item_entry": { + "type": "string" + }, + "library_report_number": { + "type": "string" + }, + "related_links_info": { + "items": { + "type": "object" + }, + "type": "array" + }, + "physical_media_type": { + "type": "string" + }, + "has_copy": { + "type": "string" + }, + "has_subtitles": { + "type": "string" + }, + "storage_service": { + "type": "string" + }, + "file_size": { + "type": "string" + }, + "record_control_number": { + "type": "string" + }, + "record_id": { + "type": "string" + }, + "format_resolution": { + "type": "string" + }, + "subtitle_extension": { + "type": "string" + }, + "subtitle_path": { + "type": "string" + }, + "subtitle_language": { + "type": "string" + }, + "subtitle_note": { + "type": "string" + }, + "conference_cds_recid": { + "type": "string" + }, + "conference_cds_id": { + "type": "string" + }, + "deleted_cds_records": { + "type": "string" + } + } + } + }, "_cds": { "type": "object", "properties": { diff --git a/tests/test_cli.py b/tests/test_cli.py index 43aacd93..c8c5fab6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -57,4 +57,6 @@ def test_cli(src, compiled): pkg_resources.resource_filename('cds_dojson.schemas', compiled), 'r') as f: compile_schema_expected = json.load(f) + print(compile_schema_expected) + print(compiled_schema_result) assert compile_schema_expected == compiled_schema_result diff --git a/tests/test_videos_project.py b/tests/test_videos_project.py index f2f3f4e9..b10ccdfe 100644 --- a/tests/test_videos_project.py +++ b/tests/test_videos_project.py @@ -18,11 +18,11 @@ # 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Video rules tests.""" import mock +from helpers import load_fixture_file, mock_contributor_fetch, validate from cds_dojson.marc21.fields.videos.utils import language_to_isocode from cds_dojson.marc21.models.videos.project import model from cds_dojson.marc21.utils import create_record -from helpers import load_fixture_file, mock_contributor_fetch, validate def test_required_fields(app): diff --git a/tests/test_videos_video.py b/tests/test_videos_video.py index d1bdfd45..8bafda03 100644 --- a/tests/test_videos_video.py +++ b/tests/test_videos_video.py @@ -18,11 +18,11 @@ # 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Video rules tests.""" import mock +from helpers import load_fixture_file, mock_contributor_fetch, validate from cds_dojson.marc21.fields.videos.utils import language_to_isocode from cds_dojson.marc21.models.videos.video import model from cds_dojson.marc21.utils import create_record -from helpers import load_fixture_file, mock_contributor_fetch, validate def test_required_fields(app): @@ -47,7 +47,7 @@ def test_required_fields(app): 'test-email@cern.ch', 'example@test.com'], 'update': ['another.user@cern.ch', - 'tuser@cern.ch']}, + 'tuser@cern.ch']}, '_files': [ { 'filepath': 'MediaArchive/Video/Masters/Movies/CERN/2017/CERN-MOVIE-2017-023/Final_Output/CERN-MOVIE-2017-023-001.mov', @@ -202,6 +202,21 @@ def test_required_fields(app): 'tags_to_transform': {'timestamp': 95} } ], + '_digitization': [ + { + 'res_ar_fps': '1920x1080 16/9, 25.00', + 'FPS': '25', + 'resolution': '1920x1080', + 'aspect_ratio': '16:9' + }, + { + 'collection': 'publvideomovie' + }, + { + 'host_item_entry': 'AVW.project.2963', + 'library_report_number': 'CERN-MOVIE-2017-023' + } + ], '_project_id': 'https://cds.cern.ch/record/1', 'category': 'CERN', 'contributors': [ @@ -461,6 +476,10 @@ def check_transformation(marcxml_body, json_body): 'related_links': [ {'name': 'Version anglaise', 'url': 'http://cds.cern.ch/record/43172'}, {'name': 'Version allemande', 'url': 'https://cds.cern.ch/record/2194933'}, + ], + '_digitization': [ + {'library_report_number': 'CERN-FILM-1965-44'}, + {'host_item_entry': 'AVW.project.111', 'library_report_number': 'CERN-MOVIE-1965-001'} ]} ) check_transformation( @@ -519,7 +538,10 @@ def check_transformation(marcxml_body, json_body): 16:9 """, { - 'duration': '00:00:00' + 'duration': '00:00:00', + '_digitization': [ + {'aspect_ratio': '16:9'} + ] }) check_transformation( """ @@ -528,7 +550,10 @@ def check_transformation(marcxml_body, json_body): 16:9 """, { - 'duration': '12:33:12' + 'duration': '12:33:12', + '_digitization': [ + {'aspect_ratio': '16:9'} + ] }) check_transformation( """ @@ -537,7 +562,10 @@ def check_transformation(marcxml_body, json_body): 16:9 """, { - 'duration': '00:00:00' + 'duration': '00:00:00', + '_digitization': [ + {'res_ar_fps': '16:9,', 'aspect_ratio': '16:9'} + ] }) check_transformation( """