From a9155f7fbc25609e9ebe5dc47e8d2d53db3f1a1c Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Thu, 20 Jul 2023 17:43:43 +0200
Subject: [PATCH 01/19] Development: updated 'date' tag matching and removed
 'date' tag redundancy

---
 cds_dojson/marc21/fields/videos/video.py | 24 ++++++++++++++++++++----
 cds_dojson/marc21/models/videos/video.py |  2 +-
 cds_dojson/marc21/utils.py               | 18 ++++++++++++++++--
 3 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py
index 714e7fdf..e2289555 100644
--- a/cds_dojson/marc21/fields/videos/video.py
+++ b/cds_dojson/marc21/fields/videos/video.py
@@ -181,11 +181,27 @@ def accelerator_experiment(self, key, value):
         'project': value.get('p'),
     }
 
-
-@model.over('date', '^269__')
+@model.over('date', '(^269__)|(^260__)')
 def date(self, key, value):
     """Date."""
-    return arrow.get(value.get('c')).strftime('%Y-%m-%d')
+    if key == '269__':
+        try:
+            return arrow.get(value.get('c')).strftime('%Y-%m-%d')
+        
+        except:
+            match = re.search(r'^(19|20)\d\d-(0[0-9]|1[012])-00', value.get('c'))
+            if match is not None:
+                return match.string.replace('-00', '')
+            
+            else:
+                return 'No Date'
+            
+    else:
+        try:
+            return arrow.get(value.get('c')).strftime('%Y')
+        
+        except:
+            return 'No Date'
 
 
 @model.over('copyright', '^542__')
@@ -263,7 +279,7 @@ def get_tags_to_guess_preset(context_type, value):
 
     def get_tags_to_transform(context_type, value):
         if context_type in ['frame', 'poster']:
-            return {'timestamp': int(value.get('y').split(' ')[3])}
+            return {'timestamp': int(float(value.get('y').split(' ')[3]))}
 
     def get_frame_name(result):
         _, ext = os.path.splitext(result['key'])
diff --git a/cds_dojson/marc21/models/videos/video.py b/cds_dojson/marc21/models/videos/video.py
index 0b505c28..b1620c62 100644
--- a/cds_dojson/marc21/models/videos/video.py
+++ b/cds_dojson/marc21/models/videos/video.py
@@ -38,7 +38,7 @@ class CDSVideo(OverdoJSONSchema):
         '035__9',
         '035__a',
         '100__9',
-        '260__c',
+        #'260__c',
         '269__b',
         '300__b',
         '300__c',
diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py
index 8a5a6c79..a0ff9f85 100644
--- a/cds_dojson/marc21/utils.py
+++ b/cds_dojson/marc21/utils.py
@@ -53,15 +53,18 @@ def create_record(marcxml, correct=False, keep_singletons=True):
         text = leader.text or ''
         record.append(('leader', text))
 
+    index_offset = 0
     controlfield_iterator = tree.iter(tag='{*}controlfield')
-    for controlfield in controlfield_iterator:
+    for index, controlfield in enumerate(controlfield_iterator):
         tag = controlfield.attrib.get('tag', '!')
         text = controlfield.text or ''
         if text or keep_singletons:
             record.append((tag, text))
+            index_offset += 1
 
+    tags_indexes = {}
     datafield_iterator = tree.iter(tag='{*}datafield')
-    for datafield in datafield_iterator:
+    for index, datafield in enumerate(datafield_iterator):
         tag = datafield.attrib.get('tag', '!')
         ind1 = datafield.attrib.get('ind1', '!')
         ind2 = datafield.attrib.get('ind2', '!')
@@ -83,6 +86,17 @@ def create_record(marcxml, correct=False, keep_singletons=True):
         if fields or keep_singletons:
             key = '{0}{1}{2}'.format(tag, ind1, ind2)
             record.append((key, MementoDict(fields)))
+            tags_indexes[key] = index + index_offset
+
+    # Removing redundant tags.
+    # Always use as (tag_to_be_removed, tag_to_be_mantained)
+    redundant_tags = [
+        ('260__', '269__')
+    ]
+    
+    for redundant in redundant_tags:
+        if tags_indexes.get(redundant[0]) is not None and tags_indexes.get(redundant[1]) is not None:
+            record.pop(tags_indexes[redundant[0]])
 
     return MementoDict(record)
 

From 523a29ada06fbb7b64f561266ae3faab74db915a Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Fri, 21 Jul 2023 17:01:29 +0200
Subject: [PATCH 02/19] Development: CI updates and test corrections for isort

---
 .github/workflows/pypi-publish.yml |  8 ++++----
 .github/workflows/tests.yml        | 14 +++++++-------
 tests/test_videos_project.py       |  2 +-
 tests/test_videos_video.py         |  2 +-
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml
index 8ba42b45..8e2cf1f9 100644
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -10,16 +10,16 @@ jobs:
     runs-on: ubuntu-20.04
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v3
         with:
-          python-version: 2.7
+          python-version: 3.6
 
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade "pip>=20,<21" "setuptools>=40,<46" wheel
+          python -m pip install --upgrade pip setuptools wheel
       - name: Build package
         # Remove `compile_catalog` if the package has no translations.
         run: |
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index ed8af9ef..738ba592 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -24,27 +24,27 @@ jobs:
     runs-on: ubuntu-20.04
     strategy:
       matrix:
-        #python-version: [2.7, 3.6]
-        python-version: [2.7]
-        requirements-level: [min, pypi]
+        python-version: [3.6]
+        #python-version: [2.7]
+        requirements-level: [pypi]
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v3
         with:
           python-version: ${{ matrix.python-version }}
 
       - name: Generate dependencies
         run: |
-          python -m pip install --upgrade "pip>=20,<21" "setuptools>=40,<46" py
+          python -m pip install --upgrade pip setuptools py
           python -m pip install wheel coveralls requirements-builder configparser
           requirements-builder --level=${{ matrix.requirements-level }} setup.py > .${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt
 
       - name: Cache pip
-        uses: actions/cache@v2
+        uses: actions/cache@v3
         with:
           path: ~/.cache/pip
           key: ${{ runner.os }}-pip-${{ hashFiles('.${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt') }}
diff --git a/tests/test_videos_project.py b/tests/test_videos_project.py
index f2f3f4e9..b10ccdfe 100644
--- a/tests/test_videos_project.py
+++ b/tests/test_videos_project.py
@@ -18,11 +18,11 @@
 # 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 """Video rules tests."""
 import mock
+from helpers import load_fixture_file, mock_contributor_fetch, validate
 
 from cds_dojson.marc21.fields.videos.utils import language_to_isocode
 from cds_dojson.marc21.models.videos.project import model
 from cds_dojson.marc21.utils import create_record
-from helpers import load_fixture_file, mock_contributor_fetch, validate
 
 
 def test_required_fields(app):
diff --git a/tests/test_videos_video.py b/tests/test_videos_video.py
index d1bdfd45..8e3a44a5 100644
--- a/tests/test_videos_video.py
+++ b/tests/test_videos_video.py
@@ -18,11 +18,11 @@
 # 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 """Video rules tests."""
 import mock
+from helpers import load_fixture_file, mock_contributor_fetch, validate
 
 from cds_dojson.marc21.fields.videos.utils import language_to_isocode
 from cds_dojson.marc21.models.videos.video import model
 from cds_dojson.marc21.utils import create_record
-from helpers import load_fixture_file, mock_contributor_fetch, validate
 
 
 def test_required_fields(app):

From 69a52db7e25f436ba7a41a277b1e70a0f9db2111 Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Fri, 21 Jul 2023 17:07:21 +0200
Subject: [PATCH 03/19] Development: CI updates

---
 .github/workflows/pypi-publish.yml | 2 +-
 .github/workflows/tests.yml        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml
index 8e2cf1f9..1533bf95 100644
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -19,7 +19,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip setuptools wheel
+          python -m pip install --upgrade "pip==21" "setuptools==40" wheel
       - name: Build package
         # Remove `compile_catalog` if the package has no translations.
         run: |
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 738ba592..d0067a05 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -39,7 +39,7 @@ jobs:
 
       - name: Generate dependencies
         run: |
-          python -m pip install --upgrade pip setuptools py
+          python -m pip install --upgrade "pip==21" "setuptools==40" py
           python -m pip install wheel coveralls requirements-builder configparser
           requirements-builder --level=${{ matrix.requirements-level }} setup.py > .${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt
 

From 1aa9d31f0b26b5d41f5d76ee7ca15950bc589441 Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Mon, 24 Jul 2023 17:58:55 +0200
Subject: [PATCH 04/19] Development: fixing bugs on data extraction for
 existing tags.

---
 cds_dojson/marc21/fields/utils.py        | 11 +++++++++--
 cds_dojson/marc21/fields/videos/video.py | 24 ++++++++++++++++++++----
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/cds_dojson/marc21/fields/utils.py b/cds_dojson/marc21/fields/utils.py
index 5cacc252..47143e2f 100644
--- a/cds_dojson/marc21/fields/utils.py
+++ b/cds_dojson/marc21/fields/utils.py
@@ -79,6 +79,7 @@ def _get_correct_video_contributor_role(role):
         'autor': 'Creator',
         'camera': 'Camera Operator',
         'camera & sound': 'Camera Operator',
+        'chairperson': 'Chairperson',
         'co-produced by': 'Co-Producer',
         'co-production': 'Co-Producer',
         'commentaire': 'Comments by',
@@ -119,6 +120,7 @@ def _get_correct_video_contributor_role(role):
         'made by': 'Creator',
         'montage': 'Editor',
         'narrator': 'Narrator',
+        'organiser': 'Organiser',
         'presentator': 'Reporter',
         'presented by': 'Reporter',
         'presenter': 'Reporter',
@@ -146,6 +148,7 @@ def _get_correct_video_contributor_role(role):
         'shooting and editing': ('Camera Operator', 'Editor'),
         'son': 'Music by',
         'speaker': 'Speaker',
+        'sponsor': 'Sponsor',
         'writen by': 'Screenwriter',
         'writer and director': ('Screenwriter', 'Director'),
         'written & directed by': ('Screenwriter', 'Director'),
@@ -227,8 +230,12 @@ def build_contributor(value):
         # Avoids a few calls
         value = get_author_info_from_people_collection(value)
 
-    role = _get_correct_video_contributor_role(
-        value.get('e', 'producer'))  # always unicode
+    try:
+        role = _get_correct_video_contributor_role(
+                value.get('e', 'producer'))  # always unicode
+    except:
+        role = 'Not default: ' + value.get('e', 'producer')
+
     contributors = []
     contributor = {
         'ids': _extract_json_ids(value) or None,
diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py
index e2289555..d538842c 100644
--- a/cds_dojson/marc21/fields/videos/video.py
+++ b/cds_dojson/marc21/fields/videos/video.py
@@ -153,7 +153,10 @@ def internal_note(self, key, value):
         if v.get('a') in CATEGS:
             _internal_categories[v.get('a')].append(v.get('s'))
         else:
-            _internal_notes.append(v.get('a'))
+            if v.get('a') is not None:
+                _internal_notes.append(v.get('a'))
+            else:
+                _internal_notes.append('No Category')
 
     if _internal_categories:
         self['internal_categories'] = dict(_internal_categories)
@@ -184,12 +187,22 @@ def accelerator_experiment(self, key, value):
 @model.over('date', '(^269__)|(^260__)')
 def date(self, key, value):
     """Date."""
+    if value.get('c') is None:
+        return 'No Date'
+
     if key == '269__':
         try:
-            return arrow.get(value.get('c')).strftime('%Y-%m-%d')
+            if type(value.get('c')) is tuple:
+                return arrow.get(value.get('c')[0]).strftime('%Y-%m-%d')
+            else:
+                return arrow.get(value.get('c')).strftime('%Y-%m-%d')
         
         except:
-            match = re.search(r'^(19|20)\d\d-(0[0-9]|1[012])-00', value.get('c'))
+            if type(value.get('c')) is tuple:
+                match = re.search(r'^(19|20)\d\d-(0[0-9]|1[012])-00', value.get('c')[0])
+            else:
+                match = re.search(r'^(19|20)\d\d-(0[0-9]|1[012])-00', value.get('c'))
+
             if match is not None:
                 return match.string.replace('-00', '')
             
@@ -198,7 +211,10 @@ def date(self, key, value):
             
     else:
         try:
-            return arrow.get(value.get('c')).strftime('%Y')
+            if type(value.get('c')) is tuple:
+                return arrow.get(value.get('c')[0]).strftime('%Y')
+            else:
+                return arrow.get(value.get('c')).strftime('%Y')
         
         except:
             return 'No Date'

From daf410ac226fc692fd4e499d7a7d854619b3c79d Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Fri, 28 Jul 2023 17:04:59 +0200
Subject: [PATCH 05/19] Development: multiple JSONs for multiple videos inside
 the same record

---
 cds_dojson/marc21/fields/videos/video.py |  1 -
 cds_dojson/marc21/utils.py               | 67 ++++++++++++++++++++----
 2 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py
index d538842c..8e7f1237 100644
--- a/cds_dojson/marc21/fields/videos/video.py
+++ b/cds_dojson/marc21/fields/videos/video.py
@@ -205,7 +205,6 @@ def date(self, key, value):
 
             if match is not None:
                 return match.string.replace('-00', '')
-            
             else:
                 return 'No Date'
             
diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py
index a0ff9f85..69d4ca9e 100644
--- a/cds_dojson/marc21/utils.py
+++ b/cds_dojson/marc21/utils.py
@@ -21,6 +21,7 @@
 from dojson.contrib.marc21.utils import MARC21_DTD, split_stream
 from lxml import etree
 from six import StringIO, binary_type, text_type
+import copy
 
 from ..utils import MementoDict
 
@@ -53,18 +54,16 @@ def create_record(marcxml, correct=False, keep_singletons=True):
         text = leader.text or ''
         record.append(('leader', text))
 
-    index_offset = 0
     controlfield_iterator = tree.iter(tag='{*}controlfield')
     for index, controlfield in enumerate(controlfield_iterator):
         tag = controlfield.attrib.get('tag', '!')
         text = controlfield.text or ''
         if text or keep_singletons:
             record.append((tag, text))
-            index_offset += 1
 
-    tags_indexes = {}
+    multi_video = set()
     datafield_iterator = tree.iter(tag='{*}datafield')
-    for index, datafield in enumerate(datafield_iterator):
+    for datafield in datafield_iterator:
         tag = datafield.attrib.get('tag', '!')
         ind1 = datafield.attrib.get('ind1', '!')
         ind2 = datafield.attrib.get('ind2', '!')
@@ -75,6 +74,7 @@ def create_record(marcxml, correct=False, keep_singletons=True):
         ind1 = ind1.replace(' ', '_')
         ind2 = ind2.replace(' ', '_')
 
+        multi_video_with_index = False
         fields = []
         subfield_iterator = datafield.iter(tag='{*}subfield')
         for subfield in subfield_iterator:
@@ -83,11 +83,56 @@ def create_record(marcxml, correct=False, keep_singletons=True):
             if text or keep_singletons:
                 fields.append((code, text))
 
+                # Getting video indexes to create multiple records
+                if tag == '856' and code == '8':
+                    multi_video_with_index = True
+                    multi_video = multi_video.union({text})
+
+        # Handle the not indexed video
+        if not multi_video_with_index:
+            multi_video = multi_video.union({'not_indexed'})
+
         if fields or keep_singletons:
             key = '{0}{1}{2}'.format(tag, ind1, ind2)
             record.append((key, MementoDict(fields)))
-            tags_indexes[key] = index + index_offset
 
+    # Creating multiple records
+    tags_indexes = {video: {} for video in multi_video}
+    tags_counter = {video: 0 for video in multi_video}
+    multi_video_dict = {video: [] for video in multi_video}
+    for tag in record:
+        # Tags with no code or with codes, but no '8' code
+        if type(tag[1]) is not MementoDict or '8' not in tag[1].keys():
+            for video in multi_video:
+                multi_video_dict[video].append(copy.deepcopy(tag))
+                
+                if not(tag[0] in tags_indexes[video]):
+                    tags_indexes[video][tag[0]] = tags_counter[video]
+
+                tags_counter[video] += 1
+                    
+
+        # Tags with code '8'
+        else:
+            # Code 8 within the indexes of videos
+            try:
+                multi_video_dict[tag[1]['8']].append(copy.deepcopy(tag))
+
+                if not(tag[0] in tags_indexes[tag[1]['8']]):
+                    tags_indexes[tag[1]['8']][tag[0]] = tags_counter[tag[1]['8']]
+
+                tags_counter[tag[1]['8']] += 1
+
+            # Wrong code 8
+            except:
+                for video in multi_video:
+                    multi_video_dict[video].append(copy.deepcopy(tag))
+                    
+                    if not(tag[0] in tags_indexes[video]):
+                        tags_indexes[video][tag[0]] = tags_counter[video]
+
+                    tags_counter[video] += 1
+            
     # Removing redundant tags.
     # Always use as (tag_to_be_removed, tag_to_be_mantained)
     redundant_tags = [
@@ -95,10 +140,14 @@ def create_record(marcxml, correct=False, keep_singletons=True):
     ]
     
     for redundant in redundant_tags:
-        if tags_indexes.get(redundant[0]) is not None and tags_indexes.get(redundant[1]) is not None:
-            record.pop(tags_indexes[redundant[0]])
-
-    return MementoDict(record)
+        for video in multi_video:
+            if tags_indexes[video].get(redundant[0]) is not None and tags_indexes[video].get(redundant[1]) is not None:
+                
+                index_to_remove = tags_indexes[video][redundant[0]]
+                while multi_video_dict[video][index_to_remove][0] == redundant[0]:
+                    multi_video_dict[video].pop(tags_indexes[video][redundant[0]])
+
+    return [MementoDict(video_record) for video_record in multi_video_dict.values()]
 
 
 def load(source):

From 39c97d7e3c50d5cecd3e7cb99c67345ace3f7c43 Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Fri, 28 Jul 2023 17:19:44 +0200
Subject: [PATCH 06/19] Git: recommiting because of wrong origin

---
 cds_dojson/marc21/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py
index 69d4ca9e..b0fd1fea 100644
--- a/cds_dojson/marc21/utils.py
+++ b/cds_dojson/marc21/utils.py
@@ -120,7 +120,6 @@ def create_record(marcxml, correct=False, keep_singletons=True):
 
                 if not(tag[0] in tags_indexes[tag[1]['8']]):
                     tags_indexes[tag[1]['8']][tag[0]] = tags_counter[tag[1]['8']]
-
                 tags_counter[tag[1]['8']] += 1
 
             # Wrong code 8

From 5d5eeb3ebd9e4e06a5e9bdd1213c93596155de68 Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Fri, 28 Jul 2023 17:29:35 +0200
Subject: [PATCH 07/19] Development: fixing  return type bug

---
 cds_dojson/marc21/utils.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py
index b0fd1fea..ff6399aa 100644
--- a/cds_dojson/marc21/utils.py
+++ b/cds_dojson/marc21/utils.py
@@ -146,6 +146,9 @@ def create_record(marcxml, correct=False, keep_singletons=True):
                 while multi_video_dict[video][index_to_remove][0] == redundant[0]:
                     multi_video_dict[video].pop(tags_indexes[video][redundant[0]])
 
+    if len(multi_video_dict.keys()) == 1:
+        return MementoDict(multi_video_dict['not_indexed'])
+    
     return [MementoDict(video_record) for video_record in multi_video_dict.values()]
 
 

From a59c37e1552d8d233c532895e5ab7daf78468a5d Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Mon, 31 Jul 2023 10:23:20 +0200
Subject: [PATCH 08/19] Development: bug fixed to handle controlfield only
 marcxml

---
 cds_dojson/marc21/utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py
index ff6399aa..8a35c7f9 100644
--- a/cds_dojson/marc21/utils.py
+++ b/cds_dojson/marc21/utils.py
@@ -146,9 +146,15 @@ def create_record(marcxml, correct=False, keep_singletons=True):
                 while multi_video_dict[video][index_to_remove][0] == redundant[0]:
                     multi_video_dict[video].pop(tags_indexes[video][redundant[0]])
 
+    # MARCXML with no datafield - only controlfield
+    if len(multi_video) == 0:
+        return MementoDict(record)
+
+    # Single not indexed video
     if len(multi_video_dict.keys()) == 1:
         return MementoDict(multi_video_dict['not_indexed'])
     
+    # Multiple indexed videos
     return [MementoDict(video_record) for video_record in multi_video_dict.values()]
 
 

From 573133021a4d7e964c1bb23344aa661114b17ec3 Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Tue, 1 Aug 2023 17:02:45 +0200
Subject: [PATCH 09/19] Development: tag 8564 added to model

---
 cds_dojson/marc21/fields/videos/video.py | 50 +++++++++++++++++-------
 cds_dojson/marc21/models/videos/video.py | 17 ++++----
 2 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py
index 8e7f1237..a2fe4265 100644
--- a/cds_dojson/marc21/fields/videos/video.py
+++ b/cds_dojson/marc21/fields/videos/video.py
@@ -230,7 +230,7 @@ def copyright(self, key, value):
     }
 
 
-@model.over('_files', '^8567_')
+@model.over('_files', '^(8567|8564)_')
 @for_each_value
 @filter_values
 def _files(self, key, value):
@@ -277,9 +277,14 @@ def get_tags(context_type, value):
 
     def get_filepath(value):
         if value.get('d'):
-            return value.get('d')[
-                len('\\\\cern.ch\\dfs\\Services\\'):
-            ].replace('\\', '/')
+            if 'cern.ch\\dfs\\Services' in value.get('d'):
+                return value.get('d')[
+                    len('\\\\cern.ch\\dfs\\Services\\'):
+                ].replace('\\', '/')
+            
+            else:
+                return 'http://cern.ch' + value.get('d').split('www')[-1]
+        
         else:
             return re.sub(
                 'https?://mediaarchive.cern.ch/', '', value.get('u', '')
@@ -327,18 +332,33 @@ def compute(value, context_type, media_type):
 
         return result
 
-    result = compute(deepcopy(value), *get_context_type(value))
+    if key == '8567_':
+        result = compute(deepcopy(value), *get_context_type(value))
 
-    # if it's the poster frame, make a copy for a frame!
-    if result['tags']['context_type'] == 'poster' and \
-            result['tags_to_transform']['timestamp'] == 5:
-        frame_5 = compute(value, 'frame', 'image')
-        if '_files' not in self:
-            self['_files'] = []
-        self['_files'].append(frame_5)
-        # update posterframe key name
-        _, ext = os.path.splitext(result['key'])
-        result['key'] = 'posterframe{0}'.format(ext)
+        # if it's the poster frame, make a copy for a frame!
+        if result['tags']['context_type'] == 'poster' and \
+                result['tags_to_transform']['timestamp'] == 5:
+            frame_5 = compute(value, 'frame', 'image')
+            if '_files' not in self:
+                self['_files'] = []
+            self['_files'].append(frame_5)
+            # update posterframe key name
+            _, ext = os.path.splitext(result['key'])
+            result['key'] = 'posterframe{0}'.format(ext)
+
+    else:
+        result = {}
+        result['key'] = get_key(value)
+
+        result['tags'] = {}
+        if value.get('u'):
+            result['tags']['preview'] = True
+            result['tags']['context_type'] = 'master'
+        result['tags']['media_type'] = value.get('y').split('-')[0].lower()
+        result['tags']['content_type'] = value.get('q').lower()
+
+        result['filepath'] = value.get('u')
+        result['tags_to_transform'] = get_tags_to_transform(result['tags']['context_type'], value)
 
     return result
 
diff --git a/cds_dojson/marc21/models/videos/video.py b/cds_dojson/marc21/models/videos/video.py
index b1620c62..7dd59ae5 100644
--- a/cds_dojson/marc21/models/videos/video.py
+++ b/cds_dojson/marc21/models/videos/video.py
@@ -61,14 +61,15 @@ class CDSVideo(OverdoJSONSchema):
         '852__j',
         '852__x',
         # FIXME need to double check (see #85)
-        '8564_8',
-        '8564_d',
-        '8564_q',
-        '8564_s',
-        '8564_u',
-        '8564_x',
-        '8564_y',
-        '8564_z',
+        #'8564_8',
+        #'8564_d',
+        #'8564_q',
+        #'8564_s',
+        #'8564_u',
+        #'8564_x',
+        #'8564_y',
+        #'8564_z',
+        '8564_2',
         '8567_2',
         '916__s',
         '916__w',

From 393daf13ce5434d3456e723d6d17480a7cdc5dda Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Fri, 4 Aug 2023 17:48:28 +0200
Subject: [PATCH 10/19] Development: bug on tag 8564 fixed and made
 'not_indexed' video the master with all information

---
 cds_dojson/marc21/fields/videos/video.py | 15 ++++++++++++---
 cds_dojson/marc21/utils.py               |  5 +++++
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py
index a2fe4265..05a74e8b 100644
--- a/cds_dojson/marc21/fields/videos/video.py
+++ b/cds_dojson/marc21/fields/videos/video.py
@@ -351,11 +351,20 @@ def compute(value, context_type, media_type):
         result['key'] = get_key(value)
 
         result['tags'] = {}
-        if value.get('u'):
+        if value.get('u') and value.get('q') is not None:
             result['tags']['preview'] = True
             result['tags']['context_type'] = 'master'
-        result['tags']['media_type'] = value.get('y').split('-')[0].lower()
-        result['tags']['content_type'] = value.get('q').lower()
+            result['tags']['content_type'] = value.get('q').lower()
+
+        else:
+            result['tags']['preview'] = False
+            result['tags']['context_type'] = value.get('q')
+
+        if value.get('y') is None:
+            result['tags']['media_type'] = value.get('y')
+        
+        else:
+            result['tags']['media_type'] = value.get('y').split('-')[0].lower()
 
         result['filepath'] = value.get('u')
         result['tags_to_transform'] = get_tags_to_transform(result['tags']['context_type'], value)
diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py
index 8a35c7f9..c5d0da44 100644
--- a/cds_dojson/marc21/utils.py
+++ b/cds_dojson/marc21/utils.py
@@ -117,11 +117,16 @@ def create_record(marcxml, correct=False, keep_singletons=True):
             # Code 8 within the indexes of videos
             try:
                 multi_video_dict[tag[1]['8']].append(copy.deepcopy(tag))
+                multi_video_dict['not_indexed'].append(copy.deepcopy(tag))
 
                 if not(tag[0] in tags_indexes[tag[1]['8']]):
                     tags_indexes[tag[1]['8']][tag[0]] = tags_counter[tag[1]['8']]
                 tags_counter[tag[1]['8']] += 1
 
+                if not(tag[0] in tags_indexes['not_indexed']):
+                    tags_indexes['not_indexed'][tag[0]] = tags_counter['not_indexed']
+                tags_counter['not_indexed'] += 1
+
             # Wrong code 8
             except:
                 for video in multi_video:

From 84df89103f6df84ae4a8674179cf85b0fc11bea3 Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Wed, 16 Aug 2023 17:20:12 +0200
Subject: [PATCH 11/19] Development: changed not_indexed tag set and fixed a
 bug when reading '8564' tag

---
 cds_dojson/marc21/fields/videos/video.py |  5 +++-
 cds_dojson/marc21/utils.py               | 35 ++++++++++++++----------
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py
index 05a74e8b..3398d5fd 100644
--- a/cds_dojson/marc21/fields/videos/video.py
+++ b/cds_dojson/marc21/fields/videos/video.py
@@ -364,7 +364,10 @@ def compute(value, context_type, media_type):
             result['tags']['media_type'] = value.get('y')
         
         else:
-            result['tags']['media_type'] = value.get('y').split('-')[0].lower()
+            try:
+                result['tags']['media_type'] = value.get('y').split('-')[0].lower()
+            except:
+                result['tags']['media_type'] = None
 
         result['filepath'] = value.get('u')
         result['tags_to_transform'] = get_tags_to_transform(result['tags']['context_type'], value)
diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py
index c5d0da44..32b29405 100644
--- a/cds_dojson/marc21/utils.py
+++ b/cds_dojson/marc21/utils.py
@@ -89,7 +89,7 @@ def create_record(marcxml, correct=False, keep_singletons=True):
                     multi_video = multi_video.union({text})
 
         # Handle the not indexed video
-        if not multi_video_with_index:
+        if tag == '856' and not multi_video_with_index:
             multi_video = multi_video.union({'not_indexed'})
 
         if fields or keep_singletons:
@@ -103,13 +103,25 @@ def create_record(marcxml, correct=False, keep_singletons=True):
     for tag in record:
         # Tags with no code or with codes, but no '8' code
         if type(tag[1]) is not MementoDict or '8' not in tag[1].keys():
-            for video in multi_video:
-                multi_video_dict[video].append(copy.deepcopy(tag))
-                
-                if not(tag[0] in tags_indexes[video]):
-                    tags_indexes[video][tag[0]] = tags_counter[video]
+            
+            # Propagating non-ndexed information to all videos
+            if tag[0][:3] != '856':
+                for video in multi_video:
+                    multi_video_dict[video].append(copy.deepcopy(tag))
+                    
+                    if not(tag[0] in tags_indexes[video]):
+                        tags_indexes[video][tag[0]] = tags_counter[video]
+
+                    tags_counter[video] += 1
+            
+            # Video file special case
+            else:
+                multi_video_dict['not_indexed'].append(copy.deepcopy(tag))
+                    
+                if not(tag[0] in tags_indexes['not_indexed']):
+                    tags_indexes['not_indexed'][tag[0]] = tags_counter['not_indexed']
 
-                tags_counter[video] += 1
+                tags_counter['not_indexed'] += 1
                     
 
         # Tags with code '8'
@@ -117,16 +129,11 @@ def create_record(marcxml, correct=False, keep_singletons=True):
             # Code 8 within the indexes of videos
             try:
                 multi_video_dict[tag[1]['8']].append(copy.deepcopy(tag))
-                multi_video_dict['not_indexed'].append(copy.deepcopy(tag))
-
+                
                 if not(tag[0] in tags_indexes[tag[1]['8']]):
                     tags_indexes[tag[1]['8']][tag[0]] = tags_counter[tag[1]['8']]
                 tags_counter[tag[1]['8']] += 1
 
-                if not(tag[0] in tags_indexes['not_indexed']):
-                    tags_indexes['not_indexed'][tag[0]] = tags_counter['not_indexed']
-                tags_counter['not_indexed'] += 1
-
             # Wrong code 8
             except:
                 for video in multi_video:
@@ -136,7 +143,7 @@ def create_record(marcxml, correct=False, keep_singletons=True):
                         tags_indexes[video][tag[0]] = tags_counter[video]
 
                     tags_counter[video] += 1
-            
+
     # Removing redundant tags.
     # Always use as (tag_to_be_removed, tag_to_be_mantained)
     redundant_tags = [

From b7c9718a643caff630ec073605c03aff6c993787 Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Thu, 17 Aug 2023 17:38:14 +0200
Subject: [PATCH 12/19] Development: fixing name bug for non-indexed videos and
 tag 246 added

---
 cds_dojson/marc21/fields/base.py  | 4 ++++
 cds_dojson/marc21/fields/utils.py | 2 +-
 cds_dojson/marc21/utils.py        | 3 ++-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/cds_dojson/marc21/fields/base.py b/cds_dojson/marc21/fields/base.py
index a1a9e481..eeb9802b 100644
--- a/cds_dojson/marc21/fields/base.py
+++ b/cds_dojson/marc21/fields/base.py
@@ -87,6 +87,10 @@ def translations(self, key, value):
     translation = self.get('translations', [{}])[0]
     if key.startswith('246'):
         translation['title'] = {'title': value.get('a')}
+        if value.get('n'):
+            translation['description'] = value.get('n')
+        if value.get('p'):
+            translation['title']['subtitle'] = value.get('p')
     if key.startswith('590'):
         translation['description'] = value.get('a')
     translation['language'] = 'fr'
diff --git a/cds_dojson/marc21/fields/utils.py b/cds_dojson/marc21/fields/utils.py
index 47143e2f..8413326d 100644
--- a/cds_dojson/marc21/fields/utils.py
+++ b/cds_dojson/marc21/fields/utils.py
@@ -234,7 +234,7 @@ def build_contributor(value):
         role = _get_correct_video_contributor_role(
                 value.get('e', 'producer'))  # always unicode
     except:
-        role = 'Not default: ' + value.get('e', 'producer')
+        role = 'Producer'
 
     contributors = []
     contributor = {
diff --git a/cds_dojson/marc21/utils.py b/cds_dojson/marc21/utils.py
index 32b29405..317b5d33 100644
--- a/cds_dojson/marc21/utils.py
+++ b/cds_dojson/marc21/utils.py
@@ -164,7 +164,8 @@ def create_record(marcxml, correct=False, keep_singletons=True):
 
     # Single not indexed video
     if len(multi_video_dict.keys()) == 1:
-        return MementoDict(multi_video_dict['not_indexed'])
+        key = [i for i in  multi_video_dict.keys()][0]
+        return MementoDict(multi_video_dict[key])
     
     # Multiple indexed videos
     return [MementoDict(video_record) for video_record in multi_video_dict.values()]

From f23b6f2b54f8ea18c25d4e1154ac265dd0cc27a9 Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Fri, 18 Aug 2023 15:34:36 +0200
Subject: [PATCH 13/19] Development: new tags added to existing properties

---
 cds_dojson/marc21/fields/base.py         | 43 +++++++++++++++--
 cds_dojson/marc21/fields/utils.py        | 17 +++++--
 cds_dojson/marc21/fields/videos/video.py | 60 +++++++++++++++++++++---
 3 files changed, 105 insertions(+), 15 deletions(-)

diff --git a/cds_dojson/marc21/fields/base.py b/cds_dojson/marc21/fields/base.py
index eeb9802b..469e3734 100644
--- a/cds_dojson/marc21/fields/base.py
+++ b/cds_dojson/marc21/fields/base.py
@@ -24,7 +24,7 @@
                           ignore_value)
 
 from ..models.base import model
-from .utils import build_contributor, build_contributor_from_508
+from .utils import build_contributor, build_contributor_from_508, build_contributor_from_906
 
 
 @model.over('recid', '^001')
@@ -54,14 +54,25 @@ def report_number(self, key, value):
     return rn
 
 
-@model.over('contributors', '^(100|700|508)__')
+@model.over('contributors', '^(100|700|508|906)__')
 def contributors(self, key, value):
     """Contributors."""
     authors = self.get('contributors', [])
     if key in ['100__', '700__']:
         items = build_contributor(value)
-    else:
+    elif key == '508__':
         items = build_contributor_from_508(value)
+    else:
+        items = build_contributor_from_906(value)
+        if 'contributors' in self.keys():
+            import ipdb
+            ipdb.set_trace()
+
+            names = [dic['name'] for dic in self['contributors']]
+            roles = [dic['role'] for dic in self['contributors']]
+            if items[0]['name'] in names:
+                if items[0]['role'] == roles[names.index(items[0]['name'])]:
+                    items = None
     # add only contributors that are not part of the authors
     if items:
         authors.extend(
@@ -98,10 +109,32 @@ def translations(self, key, value):
     raise IgnoreKey('translations')
 
 
-@model.over('description', '^520__')
+@model.over('description', '(^511__)|(^5111_)|(^518__)|(^520__)')
 def description(self, key, value):
     """Description."""
-    return value.get('a')
+    if key == '511__' or key == '5111_':
+        if value.get('a'):
+            return 'Filmed people: ' + value.get('a')
+        elif value.get('1'):
+            return 'Filmed people: ' + value.get('1')
+        return ''
+    
+    if key == '518__':
+        if value.get('a'):
+            if 'description' in self.keys():
+                return self['description'] + '\nPlace and/or date of event: ' + value.get('a')
+            return value.get('a')
+        if 'description' in self.keys():
+            return self['description']
+        return ''
+
+    if value.get('a'):
+        if 'description' in self.keys():
+            return self['description'] + '\nPlace and/or date of event: ' + value.get('a')
+        return value.get('a')
+    if 'description' in self.keys():
+        return self['description']
+    return ''
 
 
 @model.over('keywords', '^6531_')
diff --git a/cds_dojson/marc21/fields/utils.py b/cds_dojson/marc21/fields/utils.py
index 8413326d..1eddf84a 100644
--- a/cds_dojson/marc21/fields/utils.py
+++ b/cds_dojson/marc21/fields/utils.py
@@ -230,11 +230,15 @@ def build_contributor(value):
         # Avoids a few calls
         value = get_author_info_from_people_collection(value)
 
-    try:
+    if value.get('e'):
         role = _get_correct_video_contributor_role(
                 value.get('e', 'producer'))  # always unicode
-    except:
-        role = 'Producer'
+    else:
+        try:
+            role = _get_correct_video_contributor_role(
+                    value.get('g', 'producer'))  # always unicode
+        except:
+            role = 'Producer'
 
     contributors = []
     contributor = {
@@ -277,3 +281,10 @@ def build_contributor_from_508(value):
             return contributors
     else:
         return build_contributor({'a': item.strip(), 'e': 'credits'})
+    
+def build_contributor_from_906(value):
+    contributor = {'name': value.get('p'), 'role': 'Speaker'}
+    if value.get('u'):
+        contributor['affiliations'] = (value.get('u'))
+
+    return [contributor]
\ No newline at end of file
diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py
index 3398d5fd..b75d63ed 100644
--- a/cds_dojson/marc21/fields/videos/video.py
+++ b/cds_dojson/marc21/fields/videos/video.py
@@ -112,6 +112,19 @@ def find_match(seq, copy):
     return [dict((k, v) for k, v in iteritems(i) if v is not None)
             for i in _physical_medium]
 
+@model.over('related_links', '^775__')
+def related_links(self, key, value):
+    
+    related_link = {}
+    if value.get('b') and value.get('w'):
+        if value.get('c'):
+            related_link['name'] = value.get('b') + ' ' + value.get('c')
+        else:
+            related_link['name'] = value.get('b')
+        
+        related_link['url'] = 'https://cds.cern.ch/record/' + value.get('w')
+    return related_link
+
 
 @model.over('_project_id', '^773__')
 @ignore_value
@@ -135,8 +148,11 @@ def project_id(self, key, value):
     return project_id
 
 
-@model.over('location', '^110__')
+@model.over('location', '(^110__)|(^901__)')
 def location(self, key, value):
+    if key == '901__' and 'location' not in self.keys():
+        return value.get('u')
+    
     """Location."""
     return value.get('a')
 
@@ -219,15 +235,45 @@ def date(self, key, value):
             return 'No Date'
 
 
-@model.over('copyright', '^542__')
+@model.over('copyright', '(^269__)|(^542__)|(^5421_)')
 @filter_values
 def copyright(self, key, value):
     """Copyright."""
-    return {
-        'holder': value.get('d'),
-        'year': value.get('g'),
-        'message': value.get('f'),
-    }
+    if key == '269__':
+        if value.get('b'):
+            return {
+                'holder': value.get('b')
+            }
+        return {'holder': ''}
+    
+    if key == '5421_':
+        if 'copyright' not in self.keys():
+            try:
+                if value.get('a'):
+                    return {
+                        'holder': value.get('a'),
+                        'year': value.get('g')
+                    }
+                else:
+                    return {
+                        'holder': value.get('d'),
+                        'year': value.get('g')
+                    }
+            except:
+                return {'holder': ''}
+    
+    if value.get('a'):
+        return {
+            'holder': value.get('a'),
+            'year': value.get('g'),
+            'message': value.get('f'),
+        }
+    else:
+        return {
+            'holder': value.get('d'),
+            'year': value.get('g'),
+            'message': value.get('f'),
+        }
 
 
 @model.over('_files', '^(8567|8564)_')

From 3fd87e6167186ea97c3061d7ae31fa70343aee3b Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Fri, 18 Aug 2023 15:39:59 +0200
Subject: [PATCH 14/19] Documentation: fixing missing docstring error

---
 cds_dojson/marc21/fields/utils.py        | 1 +
 cds_dojson/marc21/fields/videos/video.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/cds_dojson/marc21/fields/utils.py b/cds_dojson/marc21/fields/utils.py
index 1eddf84a..7a4529f6 100644
--- a/cds_dojson/marc21/fields/utils.py
+++ b/cds_dojson/marc21/fields/utils.py
@@ -283,6 +283,7 @@ def build_contributor_from_508(value):
         return build_contributor({'a': item.strip(), 'e': 'credits'})
     
 def build_contributor_from_906(value):
+    """Build contributors from field 508."""
     contributor = {'name': value.get('p'), 'role': 'Speaker'}
     if value.get('u'):
         contributor['affiliations'] = (value.get('u'))
diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py
index b75d63ed..337dacac 100644
--- a/cds_dojson/marc21/fields/videos/video.py
+++ b/cds_dojson/marc21/fields/videos/video.py
@@ -114,7 +114,7 @@ def find_match(seq, copy):
 
 @model.over('related_links', '^775__')
 def related_links(self, key, value):
-    
+    """Related links"""
     related_link = {}
     if value.get('b') and value.get('w'):
         if value.get('c'):
@@ -150,10 +150,10 @@ def project_id(self, key, value):
 
 @model.over('location', '(^110__)|(^901__)')
 def location(self, key, value):
+    """Location."""
     if key == '901__' and 'location' not in self.keys():
         return value.get('u')
     
-    """Location."""
     return value.get('a')
 
 

From 4e6bb8fcffd5dd2307610cda2768873f77788cc0 Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Fri, 18 Aug 2023 15:43:15 +0200
Subject: [PATCH 15/19] Documentation: fixing missing docstring error

---
 cds_dojson/marc21/fields/videos/video.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py
index 337dacac..abbe77b2 100644
--- a/cds_dojson/marc21/fields/videos/video.py
+++ b/cds_dojson/marc21/fields/videos/video.py
@@ -114,7 +114,7 @@ def find_match(seq, copy):
 
 @model.over('related_links', '^775__')
 def related_links(self, key, value):
-    """Related links"""
+    """Related links."""
     related_link = {}
     if value.get('b') and value.get('w'):
         if value.get('c'):

From fa92d486ddb52b01944fb930d8a406890d3cbfca Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Mon, 21 Aug 2023 17:21:28 +0200
Subject: [PATCH 16/19] Development: '_digitization' field added to the sceham
 and tags added to processing step

---
 cds_dojson/marc21/fields/base.py              |   3 -
 cds_dojson/marc21/fields/videos/video.py      | 111 ++++++++++++++
 .../records/videos/video/video-v1.0.0.json    | 135 ++++++++++++++++++
 3 files changed, 246 insertions(+), 3 deletions(-)

diff --git a/cds_dojson/marc21/fields/base.py b/cds_dojson/marc21/fields/base.py
index 469e3734..52f14288 100644
--- a/cds_dojson/marc21/fields/base.py
+++ b/cds_dojson/marc21/fields/base.py
@@ -65,9 +65,6 @@ def contributors(self, key, value):
     else:
         items = build_contributor_from_906(value)
         if 'contributors' in self.keys():
-            import ipdb
-            ipdb.set_trace()
-
             names = [dic['name'] for dic in self['contributors']]
             roles = [dic['role'] for dic in self['contributors']]
             if items[0]['name'] in names:
diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py
index abbe77b2..25c43d29 100644
--- a/cds_dojson/marc21/fields/videos/video.py
+++ b/cds_dojson/marc21/fields/videos/video.py
@@ -126,6 +126,117 @@ def related_links(self, key, value):
     return related_link
 
 
+@model.over('_digitization', '(^300__)|(^336__)|(^337__)|(^5081_)|(^514__)|(^5421_)|(^5831_)|(^583__)|(^594__)|(^595__)|(^597__)|(^65027)|(690C_)|(^773__)|(^7870_)|(^787__)|(^852__)|(^8564_)|(^856__)|(^961__)|(^962__)|(^981__)')
+@for_each_value
+def digitization(self, key, value):
+    """Digitization field."""
+    #import ipdb
+    #ipdb.set_trace()
+    
+    data = {}
+    try:
+        if key == '300__':
+            data['CERN_ID'] = value.get('2', '')
+            data['res_ar_fps'] = value.get('b', '')
+            data['FPS'] = value.get('c', '')
+            data['resolution'] = value.get('d', '')
+            data['aspect_ratio'] = value.get('e', '')
+        
+        elif key == '336__':
+            data['curator_split_comment'] = value.get('a', '')
+            data['curator_split_time'] = value.get('b', '')
+
+        elif key == '337__':
+            data['media_type'] = value.get('a', '')
+
+        elif key == '5081_':
+            data['director_info'] = value.get('a', '')
+
+        elif key == '514__':
+            data['picturae_media_quality'] = value.get('a', '')
+
+        elif key =='5421_':
+            data['copyright'] = value.get('a', '')
+                
+        elif key == '5831_':
+            data['quality_control_info'] = [value.get(code) for code in ['3', '5', '6', 'a', 'b', 'c', 'f', 'i', 'k', 'l', 'n', 'o', 'u', 'x', 'z'] if value.get(code)]
+            
+        elif key =='583__':
+            data['curated'] = value.get('a', '')
+            data['curation_date'] = value.get('c', '')
+            data['curation_quality_control'] = value.get('z', '')
+
+        elif key =='594__':
+            data['curator_category'] = value.get('a', '')
+        
+        elif key == '595__':
+            data['internal_note_datetime'] = value.get('d', '')
+            
+        elif key == '597__':
+            data['internal_note'] = value.get('a', '')
+
+        elif key == '65027':
+            data['epfl_category'] = value.get('a', '')
+
+        elif key == '690C_':
+            data['collection'] = value.get('a', '')
+
+        elif key == '773__':
+            data['host_item_entry'] = value.get('o', '')
+            data['library_report_number'] = value.get('r', '')
+
+        elif key == '7870_':
+            data['related_links_info'] = [value.get(code) for code in ['i', 'r', 'w'] if value.get(code)]
+            
+        elif key == '787__':
+            data['related_links_info'] = data['related_links_info'] = [value.get(code) for code in ['1', 'a', 'i', 'w'] if value.get(code)]
+            
+        elif key == '852__':
+            data['physical_media_note'] = value.get('h', '')
+            data['has_copy'] = value.get('j', '')
+            data['physical_media_type'] = value.get('x', '')
+
+        elif key == '8564_':
+            if value.get('1'):
+                data['has_subtitles'] = value.get('1', '')
+            else:
+                data['has_subtitles'] = value.get('i', '')
+
+            data['storage_service'] = value.get('2', '')
+            data['file_size'] = value.get('s', '')
+            data['record_control_number'] = value.get('w', '')
+            data['record_id'] = value.get('y', '')
+            data['format_resolution'] = value.get('z', '')
+
+        elif key == '856_2':
+            data['subtitle_extension'] = value.get('q', '')
+            data['subtitle_path'] = value.get('u', '')
+
+            if value.get('x'):
+                data['subtitle_language'] = value.get('x', '')
+            else:
+                data['subtitle_language'] = value.get('y', '')
+
+            data['subtitle_note'] = value.get('z', '')
+
+        elif key == '961__':
+            data['curator_name'] = value.get('a', '')
+            data['curator_title'] = value.get('b', '')
+            data['curation_time'] = value.get('h', '')
+
+        elif key == '962__':
+            data['conference_cds_recid'] = value.get('b', '')
+            data['conference_cds_id'] = value.get('n', '')
+
+        elif key == '962__':
+            data['deleted_cds_records'] = value.get('a', '')
+    
+    except Exception as exception:
+        #print(exception)
+        pass
+
+    return data
+
 @model.over('_project_id', '^773__')
 @ignore_value
 def project_id(self, key, value):
diff --git a/cds_dojson/schemas/records/videos/video/video-v1.0.0.json b/cds_dojson/schemas/records/videos/video/video-v1.0.0.json
index 647db46a..d9476528 100644
--- a/cds_dojson/schemas/records/videos/video/video-v1.0.0.json
+++ b/cds_dojson/schemas/records/videos/video/video-v1.0.0.json
@@ -151,6 +151,141 @@
         }
       }
     },
+    "_digitization": {
+      "type": "object",
+      "description": "Field with digitization information for old videos.",
+      "properties": {
+        "CERN_ID": {
+          "type": "string"
+        },
+        "res_ar_fps": {
+          "type": "string"
+        },
+        "FPS": {
+          "type": "string"
+        },
+        "resolution": {
+          "type": "string"
+        },
+        "aspect_ratio": {
+          "type": "string"
+        },
+        "curated": {
+          "type": "string"
+        },
+        "curator_name": {
+          "type": "string"
+        },
+        "curator_title": {
+          "type": "string"
+        },
+        "curation_date": {
+          "type": "string"
+        },
+        "curation_time": {
+          "type": "string"
+        },
+        "curation_quality_control": {
+          "type": "string"
+        },
+        "curator_category": {
+          "type": "string"
+        },
+        "curator_split_comment": {
+          "type": "string"
+        },
+        "curator_split_time": {
+          "type": "string"
+        },
+        "media_type": {
+          "type": "string"
+        },
+        "director_info": {
+          "type": "string"
+        },
+        "picturae_media_quality": {
+          "type": "string"
+        },
+        "copyright": {
+          "type": "string"
+        },
+        "quality_control_info": {
+          "items": {
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "internal_note": {
+          "type": "string"
+        },
+        "internal_note_datetime": {
+          "type": "string"
+        },
+        "epfl_category": {
+          "type": "string"
+        },
+        "collection": {
+          "type": "string"
+        },
+        "host_item_entry": {
+          "type": "string"
+        },
+        "library_report_number": {
+          "type": "string"
+        },
+        "related_links_info": {
+          "items": {
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "physical_media_type": {
+          "type": "string"
+        },
+        "has_copy": {
+          "type": "string"
+        },
+        "has_subtitles": {
+          "type": "string"
+        },
+        "storage_service": {
+          "type": "string"
+        },
+        "file_size": {
+          "type": "string"
+        },
+        "record_control_number": {
+          "type": "string"
+        },
+        "record_id": {
+          "type": "string"
+        },
+        "format_resolution": {
+          "type": "string"
+        },
+        "subtitle_extension": {
+          "type": "string"
+        },
+        "subtitle_path": {
+          "type": "string"
+        },
+        "subtitle_language": {
+          "type": "string"
+        },
+        "subtitle_note": {
+          "type": "string"
+        },
+        "conference_cds_recid": {
+          "type": "string"
+        },
+        "conference_cds_id": {
+          "type": "string"
+        },
+        "deleted_cds_records": {
+          "type": "string"
+        }
+      }
+    },
     "keywords": {
       "items": {
         "type": "object"

From 67131feb61ca418023ae8e2a8f654deffff7b916 Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Tue, 22 Aug 2023 15:28:02 +0200
Subject: [PATCH 17/19] Development: video test revision, bug fixing on new
 read tags

---
 cds_dojson/marc21/fields/videos/video.py |  6 +++-
 cds_dojson/marc21/models/videos/video.py | 42 ++++++++++++------------
 tests/test_videos_video.py               | 14 +++++++-
 3 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py
index 25c43d29..266720bf 100644
--- a/cds_dojson/marc21/fields/videos/video.py
+++ b/cds_dojson/marc21/fields/videos/video.py
@@ -228,13 +228,17 @@ def digitization(self, key, value):
             data['conference_cds_recid'] = value.get('b', '')
             data['conference_cds_id'] = value.get('n', '')
 
-        elif key == '962__':
+        elif key == '981__':
             data['deleted_cds_records'] = value.get('a', '')
     
     except Exception as exception:
         #print(exception)
         pass
 
+    empty_keys = [key for key in data.keys() if data[key] == '']
+    for key in empty_keys:
+        data.pop(key)
+
     return data
 
 @model.over('_project_id', '^773__')
diff --git a/cds_dojson/marc21/models/videos/video.py b/cds_dojson/marc21/models/videos/video.py
index 7dd59ae5..d42daeba 100644
--- a/cds_dojson/marc21/models/videos/video.py
+++ b/cds_dojson/marc21/models/videos/video.py
@@ -39,54 +39,54 @@ class CDSVideo(OverdoJSONSchema):
         '035__a',
         '100__9',
         #'260__c',
-        '269__b',
-        '300__b',
-        '300__c',
-        '300__d',
-        '300__e',
-        '337__a',
+        #'269__b',
+        #'300__b',
+        #'300__c',
+        #'300__d',
+        #'300__e',
+        #'337__a',
         '5061_2',
         '5061_5',
         '5061_a',
         '5061_f',
         '5061_z',
         '542__e',
-        '690C_a',
+        #'690C_a',
         '700__0',
         '700__9',
-        '773__o',
-        '773__r',
-        '787__i',
-        '787__w',
-        '852__j',
-        '852__x',
+        #'773__o',
+        #'773__r',
+        #'787__i',
+        #'787__w',
+        #'852__j',
+        #'852__x',
         # FIXME need to double check (see #85)
         #'8564_8',
-        #'8564_d',
+        '8564_d',
         #'8564_q',
         #'8564_s',
-        #'8564_u',
-        #'8564_x',
+        '8564_u',
+        '8564_x',
         #'8564_y',
         #'8564_z',
-        '8564_2',
+        #'8564_2',
         '8567_2',
         '916__s',
         '916__w',
         '937__c',
         '960__a',
         '961__c',
-        '961__h',
+        #'961__h',
         '961__l',
         '961__x',
-        '962__b',
+        #'962__b',
         '962__l',
-        '962__n',
+        #'962__n',
         '962__t',
         '963__a',
         '980__a',
         '980__b',
-        '981__a',
+        #'981__a',
     }
 
 
diff --git a/tests/test_videos_video.py b/tests/test_videos_video.py
index 8e3a44a5..ba4ca416 100644
--- a/tests/test_videos_video.py
+++ b/tests/test_videos_video.py
@@ -47,7 +47,7 @@ def test_required_fields(app):
                                     'test-email@cern.ch',
                                     'example@test.com'],
                             'update': ['another.user@cern.ch',
-                                    'tuser@cern.ch']},
+                                    'tuser@cern.ch']},    
                 '_files': [
                     {
                         'filepath': 'MediaArchive/Video/Masters/Movies/CERN/2017/CERN-MOVIE-2017-023/Final_Output/CERN-MOVIE-2017-023-001.mov',
@@ -202,6 +202,18 @@ def test_required_fields(app):
                         'tags_to_transform': {'timestamp': 95}
                     }
                 ],
+                '_digitization': [
+                    {
+                        'res_ar_fps': '1920x1080 16/9, 25.00',
+                        'FPS': '25',
+                        'resolution': '1920x1080',
+                        'aspect_ratio': '16:9'
+                    },
+                    {
+                        'host_item_entry': 'AVW.project.2963',
+                        'library_report_number': 'CERN-MOVIE-2017-023'
+                    }
+                ],
                 '_project_id': 'https://cds.cern.ch/record/1',
                 'category': 'CERN',
                 'contributors': [

From 09eaa24b579b952be3a49642dca838cc6edfcba0 Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Tue, 22 Aug 2023 17:45:10 +0200
Subject: [PATCH 18/19] Development: bug fixed on tag capturing and new version
 of tests for videos added

---
 cds_dojson/marc21/fields/videos/video.py      | 131 ++++++---
 .../records/videos/video/video-v1.0.0.json    | 261 +++++++++---------
 tests/test_videos_video.py                    |  22 +-
 3 files changed, 242 insertions(+), 172 deletions(-)

diff --git a/cds_dojson/marc21/fields/videos/video.py b/cds_dojson/marc21/fields/videos/video.py
index 266720bf..9180044c 100644
--- a/cds_dojson/marc21/fields/videos/video.py
+++ b/cds_dojson/marc21/fields/videos/video.py
@@ -45,6 +45,23 @@ def duration(self, key, value):
     i.e. '2 min.', we will extract it programatically later to avoid the hassle
     off dealing with more regex.
     """
+    data = {}
+    data['CERN_ID'] = value.get('2', '')
+    data['res_ar_fps'] = value.get('b', '')
+    data['FPS'] = value.get('c', '')
+    data['resolution'] = value.get('d', '')
+    data['aspect_ratio'] = value.get('e', '')
+
+    empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == '']
+    for aux_key in empty_keys:
+        data.pop(aux_key)
+
+    if len(data.keys()) > 0:
+        if '_digitization' not in self.keys():
+            self['_digitization'] = [data]
+        else:
+            self['_digitization'].append(data)
+
     try:
         return re.match(r'(\d{2}:\d{2}:\d{2})(\.\d+)?', value.get('a')) \
             .group(1)
@@ -66,6 +83,21 @@ def language(self, key, value):
 @model.over('physical_medium', '(^340__)|(^852__)')
 def physical_medium(self, key, value):
     """Physical medium."""
+    data = {}
+    data['physical_media_note'] = value.get('h', '')
+    data['has_copy'] = value.get('j', '')
+    data['physical_media_type'] = value.get('x', '')
+
+    empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == '']
+    for aux_key in empty_keys:
+        data.pop(aux_key)
+
+    if len(data.keys()) > 0:
+        if '_digitization' not in self.keys():
+            self['_digitization'] = [data]
+        else:
+            self['_digitization'].append(data)
+
     def find_match(seq, copy):
         if not seq and not copy \
                 and key == '852__' and len(_physical_medium) == 1:
@@ -126,8 +158,9 @@ def related_links(self, key, value):
     return related_link
 
 
-@model.over('_digitization', '(^300__)|(^336__)|(^337__)|(^5081_)|(^514__)|(^5421_)|(^5831_)|(^583__)|(^594__)|(^595__)|(^597__)|(^65027)|(690C_)|(^773__)|(^7870_)|(^787__)|(^852__)|(^8564_)|(^856__)|(^961__)|(^962__)|(^981__)')
+@model.over('_digitization', '(^336__)|(^337__)|(^5081_)|(^514__)|(^5831_)|(^583__)|(^594__)|(^597__)|(^65027)|(690C_)|(^7870_)|(^787__)|(^856_2)|(^961__)|(^962__)|(^981__)')
 @for_each_value
+@ignore_value
 def digitization(self, key, value):
     """Digitization field."""
     #import ipdb
@@ -135,14 +168,7 @@ def digitization(self, key, value):
     
     data = {}
     try:
-        if key == '300__':
-            data['CERN_ID'] = value.get('2', '')
-            data['res_ar_fps'] = value.get('b', '')
-            data['FPS'] = value.get('c', '')
-            data['resolution'] = value.get('d', '')
-            data['aspect_ratio'] = value.get('e', '')
-        
-        elif key == '336__':
+        if key == '336__':
             data['curator_split_comment'] = value.get('a', '')
             data['curator_split_time'] = value.get('b', '')
 
@@ -154,9 +180,6 @@ def digitization(self, key, value):
 
         elif key == '514__':
             data['picturae_media_quality'] = value.get('a', '')
-
-        elif key =='5421_':
-            data['copyright'] = value.get('a', '')
                 
         elif key == '5831_':
             data['quality_control_info'] = [value.get(code) for code in ['3', '5', '6', 'a', 'b', 'c', 'f', 'i', 'k', 'l', 'n', 'o', 'u', 'x', 'z'] if value.get(code)]
@@ -168,9 +191,6 @@ def digitization(self, key, value):
 
         elif key =='594__':
             data['curator_category'] = value.get('a', '')
-        
-        elif key == '595__':
-            data['internal_note_datetime'] = value.get('d', '')
             
         elif key == '597__':
             data['internal_note'] = value.get('a', '')
@@ -181,32 +201,11 @@ def digitization(self, key, value):
         elif key == '690C_':
             data['collection'] = value.get('a', '')
 
-        elif key == '773__':
-            data['host_item_entry'] = value.get('o', '')
-            data['library_report_number'] = value.get('r', '')
-
         elif key == '7870_':
             data['related_links_info'] = [value.get(code) for code in ['i', 'r', 'w'] if value.get(code)]
             
         elif key == '787__':
             data['related_links_info'] = data['related_links_info'] = [value.get(code) for code in ['1', 'a', 'i', 'w'] if value.get(code)]
-            
-        elif key == '852__':
-            data['physical_media_note'] = value.get('h', '')
-            data['has_copy'] = value.get('j', '')
-            data['physical_media_type'] = value.get('x', '')
-
-        elif key == '8564_':
-            if value.get('1'):
-                data['has_subtitles'] = value.get('1', '')
-            else:
-                data['has_subtitles'] = value.get('i', '')
-
-            data['storage_service'] = value.get('2', '')
-            data['file_size'] = value.get('s', '')
-            data['record_control_number'] = value.get('w', '')
-            data['record_id'] = value.get('y', '')
-            data['format_resolution'] = value.get('z', '')
 
         elif key == '856_2':
             data['subtitle_extension'] = value.get('q', '')
@@ -235,16 +234,33 @@ def digitization(self, key, value):
         #print(exception)
         pass
 
-    empty_keys = [key for key in data.keys() if data[key] == '']
-    for key in empty_keys:
-        data.pop(key)
+    empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == '']
+    for aux_key in empty_keys:
+        data.pop(aux_key)
 
-    return data
+    if len(data.keys()) > 0:
+        return data
+    
+    return None
 
 @model.over('_project_id', '^773__')
 @ignore_value
 def project_id(self, key, value):
     """Report number."""
+    data = {}
+    data['host_item_entry'] = value.get('o', '')
+    data['library_report_number'] = value.get('r', '')
+
+    empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == '']
+    for aux_key in empty_keys:
+        data.pop(aux_key)
+
+    if len(data.keys()) > 0:
+        if '_digitization' not in self.keys():
+            self['_digitization'] = [data]
+        else:
+            self['_digitization'].append(data)
+
     values = force_list(value)
     project_id = None
     related_links = self.get('related_links', [])
@@ -291,6 +307,13 @@ def internal_note(self, key, value):
 
     if _internal_categories:
         self['internal_categories'] = dict(_internal_categories)
+
+    if value.get('d'):
+        if '_digitization' not in self.keys():
+            self['_digitization'] = [{'internal_note_datetime': value.get('d')}]
+        else:
+            self['_digitization'].append({'internal_note_datetime': value.get('d')})
+
     return '\n'.join(_internal_notes) or None
 
 
@@ -362,6 +385,13 @@ def copyright(self, key, value):
         return {'holder': ''}
     
     if key == '5421_':
+        
+        if value.get('a'):
+            if '_digitization' not in self.keys():
+                self['_digitization'] = [{'copyright': value.get('a')}]
+            else:
+                self['_digitization'].append({'copyright': value.get('a')})
+
         if 'copyright' not in self.keys():
             try:
                 if value.get('a'):
@@ -508,6 +538,27 @@ def compute(value, context_type, media_type):
             result['key'] = 'posterframe{0}'.format(ext)
 
     else:
+        data = {}
+        if value.get('1'):
+            data['has_subtitles'] = value.get('1', '')
+        else:
+            data['has_subtitles'] = value.get('i', '')
+        data['storage_service'] = value.get('2', '')
+        data['file_size'] = value.get('s', '')
+        data['record_control_number'] = value.get('w', '')
+        data['record_id'] = value.get('y', '')
+        data['format_resolution'] = value.get('z', '')
+
+        empty_keys = [aux_key for aux_key in data.keys() if data[aux_key] == '']
+        for aux_key in empty_keys:
+            data.pop(aux_key)
+
+        if len(data.keys()) > 0:
+            if '_digitization' not in self.keys():
+                self['_digitization'] = [data]
+            else:
+                self['_digitization'].append(data)
+
         result = {}
         result['key'] = get_key(value)
 
diff --git a/cds_dojson/schemas/records/videos/video/video-v1.0.0.json b/cds_dojson/schemas/records/videos/video/video-v1.0.0.json
index d9476528..9eb170b5 100644
--- a/cds_dojson/schemas/records/videos/video/video-v1.0.0.json
+++ b/cds_dojson/schemas/records/videos/video/video-v1.0.0.json
@@ -152,137 +152,140 @@
       }
     },
     "_digitization": {
-      "type": "object",
-      "description": "Field with digitization information for old videos.",
-      "properties": {
-        "CERN_ID": {
-          "type": "string"
-        },
-        "res_ar_fps": {
-          "type": "string"
-        },
-        "FPS": {
-          "type": "string"
-        },
-        "resolution": {
-          "type": "string"
-        },
-        "aspect_ratio": {
-          "type": "string"
-        },
-        "curated": {
-          "type": "string"
-        },
-        "curator_name": {
-          "type": "string"
-        },
-        "curator_title": {
-          "type": "string"
-        },
-        "curation_date": {
-          "type": "string"
-        },
-        "curation_time": {
-          "type": "string"
-        },
-        "curation_quality_control": {
-          "type": "string"
-        },
-        "curator_category": {
-          "type": "string"
-        },
-        "curator_split_comment": {
-          "type": "string"
-        },
-        "curator_split_time": {
-          "type": "string"
-        },
-        "media_type": {
-          "type": "string"
-        },
-        "director_info": {
-          "type": "string"
-        },
-        "picturae_media_quality": {
-          "type": "string"
-        },
-        "copyright": {
-          "type": "string"
-        },
-        "quality_control_info": {
-          "items": {
-            "type": "object"
+      "type": "array",
+      "items": {
+        "type": "object",
+        "description": "Field with digitization information for old videos.",
+        "properties": {
+          "CERN_ID": {
+            "type": "string"
           },
-          "type": "array"
-        },
-        "internal_note": {
-          "type": "string"
-        },
-        "internal_note_datetime": {
-          "type": "string"
-        },
-        "epfl_category": {
-          "type": "string"
-        },
-        "collection": {
-          "type": "string"
-        },
-        "host_item_entry": {
-          "type": "string"
-        },
-        "library_report_number": {
-          "type": "string"
-        },
-        "related_links_info": {
-          "items": {
-            "type": "object"
+          "res_ar_fps": {
+            "type": "string"
           },
-          "type": "array"
-        },
-        "physical_media_type": {
-          "type": "string"
-        },
-        "has_copy": {
-          "type": "string"
-        },
-        "has_subtitles": {
-          "type": "string"
-        },
-        "storage_service": {
-          "type": "string"
-        },
-        "file_size": {
-          "type": "string"
-        },
-        "record_control_number": {
-          "type": "string"
-        },
-        "record_id": {
-          "type": "string"
-        },
-        "format_resolution": {
-          "type": "string"
-        },
-        "subtitle_extension": {
-          "type": "string"
-        },
-        "subtitle_path": {
-          "type": "string"
-        },
-        "subtitle_language": {
-          "type": "string"
-        },
-        "subtitle_note": {
-          "type": "string"
-        },
-        "conference_cds_recid": {
-          "type": "string"
-        },
-        "conference_cds_id": {
-          "type": "string"
-        },
-        "deleted_cds_records": {
-          "type": "string"
+          "FPS": {
+            "type": "string"
+          },
+          "resolution": {
+            "type": "string"
+          },
+          "aspect_ratio": {
+            "type": "string"
+          },
+          "curated": {
+            "type": "string"
+          },
+          "curator_name": {
+            "type": "string"
+          },
+          "curator_title": {
+            "type": "string"
+          },
+          "curation_date": {
+            "type": "string"
+          },
+          "curation_time": {
+            "type": "string"
+          },
+          "curation_quality_control": {
+            "type": "string"
+          },
+          "curator_category": {
+            "type": "string"
+          },
+          "curator_split_comment": {
+            "type": "string"
+          },
+          "curator_split_time": {
+            "type": "string"
+          },
+          "media_type": {
+            "type": "string"
+          },
+          "director_info": {
+            "type": "string"
+          },
+          "picturae_media_quality": {
+            "type": "string"
+          },
+          "copyright": {
+            "type": "string"
+          },
+          "quality_control_info": {
+            "items": {
+              "type": "object"
+            },
+            "type": "array"
+          },
+          "internal_note": {
+            "type": "string"
+          },
+          "internal_note_datetime": {
+            "type": "string"
+          },
+          "epfl_category": {
+            "type": "string"
+          },
+          "collection": {
+            "type": "string"
+          },
+          "host_item_entry": {
+            "type": "string"
+          },
+          "library_report_number": {
+            "type": "string"
+          },
+          "related_links_info": {
+            "items": {
+              "type": "object"
+            },
+            "type": "array"
+          },
+          "physical_media_type": {
+            "type": "string"
+          },
+          "has_copy": {
+            "type": "string"
+          },
+          "has_subtitles": {
+            "type": "string"
+          },
+          "storage_service": {
+            "type": "string"
+          },
+          "file_size": {
+            "type": "string"
+          },
+          "record_control_number": {
+            "type": "string"
+          },
+          "record_id": {
+            "type": "string"
+          },
+          "format_resolution": {
+            "type": "string"
+          },
+          "subtitle_extension": {
+            "type": "string"
+          },
+          "subtitle_path": {
+            "type": "string"
+          },
+          "subtitle_language": {
+            "type": "string"
+          },
+          "subtitle_note": {
+            "type": "string"
+          },
+          "conference_cds_recid": {
+            "type": "string"
+          },
+          "conference_cds_id": {
+            "type": "string"
+          },
+          "deleted_cds_records": {
+            "type": "string"
+          }
         }
       }
     },
diff --git a/tests/test_videos_video.py b/tests/test_videos_video.py
index ba4ca416..f682bb9d 100644
--- a/tests/test_videos_video.py
+++ b/tests/test_videos_video.py
@@ -209,6 +209,9 @@ def test_required_fields(app):
                         'resolution': '1920x1080',
                         'aspect_ratio': '16:9'
                     },
+                    {
+                        'collection': 'publvideomovie'
+                    },
                     {
                         'host_item_entry': 'AVW.project.2963',
                         'library_report_number': 'CERN-MOVIE-2017-023'
@@ -473,6 +476,10 @@ def check_transformation(marcxml_body, json_body):
                     'related_links': [
                         {'name': 'Version anglaise', 'url': 'http://cds.cern.ch/record/43172'},
                         {'name': 'Version allemande', 'url': 'https://cds.cern.ch/record/2194933'},
+                    ],
+                    '_digitization': [
+                        {'library_report_number': 'CERN-FILM-1965-44'},
+                        {'host_item_entry': 'AVW.project.111', 'library_report_number': 'CERN-MOVIE-1965-001'}
                     ]}
             )
             check_transformation(
@@ -531,7 +538,10 @@ def check_transformation(marcxml_body, json_body):
                     <subfield code="e">16:9</subfield>
                 </datafield>
                 """, {
-                    'duration': '00:00:00'
+                    'duration': '00:00:00',
+                    '_digitization': [
+                        {'aspect_ratio': '16:9'}
+                    ]
                 })
             check_transformation(
                 """
@@ -540,7 +550,10 @@ def check_transformation(marcxml_body, json_body):
                     <subfield code="e">16:9</subfield>
                 </datafield>
                 """, {
-                    'duration': '12:33:12'
+                    'duration': '12:33:12',
+                    '_digitization': [
+                        {'aspect_ratio': '16:9'}
+                    ]
                 })
             check_transformation(
                 """
@@ -549,7 +562,10 @@ def check_transformation(marcxml_body, json_body):
                     <subfield code="e">16:9</subfield>
                 </datafield>
                 """, {
-                    'duration': '00:00:00'
+                    'duration': '00:00:00',
+                    '_digitization': [
+                        {'res_ar_fps': '16:9', 'aspect_ratio': '16:9'}
+                    ]
                 })
             check_transformation(
                 """

From 520c154cdf4d22c52bcb23abfb48aa16200c42a3 Mon Sep 17 00:00:00 2001
From: Luis Zerkowski <luisvz@gmail.com>
Date: Wed, 23 Aug 2023 10:10:29 +0200
Subject: [PATCH 19/19] Development: updated deposit video schema and modified
 tests to include '_digitization' field

---
 .../records/videos/video/video-v1.0.0.json    | 138 ++++++++++++++++++
 .../videos/video/video_src-v1.0.0.json        | 138 ++++++++++++++++++
 tests/test_cli.py                             |   2 +
 tests/test_videos_video.py                    |   2 +-
 4 files changed, 279 insertions(+), 1 deletion(-)

diff --git a/cds_dojson/schemas/deposits/records/videos/video/video-v1.0.0.json b/cds_dojson/schemas/deposits/records/videos/video/video-v1.0.0.json
index fe4256e8..b9ec4d11 100644
--- a/cds_dojson/schemas/deposits/records/videos/video/video-v1.0.0.json
+++ b/cds_dojson/schemas/deposits/records/videos/video/video-v1.0.0.json
@@ -416,6 +416,144 @@
         }
       }
     },
+    "_digitization": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "description": "Field with digitization information for old videos.",
+        "properties": {
+          "CERN_ID": {
+            "type": "string"
+          },
+          "res_ar_fps": {
+            "type": "string"
+          },
+          "FPS": {
+            "type": "string"
+          },
+          "resolution": {
+            "type": "string"
+          },
+          "aspect_ratio": {
+            "type": "string"
+          },
+          "curated": {
+            "type": "string"
+          },
+          "curator_name": {
+            "type": "string"
+          },
+          "curator_title": {
+            "type": "string"
+          },
+          "curation_date": {
+            "type": "string"
+          },
+          "curation_time": {
+            "type": "string"
+          },
+          "curation_quality_control": {
+            "type": "string"
+          },
+          "curator_category": {
+            "type": "string"
+          },
+          "curator_split_comment": {
+            "type": "string"
+          },
+          "curator_split_time": {
+            "type": "string"
+          },
+          "media_type": {
+            "type": "string"
+          },
+          "director_info": {
+            "type": "string"
+          },
+          "picturae_media_quality": {
+            "type": "string"
+          },
+          "copyright": {
+            "type": "string"
+          },
+          "quality_control_info": {
+            "items": {
+              "type": "object"
+            },
+            "type": "array"
+          },
+          "internal_note": {
+            "type": "string"
+          },
+          "internal_note_datetime": {
+            "type": "string"
+          },
+          "epfl_category": {
+            "type": "string"
+          },
+          "collection": {
+            "type": "string"
+          },
+          "host_item_entry": {
+            "type": "string"
+          },
+          "library_report_number": {
+            "type": "string"
+          },
+          "related_links_info": {
+            "items": {
+              "type": "object"
+            },
+            "type": "array"
+          },
+          "physical_media_type": {
+            "type": "string"
+          },
+          "has_copy": {
+            "type": "string"
+          },
+          "has_subtitles": {
+            "type": "string"
+          },
+          "storage_service": {
+            "type": "string"
+          },
+          "file_size": {
+            "type": "string"
+          },
+          "record_control_number": {
+            "type": "string"
+          },
+          "record_id": {
+            "type": "string"
+          },
+          "format_resolution": {
+            "type": "string"
+          },
+          "subtitle_extension": {
+            "type": "string"
+          },
+          "subtitle_path": {
+            "type": "string"
+          },
+          "subtitle_language": {
+            "type": "string"
+          },
+          "subtitle_note": {
+            "type": "string"
+          },
+          "conference_cds_recid": {
+            "type": "string"
+          },
+          "conference_cds_id": {
+            "type": "string"
+          },
+          "deleted_cds_records": {
+            "type": "string"
+          }
+        }
+      }
+    },
     "translations": {
       "items": {
         "type": "object",
diff --git a/cds_dojson/schemas/records/videos/video/video_src-v1.0.0.json b/cds_dojson/schemas/records/videos/video/video_src-v1.0.0.json
index 8cbf3036..8c86cbe7 100644
--- a/cds_dojson/schemas/records/videos/video/video_src-v1.0.0.json
+++ b/cds_dojson/schemas/records/videos/video/video_src-v1.0.0.json
@@ -111,6 +111,144 @@
         "_project_id": {
           "type": "string"
         },
+        "_digitization": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "description": "Field with digitization information for old videos.",
+            "properties": {
+              "CERN_ID": {
+                "type": "string"
+              },
+              "res_ar_fps": {
+                "type": "string"
+              },
+              "FPS": {
+                "type": "string"
+              },
+              "resolution": {
+                "type": "string"
+              },
+              "aspect_ratio": {
+                "type": "string"
+              },
+              "curated": {
+                "type": "string"
+              },
+              "curator_name": {
+                "type": "string"
+              },
+              "curator_title": {
+                "type": "string"
+              },
+              "curation_date": {
+                "type": "string"
+              },
+              "curation_time": {
+                "type": "string"
+              },
+              "curation_quality_control": {
+                "type": "string"
+              },
+              "curator_category": {
+                "type": "string"
+              },
+              "curator_split_comment": {
+                "type": "string"
+              },
+              "curator_split_time": {
+                "type": "string"
+              },
+              "media_type": {
+                "type": "string"
+              },
+              "director_info": {
+                "type": "string"
+              },
+              "picturae_media_quality": {
+                "type": "string"
+              },
+              "copyright": {
+                "type": "string"
+              },
+              "quality_control_info": {
+                "items": {
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "internal_note": {
+                "type": "string"
+              },
+              "internal_note_datetime": {
+                "type": "string"
+              },
+              "epfl_category": {
+                "type": "string"
+              },
+              "collection": {
+                "type": "string"
+              },
+              "host_item_entry": {
+                "type": "string"
+              },
+              "library_report_number": {
+                "type": "string"
+              },
+              "related_links_info": {
+                "items": {
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "physical_media_type": {
+                "type": "string"
+              },
+              "has_copy": {
+                "type": "string"
+              },
+              "has_subtitles": {
+                "type": "string"
+              },
+              "storage_service": {
+                "type": "string"
+              },
+              "file_size": {
+                "type": "string"
+              },
+              "record_control_number": {
+                "type": "string"
+              },
+              "record_id": {
+                "type": "string"
+              },
+              "format_resolution": {
+                "type": "string"
+              },
+              "subtitle_extension": {
+                "type": "string"
+              },
+              "subtitle_path": {
+                "type": "string"
+              },
+              "subtitle_language": {
+                "type": "string"
+              },
+              "subtitle_note": {
+                "type": "string"
+              },
+              "conference_cds_recid": {
+                "type": "string"
+              },
+              "conference_cds_id": {
+                "type": "string"
+              },
+              "deleted_cds_records": {
+                "type": "string"
+              }
+            }
+          }
+        },
         "_cds": {
           "type": "object",
           "properties": {
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 43aacd93..c8c5fab6 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -57,4 +57,6 @@ def test_cli(src, compiled):
             pkg_resources.resource_filename('cds_dojson.schemas',
                                             compiled), 'r') as f:
         compile_schema_expected = json.load(f)
+    print(compile_schema_expected)
+    print(compiled_schema_result)
     assert compile_schema_expected == compiled_schema_result
diff --git a/tests/test_videos_video.py b/tests/test_videos_video.py
index f682bb9d..8bafda03 100644
--- a/tests/test_videos_video.py
+++ b/tests/test_videos_video.py
@@ -564,7 +564,7 @@ def check_transformation(marcxml_body, json_body):
                 """, {
                     'duration': '00:00:00',
                     '_digitization': [
-                        {'res_ar_fps': '16:9', 'aspect_ratio': '16:9'}
+                        {'res_ar_fps': '16:9,', 'aspect_ratio': '16:9'}
                     ]
                 })
             check_transformation(