diff --git a/ckanext/datajson/datajson.py b/ckanext/datajson/datajson.py index 2e44715b..26338a01 100644 --- a/ckanext/datajson/datajson.py +++ b/ckanext/datajson/datajson.py @@ -11,7 +11,7 @@ from ckanext.harvest.model import HarvestObject, HarvestObjectError, HarvestObjectExtra from ckanext.harvest.harvesters.base import HarvesterBase -from ckanext.datajson.exceptions import ParentNotHarvestedException +# from ckanext.datajson.exceptions import ParentNotHarvestedException import uuid import hashlib import json @@ -465,7 +465,11 @@ def is_part_of_to_package_id(self, ipo, harvest_object): harvest_object.save() except Exception: pass - raise ParentNotHarvestedException('Unable to find parent dataset. Raising error to allow re-run later') + + # This 'raise' was constantly crashing our harvesting process. + # To better accomodate our current infrastructure, the output + # of this function should be validated instead. + # raise ParentNotHarvestedException('Unable to find parent dataset. Raising error to allow re-run later') def import_stage(self, harvest_object): # The import stage actually creates the dataset. @@ -502,7 +506,10 @@ def import_stage(self, harvest_object): # check if parent is already harvested parent_identifier = parent_pkg_id.replace('IPO:', '') parent = self.is_part_of_to_package_id(parent_identifier, harvest_object) - parent_pkg_id = parent['id'] + if parent is not None: + parent_pkg_id = parent['id'] + else: + return None if extra.key.startswith('catalog_'): catalog_extras[extra.key] = extra.value diff --git a/ckanext/datajson/tests/test_datajson_ckan_all_harvester.py b/ckanext/datajson/tests/test_datajson_ckan_all_harvester.py index 5861b8f0..d1923acb 100644 --- a/ckanext/datajson/tests/test_datajson_ckan_all_harvester.py +++ b/ckanext/datajson/tests/test_datajson_ckan_all_harvester.py @@ -2,8 +2,6 @@ import json import logging -import pytest - import ckan.plugins as p import ckanext.harvest.model as harvest_model import ckanext.harvest.queue as queue @@ -11,7 +9,6 @@ from ckan import model from ckan.lib.munge import munge_title_to_name from ckanext.datajson.harvester_datajson import DataJsonHarvester -from ckanext.datajson.exceptions import ParentNotHarvestedException from .factories import HarvestJobObj, HarvestSourceObj from mock import Mock, patch @@ -382,8 +379,7 @@ def __init__(self, message): # first a child and assert to get an error r2 = json.dumps({"harvest_object_id": self.harvest_objects[1].id}) r0 = FakeMethod(r2) - with pytest.raises(ParentNotHarvestedException): - queue.fetch_callback(consumer_fetch, r0, None, r2) + queue.fetch_callback(consumer_fetch, r0, None, r2) assert self.harvest_objects[1].retry_times == 1 assert self.harvest_objects[1].state == "ERROR" @@ -391,7 +387,7 @@ def __init__(self, message): r2 = json.dumps({"harvest_object_id": self.harvest_objects[0].id}) r0 = FakeMethod(r2) queue.fetch_callback(consumer_fetch, r0, None, r2) - assert self.harvest_objects[0].retry_times == 1 + assert self.harvest_objects[0].retry_times == 0 assert self.harvest_objects[0].state == "COMPLETE" # Check status on harvest objects @@ -476,8 +472,7 @@ def get_action(action_name): harvest_object.source = harvest_source harvester = DataJsonHarvester() - with pytest.raises(ParentNotHarvestedException): - harvester.is_part_of_to_package_id('custom-identifier', harvest_object) + assert harvester.is_part_of_to_package_id('custom-identifier', harvest_object) is None assert mock_get_action.called @@ -557,8 +552,7 @@ def get_action(action_name): mock_get_action.side_effect = get_action harvester = DataJsonHarvester() - with pytest.raises(ParentNotHarvestedException): - harvester.is_part_of_to_package_id('identifier', None) + assert harvester.is_part_of_to_package_id('identifier', None) is None def test_datajson_is_part_of_package_id(self): url = 'http://127.0.0.1:%s/collection-1-parent-2-children.data.json' % self.mock_port @@ -575,11 +569,9 @@ def test_datajson_is_part_of_package_id(self): assert dataset['title'] == 'Employee Relations Roundtables' if content['identifier'] in ['OPM-ERround-0001-AWOL', 'OPM-ERround-0001-Retire']: - with pytest.raises(ParentNotHarvestedException): - self.harvester.is_part_of_to_package_id(content['identifier'], harvest_object) + assert self.harvester.is_part_of_to_package_id(content['identifier'], harvest_object) is None - with pytest.raises(ParentNotHarvestedException): - self.harvester.is_part_of_to_package_id('bad identifier', harvest_object) + assert self.harvester.is_part_of_to_package_id('bad identifier', harvest_object) is None def test_datajson_non_federal(self): """ validate we get the coinfig we sent """