Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions ckanext/datajson/datajson.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from ckanext.harvest.model import HarvestObject, HarvestObjectError, HarvestObjectExtra
from ckanext.harvest.harvesters.base import HarvesterBase
from ckanext.datajson.exceptions import ParentNotHarvestedException
# from ckanext.datajson.exceptions import ParentNotHarvestedException
import uuid
import hashlib
import json
Expand Down Expand Up @@ -465,7 +465,11 @@ def is_part_of_to_package_id(self, ipo, harvest_object):
harvest_object.save()
except Exception:
pass
raise ParentNotHarvestedException('Unable to find parent dataset. Raising error to allow re-run later')

# This 'raise' was constantly crashing our harvesting process.
# To better accomodate our current infrastructure, the output
# of this function should be validated instead.
# raise ParentNotHarvestedException('Unable to find parent dataset. Raising error to allow re-run later')

def import_stage(self, harvest_object):
# The import stage actually creates the dataset.
Expand Down Expand Up @@ -502,7 +506,10 @@ def import_stage(self, harvest_object):
# check if parent is already harvested
parent_identifier = parent_pkg_id.replace('IPO:', '')
parent = self.is_part_of_to_package_id(parent_identifier, harvest_object)
parent_pkg_id = parent['id']
if parent is not None:
parent_pkg_id = parent['id']
else:
return None

if extra.key.startswith('catalog_'):
catalog_extras[extra.key] = extra.value
Expand Down
20 changes: 6 additions & 14 deletions ckanext/datajson/tests/test_datajson_ckan_all_harvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,13 @@
import json
import logging

import pytest

import ckan.plugins as p
import ckanext.harvest.model as harvest_model
import ckanext.harvest.queue as queue
from . import mock_datajson_source
from ckan import model
from ckan.lib.munge import munge_title_to_name
from ckanext.datajson.harvester_datajson import DataJsonHarvester
from ckanext.datajson.exceptions import ParentNotHarvestedException
from .factories import HarvestJobObj, HarvestSourceObj
from mock import Mock, patch

Expand Down Expand Up @@ -382,16 +379,15 @@ def __init__(self, message):
# first a child and assert to get an error
r2 = json.dumps({"harvest_object_id": self.harvest_objects[1].id})
r0 = FakeMethod(r2)
with pytest.raises(ParentNotHarvestedException):
queue.fetch_callback(consumer_fetch, r0, None, r2)
queue.fetch_callback(consumer_fetch, r0, None, r2)
assert self.harvest_objects[1].retry_times == 1
assert self.harvest_objects[1].state == "ERROR"

# run the parent later, like in a different queue
r2 = json.dumps({"harvest_object_id": self.harvest_objects[0].id})
r0 = FakeMethod(r2)
queue.fetch_callback(consumer_fetch, r0, None, r2)
assert self.harvest_objects[0].retry_times == 1
assert self.harvest_objects[0].retry_times == 0
assert self.harvest_objects[0].state == "COMPLETE"

# Check status on harvest objects
Expand Down Expand Up @@ -476,8 +472,7 @@ def get_action(action_name):
harvest_object.source = harvest_source

harvester = DataJsonHarvester()
with pytest.raises(ParentNotHarvestedException):
harvester.is_part_of_to_package_id('custom-identifier', harvest_object)
assert harvester.is_part_of_to_package_id('custom-identifier', harvest_object) is None

assert mock_get_action.called

Expand Down Expand Up @@ -557,8 +552,7 @@ def get_action(action_name):
mock_get_action.side_effect = get_action

harvester = DataJsonHarvester()
with pytest.raises(ParentNotHarvestedException):
harvester.is_part_of_to_package_id('identifier', None)
assert harvester.is_part_of_to_package_id('identifier', None) is None

def test_datajson_is_part_of_package_id(self):
url = 'http://127.0.0.1:%s/collection-1-parent-2-children.data.json' % self.mock_port
Expand All @@ -575,11 +569,9 @@ def test_datajson_is_part_of_package_id(self):
assert dataset['title'] == 'Employee Relations Roundtables'

if content['identifier'] in ['OPM-ERround-0001-AWOL', 'OPM-ERround-0001-Retire']:
with pytest.raises(ParentNotHarvestedException):
self.harvester.is_part_of_to_package_id(content['identifier'], harvest_object)
assert self.harvester.is_part_of_to_package_id(content['identifier'], harvest_object) is None

with pytest.raises(ParentNotHarvestedException):
self.harvester.is_part_of_to_package_id('bad identifier', harvest_object)
assert self.harvester.is_part_of_to_package_id('bad identifier', harvest_object) is None

def test_datajson_non_federal(self):
""" validate we get the coinfig we sent """
Expand Down