Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions ckanapi/cli/dump_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""
implementation of dump_metadata cli command
"""

import sys
import gzip
import json
from datetime import datetime
import os

from ckanapi.cli.utils import compact_json, \
quiet_int_pipe, pretty_json

DEFAULT_PAGINATION = 50

def dump_metadata(ckan, arguments, pagination=DEFAULT_PAGINATION,
stdout=None, stderr=None):
'''
Dump all the JSON metadata records.
This is often a better than using dump_things with thing=datasets,
as sites like catalog.data.gov do not support package_list api.

The package_search API is used with pagination.
'''
if pagination < 1:
raise ValueError("Pagination size must be greater or equal to 1")
if stdout is None:
stdout = getattr(sys.stdout, 'buffer', sys.stdout)
if stderr is None:
stderr = getattr(sys.stderr, 'buffer', sys.stderr)

jsonl_output = stdout
if arguments['--output']:
jsonl_output = open(arguments['--output'], 'wb')
if arguments['--gzip']:
jsonl_output = gzip.GzipFile(fileobj=jsonl_output)

with quiet_int_pipe() as errors:
count = 0
total_count = 0
total_known = False
while not total_known or total_count > count:
response = ckan.call_action("package_search", dict(rows=pagination,
start=count, sort="id asc"))
total_count = response["count"]
total_known = True
for record in response["results"]:
jsonl_output.write(compact_json(record,
sort_keys=True) + b'\n')
count += 1
if 'pipe' in errors:
return 1
if 'interrupt' in errors:
return 2

8 changes: 8 additions & 0 deletions ckanapi/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
(ID_OR_NAME ... | --all) ([-O JSONL_OUTPUT] | [-D DIRECTORY])
[-p PROCESSES] [-qwz]
[[-c CONFIG] [-u USER] | -r SITE_URL [-a APIKEY] [-g]]
ckanapi dump_datasets2
[-O JSONL_OUTPUT]
[-z]
[[-c CONFIG] [-u USER] | -r SITE_URL [-a APIKEY] [-g]]
ckanapi delete (datasets | groups | organizations | users | related)
(ID_OR_NAME ... | [-I JSONL_INPUT] [-s START] [-m MAX])
[-p PROCESSES] [-l LOG_FILE] [-qwz]
Expand Down Expand Up @@ -77,6 +81,7 @@
from ckanapi.cli.dump import dump_things
from ckanapi.cli.delete import delete_things
from ckanapi.cli.action import action
from ckanapi.cli.dump_metadata import dump_metadata


def parse_arguments():
Expand Down Expand Up @@ -130,6 +135,9 @@ def main(running_with_paster=False):
if arguments['delete']:
return delete_things(ckan, thing[0], arguments)

if arguments['dump_datasets2']:
return dump_metadata(ckan, arguments)

assert 0, arguments # we shouldn't be here


Expand Down
67 changes: 67 additions & 0 deletions ckanapi/tests/test_cli_dump_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from ckanapi.cli.dump_metadata import dump_metadata
from ckanapi.errors import NotFound
import json
import tempfile
import shutil
from os.path import exists

try:
import unittest2 as unittest
except ImportError:
import unittest
from io import BytesIO


class MockCKAN(object):

def __init__(self):

self.data = [{'id': '12',
'name': 'twelve',
'title': "Twelve"},
{'id': '34',
'name': 'thirtyfour',
'title': "Thirty-four"},
{'id': '56',
'name': 'fiftysix',
'title': "Fifty-Six"},
{'id': '67',
'name': 'sixtyseven',
'title': "Sixty-Seven"},
{'id': 'dp',
'name': 'dp',
'title': 'Test for datapackage',
'resources':[ {'name': 'resource1',
'format': 'html',
'url':'http://example.com/test-file'}]}]

def call_action(self, name, kwargs):
start = kwargs["start"]
rows = kwargs["rows"]
return {"count" : 5,
"results" : self.data[start:start+rows]}


class TestCLIDumpMetadata(unittest.TestCase):
def setUp(self):
self.ckan = MockCKAN()
self.stdout = BytesIO()
self.stderr = BytesIO()

def test_dump_metadata(self):
dump_metadata(self.ckan, {
'--ckan-user': None,
'--config': None,
'--remote': None,
'--apikey': None,
'--output': None,
'--gzip': False,
}, 2,
stdout=self.stdout,
stderr=self.stderr)
output = self.stdout.getvalue().decode("UTF-8")
self.assertEqual(output.count("\n"), 5)
self.assertTrue(r'{"id":"12","name":"twelve","title":"Twelve"}' in output)
self.assertTrue(r'{"id":"34","name":"thirtyfour","title":"Thirty-four"}' in output)
self.assertTrue(r'id":"dp"' in output)