-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathbkciuploader.py
More file actions
105 lines (93 loc) · 4.23 KB
/
bkciuploader.py
File metadata and controls
105 lines (93 loc) · 4.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python3
"""
Retrieve and disseminate files and metadata to Clarivate Web of Science Book Citation Index (BKCI)
"""
import logging
import sys
from datetime import datetime, timezone
from ftplib import FTP, error_perm
from io import BytesIO, TextIOWrapper
from errors import DisseminationError
from uploader import Uploader
class BKCIUploader(Uploader):
"""Dissemination logic for Clarivate Web of Science Book Citation Index (BKCI)"""
def upload_to_platform(self):
"""
Upload work in required format to Clarivate Web of Science Book Citation Index (BKCI).
Content required: PDF work file
Metadata required: CSV containing at minimum (1) titles provided, (2) ISBNs,
(3) file name per title, (4) publication date
Naming convention: Not strictly controlled, but use simple/human-readable names e.g. ISBN
Upload directory: per-upload subfolder (e.g. datestamped) within `INCOMING-BOOKS` folder
"""
# Check that BKCI credentials have been provided for this publisher
publisher_id = self.get_publisher_id()
try:
user = self.get_variable_from_env('bkci_ftp_user_' + publisher_id.replace('-', '_'), 'BKCI')
passwd = self.get_variable_from_env('bkci_ftp_pw_' + publisher_id.replace('-', '_'), 'BKCI')
except DisseminationError as error:
logging.error(error)
sys.exit(1)
metadata_csv = self.parse_metadata()
filename = self.get_isbn('PDF')
folder_name = datetime.strftime(datetime.now(timezone.utc), "%Y%m%d%H%M%S")
pdf = self.get_publication_details('PDF')
try:
with FTP(
host='ftp.isinet.com',
user=user,
passwd=passwd,
) as ftp:
try:
ftp.cwd('INCOMING-BOOKS')
except FileNotFoundError:
logging.error(
'Could not find folder "INCOMING-BOOKS" on BKCI FTP server')
sys.exit(1)
try:
ftp.mkd(folder_name)
except Exception:
logging.error(
'Could not create folder "{}" on BKCI FTP server'.format(folder_name))
sys.exit(1)
try:
ftp.cwd(folder_name)
except FileNotFoundError:
logging.error(
'Could not find folder "{}" on BKCI FTP server'.format(folder_name))
sys.exit(1)
try:
ftp.storbinary('STOR {}{}'.format(filename, pdf.file_ext), BytesIO(pdf.bytes))
except error_perm as error:
logging.error(
'Error uploading PDF to BKCI FTP server: {}'.format(error))
sys.exit(1)
try:
ftp.storbinary('STOR {}.csv'.format(folder_name), metadata_csv)
except error_perm as error:
logging.error(
'Error uploading metadata to BKCI FTP server: {}'.format(error))
# Avoid deleting any partially-uploaded items (as is done in other
# workflows) because instructions forbid making changes post-upload
sys.exit(1)
except error_perm as error:
logging.error(
'Could not connect to BKCI FTP server: {}'.format(error))
sys.exit(1)
logging.info('Successfully uploaded to BKCI FTP server')
def parse_metadata(self):
"""Convert work metadata into Clarivate Web of Science Book Citation Index (BKCI) format"""
title = self.get_title()
isbn = self.get_isbn('PDF')
filename = '{}.pdf'.format(isbn)
pub_date = self.metadata.get('data').get('work').get('publicationDate')
rows = [
"Title, ISBN, Publication date, Filename\n",
"{}, {}, {}, {}\n".format(title, isbn, pub_date, filename)
]
metadata_bytes = BytesIO()
metadata_text = TextIOWrapper(metadata_bytes)
metadata_text.writelines(rows)
metadata_text.detach()
metadata_bytes.seek(0)
return metadata_bytes