Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions harvestingkit/contrast_out.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
##
## This file is part of Harvesting Kit.
## Copyright (C) 2013, 2014 CERN.
## Copyright (C) 2013, 2014, 2016 CERN.
##
## Harvesting Kit is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -99,7 +99,9 @@ def connect(self):
try:
self.ftp = FtpHandler(self.config.ELSEVIER.URL,
self.config.ELSEVIER.LOGIN,
self.config.ELSEVIER.PASSWORD)
self.config.ELSEVIER.PASSWORD,
port = int(self.config.ELSEVIER.PORT),
sftp = self.config.ELSEVIER.SFTP)
self.logger.debug(('Successful connection to the '
'Elsevier server'))
return
Expand Down
9 changes: 4 additions & 5 deletions harvestingkit/elsevier_package.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of Harvesting Kit.
# Copyright (C) 2013, 2014, 2015 CERN.
# Copyright (C) 2013, 2014, 2015, 2016 CERN.
#
# Harvesting Kit is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -38,7 +38,6 @@
from zipfile import ZipFile
from xml.dom.minidom import parse


try:
from invenio.errorlib import register_exception
except ImportError:
Expand Down Expand Up @@ -423,14 +422,14 @@ def get_abstract(self, xml_doc):

def get_keywords(self, xml_doc):
head = xml_doc.getElementsByTagName("ja:head")
if not head:
if not head:
head = xml_doc.getElementsByTagName("cja:head")
if not head:
keywords = xml_doc.getElementsByTagName("ce:keyword")
else:
keywords = head[0].getElementsByTagName("ce:keyword")
return [get_value_in_tag(keyword, "ce:text")
for keyword in keywords
return [get_value_in_tag(keyword, "ce:text")
for keyword in keywords
if get_value_in_tag(keyword, "ce:text")]

def get_copyright(self, xml_doc):
Expand Down
176 changes: 130 additions & 46 deletions harvestingkit/ftp_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
##
## This file is part of Harvesting Kit.
## Copyright (C) 2014 CERN.
## Copyright (C) 2014, 2016 CERN.
##
## Harvesting Kit is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
Expand All @@ -28,6 +28,7 @@
from urlparse import urlparse
from netrc import netrc
from datetime import datetime
import paramiko


class FtpHandler(object):
Expand All @@ -44,25 +45,37 @@ class FtpHandler(object):
for authentication with the server.
:type netrc_file: string
"""
def __init__(self, server, username='', passwd='', netrc_file=''):
def __init__(self, server, username='', passwd='', netrc_file='', port=21, sftp=False):
self.port = port
self.sftp = sftp
server = urlparse(server)
if server.netloc:
server = server.netloc
elif server.path:
server = server.path
self._ftp = FTP(server)
if self.sftp:
self._ftp = paramiko.Transport((server, self.port))
else:
self._ftp = FTP(server)
self._username = username
self._passwd = passwd
if netrc_file:
logininfo = netrc(netrc_file).authenticators(server)
self._username, _, self._passwd = logininfo
self.connect()
self._home = self._ftp.pwd()
if self.sftp:
self._home = self._sftp_client.getcwd()
else:
self._home = self._ftp.pwd()

def connect(self):
""" Connects and logins to the server. """
self._ftp.connect()
self._ftp.login(user=self._username, passwd=self._passwd)
if self.sftp:
self._ftp.connect(username=self._username, password=self._passwd)
self._sftp_client = paramiko.SFTPClient.from_transport(self._ftp)
else:
self._ftp.connect(port=self.port)
self._ftp.login(user=self._username, passwd=self._passwd)

def close(self):
""" Closes the connection to the server. """
Expand Down Expand Up @@ -99,13 +112,20 @@ def download(self, source_file, target_folder=''):
working directory.
:type target_folder: string
"""
current_folder = self._ftp.pwd()
if self.sftp:
current_folder = self._sftp_client.getcwd()
else:
current_folder = self._ftp.pwd()

if not target_folder.startswith('/'): # relative path
target_folder = join(getcwd(), target_folder)

folder = os.path.dirname(source_file)
self.cd(folder)
if folder:
if self.sftp:
self._sftp_client.chdir(folder)
else:
self.cd(folder)

if folder.startswith("/"):
folder = folder[1:]
Expand All @@ -118,14 +138,20 @@ def download(self, source_file, target_folder=''):
source_file = os.path.basename(source_file)
destination = join(destination_folder, source_file)
try:
with open(destination, 'wb') as result:
self._ftp.retrbinary('RETR %s' % (source_file,),
result.write)
if self.sftp:
self._sftp_client.get(source_file, destination)
else:
with open(destination, 'wb') as result:
self._ftp.retrbinary('RETR %s' % (source_file,),
result.write)
except error_perm as e: # source_file is a folder
print(e)
remove(join(target_folder, source_file))
raise
self._ftp.cwd(current_folder)
if self.sftp:
self._sftp_client.chdir(current_folder)
else:
self._ftp.cwd(current_folder)

def cd(self, folder):
""" Changes the working directory on the server.
Expand All @@ -134,11 +160,17 @@ def cd(self, folder):
:type folder: string
"""
if folder.startswith('/'):
self._ftp.cwd(folder)
if self.sftp:
self._sftp_client.chdir(folder)
else:
self._ftp.cwd(folder)
else:
for subfolder in folder.split('/'):
if subfolder:
self._ftp.cwd(subfolder)
if self.sftp:
self._sftp_client.chdir(subfolder)
else:
self._ftp.cwd(subfolder)

def ls(self, folder=''):
""" Lists the files and folders of a specific directory
Expand All @@ -150,15 +182,31 @@ def ls(self, folder=''):
:returns: a tuple with the list of files in the folder
and the list of subfolders in the folder.
"""
current_folder = self._ftp.pwd()
self.cd(folder)
if self.sftp and folder == '':
folder = '.'

files = []
folders = []
contents = []
self._ftp.retrlines('LIST', lambda a: contents.append(a))
files = filter(lambda a: a.split()[0].startswith('-'), contents)
folders = filter(lambda a: a.split()[0].startswith('d'), contents)
files = map(lambda a: ' '.join(a.split()[8:]), files)
folders = map(lambda a: ' '.join(a.split()[8:]), folders)
self._ftp.cwd(current_folder)

if self.sftp:
current_folder = self._sftp_client.getcwd()
self._sftp_client.chdir(folder)
contents = self._sftp_client.listdir()
files = filter(lambda a: str(self._sftp_client.lstat(a)).split()[0].startswith('-'), contents)
folders = filter(lambda a: str(self._sftp_client.lstat(a)).split()[0].startswith('d'), contents)
files = map(lambda a: ' '.join(a.split()[8:]), files)
folders = map(lambda a: ' '.join(a.split()[8:]), folders)
self._sftp_client.chdir(current_folder)
else:
current_folder = self._ftp.pwd()
self.cd(folder)
self._ftp.retrlines('LIST', lambda a: contents.append(a))
files = filter(lambda a: a.split()[0].startswith('-'), contents)
folders = filter(lambda a: a.split()[0].startswith('d'), contents)
files = map(lambda a: ' '.join(a.split()[8:]), files)
folders = map(lambda a: ' '.join(a.split()[8:]), folders)
self._ftp.cwd(current_folder)
return files, folders

def dir(self, folder='', prefix=''):
Expand Down Expand Up @@ -192,15 +240,22 @@ def mkdir(self, folder):
:param folder: the folder to be created.
:type folder: string
"""
current_folder = self._ftp.pwd()

if self.sftp:
current_folder = self._sftp_client.getcwd()
else:
current_folder = self._ftp.pwd()
#creates the necessary folders on
#the server if they don't exist
folders = folder.split('/')
for fld in folders:
try:
self.cd(fld)
except error_perm: # folder does not exist
self._ftp.mkd(fld)
if self.sftp:
self._sftp_client.mkdir(fld)
else:
self._ftp.mkd(fld)
self.cd(fld)
self.cd(current_folder)

Expand All @@ -211,11 +266,17 @@ def rm(self, filename):
:type filename: string
"""
try:
self._ftp.delete(filename)
if self.sftp:
self._sftp_client.remove(filename)
else:
self._ftp.delete(filename)
except error_perm: # target is either a directory
# either it does not exist
try:
current_folder = self._ftp.pwd()
if self.sftp:
current_folder = self._sftp_client.getcwd()
else:
current_folder = self._ftp.pwd()
self.cd(filename)
except error_perm:
print('550 Delete operation failed %s '
Expand All @@ -232,7 +293,11 @@ def rmdir(self, foldername):
:param foldername: the folder to be deleted.
:type foldername: string
"""
current_folder = self._ftp.pwd()
if self.sftp:
current_folder = self._sftp_client.getcwd()
else:
current_folder = self._ftp.pwd()

try:
self.cd(foldername)
except error_perm:
Expand All @@ -241,16 +306,25 @@ def rmdir(self, foldername):
else:
self.cd(current_folder)
try:
self._ftp.rmd(foldername)
if self.sftp:
self._sftp_client.rmdir(foldername)
else:
self._ftp.rmd(foldername)
except error_perm: # folder not empty
self.cd(foldername)
contents = self.ls()
#delete the files
map(self._ftp.delete, contents[0])
if self.sftp:
map(self._sftp_client.remove, contents[0])
else:
map(self._ftp.delete, contents[0])
#delete the subfolders
map(self.rmdir, contents[1])
self.cd(current_folder)
self._ftp.rmd(foldername)
if self.sftp:
self._sftp_client.rmdir(foldername)
else:
self._ftp.rmd(foldername)

def get_filesize(self, filename):
""" Returns the filesize of a file
Expand All @@ -260,19 +334,23 @@ def get_filesize(self, filename):

:returns: string representation of the filesize.
"""
result = []

def dir_callback(val):
result.append(val.split()[4])

self._ftp.dir(filename, dir_callback)
return result[0]
if self.sftp:
return self._sftp_client.lstat(filename).st_size
else:
result = []
def dir_callback(val):
result.append(val.split()[4])
self._ftp.dir(filename, dir_callback)
return result[0]

def get_datestamp(self, filename):
datestamp = self._ftp.sendcmd('MDTM ' + filename)
datestamp = datetime.strptime(datestamp[4:],
"%Y%m%d%H%M%S").strftime("%Y-%M-%d")
return datestamp
if self.sftp:
datestamp = datetime.fromtimestamp(self._sftp_client.lstat(filename).st_mtime)
return datestamp.strftime("%Y-%m-%d")
else:
datestamp = self._ftp.sendcmd('MDTM ' + filename)
return datetime.strptime(datestamp[4:],
"%Y%m%d%H%M%S").strftime("%Y-%M-%d")

def check_pkgs_integrity(self, filelist, logger,
timeout=120, sleep_time=10):
Expand Down Expand Up @@ -324,11 +402,17 @@ def upload(self, filename, location=''):
be stored.
:type location: string
"""
current_folder = self._ftp.pwd()
if self.sftp:
current_folder = self._sftp_client.getcwd()
else:
current_folder = self._ftp.pwd()
self.mkdir(location)
self.cd(location)
fl = open(filename, 'rb')
filename = filename.split('/')[-1]
self._ftp.storbinary('STOR %s' % filename, fl)
fl.close()
if self.sftp:
self._sftp_client.put(filename, location)
else:
fl = open(filename, 'rb')
filename = filename.split('/')[-1]
self._ftp.storbinary('STOR %s' % filename, fl)
fl.close()
self.cd(current_folder)
2 changes: 2 additions & 0 deletions user_config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
login = empty
password = empty
url = empty
port = 21
sftp = True

[OXFORD]
login = empty
Expand Down