From e1f98a6eb3f9165bcf1a703ea8d8dda1fe27d511 Mon Sep 17 00:00:00 2001 From: Tom Boutell Date: Mon, 6 Jul 2020 08:34:09 -0400 Subject: [PATCH 1/4] beginnings of a catalog method to make implementing sync and export operations possible --- lib/storage/local.js | 59 +++++++++++++++++++++++++++++++++++++++++++ lib/storage/s3.js | 60 ++++++++++++++++++++++++++++++++++++++++++++ package.json | 3 ++- test/s3.js | 24 ++++++++++++++++++ uploadfs.js | 13 ++++++++++ 5 files changed, 158 insertions(+), 1 deletion(-) diff --git a/lib/storage/local.js b/lib/storage/local.js index 881019f..6cd8179 100644 --- a/lib/storage/local.js +++ b/lib/storage/local.js @@ -209,6 +209,65 @@ module.exports = function() { // Exported for unit testing only _testCopyFile: function(path1, path2, options, callback) { return copyFile(path1, path2, options, callback); + }, + + catalog: function(prefix, callback) { + var list = []; + return spelunk(prefix, function(err) { + if (err) { + return callback(err); + } + list.sort(function(a, b) { + if (a.path < b.path) { + return -1; + } else if (a.path > b.path) { + return 1; + } else { + return 0; + } + }); + return callback(null, list); + }); + function spelunk(folder, callback) { + return fs.readdir(uploadsPath + folder, function(err, files) { + if (err) { + return callback(err); + } + return async.eachLimit(files, 5, function(file, callback) { + var path = folder + '/' + file; + return fs.stat(uploadsPath + path, function(err, stats) { + var matches; + if (err) { + return callback(err); + } + if (stats.isDirectory()) { + return spelunk(path, callback); + } else { + if (self.options.disabledFileKey) { + matches = file.match(/^(.*)?-disabled-[0-9a-f]+$/); + if (matches && (utils.getDisabledPath(folder + '/' + matches[1]) === path)) { + list.push({ + path: folder + '/' + matches[1], + trash: true + }); + } else { + list.push({ + path: folder + '/' + file, + trash: false + }); + } + } else { + list.push({ + path: folder + '/' + file, + trash: (stats.mode & parseInt('777', 8)) === self.options.disablePermissions + }); + } + return callback(null); + } + }); + }, callback); + }); + } } }; diff --git a/lib/storage/s3.js b/lib/storage/s3.js index 87fbc62..dc37c24 100644 --- a/lib/storage/s3.js +++ b/lib/storage/s3.js @@ -7,6 +7,8 @@ var fs = require('fs'); var AWS = require('aws-sdk'); var extname = require('path').extname; var _ = require('lodash'); +var regexpQuote = require('regexp-quote'); +var async = require('async'); module.exports = function() { var contentTypes; @@ -172,6 +174,59 @@ module.exports = function() { }, callback); }, + catalog: function(prefix, callback) { + var continuationToken; + var list = []; + return pass(); + function pass() { + var args = { + Bucket: bucket + }; + if (prefix) { + args.Prefix = prefix; + } + if (continuationToken) { + args.ContinuationToken = continuationToken; + } + return client.listObjectsV2(args, function(err, data) { + if (err) { + return callback(err); + } + list = list.concat(data.Contents.map(function(item) { + return { + path: item.Key.replace(new RegExp('^' + regexpQuote(prefix)), ''), + size: item.Size, + updatedAt: item.LastModified + }; + })); + if (data.IsTruncated) { + return pass(); + } + // Boo, this is slow, we should switch to supporting disabledFileKey + // for s3 so we can avoid this + return async.eachLimit(list, 5, function(file, callback) { + return client.getObjectAcl({ + Bucket: bucket, + Key: prefixPath(prefix, file.path) + }, function(err, acl) { + if (err) { + return callback(err); + } + file.disabled = !_.find(acl.Grants || [], function(grant) { + return grant.Grantee && (grant.Grantee.URI === 'http://acs.amazonaws.com/groups/global/AllUsers') && (grant.Permission === 'READ'); + }); + return callback(null); + }); + }, function(err) { + if (err) { + return callback(err); + } + return callback(null, list); + }); + }); + } + }, + getUrl: function (path) { noProtoEndpoint = endpoint.replace(/^https?:\/\//i, ""); if (pathStyle) { @@ -196,3 +251,8 @@ module.exports = function() { function cleanKey(key) { return key.replace(/^\//, ''); } + +function prefixPath(prefix, path) { + // Resolve any double // that results from the prefix + return (prefix + path).replace(/\/\//g, '/'); +} diff --git a/package.json b/package.json index f911129..c63d839 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "jimp": "^0.9.6", "lodash": "^4.0.0", "mkdirp": "~0.3.4", + "regexp-quote": "0.0.0", "request": "^2.88.2", "rimraf": "^2.0.2", "xml2js": "^0.4.23" @@ -49,4 +50,4 @@ "request-promise": "^4.2.5", "stat-mode": "^0.2.2" } -} \ No newline at end of file +} diff --git a/test/s3.js b/test/s3.js index 310a1c4..da648e4 100644 --- a/test/s3.js +++ b/test/s3.js @@ -1,6 +1,7 @@ /* global describe, it */ const assert = require('assert'); const request = require('request'); +const _ = require('lodash'); describe('UploadFS S3', function () { this.timeout(50000); @@ -69,6 +70,16 @@ describe('UploadFS S3', function () { }); }); + it('catalog should work', function (done) { + return uploadfs.catalog(function(e, list) { + assert(!e); + assert(_.find(list, function(file) { + return (file.path === dstPath) && (!file.disabled); + })); + done(); + }); + }); + it('S3 CopyOut should work', done => { const cpOutPath = 'copy-out-test.txt'; return uploadfs.copyOut(dstPath, cpOutPath, e => { @@ -97,6 +108,19 @@ describe('UploadFS S3', function () { cb(null); }); }, + catalog: cb => { + uploadfs.catalog((e, list) => { + if (e) { + return cb(e); + } + if (!_.find(list, function(item) { + return (item.path === dstPath) && item.disabled + })) { + return cb('catalog does not show test.txt as disabled'); + } + return cb(null); + }); + }, enable: cb => { uploadfs.enable(dstPath, e => { assert(!e, 'uploadfs enable should not fail'); diff --git a/uploadfs.js b/uploadfs.js index a60e2cb..6578821 100644 --- a/uploadfs.js +++ b/uploadfs.js @@ -425,6 +425,19 @@ function Uploadfs() { return self._storage.disable(path, callback); }; + /** + * + * Callback receives `(null, list)` where `list` is an array + * of all files with `path` and `trash` properties. This + * method respects the `prefix` option, so only files matching + * the prefix are included. + * @param {*} path + * @param {*} callback + */ + self.catalog = function (callback) { + return self._storage.catalog(self.prefix, callback); + }; + /** * Identify a local image file. Normally you don't need to call * this yourself, it is mostly used by copyImageIn. But you may find it From 31b9524da5bfecb58ec8308c33de26590beef98d Mon Sep 17 00:00:00 2001 From: Tom Boutell Date: Mon, 13 Jul 2020 10:06:13 -0400 Subject: [PATCH 2/4] catalog and catalog tests pretty far down the road for local and azure in addition to s3, but local needs a disabledFileKey catalog test, and azure and s3 need tests exercising the continuation tokens code --- lib/storage/azure.js | 38 ++++++++++++++++++++++++++++++++++++++ lib/storage/local.js | 6 +++--- package.json | 10 +++++----- test/azure.js | 23 ++++++++++++++++++++--- test/local.js | 24 +++++++++++++++++++++++- 5 files changed, 89 insertions(+), 12 deletions(-) diff --git a/lib/storage/azure.js b/lib/storage/azure.js index bd96d92..61b43fa 100644 --- a/lib/storage/azure.js +++ b/lib/storage/azure.js @@ -8,6 +8,7 @@ var utils = require('../utils.js'); var defaultGzipBlacklist = require('../../defaultGzipBlacklist'); var verbose = false; var _ = require('lodash'); +var regexpQuote = require('regexp-quote'); var DEFAULT_MAX_AGE_IN_SECONDS = 500; var DEFAULT_MAX_CACHE = 2628000; @@ -443,6 +444,43 @@ module.exports = function() { }); }, + catalog: function(prefix, callback) { + var continuationToken; + var list = []; + return pass(); + function pass() { + const blobSvc = self.blobSvcs[0]; + return blobSvc.svc.listBlobsSegmentedWithPrefix(blobSvc.container, prefix, continuationToken, function(err, data) { + if (err) { + return callback(err); + } + data.entries.forEach(function(item) { + var matches = item.name.match(/^(.*)?-disabled-[0-9a-f]+$/); + if (matches && (utils.getDisabledPath(matches[1], self.options.disabledFileKey) === item.name)) { + list.push({ + path: matches[1].replace(new RegExp('^' + regexpQuote(prefix)), ''), + size: item.contentLength, + disabled: true, + updatedAt: item.lastModified + }); + } else { + list.push({ + path: item.name.replace(new RegExp('^' + regexpQuote(prefix)), ''), + size: item.contentLength, + updatedAt: item.lastModified, + disabled: false + }); + } + }); + if (data.continuationToken) { + continuationToken = data.continuationToken; + return pass(); + } + return callback(null, list); + }); + } + }, + getUrl: function(path) { var blob = self.blobSvcs[0]; var url = blob.svc.getUrl(blob.container, path); diff --git a/lib/storage/local.js b/lib/storage/local.js index 6cd8179..92a0c9b 100644 --- a/lib/storage/local.js +++ b/lib/storage/local.js @@ -248,18 +248,18 @@ module.exports = function() { if (matches && (utils.getDisabledPath(folder + '/' + matches[1]) === path)) { list.push({ path: folder + '/' + matches[1], - trash: true + disabled: true }); } else { list.push({ path: folder + '/' + file, - trash: false + disabled: false }); } } else { list.push({ path: folder + '/' + file, - trash: (stats.mode & parseInt('777', 8)) === self.options.disablePermissions + disabled: (stats.mode & parseInt('777', 8)) === self.getDisablePermissions() }); } return callback(null); diff --git a/package.json b/package.json index c63d839..feed6db 100644 --- a/package.json +++ b/package.json @@ -21,17 +21,17 @@ "author": "Apostrophe Technologies, Inc.", "license": "MIT", "dependencies": { - "@google-cloud/storage": "^4.6.0", + "@google-cloud/storage": "^4.7.0", "async": "^1.0.0", - "aws-sdk": "^2.645.0", + "aws-sdk": "^2.713.0", "azure-storage": "^2.8.2", "bluebird": "^3.7.2", "es6-promise": "^4.1.0", "fs-extra": "^5.0.0", "gm": "^1.9.0", "gzipme": "^0.1.1", - "jimp": "^0.9.6", - "lodash": "^4.0.0", + "jimp": "^0.9.8", + "lodash": "^4.17.19", "mkdirp": "~0.3.4", "regexp-quote": "0.0.0", "request": "^2.88.2", @@ -42,7 +42,7 @@ "eslint": "^4.19.1", "eslint-config-punkave": "^1.0.10", "eslint-config-standard": "^11.0.0-beta.0", - "eslint-plugin-import": "^2.20.1", + "eslint-plugin-import": "^2.22.0", "eslint-plugin-node": "^5.2.1", "eslint-plugin-promise": "^3.7.0", "eslint-plugin-standard": "^3.1.0", diff --git a/test/azure.js b/test/azure.js index 33e538d..eda2f25 100644 --- a/test/azure.js +++ b/test/azure.js @@ -91,6 +91,16 @@ describe('UploadFS Azure', function() { }); }); + it('catalog should work', function (done) { + return uploadfs.catalog(function(e, list) { + assert(!e); + assert(_.find(list, function(file) { + return (file.path === infile) && (!file.disabled); + })); + done(); + }); + }); + it('Azure test copyOut should work', function(done) { _getOutfile(infile, done); }); @@ -108,9 +118,6 @@ describe('UploadFS Azure', function() { it('Azure test copyOut after disable should fail', function(done) { setTimeout(function() { uploadfs.copyOut(infile, 'foo.bar', {}, function(e, res) { - if (e) { - console.log("error", e); - } assert(e); assert(e.name === 'StorageError'); assert(e.message === 'NotFound'); @@ -119,6 +126,16 @@ describe('UploadFS Azure', function() { }, 5000); }); + it('catalog should show file as disabled', function (done) { + return uploadfs.catalog(function(e, list) { + assert(!e); + assert(_.find(list, function(file) { + return (file.path === infile) && (file.disabled); + })); + done(); + }); + }); + it('Azure enable should work', function(done) { uploadfs.enable(infile, function(e, val) { if (e) { diff --git a/test/local.js b/test/local.js index 2622e21..e8f58bc 100644 --- a/test/local.js +++ b/test/local.js @@ -1,6 +1,7 @@ /* global describe, it */ var Mode = require('stat-mode'); var assert = require('assert'); +var _ = require('lodash'); describe('UploadFS Local', function () { this.timeout(4500); @@ -49,6 +50,16 @@ describe('UploadFS Local', function () { }); }); + it('catalog should work', function (done) { + return uploadfs.catalog(function(e, list) { + assert(!e); + assert(_.find(list, function(file) { + return (file.path === '/test_copy.txt') && (!file.disabled); + })); + done(); + }); + }); + it('copyOut should work for local filesystem', done => { return uploadfs.copyOut('/test_copy.txt', 'copy-out-test.txt', e => { assert(!e); @@ -91,7 +102,7 @@ describe('UploadFS Local', function () { return async.series({ disable: cb => { - assert(fs.existsSync(infile), 'copyIn file exissts'); + assert(fs.existsSync(infile), 'copyIn file exists'); uploadfs.disable(srcFile, e => { var stats = fs.statSync(infile); @@ -101,6 +112,17 @@ describe('UploadFS Local', function () { return cb(null); }); }, + catalog: cb => { + return uploadfs.catalog(function(e, list) { + if (e) { + return cb(e); + } + assert(_.find(list, function(file) { + return (file.path === '/test_copy.txt') && (file.disabled); + })); + return cb(null); + }); + }, enable: cb => { uploadfs.enable(srcFile, e => { var stats = fs.statSync(infile); From fadaf377644fa36f6e9e0f5d4f398873e70718b7 Mon Sep 17 00:00:00 2001 From: Tom Boutell Date: Tue, 14 Jul 2020 14:35:07 -0400 Subject: [PATCH 3/4] * catalog works in gcs * removed bogus path argument from getUrl, it never had a function and is not documented --- .gitignore | 1 + lib/storage/azure.js | 4 +-- lib/storage/gcs.js | 58 +++++++++++++++++++++++++++++++++++++++++--- lib/storage/s3.js | 2 +- test/gcs.js | 20 +++++++++++++++ 5 files changed, 79 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index b48360d..56f61ab 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ package-lock.json .jshintrc # an extra local test in my checkout test-jimp.js +gcs-credentials-uploadfstest.json diff --git a/lib/storage/azure.js b/lib/storage/azure.js index 61b43fa..1881378 100644 --- a/lib/storage/azure.js +++ b/lib/storage/azure.js @@ -481,9 +481,9 @@ module.exports = function() { } }, - getUrl: function(path) { + getUrl: function() { var blob = self.blobSvcs[0]; - var url = blob.svc.getUrl(blob.container, path); + var url = blob.svc.getUrl(blob.container, ''); return url; }, diff --git a/lib/storage/gcs.js b/lib/storage/gcs.js index aa7daae..af0a4cc 100644 --- a/lib/storage/gcs.js +++ b/lib/storage/gcs.js @@ -6,6 +6,8 @@ var storage = require('@google-cloud/storage'); var extname = require('path').extname; var _ = require('lodash'); +var regexpQuote = require('regexp-quote'); +var async = require('async'); module.exports = function() { let contentTypes; @@ -89,7 +91,16 @@ module.exports = function() { ContentType: contentType } }; - client.bucket(bucketName).upload(localPath, uploadOptions, callback); + return async.series([ + upload, + makePublic + ], callback); + function upload(callback) { + return client.bucket(bucketName).upload(localPath, uploadOptions, callback); + } + function makePublic(callback) { + return client.bucket(bucketName).file(path).makePublic(callback); + } }, copyOut: function(path, localPath, options, callback) { @@ -109,9 +120,50 @@ module.exports = function() { client.bucket(bucketName).file(path).makePrivate({}, callback); }, - getUrl: function (path) { + catalog: function(prefix, callback) { + var list = []; + var rawList = []; + return async.series([ + getList, + getDisabled + ], function(err) { + if (err) { + return callback(err); + } + return callback(null, list); + }); + function getList(callback) { + return client.bucket(bucketName).getFiles({ + prefix + }, function(err, files) { + if (err) { + return callback(err); + } + rawList = files; + return callback(null); + }); + } + function getDisabled(callback) { + return async.eachLimit(rawList, 5, function(item, callback) { + return client.bucket(bucketName).file(item.name).isPublic(function(err, p) { + if (err) { + return callback(err); + } + list.push({ + path: item.name.replace(new RegExp('^' + regexpQuote(prefix)), ''), + size: item.metadata.size, + updatedAt: item.metadata.updated, + disabled: !p + }); + return callback(null); + }); + }, callback); + } + }, + + getUrl: function () { noProtoEndpoint = endpoint.replace(/^https?:\/\//i, ""); - return (https ? 'https://' : 'http://') + bucketName + '.' + noProtoEndpoint; + return (https ? 'https://' : 'http://') + bucketName + '.' + noProtoEndpoint + '/'; }, destroy: function(callback) { diff --git a/lib/storage/s3.js b/lib/storage/s3.js index dc37c24..14dc8f4 100644 --- a/lib/storage/s3.js +++ b/lib/storage/s3.js @@ -227,7 +227,7 @@ module.exports = function() { } }, - getUrl: function (path) { + getUrl: function () { noProtoEndpoint = endpoint.replace(/^https?:\/\//i, ""); if (pathStyle) { return (https ? 'https://' : 'http://') + noProtoEndpoint + "/" + bucket; diff --git a/test/gcs.js b/test/gcs.js index e00963b..57e8f20 100644 --- a/test/gcs.js +++ b/test/gcs.js @@ -1,6 +1,7 @@ /* global describe, it */ const assert = require('assert'); const request = require('request'); +const _ = require('lodash'); describe('UploadFS GCS', function () { this.timeout(20000); @@ -54,6 +55,16 @@ describe('UploadFS GCS', function () { }); }); + it('catalog should work', function (done) { + return uploadfs.catalog(function(e, list) { + assert(!e); + assert(_.find(list, function(file) { + return (file.path === dstPath) && (!file.disabled); + })); + done(); + }); + }); + it('CopyIn file should be available via gcs', function (done) { const url = uploadfs.getUrl() + '/one/two/three/test.txt'; const og = fs.readFileSync('test.txt', 'utf8'); @@ -91,6 +102,15 @@ describe('UploadFS GCS', function () { cb(null); }); }, + catalog: cb => { + return uploadfs.catalog(function(e, list) { + assert(!e); + assert(_.find(list, function(file) { + return (file.path === dstPath) && (file.disabled); + })); + return cb(null); + }); + }, enable: cb => { uploadfs.enable(dstPath, e => { assert(!e, 'uploadfs enable should not fail'); From 20efc458c7db99833011a31f25231abcf6994a86 Mon Sep 17 00:00:00 2001 From: Tom Boutell Date: Tue, 14 Jul 2020 15:01:59 -0400 Subject: [PATCH 4/4] I'm wrecked in azure somehow after discovering and trying to fix the horrible mess with getUrl that somehow crept in --- lib/storage/azure.js | 3 +++ test/azure.js | 6 +++--- uploadfs.js | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/storage/azure.js b/lib/storage/azure.js index 1881378..88bbac0 100644 --- a/lib/storage/azure.js +++ b/lib/storage/azure.js @@ -455,6 +455,9 @@ module.exports = function() { return callback(err); } data.entries.forEach(function(item) { + if (item.name.charAt(0) !== '/') { + item.name = '/' + item.name; + } var matches = item.name.match(/^(.*)?-disabled-[0-9a-f]+$/); if (matches && (utils.getDisabledPath(matches[1], self.options.disabledFileKey) === item.name)) { list.push({ diff --git a/test/azure.js b/test/azure.js index eda2f25..5505e3b 100644 --- a/test/azure.js +++ b/test/azure.js @@ -6,7 +6,7 @@ var uploadfs = require('../uploadfs.js')(); // A JPEG is not a good default because it is exempt from GZIP so // we get less coverage. -Tom var srcFile = process.env.AZURE_TEST_FILE || 'test.txt'; -var infilePath = 'one/two/three/'; +var infilePath = '/one/two/three/'; var infile = infilePath + srcFile; var _ = require('underscore'); @@ -151,9 +151,9 @@ describe('UploadFS Azure', function() { }); it('Uploadfs should return valid web-servable url pointing to uploaded file', function() { - var url = uploadfs.getUrl(infile); + var url = uploadfs.getUrl() + infile; + console.log('**', url); var ogFile = fs.readFileSync(srcFile, {encoding: 'utf8'}); - return rp({uri: url, gzip: true}) .then(function(res) { assert(ogFile === res, "Web servable file contents equal original text file contents"); diff --git a/uploadfs.js b/uploadfs.js index 6578821..19a950c 100644 --- a/uploadfs.js +++ b/uploadfs.js @@ -373,11 +373,11 @@ function Uploadfs() { }); }; - self.getUrl = function (options, callback) { + self.getUrl = function () { if (self.cdn && self.cdn.enabled) { return self.cdn.url; } - return self._storage.getUrl(options, callback) + self.prefix; + return self._storage.getUrl() + self.prefix; }; self.remove = function (path, callback) {