From fd28ba8ff7e09a7987835e93db0d1210097e56cd Mon Sep 17 00:00:00 2001 From: Diogo Silva Date: Wed, 5 Oct 2016 00:18:19 -0300 Subject: [PATCH 1/3] url to download file from inside the DarwinCore-Archive --- .../ipt/action/portal/ResourceFileAction.java | 61 +++++++++++++++++-- .../java/org/gbif/ipt/config/Constants.java | 1 + src/main/resources/struts-portal.xml | 8 +++ 3 files changed, 65 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/gbif/ipt/action/portal/ResourceFileAction.java b/src/main/java/org/gbif/ipt/action/portal/ResourceFileAction.java index efa791af70..fe1f3ac3a9 100644 --- a/src/main/java/org/gbif/ipt/action/portal/ResourceFileAction.java +++ b/src/main/java/org/gbif/ipt/action/portal/ResourceFileAction.java @@ -1,5 +1,11 @@ package org.gbif.ipt.action.portal; +import com.google.common.base.Strings; +import com.google.inject.Inject; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipFile; +import org.apache.commons.lang3.StringUtils; +import org.apache.log4j.Logger; import org.gbif.ipt.config.AppConfig; import org.gbif.ipt.config.Constants; import org.gbif.ipt.config.DataDir; @@ -13,11 +19,7 @@ import java.io.FileNotFoundException; import java.io.InputStream; import java.math.BigDecimal; - -import com.google.common.base.Strings; -import com.google.inject.Inject; -import org.apache.commons.lang3.StringUtils; -import org.apache.log4j.Logger; +import java.util.Enumeration; /** * The Action responsible for serving datadir resource files. @@ -42,6 +44,55 @@ public ResourceFileAction(SimpleTextProvider textProvider, AppConfig cfg, Regist this.dataDir = dataDir; } + /** + * Handles DwC-A internal file download request. + * + * @return Struts2 result string + */ + public String dwcaIn() { + if (resource == null) { + return NOT_FOUND; + } + String internalFilename = StringUtils.trimToNull(req.getParameter(Constants.REQ_PARAM_FILE)); + + // if no specific version is requested, use the latest published version + if (version == null) { + BigDecimal latestVersion = resource.getLastPublishedVersionsVersion(); + if (latestVersion == null) { + return NOT_FOUND; + } else { + version = latestVersion; + } + } + + // serve file + File dwcaFile = dataDir.resourceDwcaFile(resource.getShortname(), version); + try { + ZipFile dwcaZip = new ZipFile(dwcaFile); + ZipArchiveEntry entry = dwcaZip.getEntry(internalFilename); + if(entry != null) { + inputStream = dwcaZip.getInputStream(entry); + } else { + return NOT_FOUND; + } + }catch(Exception e) { + LOG.warn("failed to get internal file", e); + return ERROR; + } + + // construct download filename + StringBuilder sb = new StringBuilder(); + sb.append("dwca-" + resource.getShortname()); + if (version != null) { + sb.append("-v" + version.toPlainString()); + } + sb.append("-"+internalFilename); + filename = sb.toString(); + + mimeType = "text/plain"; + return SUCCESS; + } + /** * Handles DwC-A file download request. The method checks if the request is a conditional get with If-Modified-Since * header. If the If-Modified-Since date is greater than the last published date, the NOT_MODIFIED string is returned. diff --git a/src/main/java/org/gbif/ipt/config/Constants.java b/src/main/java/org/gbif/ipt/config/Constants.java index ba39acde69..0d7231632e 100644 --- a/src/main/java/org/gbif/ipt/config/Constants.java +++ b/src/main/java/org/gbif/ipt/config/Constants.java @@ -19,6 +19,7 @@ public final class Constants { public static final String REQ_PATH_DWCA = "archive.do"; public static final String REQ_PATH_LOGO = "logo.do"; public static final String REQ_PARAM_RESOURCE = "r"; + public static final String REQ_PARAM_FILE = "f"; public static final String REQ_PARAM_ID = "id"; public static final String REQ_PARAM_SOURCE = "s"; public static final String REQ_PARAM_VERSION = "v"; diff --git a/src/main/resources/struts-portal.xml b/src/main/resources/struts-portal.xml index 99fe3de2fd..b40cde4e44 100644 --- a/src/main/resources/struts-portal.xml +++ b/src/main/resources/struts-portal.xml @@ -88,6 +88,14 @@ 1024 + + + ${mimeType} + inputStream + filename="${filename}" + 1024 + + true From ed0b4eb01b8688277c2815282127128ff9bffad8 Mon Sep 17 00:00:00 2001 From: Diogo Silva Date: Wed, 5 Oct 2016 02:42:57 -0300 Subject: [PATCH 2/3] Can download dwca internal file compressed --- .../java/org/gbif/ipt/action/BaseAction.java | 9 +++- .../ipt/action/portal/ResourceFileAction.java | 41 +++++++++++++++++-- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/gbif/ipt/action/BaseAction.java b/src/main/java/org/gbif/ipt/action/BaseAction.java index cef74075e1..656c156983 100644 --- a/src/main/java/org/gbif/ipt/action/BaseAction.java +++ b/src/main/java/org/gbif/ipt/action/BaseAction.java @@ -16,6 +16,7 @@ import java.util.ResourceBundle; import javax.annotation.Nullable; import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; import com.google.common.base.Strings; import com.google.inject.Inject; @@ -25,13 +26,14 @@ import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; import org.apache.struts2.interceptor.ServletRequestAware; +import org.apache.struts2.interceptor.ServletResponseAware; import org.apache.struts2.interceptor.SessionAware; /** * The base of all IPT actions. This handles conditions such as menu items, a custom text provider, sessions, currently * logged in user, and hosting organization information. */ -public class BaseAction extends ActionSupport implements SessionAware, Preparable, ServletRequestAware { +public class BaseAction extends ActionSupport implements SessionAware, Preparable, ServletRequestAware , ServletResponseAware { // logging private static final Logger LOG = Logger.getLogger(BaseAction.class); @@ -48,6 +50,7 @@ public class BaseAction extends ActionSupport implements SessionAware, Preparabl protected List warnings = new ArrayList(); protected Map session; protected HttpServletRequest req; + protected HttpServletResponse res; // a generic identifier for loading an object BEFORE the param interceptor sets values protected String id; @@ -264,6 +267,10 @@ public void setServletRequest(HttpServletRequest req) { this.req = req; } + public void setServletResponse(HttpServletResponse res) { + this.res = res; + } + public void setSession(Map session) { this.session = session; // always keep sth in the session otherwise the session is not maintained and e.g. the message redirect interceptor diff --git a/src/main/java/org/gbif/ipt/action/portal/ResourceFileAction.java b/src/main/java/org/gbif/ipt/action/portal/ResourceFileAction.java index fe1f3ac3a9..39fafa3d27 100644 --- a/src/main/java/org/gbif/ipt/action/portal/ResourceFileAction.java +++ b/src/main/java/org/gbif/ipt/action/portal/ResourceFileAction.java @@ -4,6 +4,9 @@ import com.google.inject.Inject; import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipFile; +import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; import org.gbif.ipt.config.AppConfig; @@ -20,6 +23,9 @@ import java.io.InputStream; import java.math.BigDecimal; import java.util.Enumeration; +import java.util.zip.GZIPOutputStream; + +import javax.servlet.ServletOutputStream; /** * The Action responsible for serving datadir resource files. @@ -55,6 +61,10 @@ public String dwcaIn() { } String internalFilename = StringUtils.trimToNull(req.getParameter(Constants.REQ_PARAM_FILE)); + if(internalFilename == null || internalFilename.trim().isEmpty()) { + return NOT_FOUND; + } + // if no specific version is requested, use the latest published version if (version == null) { BigDecimal latestVersion = resource.getLastPublishedVersionsVersion(); @@ -65,13 +75,18 @@ public String dwcaIn() { } } + boolean deflate = internalFilename.endsWith(".gz"); + if(deflate) { + internalFilename = internalFilename.replace(".gz",""); + } + // serve file File dwcaFile = dataDir.resourceDwcaFile(resource.getShortname(), version); try { ZipFile dwcaZip = new ZipFile(dwcaFile); ZipArchiveEntry entry = dwcaZip.getEntry(internalFilename); if(entry != null) { - inputStream = dwcaZip.getInputStream(entry); + inputStream = dwcaZip.getInputStream(entry); } else { return NOT_FOUND; } @@ -87,10 +102,30 @@ public String dwcaIn() { sb.append("-v" + version.toPlainString()); } sb.append("-"+internalFilename); + if(deflate) sb.append(".gz"); filename = sb.toString(); - mimeType = "text/plain"; - return SUCCESS; + if(deflate){ + try { + res.setContentType("application/x-gzip"); + res.addHeader("Content-Disposition", "attachment; filename=\""+filename+"\""); + + ServletOutputStream sos = res.getOutputStream(); + GZIPOutputStream zos = new GZIPOutputStream(sos); + IOUtils.copy(inputStream, zos); + zos.flush(); + inputStream.close(); + zos.close(); + sos.close(); + } catch (Exception e){ + LOG.warn("error sending gzip",e); + } + return NONE; + } else { + mimeType = "application/octet-stream"; + return SUCCESS; + + } } /** From 299b41f311878ae22f0f9ba269c6c900d7041b1b Mon Sep 17 00:00:00 2001 From: Diogo Silva Date: Fri, 7 Oct 2016 18:13:02 -0300 Subject: [PATCH 3/3] Better compress dectection using headers for file download --- .../ipt/action/portal/ResourceFileAction.java | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/gbif/ipt/action/portal/ResourceFileAction.java b/src/main/java/org/gbif/ipt/action/portal/ResourceFileAction.java index 39fafa3d27..67e2d8dd9c 100644 --- a/src/main/java/org/gbif/ipt/action/portal/ResourceFileAction.java +++ b/src/main/java/org/gbif/ipt/action/portal/ResourceFileAction.java @@ -75,10 +75,11 @@ public String dwcaIn() { } } - boolean deflate = internalFilename.endsWith(".gz"); - if(deflate) { + boolean gz = internalFilename.endsWith(".gz"); + if(gz) { internalFilename = internalFilename.replace(".gz",""); } + boolean compress = gz || (req.getHeader("Accept-Encoding") != null && req.getHeader("Accept-Encoding").contains("gzip")); // serve file File dwcaFile = dataDir.resourceDwcaFile(resource.getShortname(), version); @@ -102,12 +103,18 @@ public String dwcaIn() { sb.append("-v" + version.toPlainString()); } sb.append("-"+internalFilename); - if(deflate) sb.append(".gz"); + if(gz) sb.append(".gz"); filename = sb.toString(); - if(deflate){ + if(compress){ try { - res.setContentType("application/x-gzip"); + if(gz) { + res.setContentType("application/x-gzip"); + } else { + res.setContentType("application/octet-stream"); + res.addHeader("Content-Encoding", "gzip"); + + } res.addHeader("Content-Disposition", "attachment; filename=\""+filename+"\""); ServletOutputStream sos = res.getOutputStream();