diff --git a/pom-common.xml b/pom-common.xml index 3e39c2a..63c659f 100755 --- a/pom-common.xml +++ b/pom-common.xml @@ -164,17 +164,17 @@ org.apache.tika tika-parsers - 1.14 + 1.15 org.apache.tika tika-core - 1.14 + 1.15 org.apache.pdfbox pdfbox - 1.8.1 + 2.0.6 diff --git a/src/java/edu/stanford/muse/datacache/Blob.java b/src/java/edu/stanford/muse/datacache/Blob.java index 19150d0..e952541 100755 --- a/src/java/edu/stanford/muse/datacache/Blob.java +++ b/src/java/edu/stanford/muse/datacache/Blob.java @@ -19,12 +19,14 @@ import edu.stanford.muse.util.Util; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.AutoDetectParser; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.sax.BodyContentHandler; import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; import java.io.IOException; import java.io.InputStream; @@ -132,9 +134,15 @@ public Pair getContent(BlobStore store) try { // skip mp3 files, tika has trouble with it and hangs if (!Util.nullOrEmpty(this.filename) && !this.filename.toLowerCase().endsWith(".mp3")) - parser.parse(stream, handler, metadata, context); - - String[] names = metadata.names(); + try { + parser.parse(stream, handler, metadata, context); + } catch (Exception e) { + log.error(e.getMessage(), e); + log.error(filename); + throw new RuntimeException(e.getMessage(), e); + } + + String[] names = metadata.names(); //Arrays.sort(names); for (String name : names) { // some metadata tags are problematic and result in large hex strings... ignore them. (caused memory problems with Henry's archive) diff --git a/src/java/edu/stanford/muse/datacache/BlobStore.java b/src/java/edu/stanford/muse/datacache/BlobStore.java index 960d22b..44c8daf 100755 --- a/src/java/edu/stanford/muse/datacache/BlobStore.java +++ b/src/java/edu/stanford/muse/datacache/BlobStore.java @@ -468,7 +468,7 @@ public void generate_thumbnail(Blob b) throws IOException { tnFilename = tmp_filename.substring(0, tmp_filename.length() - ".pdf".length()); // strip the ".pdf" tnFilename += "1.png"; String[] args = new String[]{"-imageType", "png", "-startPage", "1", "-endPage", "1", tmp_filename}; - org.apache.pdfbox.PDFToImage.main(args); + org.apache.pdfbox.tools.PDFToImage.main(args); log.info("Saving PDF thumbnail to " + tnFilename); filename = filename + ".png"; // make sure the suffix for the thumbnail is named with a .png suffix in the cache } catch (Throwable e) { diff --git a/src/java/edu/stanford/muse/email/EmailFetcherThread.java b/src/java/edu/stanford/muse/email/EmailFetcherThread.java index ebff296..e89a7ef 100755 --- a/src/java/edu/stanford/muse/email/EmailFetcherThread.java +++ b/src/java/edu/stanford/muse/email/EmailFetcherThread.java @@ -17,6 +17,7 @@ import com.sun.mail.imap.IMAPFolder; import edu.stanford.muse.datacache.Blob; +import edu.stanford.muse.email.json.ArchiveSaver; import edu.stanford.muse.index.*; import edu.stanford.muse.util.EmailUtils; import edu.stanford.muse.util.JSONUtils; @@ -24,6 +25,7 @@ import edu.stanford.muse.webapp.HTMLUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.james.mime4j.codec.DecoderUtil; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -472,12 +474,18 @@ private List processMessagePart(int messageNum, Message m, Part p, List< String content; String type = p.getContentType(); // new InputStreamReader(p.getInputStream(), "UTF-8"); try { - // if forced encoding is set, we read the string with that encoding, otherwise we just use whatever p.getContent gives us - if (FORCED_ENCODING != null) { + if (type.contains("charset=")) { byte b[] = Util.getBytesFromStream(p.getInputStream()); - content = new String(b, FORCED_ENCODING); - } else - content = (String) p.getContent(); + content = new String(b, type.substring(type.indexOf("charset=") + "charset=".length())); + } else { + // if forced encoding is set, we read the string with that encoding, otherwise we just use whatever p.getContent gives us + if (FORCED_ENCODING != null) { + byte b[] = Util.getBytesFromStream(p.getInputStream()); + content = new String(b, FORCED_ENCODING); + } else { + content = (String) p.getContent(); + } + } } catch (UnsupportedEncodingException uee) { dataErrors.add("Unsupported encoding: " + folder_name() + " Message #" + messageNum + " type " + type + ", using brute force conversion"); // a particularly nasty issue:javamail can't handle utf-7 encoding which is common with hotmail and exchange servers. @@ -496,6 +504,7 @@ private List processMessagePart(int messageNum, Message m, Part p, List< // rfc822 mime type is for embedded mbox format or some such (appears for things like // forwarded messages). the content appears to be just a multipart. Object o = p.getContent(); + System.setProperty("mail.mime.multipart.allowempty", "true"); if (o instanceof Multipart) { Multipart allParts = (Multipart) o; if (p.isMimeType("multipart/alternative")) { @@ -582,6 +591,9 @@ private void handleAttachments(int idx, Message m, Part p, List textList String filename = null; try { filename = p.getFileName(); + if (filename != null) { + filename = DecoderUtil.decodeEncodedWords(filename, null); + } } catch (Exception e) { // seen this happen with: // Folders__gmail-sent Message #12185 Expected ';', got "Message" @@ -1089,7 +1101,17 @@ private void fetchAndIndexMessages(Folder folder, Message[] messages, int offset } if (contents == null) - contents = processMessagePart(messageNum, originalMessage, mm, attachmentsList); + try { + contents = processMessagePart(messageNum, originalMessage, mm, attachmentsList); + } catch (Exception e) { + log.error(e.getMessage(), e); + try { + log.error("MessageId: " + originalMessage.getMessageID()); + } catch (MessagingException e1) { + log.error(e.getMessage(), e); + } + throw e; + } // if mm is not prefetched, it is the same as original_mm // will also work, but will be slow as javamail accesses and fetches each mm separately, instead of using the bulk prefetched version @@ -1272,14 +1294,22 @@ public void run() { // this is a special for mbox'es because we run out of memory if we try to openFolderAndGetMessages() // so we process in batches //TODO: Ideally, should cap on buffer size rather than on number of messages. - final int BATCH = 10000; + int nMessagesperbathc = 10000; + long maxMemory = Runtime.getRuntime().maxMemory(); + if (maxMemory <= 4294967296L ) { nMessagesperbathc = 100; } + else { + if (maxMemory<= 8294967296L) { nMessagesperbathc = 1000; } + } + final int BATCH = nMessagesperbathc; //gradual decrease of batch size due to memory size int nbatches = nMessages / BATCH; nMessagesProcessedSuccess = 0; long st = System.currentTimeMillis(); int b; for (b = 0; b < nbatches + 1; b++) { begin_msg_index = b * BATCH + 1; - end_msg_index = Math.min((b + 1) * BATCH, nMessages) + 1; + end_msg_index = Math.min((b + 1) * BATCH, nMessages); + log.info("begin_msg_index: " + begin_msg_index); + log.info("end_msg_index: " + end_msg_index); log.info("Fetching messages in index [" + begin_msg_index + ", " + end_msg_index + "] batch: " + b + "/" + nbatches + "\nTotal Messages: " + nMessages); Message[] messages = openFolderAndGetMessages(); currentStatus = JSONUtils.getStatusJSON(""); @@ -1349,6 +1379,7 @@ public void run() { } log.info("Read #" + nMessages + " messages in in " + (System.currentTimeMillis() - st) + "ms"); } + new ArchiveSaver(archive.archiveTitle).save(archive); } catch (Throwable t) { if (t instanceof OutOfMemoryError) this.mayHaveRunOutOfMemory = true; diff --git a/src/java/edu/stanford/muse/email/json/ArchiveSaver.java b/src/java/edu/stanford/muse/email/json/ArchiveSaver.java new file mode 100644 index 0000000..9fd3abf --- /dev/null +++ b/src/java/edu/stanford/muse/email/json/ArchiveSaver.java @@ -0,0 +1,107 @@ +package edu.stanford.muse.email.json; + +import edu.stanford.muse.index.Archive; +import edu.stanford.muse.index.Document; +import edu.stanford.muse.index.EmailDocument; +import edu.stanford.muse.util.Util; + +import javax.mail.Address; +import javax.mail.internet.InternetAddress; +import java.io.*; +import java.util.Base64; +import java.util.List; +import java.util.Map; + +/** + * Created by sunchise on 04.06.17. + */ +public class ArchiveSaver { + + private final String archiveName; + + public ArchiveSaver(String archiveName) { + this.archiveName = archiveName; + } + + public void save(Archive archive) { + String folderName = new String(Base64.getEncoder().encode(archiveName.getBytes())); + String folderPath = System.getProperty("user.home") + File.separator + "epadd-data"; + File folder = new File(folderPath); + if (!folder.exists()) { + folder.mkdir(); + } + folderPath += File.separator + folderName; + folder = new File(folderPath); + if (!folder.exists()) { + folder.mkdir(); + } + String fileName = folderPath + File.separator + "archive.json"; + File file = new File(fileName); + if (file.exists()) { + file.delete(); + } + try { + file.createNewFile(); + } catch (IOException e) { + throw new RuntimeException(e.getMessage(), e); + } + List allDocs = archive.getAllDocs(); + EmailNameAgregator emailNameAgregator = new EmailNameAgregator(allDocs); + emailNameAgregator.save(folderPath + File.separator + "email-names.json"); + int i = 1; + try (BufferedWriter stream = new BufferedWriter(new FileWriter(file))) { + append(stream, "["); + boolean fail = false; + for (Document doc : allDocs) { + if (i > 1 && !fail) { + append(stream, ","); + } + fail = false; + final EmailDocument emailDocument = (EmailDocument) doc; + String messageID = Util.hash (emailDocument.getSignature()); + Email email = new Email(messageID, + emailDocument.date, + true, + emailDocument.getSubject(), + emailDocument.from == null || emailDocument.from.length == 0 ? null : emailNameAgregator.getName(emailDocument.getFromEmailAddress()), + emailDocument.getFromEmailAddress()); + if (emailDocument.cc != null) { + for (Address address : emailDocument.cc) { + InternetAddress internetAddress = (InternetAddress) address; + email.addCc(emailNameAgregator.getName(internetAddress.getAddress()), internetAddress.getAddress()); + } + } + if (emailDocument.bcc != null) { + for (Address address : emailDocument.bcc) { + InternetAddress internetAddress = (InternetAddress) address; + email.addCc(emailNameAgregator.getName(internetAddress.getAddress()), internetAddress.getAddress()); + } + } + if (emailDocument.to != null) { + for (Address address : emailDocument.to) { + InternetAddress internetAddress = (InternetAddress) address; + email.addCc(emailNameAgregator.getName(internetAddress.getAddress()), internetAddress.getAddress()); + } + } + if (email.check()) { + append(stream, email.toJson()); + } else { + fail = true; + } + i++; + } + append(stream, "]"); + stream.flush(); + stream.close(); + } catch (IOException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + + private void append(Writer stream, String string) throws IOException { + string = string.trim(); + stream.append(string); + } + +} diff --git a/src/java/edu/stanford/muse/email/json/Email.java b/src/java/edu/stanford/muse/email/json/Email.java new file mode 100644 index 0000000..414845c --- /dev/null +++ b/src/java/edu/stanford/muse/email/json/Email.java @@ -0,0 +1,192 @@ +package edu.stanford.muse.email.json; + +import org.json.JSONException; +import org.json.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.mail.Address; +import javax.mail.internet.InternetAddress; +import java.io.*; +import java.util.Calendar; +import java.util.Collection; +import java.util.Date; +import java.util.HashSet; +/* +{ + "emailId": 3, + "dateField": "1496222800", + "isSent": true, + "toField": [ + [ + "Александр Игоревич", + "Александр Игоревич" + ] + ], + "ccField": [ + [ + "ccPlaceholder", + "ccPlaceholder" + ] + ], + "fromField": [ + "WWF России", + "WWF России" + ], + "subject": "Барс по имени Крюк" + } + */ + + +public class Email { + private final Logger log = LoggerFactory.getLogger(Email.class); + + private final String id; + + private final Date date; + + private final boolean isSent; + + private final Collection to = new HashSet<>(); + + private final Collection cc = new HashSet<>(); + + private final EmailAddress from; + + private final String subject; + + private String toJson; + + public Email(String id, Date date, boolean isSent, EmailAddress from, String subject) { + this.id = id; + Calendar calendar = Calendar.getInstance(); + calendar.set(Calendar.YEAR, 1999); + calendar.set(Calendar.MONTH, Calendar.SEPTEMBER); + calendar.set(Calendar.DAY_OF_MONTH, 11); + Date minDate = calendar.getTime(); + if (date == null || minDate.compareTo(date) > 0) { + date = minDate; + } + this.date = date; + this.isSent = isSent; + this.from = from; + this.subject = subject; + } + + + public Email(int id, Date date, boolean isSent, String subject, String fromName, String fromEmail) { + this(String.valueOf(id), date, isSent, subject, fromName, fromEmail); + } + + public Email(String id, Date date, boolean isSent, String subject, String fromName, String fromEmail) { + this(id, date, isSent, new EmailAddress(fromName, fromEmail), subject); + } + + public void addTo(EmailAddress emailAddress) { + toJson = null; + to.add(emailAddress); + } + + public void addTo(String name, String email) { + toJson = null; + addTo(new EmailAddress(name, email)); + } + + public void addCc(EmailAddress emailAddress) { + toJson = null; + to.add(emailAddress); + } + + public void addCc(String name, String email) { + toJson = null; + addCc(new EmailAddress(name, email)); + } + + public String toJson() { + if (toJson == null) { + StringBuilder stream = new StringBuilder(); + stream.append("{"); + stream.append("\"emailId\": \"").append(id).append("\","); + stream.append("\"dateField\": ").append(date.getTime() / 1000).append(","); + stream.append("\"isSent\": ").append(isSent).append(","); + stream.append("\"toField\": ["); + stream.append(to.stream().map(EmailAddress::toJson).reduce((s, s2) -> s + "," + s2).orElse("")); + stream.append("],"); + stream.append("\"ccField\": ["); + if (cc.isEmpty()) { + stream.append(new EmailAddress("ccPlaceholder", "ccPlaceholder").toJson()); + } else { + stream.append(cc.stream().map(EmailAddress::toJson).reduce((s, s2) -> s + "," + s2).orElse("")); + } + stream.append("],"); + stream.append("\"fromField\": "); + if (from == null) { + stream.append(new EmailAddress("fromPlaceholder", "fromPlaceholder").toJson()); + } else { + stream.append(from.toJson()); + } + stream.append(","); + stream.append("\"subject\": \""); + String formatedSubject = subject == null ? "Without subject" : String.valueOf(subject).replaceAll("\"", "'").replace("Subject: ", "").trim(); + if ("null".equals(formatedSubject)) { + formatedSubject = "Without subject"; + } + append(stream, formatedSubject).append("\""); + stream.append("}"); + toJson = stream.toString(); + } + return toJson; + } + + public boolean check() { + try { + new JSONObject(toJson()); + } catch (JSONException e) { + log.error("Not right format of json\n\n" + toJson + "\n\n" + e.getMessage()); + return false; + } + return true; + } + + public static class EmailAddress { + private final String name; + private final String email; + + public EmailAddress(String name, String email) { + this.name = name; + this.email = email; + } + + public String getName() { + return name == null ? email : name.replaceAll("\"", "'"); + } + + public String getEmail() { + return email.replaceAll("\"", "'"); + } + + public String toJson() { + StringBuilder stream = new StringBuilder(); + append(stream, "["); + append(stream, "\"" + getName() + "\""); + append(stream, ","); + append(stream, "\"" + getEmail() + "\""); + append(stream, "]"); + return stream.toString(); + } + } + + private static StringBuilder append(StringBuilder stream, String string) { + string = string.replaceAll("\\s", " "); + string = string.replaceAll("\\n", " "); + string = string.replaceAll("\\\\", "\\\\\\\\"); + string = string.replaceAll("\\r", " "); + string = string.replaceAll(" {2,}", " "); + string = string.replaceAll("\" ", "\""); + string = string.replaceAll(" \"", "\""); + string = string.replaceAll("[^\\w\\d\\sёЁА-Яа-я.,:\\\\\\[\\]|'\";()*?!#$%{}@+\\-]", ""); + string = string.trim(); + stream.append(string); + return stream; + } +} diff --git a/src/java/edu/stanford/muse/email/json/EmailInfo.java b/src/java/edu/stanford/muse/email/json/EmailInfo.java new file mode 100644 index 0000000..5af3927 --- /dev/null +++ b/src/java/edu/stanford/muse/email/json/EmailInfo.java @@ -0,0 +1,56 @@ +package edu.stanford.muse.email.json; + +import java.io.Serializable; + +/** + * Created by sunchise on 03.06.17. + */ +public class EmailInfo implements Serializable { + + private final int emailId; + private final String dateField; + private final boolean isSent; + private final String[][] toField; + private final String[][] ccField; + private final Object[] fromField; + private final String subject; + + + public EmailInfo(int emailId, String dateField, boolean isSent, String[][] toField, String[][] ccField, Object[] fromField, String subject) { + this.emailId = emailId; + this.dateField = dateField; + this.isSent = isSent; + this.toField = toField; + this.ccField = ccField; + this.fromField = fromField; + this.subject = subject; + } + + public int getEmailId() { + return emailId; + } + + public String getDateField() { + return dateField; + } + + public boolean isSent() { + return isSent; + } + + public String[][] getToField() { + return toField; + } + + public String[][] getCcField() { + return ccField; + } + + public Object[] getFromField() { + return fromField; + } + + public String getSubject() { + return subject; + } +} diff --git a/src/java/edu/stanford/muse/email/json/EmailNameAgregator.java b/src/java/edu/stanford/muse/email/json/EmailNameAgregator.java new file mode 100644 index 0000000..1090ec9 --- /dev/null +++ b/src/java/edu/stanford/muse/email/json/EmailNameAgregator.java @@ -0,0 +1,138 @@ +package edu.stanford.muse.email.json; + +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; +import edu.stanford.muse.index.Document; +import edu.stanford.muse.index.EmailDocument; +import org.json.JSONObject; + +import javax.mail.Address; +import javax.mail.internet.InternetAddress; +import java.io.*; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class EmailNameAgregator { + private List allDocs; + final Map emailNameMap = new HashMap<>(); + + public EmailNameAgregator(List allDocs) { + this.allDocs = allDocs; + init(); + } + + + public EmailNameAgregator(List allDocs, String fileName) { + this.allDocs = allDocs; + if (fileName == null) { + init(); + } else { + File file = new File(fileName); + if (file.exists()) { + load(fileName); + } else { + init(); + } + } + } + + private void init() { + emailNameMap.clear(); + allDocs.forEach(document -> { + EmailDocument emailDocument = (EmailDocument) document; + if (emailDocument.to != null) { + for (Address address : emailDocument.to) { + appendToEmailNameMap(emailNameMap, (InternetAddress) address); + } + } + if (emailDocument.cc != null) { + for (Address address : emailDocument.cc) { + appendToEmailNameMap(emailNameMap, (InternetAddress) address); + } + } + if (emailDocument.bcc != null) { + for (Address address : emailDocument.bcc) { + appendToEmailNameMap(emailNameMap, (InternetAddress) address); + } + } + }); + } + + public String getName(String email) { + return emailNameMap.get(email); + } + + + private void appendToEmailNameMap(Map emailNameMap, InternetAddress internetAddress) { + String email = internetAddress.getAddress(); + String personal = internetAddress.getPersonal(); + if (personal == null) { + return; + } + personal = removeWildChars(personal); + String name = emailNameMap.get(email); + if (name != null) { + if (name.length() < personal.length()) { + if (personal.contains(" ") || (!name.contains(" "))) { + emailNameMap.put(email, personal); + } + } else if (!name.contains(" ") && personal.contains(" ")) { + emailNameMap.put(email, personal); + } else if (name.contains(" ") && personal.contains(" ")) { + int nameWordsCount = name.split(" ").length; + int personalWordsCount = personal.split(" ").length; + if (personalWordsCount < 4 && personalWordsCount < nameWordsCount) { + emailNameMap.put(email, personal); + } + } + } else { + emailNameMap.put(email, personal); + } + } + + private String removeWildChars(String string) { + string = string.replaceAll("\\s", " "); + string = string.replaceAll("\\n", " "); + string = string.replaceAll("\\\\", "\\\\\\\\"); + string = string.replaceAll("\\r", " "); + string = string.replaceAll(" {2,}", " "); + string = string.replaceAll("\" ", "\""); + string = string.replaceAll(" \"", "\""); + string = string.replaceAll("[^\\w\\d\\sёЁА-Яа-я.,:\\\\\\[\\]|'\";()*?!#$%{}@+\\-]", ""); + if (string.endsWith("'")) { + string = string.substring(0, string.length() - 1); + } + return string.trim(); + } + + public void save(String fileName) { + File file = new File(fileName); + if (file.exists()) { + file.delete(); + } + JSONObject json = new JSONObject(emailNameMap); + try (Writer writer = new FileWriter(file)) { + json.write(writer); + writer.close(); + } catch (IOException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + public void load(String fileName) { + File file = new File(fileName); + if (!file.exists()) { + file.delete(); + } + try (FileReader fileReader = new FileReader(file)) { + Map tempMap = new Gson().fromJson(fileReader, new TypeToken>() {}.getType()); + if (tempMap != null) { + emailNameMap.putAll(tempMap); + } + fileReader.close(); + } catch (IOException e) { + throw new RuntimeException(e.getMessage(), e); + } + } +} diff --git a/src/java/edu/stanford/muse/index/EmailDocument.java b/src/java/edu/stanford/muse/index/EmailDocument.java index 9daa68a..03875e2 100755 --- a/src/java/edu/stanford/muse/index/EmailDocument.java +++ b/src/java/edu/stanford/muse/index/EmailDocument.java @@ -51,7 +51,7 @@ public class EmailDocument extends DatedDocument implements Serializable public String folderName, emailSource; public Set folderNames = new LinkedHashSet<>(), emailSources = new LinkedHashSet<>(); // email can now belong to multiple folders, folderName field also maintained for backward compatibility - public Address[] to, from, cc, bcc; + public Address[] to, from, cc, bcc; public String messageID; public String sentToMailingLists[]; diff --git a/src/java/edu/stanford/muse/util/Util.java b/src/java/edu/stanford/muse/util/Util.java index 3991b95..5c0a8b4 100755 --- a/src/java/edu/stanford/muse/util/Util.java +++ b/src/java/edu/stanford/muse/util/Util.java @@ -2412,9 +2412,8 @@ public static void test_tail() * actual file in the URL. * returns null if the input is null. */ - public static String URLtail(String url) - { - return tail(url, "/"); + public static String URLtail(String url) { + return URLEncode(tail(url, "/")); } /** diff --git a/src/java/edu/stanford/muse/webapp/EmailRenderer.java b/src/java/edu/stanford/muse/webapp/EmailRenderer.java index 6ffa3b2..f56ccb3 100755 --- a/src/java/edu/stanford/muse/webapp/EmailRenderer.java +++ b/src/java/edu/stanford/muse/webapp/EmailRenderer.java @@ -1,11 +1,5 @@ package edu.stanford.muse.webapp; -import java.io.IOException; -import java.util.*; - -import javax.mail.Address; -import javax.mail.internet.InternetAddress; - import edu.stanford.muse.datacache.Blob; import edu.stanford.muse.datacache.BlobStore; import edu.stanford.muse.email.AddressBook; @@ -17,6 +11,11 @@ import edu.stanford.muse.util.Span; import edu.stanford.muse.util.Util; +import javax.mail.Address; +import javax.mail.internet.InternetAddress; +import java.io.IOException; +import java.util.*; + /** This class has util methods to display an email message in an html page */ public class EmailRenderer { @@ -168,7 +167,7 @@ public static String formatAddressesAsHTML(Address addrs[], AddressBook addressB InternetAddress ia = (InternetAddress) a; Pair p = JSPHelper.getNameAndURL((InternetAddress) a, addressBook); String url = p.getSecond(); - String str = ia.toString(); + String str = ia.getPersonal() == null ? ia.getAddress() : ia.getPersonal() + "<" + ia.getAddress() + ">"; String addr = ia.getAddress(); boolean match = false; if(str!=null) { @@ -337,7 +336,7 @@ public static Pair htmlForDocument(Document d, Archive archive, // d.hashCode() is just something to identify this // page/message page.append(""); - page.append(leader + "href=\"" + attachmentURL + "\" src=\"" + thumbnailURL + "\">\n"); + page.append(leader + "href=\"" + attachmentURL + "\" download src=\"" + thumbnailURL + "\">\n"); page.append("\n"); } else diff --git a/src/java/edu/stanford/muse/webapp/JSPHelper.java b/src/java/edu/stanford/muse/webapp/JSPHelper.java index f6a05bb..67c6139 100755 --- a/src/java/edu/stanford/muse/webapp/JSPHelper.java +++ b/src/java/edu/stanford/muse/webapp/JSPHelper.java @@ -44,6 +44,8 @@ import javax.servlet.http.HttpSession; import javax.xml.transform.TransformerException; import java.io.*; +import java.net.URLDecoder; +import java.net.URLEncoder; import java.util.*; import java.util.stream.Collectors; @@ -267,17 +269,17 @@ public static String[] convertRequestParamsToUTF8(String params[]) throws Unsupp // converts an array of strings from iso-8859-1 to utf8. useful for converting i18n chars in http request parameters public static String convertRequestParamToUTF8(String param) throws UnsupportedEncodingException { - if (RUNNING_ON_JETTY) - { - log.info("running on jetty: no conversion for " + param); + // if (RUNNING_ON_JETTY) + // { + // log.info("running on jetty: no conversion for " + param); return param; - } - if (param == null) - return null; - String newParam = new String(param.getBytes("ISO-8859-1"), "UTF-8"); - if (!newParam.equals(param)) - log.info("Converted to utf-8: " + param + " -> " + newParam); - return newParam; + // } + // if (param == null) + // return null; + // String newParam = new String(param.getBytes("ISO-8859-1"), "UTF-8"); + // if (!newParam.equals(param)) + // log.info("Converted to utf-8: " + param + " -> " + newParam); + // return newParam; } public static boolean runningOnLocalhost(HttpServletRequest request) @@ -1589,7 +1591,11 @@ public static void serveBlob(HttpServletRequest request, HttpServletResponse res { HttpSession session = request.getSession(); String filename = request.getParameter("file"); - filename = convertRequestParamToUTF8(filename); + try { + filename = URLDecoder.decode(filename, "utf-8"); + } catch (Exception e) { + throw new RuntimeException(e.getMessage(), e); + } String baseDir = (String) getSessionAttribute(session, "cacheDir"); if (filename.indexOf(".." + File.separator) >= 0) // avoid file injection! @@ -1663,7 +1669,8 @@ public static void writeFileToResponse(HttpSession session, HttpServletResponse if (asAttachment) { response.setHeader("Content-Length", String.valueOf(file.length())); - response.setHeader("Content-Disposition", "attachment; filename=\"" + file.getName() + "\""); + String fileName = URLEncoder.encode(file.getName(), "utf-8").replace("+", "%20"); + response.setHeader("Content-Disposition", "attachment; filename=\"" + fileName + "\""); } // Prepare streams. BufferedInputStream input = null;