Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 103 additions & 29 deletions emm/src/main/java/whelk/Dump.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import whelk.util.FresnelUtil;
import whelk.util.Unicode;
import whelk.util.http.HttpTools;

Expand Down Expand Up @@ -79,13 +80,6 @@ public static void sendDumpResponse(Whelk whelk, TargetVocabMapper targetVocabMa
return;
}

boolean isDownload = Collections.list(req.getParameterNames()).contains("download");
String offset = req.getParameter("offset");
if (offset == null && !isDownload) {
sendDumpEntryPoint(apiBaseUrl, selection, res);
return;
}

String profile = req.getParameter("profile"); // May be null, meaning default (kbv)
Document profileDoc = null;
if (profile != null) {
Expand All @@ -96,6 +90,20 @@ public static void sendDumpResponse(Whelk whelk, TargetVocabMapper targetVocabMa
}
}

String computedLabelLocale = req.getParameter(JsonLd.Platform.COMPUTED_LABEL);
if (computedLabelLocale != null && !whelk.getLocales().contains(computedLabelLocale)) {
HttpTools.sendError(res, HttpServletResponse.SC_BAD_REQUEST,
String.format("Bad value for %s: %s", JsonLd.Platform.COMPUTED_LABEL, computedLabelLocale));
return;
}

boolean isDownload = Collections.list(req.getParameterNames()).contains("download");
String offset = req.getParameter("offset");
if (offset == null && !isDownload) {
sendDumpEntryPoint(apiBaseUrl, selection, profile, computedLabelLocale, res);
return;
}

String tmpDir = System.getProperty("java.io.tmpdir");
Path dumpsPath = Paths.get(tmpDir, "dumps");
Files.createDirectories(dumpsPath);
Expand All @@ -107,24 +115,24 @@ public static void sendDumpResponse(Whelk whelk, TargetVocabMapper targetVocabMa
}

if (isDownload) {
sendDumpDownloadResponse(whelk, targetVocabMapper, profile, profileDoc, selection, dumpFilePath, res);
sendDumpDownloadResponse(whelk, targetVocabMapper, profile, profileDoc, selection, computedLabelLocale, dumpFilePath, res);
} else {
long offsetNumeric = Long.parseLong(offset);
sendDumpPageResponse(whelk, targetVocabMapper, profile, profileDoc, apiBaseUrl, selection, dumpFilePath, offsetNumeric, res);
sendDumpPageResponse(whelk, targetVocabMapper, profile, profileDoc, apiBaseUrl, selection, computedLabelLocale, dumpFilePath, offsetNumeric, res);
}
}

private static void sendDumpEntryPoint(String apiBaseUrl, String selection, HttpServletResponse res) throws IOException {
private static void sendDumpEntryPoint(String apiBaseUrl, String selection, String profile, String computedLabelLocale, HttpServletResponse res) throws IOException {
var responseObject = new LinkedHashMap<>();
var contexts = new ArrayList<>();
contexts.add("https://www.w3.org/ns/activitystreams");
responseObject.put("@context", contexts);
responseObject.put("type", "Collection");
responseObject.put("id", apiBaseUrl + "?selection=" + selection);
responseObject.put("url", apiBaseUrl + "?selection=" + selection + "&download=" + ND_JSON_LD_GZ_EXT);
responseObject.put("id", addParams(apiBaseUrl + "?selection=" + selection, profile, computedLabelLocale));
responseObject.put("url", addParams(apiBaseUrl + "?selection=" + selection, profile, computedLabelLocale) + "&download=" + ND_JSON_LD_GZ_EXT);
var first = new LinkedHashMap<>();
first.put("type", "CollectionPage");
first.put("id", apiBaseUrl + "?selection=" + selection + "&offset=0");
first.put("id", addParams(apiBaseUrl + "?selection=" + selection + "&offset=0", profile, computedLabelLocale));
responseObject.put("first", first);

HttpTools.sendResponse(res, responseObject, AS2_CONTENT_TYPE);
Expand Down Expand Up @@ -159,7 +167,18 @@ private static void sendDumpIndexResponse(String apiBaseUrl, HttpServletResponse
HttpTools.sendResponse(res, responseObject, JSON_CONTENT_TYPE);
}

private static void sendDumpPageResponse(Whelk whelk, TargetVocabMapper targetVocabMapper, String profile, Document profileDoc, String apiBaseUrl, String selection, Path dumpFilePath, long offsetLines, HttpServletResponse res) throws IOException {
private static void sendDumpPageResponse(
Whelk whelk,
TargetVocabMapper targetVocabMapper,
String profile,
Document profileDoc,
String apiBaseUrl,
String selection,
String computedLabelLocale,
Path dumpFilePath,
long offsetLines,
HttpServletResponse res
) throws IOException {
ArrayList<String> recordIdsOnPage = new ArrayList<>(EmmChangeSet.TARGET_HITS_PER_PAGE);
Long totalEntityCount = null;

Expand Down Expand Up @@ -221,14 +240,30 @@ private static void sendDumpPageResponse(Whelk whelk, TargetVocabMapper targetVo
}

Instant dumpCreationTime = getDumpCreationTime(dumpFilePath);
sendFormattedResponse(whelk, targetVocabMapper, profile, profileDoc, apiBaseUrl, selection, recordIdsOnPage, res, offsetLines, totalEntityCount, dumpCreationTime);
sendFormattedResponse(whelk, targetVocabMapper, profile, profileDoc, apiBaseUrl, selection, computedLabelLocale, res, offsetLines, totalEntityCount, dumpCreationTime, recordIdsOnPage);
}

private static void sendFormattedResponse(Whelk whelk, TargetVocabMapper targetVocabMapper, String profile, Document profileDoc, String apiBaseUrl, String selection, ArrayList<String> recordIdsOnPage, HttpServletResponse res, long offset, Long totalEntityCount, Instant dumpCreationTime) throws IOException{
private static void sendFormattedResponse(
Whelk whelk,
TargetVocabMapper targetVocabMapper,
String profile,
Document profileDoc,
String apiBaseUrl,
String selection,
String computedLabelLocale,
HttpServletResponse res,
long offset,
Long totalEntityCount,
Instant dumpCreationTime,
ArrayList<String> recordIdsOnPage
) throws IOException {
var responseObject = new LinkedHashMap<>();

responseObject.put(JsonLd.CONTEXT_KEY, "https://www.w3.org/ns/activitystreams");
responseObject.put(JsonLd.ID_KEY, apiBaseUrl+"?selection="+selection+"&offset="+offset);

var id = apiBaseUrl+"?selection="+selection+"&offset="+offset;
responseObject.put(JsonLd.ID_KEY, addParams(id, profile, computedLabelLocale));

responseObject.put("type", "CollectionPage");
responseObject.put("startTime", ZonedDateTime.ofInstant(dumpCreationTime, ZoneOffset.UTC).toString());
if (totalEntityCount == null)
Expand All @@ -240,15 +275,13 @@ private static void sendFormattedResponse(Whelk whelk, TargetVocabMapper targetV

var partOf = new LinkedHashMap<>();
partOf.put("type", "Collection");
partOf.put("id", apiBaseUrl + "?selection=" + selection);
partOf.put("id", addParams(apiBaseUrl + "?selection=" + selection, profile, computedLabelLocale));
responseObject.put("partOf", partOf);

long nextOffset = offset + EmmChangeSet.TARGET_HITS_PER_PAGE;
if (totalEntityCount == null || nextOffset < totalEntityCount) {
if (profile != null)
responseObject.put("next", apiBaseUrl+"?selection="+selection+"&offset="+nextOffset+"&profile="+profile);
else
responseObject.put("next", apiBaseUrl+"?selection="+selection+"&offset="+nextOffset);
var next = apiBaseUrl+"?selection="+selection+"&offset="+nextOffset;
responseObject.put("next", addParams(next, profile, computedLabelLocale));
}

var items = new ArrayList<>(EmmChangeSet.TARGET_HITS_PER_PAGE);
Expand Down Expand Up @@ -279,13 +312,22 @@ private static void sendFormattedResponse(Whelk whelk, TargetVocabMapper targetV
}

for (Document doc : docs) {
items.add(formatDoc(doc, contextDoc, targetVocabMapper, profile, profileDoc));
items.add(formatDoc(doc, contextDoc, targetVocabMapper, whelk.getFresnelUtil(), computedLabelLocale, profileDoc, profile));
}

HttpTools.sendResponse(res, responseObject, JSON_CONTENT_TYPE);
}

private static void sendDumpDownloadResponse(Whelk whelk, TargetVocabMapper targetVocabMapper, String profile, Document profileDoc, String selection, Path dumpFilePath, HttpServletResponse res) {
private static void sendDumpDownloadResponse(
Whelk whelk,
TargetVocabMapper targetVocabMapper,
String profile,
Document profileDoc,
String selection,
String computedLabelLocale,
Path dumpFilePath,
HttpServletResponse res
) {
String filename = Unicode.stripSuffix(dumpFilePath.getFileName().toString(), ".dump") + ND_JSON_LD_GZ_EXT;
res.setHeader("Content-Disposition", "attachment; filename=" + filename);
res.setHeader("Content-Type", "application/octet-stream");
Expand Down Expand Up @@ -331,11 +373,11 @@ private static void sendDumpDownloadResponse(Whelk whelk, TargetVocabMapper targ
batch.add(line.trim());

if (batch.size() >= batchSize) {
writeJsonLdLines(whelk, targetVocabMapper, profile, profileDoc, selection, batch, contextDoc, os);
writeJsonLdLines(whelk, targetVocabMapper, profile, profileDoc, selection, computedLabelLocale, contextDoc, os, batch);
batch = new ArrayList<>(batchSize);
}
}
writeJsonLdLines(whelk, targetVocabMapper, profile, profileDoc, selection, batch, contextDoc, os);
writeJsonLdLines(whelk, targetVocabMapper, profile, profileDoc, selection, computedLabelLocale, contextDoc, os, batch);
res.flushBuffer();
}
} catch (Exception e) {
Expand All @@ -347,7 +389,17 @@ private static void sendDumpDownloadResponse(Whelk whelk, TargetVocabMapper targ
}
}

private static void writeJsonLdLines(Whelk whelk, TargetVocabMapper targetVocabMapper, String profile, Document profileDoc, String selection, Collection<String> ids, Document contextDoc, OutputStream os) throws IOException {
private static void writeJsonLdLines(
Whelk whelk,
TargetVocabMapper targetVocabMapper,
String profile,
Document profileDoc,
String selection,
String computedLabelLocale,
Document contextDoc,
OutputStream os,
Collection<String> ids
) throws IOException {
var docs = whelk.bulkLoad(ids).values();
docs.removeIf(Document::getDeleted);

Expand All @@ -356,7 +408,7 @@ private static void writeJsonLdLines(Whelk whelk, TargetVocabMapper targetVocabM
}

for (Document doc : docs) {
writeJsonLdLine(formatDoc(doc, contextDoc, targetVocabMapper, profile, profileDoc), os);
writeJsonLdLine(formatDoc(doc, contextDoc, targetVocabMapper, whelk.getFresnelUtil(), computedLabelLocale, profileDoc, profile), os);
}
os.flush();
}
Expand Down Expand Up @@ -405,7 +457,15 @@ private static void writeJsonLdLine(Object object, OutputStream os) throws IOExc
os.write("\n".getBytes(StandardCharsets.UTF_8));
}

private static Object formatDoc(Document doc, Document contextDoc, TargetVocabMapper targetVocabMapper, String profile, Document profileDoc) {
private static Object formatDoc(
Document doc,
Document contextDoc,
TargetVocabMapper targetVocabMapper,
FresnelUtil fresnelUtil,
String computedLabelLocale,
Document profileDoc,
String profile
) {
var context = new ArrayList<>();
context.add(null);
context.add(contextDoc.getRecordIdentifiers().getFirst());
Expand All @@ -415,6 +475,10 @@ private static Object formatDoc(Document doc, Document contextDoc, TargetVocabMa
formattedDoc = new Document((Map) targetVocabMapper.applyTargetVocabularyMap(profile, profileDoc.data, doc.data));
}

if (computedLabelLocale != null) {
fresnelUtil.insertComputedLabels(doc.data, new FresnelUtil.LangCode(computedLabelLocale));
}

Map data = Map.of(
JsonLd.ID_KEY, doc.getRecordIdentifiers().getFirst(),
JsonLd.CONTEXT_KEY, context,
Expand Down Expand Up @@ -531,6 +595,16 @@ private static void generateDump(Whelk whelk, String dump, Path dumpFilePath) {
}).start();
}

private static String addParams(String url, String profile, String computedLabelLocale) {
if (profile != null) {
url += "&profile=" + profile;
}
if (computedLabelLocale != null) {
url += "&" + JsonLd.Platform.COMPUTED_LABEL + "=" + computedLabelLocale;
}
return url;
}

private static PreparedStatement getAllDumpStatement(Connection connection) throws SQLException {
String sql = """
SELECT id
Expand Down
5 changes: 2 additions & 3 deletions rest/src/main/groovy/whelk/rest/api/Crud.java
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,8 @@ private Object getFormattedResponseBody(CrudGetRequest request, Document doc, St

private Object getNegotiatedDataBody(CrudGetRequest request, Object contextData, Map<String, Object> data, String uri) {
if (request.shouldComputeLabels()) {
if (!JsonLd.isFramed(data)) {
// TODO? should we support this? Requires more work in FresnelUtil
throw new BadRequestException("Cannot compute labels when not framed");
if (request.shouldEmbellish() && !JsonLd.isFramed(data)) {
throw new BadRequestException("Cannot compute labels when embellished but not framed");
}

// FIXME FresnelUtil can't handle the whole search response because of @container @index in stats
Expand Down
Loading