Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/main/java/edu/harvard/iq/dataverse/api/Index.java
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,8 @@ public Response indexMod(@QueryParam("partitions") long partitions, @QueryParam(
@GET
@Path("perms")
public Response indexAllPermissions() {
IndexResponse indexResponse = solrIndexService.indexAllPermissions();
return ok(indexResponse.getMessage());
solrIndexService.asyncIndexAllPermissions();
return ok("Asynchronous indexing of all permissions has been started. Check the server logs for progress.");
}

@GET
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.logging.Logger;
import java.util.stream.Stream;

import jakarta.ejb.Asynchronous;
import jakarta.ejb.EJB;
import jakarta.ejb.Stateless;
import jakarta.ejb.TransactionAttribute;
Expand Down Expand Up @@ -103,20 +104,6 @@ public List<DvObjectSolrDoc> determineSolrDocs(DvObject dvObject) {
return solrDocs;
}

private List<DvObjectSolrDoc> determineSolrDocsForFilesFromDataset(Map.Entry<Long, List<Long>> datasetHash) {
List<DvObjectSolrDoc> emptyList = new ArrayList<>();
List<DvObjectSolrDoc> solrDocs = emptyList;
DvObject dvObject = dvObjectService.findDvObject(datasetHash.getKey());
if (dvObject == null) {
return emptyList;
}
if (dvObject.isInstanceofDataset()) {
Dataset dataset = (Dataset) dvObject;
solrDocs.addAll(constructDatafileSolrDocsFromDataset(dataset));
}
return solrDocs;
}

/**
* @todo should this method return a List? The equivalent methods for
* datasets and files return lists.
Expand Down Expand Up @@ -155,29 +142,6 @@ private DvObjectSolrDoc constructDatafileSolrDoc(DataFileProxy fileProxy, List<S
return new DvObjectSolrDoc(fileProxy.getFileId().toString(), solrId, versionId, fileProxy.getName(), perms);
}

private List<DvObjectSolrDoc> constructDatafileSolrDocsFromDataset(Dataset dataset) {
List<DvObjectSolrDoc> datafileSolrDocs = new ArrayList<>();
for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataset)) {
List<String> perms = new ArrayList<>();
if (datasetVersionFileIsAttachedTo.isReleased()) {
perms.add(IndexServiceBean.getPublicGroupString());
} else {
perms = searchPermissionsService.findDatasetVersionPerms(datasetVersionFileIsAttachedTo);
}

for (FileMetadata fileMetadata : datasetVersionFileIsAttachedTo.getFileMetadatas()) {
Long fileId = fileMetadata.getDataFile().getId();
String solrIdStart = IndexServiceBean.solrDocIdentifierFile + fileId;
String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState());
String solrId = solrIdStart + solrIdEnd;
DvObjectSolrDoc dataFileSolrDoc = new DvObjectSolrDoc(fileId.toString(), solrId, datasetVersionFileIsAttachedTo.getId(), fileMetadata.getLabel(), perms);
logger.finest("adding fileid " + fileId);
datafileSolrDocs.add(dataFileSolrDoc);
}
}
return datafileSolrDocs;
}

/** Find the versions to index. The overall logic is
* If there is only one version, or no released version (all non-draft versions are deaccessioned)
* then index it regardless of it's versionstate
Expand Down Expand Up @@ -227,60 +191,58 @@ private String getDatasetOrDataFileSolrEnding(DatasetVersion.VersionState versio
}
}

public IndexResponse indexAllPermissions() {
Collection<SolrInputDocument> docs = new ArrayList<>();

List<DvObjectSolrDoc> definitionPoints = new ArrayList<>();
Map<Long, List<Long>> filesPerDataset = new HashMap<>();
List<DvObject> allExceptFiles = dvObjectService.findAll();
for (DvObject dvObject : allExceptFiles) {
logger.fine("determining definition points for dvobject id " + dvObject.getId());
if (dvObject.isInstanceofDataFile()) {
Long dataset = dvObject.getOwner().getId();
Long datafile = dvObject.getId();

List<Long> files = filesPerDataset.get(dataset);
if (files == null) {
files = new ArrayList<>();
filesPerDataset.put(dataset, files);
}
files.add(datafile);
} else {
definitionPoints.addAll(determineSolrDocs(dvObject));
}
}

List<DvObject> all = allExceptFiles;
for (Map.Entry<Long, List<Long>> filePerDataset : filesPerDataset.entrySet()) {
definitionPoints.addAll(determineSolrDocsForFilesFromDataset(filePerDataset));
for (long fileId : filePerDataset.getValue()) {
DvObject file = dvObjectService.findDvObject(fileId);
if (file != null) {
all.add(file);
}
}
}

for (DvObjectSolrDoc dvObjectSolrDoc : definitionPoints) {
logger.fine("creating solr doc in memory for " + dvObjectSolrDoc.getSolrId());
SolrInputDocument solrInputDocument = SearchUtil.createSolrDoc(dvObjectSolrDoc);
logger.fine("adding to list of docs to index " + dvObjectSolrDoc.getSolrId());
docs.add(solrInputDocument);
}
@Asynchronous
public void asyncIndexAllPermissions() {
logger.info("Starting asynchronous indexing of all permissions");
long startTime = System.currentTimeMillis();

try {
persistToSolr(docs);
/**
* @todo Do we need a separate permissionIndexTime timestamp?
* Probably. Update it here.
*/
for (DvObject dvObject : all) {
dvObjectService.updatePermissionIndexTime(dvObject);

// Get ALL dataverses in the system
List<Long> allDataverseIds = em.createQuery(
"SELECT d.id FROM Dataverse d ORDER BY d.id", Long.class)
.getResultList();

logger.info("Found " + allDataverseIds.size() + " dataverses to index (each will index its datasets and files)");

int processedCount = 0;

// Index each dataverse (which will automatically index all its datasets and files)
for (Long dataverseId : allDataverseIds) {
try {
Dataverse dataverse = dataverseService.find(dataverseId);
if (dataverse == null) {
logger.warning("Dataverse not found: " + dataverseId);
continue;
}

logger.fine("Indexing permissions for Dataverse " + dataverseId +
" (" + dataverse.getName() + ") and all its datasets/files");

// This will index the dataverse itself and all its direct dataset children (with their files)
IndexResponse response = indexPermissionsOnSelfAndChildren(dataverse);
processedCount++;

logger.fine("Indexed Dataverse " + dataverseId + ": " + response.getMessage());

// Clear persistence context periodically to free memory
if (processedCount % 10 == 0) {
em.clear();
logger.info("Processed " + processedCount + "/" + allDataverseIds.size() + " dataverses");
}

} catch (Exception e) {
logger.log(Level.WARNING, "Error indexing permissions for dataverse " + dataverseId, e);
}
}
return new IndexResponse("indexed all permissions");
} catch (SolrServerException | IOException ex) {
return new IndexResponse("problem indexing");

long duration = System.currentTimeMillis() - startTime;
logger.info("Completed asynchronous indexing of all permissions. Processed " +
processedCount + " dataverses (with all their datasets and files) in " + duration + "ms");

} catch (Exception e) {
logger.log(Level.SEVERE, "Error during asynchronous permission indexing", e);
}

}

public IndexResponse indexPermissionsForOneDvObject(DvObject dvObject) {
Expand Down