diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Index.java b/src/main/java/edu/harvard/iq/dataverse/api/Index.java index f83506c7e27..39e93d445b1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Index.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Index.java @@ -372,8 +372,8 @@ public Response indexMod(@QueryParam("partitions") long partitions, @QueryParam( @GET @Path("perms") public Response indexAllPermissions() { - IndexResponse indexResponse = solrIndexService.indexAllPermissions(); - return ok(indexResponse.getMessage()); + solrIndexService.asyncIndexAllPermissions(); + return ok("Asynchronous indexing of all permissions has been started. Check the server logs for progress."); } @GET diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index dfcb61438e6..65997a1bc7e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -27,6 +27,7 @@ import java.util.logging.Logger; import java.util.stream.Stream; +import jakarta.ejb.Asynchronous; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; import jakarta.ejb.TransactionAttribute; @@ -103,20 +104,6 @@ public List determineSolrDocs(DvObject dvObject) { return solrDocs; } - private List determineSolrDocsForFilesFromDataset(Map.Entry> datasetHash) { - List emptyList = new ArrayList<>(); - List solrDocs = emptyList; - DvObject dvObject = dvObjectService.findDvObject(datasetHash.getKey()); - if (dvObject == null) { - return emptyList; - } - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - solrDocs.addAll(constructDatafileSolrDocsFromDataset(dataset)); - } - return solrDocs; - } - /** * @todo should this method return a List? The equivalent methods for * datasets and files return lists. @@ -155,29 +142,6 @@ private DvObjectSolrDoc constructDatafileSolrDoc(DataFileProxy fileProxy, List constructDatafileSolrDocsFromDataset(Dataset dataset) { - List datafileSolrDocs = new ArrayList<>(); - for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataset)) { - List perms = new ArrayList<>(); - if (datasetVersionFileIsAttachedTo.isReleased()) { - perms.add(IndexServiceBean.getPublicGroupString()); - } else { - perms = searchPermissionsService.findDatasetVersionPerms(datasetVersionFileIsAttachedTo); - } - - for (FileMetadata fileMetadata : datasetVersionFileIsAttachedTo.getFileMetadatas()) { - Long fileId = fileMetadata.getDataFile().getId(); - String solrIdStart = IndexServiceBean.solrDocIdentifierFile + fileId; - String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState()); - String solrId = solrIdStart + solrIdEnd; - DvObjectSolrDoc dataFileSolrDoc = new DvObjectSolrDoc(fileId.toString(), solrId, datasetVersionFileIsAttachedTo.getId(), fileMetadata.getLabel(), perms); - logger.finest("adding fileid " + fileId); - datafileSolrDocs.add(dataFileSolrDoc); - } - } - return datafileSolrDocs; - } - /** Find the versions to index. The overall logic is * If there is only one version, or no released version (all non-draft versions are deaccessioned) * then index it regardless of it's versionstate @@ -227,60 +191,58 @@ private String getDatasetOrDataFileSolrEnding(DatasetVersion.VersionState versio } } - public IndexResponse indexAllPermissions() { - Collection docs = new ArrayList<>(); - - List definitionPoints = new ArrayList<>(); - Map> filesPerDataset = new HashMap<>(); - List allExceptFiles = dvObjectService.findAll(); - for (DvObject dvObject : allExceptFiles) { - logger.fine("determining definition points for dvobject id " + dvObject.getId()); - if (dvObject.isInstanceofDataFile()) { - Long dataset = dvObject.getOwner().getId(); - Long datafile = dvObject.getId(); - - List files = filesPerDataset.get(dataset); - if (files == null) { - files = new ArrayList<>(); - filesPerDataset.put(dataset, files); - } - files.add(datafile); - } else { - definitionPoints.addAll(determineSolrDocs(dvObject)); - } - } - - List all = allExceptFiles; - for (Map.Entry> filePerDataset : filesPerDataset.entrySet()) { - definitionPoints.addAll(determineSolrDocsForFilesFromDataset(filePerDataset)); - for (long fileId : filePerDataset.getValue()) { - DvObject file = dvObjectService.findDvObject(fileId); - if (file != null) { - all.add(file); - } - } - } - - for (DvObjectSolrDoc dvObjectSolrDoc : definitionPoints) { - logger.fine("creating solr doc in memory for " + dvObjectSolrDoc.getSolrId()); - SolrInputDocument solrInputDocument = SearchUtil.createSolrDoc(dvObjectSolrDoc); - logger.fine("adding to list of docs to index " + dvObjectSolrDoc.getSolrId()); - docs.add(solrInputDocument); - } + @Asynchronous + public void asyncIndexAllPermissions() { + logger.info("Starting asynchronous indexing of all permissions"); + long startTime = System.currentTimeMillis(); + try { - persistToSolr(docs); - /** - * @todo Do we need a separate permissionIndexTime timestamp? - * Probably. Update it here. - */ - for (DvObject dvObject : all) { - dvObjectService.updatePermissionIndexTime(dvObject); + + // Get ALL dataverses in the system + List allDataverseIds = em.createQuery( + "SELECT d.id FROM Dataverse d ORDER BY d.id", Long.class) + .getResultList(); + + logger.info("Found " + allDataverseIds.size() + " dataverses to index (each will index its datasets and files)"); + + int processedCount = 0; + + // Index each dataverse (which will automatically index all its datasets and files) + for (Long dataverseId : allDataverseIds) { + try { + Dataverse dataverse = dataverseService.find(dataverseId); + if (dataverse == null) { + logger.warning("Dataverse not found: " + dataverseId); + continue; + } + + logger.fine("Indexing permissions for Dataverse " + dataverseId + + " (" + dataverse.getName() + ") and all its datasets/files"); + + // This will index the dataverse itself and all its direct dataset children (with their files) + IndexResponse response = indexPermissionsOnSelfAndChildren(dataverse); + processedCount++; + + logger.fine("Indexed Dataverse " + dataverseId + ": " + response.getMessage()); + + // Clear persistence context periodically to free memory + if (processedCount % 10 == 0) { + em.clear(); + logger.info("Processed " + processedCount + "/" + allDataverseIds.size() + " dataverses"); + } + + } catch (Exception e) { + logger.log(Level.WARNING, "Error indexing permissions for dataverse " + dataverseId, e); + } } - return new IndexResponse("indexed all permissions"); - } catch (SolrServerException | IOException ex) { - return new IndexResponse("problem indexing"); + + long duration = System.currentTimeMillis() - startTime; + logger.info("Completed asynchronous indexing of all permissions. Processed " + + processedCount + " dataverses (with all their datasets and files) in " + duration + "ms"); + + } catch (Exception e) { + logger.log(Level.SEVERE, "Error during asynchronous permission indexing", e); } - } public IndexResponse indexPermissionsForOneDvObject(DvObject dvObject) {