diff --git a/datastore/db/management/commands/create_data_package.py b/datastore/db/management/commands/create_data_package.py index 57833eae..1cc48c8a 100644 --- a/datastore/db/management/commands/create_data_package.py +++ b/datastore/db/management/commands/create_data_package.py @@ -55,6 +55,7 @@ def handle(self, *args, **options): data_all_file = "%s/data_all.json" % options["dir"] recipients_file = "%s/recipients.jl" % options["dir"] funders_file = "%s/funders.jl" % options["dir"] + publishers_file = "%s/publishers.jl" % options["dir"] with open(funders_file, "w") as funders_fp: create_orgs_list("funder", funders_fp) @@ -62,6 +63,9 @@ def handle(self, *args, **options): with open(recipients_file, "w") as recipients_fp: create_orgs_list("recipient", recipients_fp) + with open(publishers_file, "w") as publishers_fp: + create_orgs_list("publisher", publishers_fp) + def flatten_grant(in_grant): """Add the additional_data inside grant object""" out_grant = {} diff --git a/datastore/db/management/commands/manage_entities_data.py b/datastore/db/management/commands/manage_entities_data.py index 3ab0c654..83821c96 100644 --- a/datastore/db/management/commands/manage_entities_data.py +++ b/datastore/db/management/commands/manage_entities_data.py @@ -87,19 +87,35 @@ def create_orgs_list(entity_type, output=sys.stdout): entity_type: publisher, recipient, funder output: io """ + + extra_select = "" + end_clause = "" + + if entity_type == "publisher": + extra_select = "db_publisher.prefix," + # Limit the publisher entities to only the latest ones from the datagetter + end_clause = "WHERE db_publisher.getter_run_id = (SELECT id FROM db_getterrun ORDER BY datetime desc LIMIT 1)" + + # TODO To be removed when GN switch over to publisher org data + if entity_type == "funder": + extra_select = """ + db_publisher.name as "publisherName", + db_publisher.prefix as "publisherPrefix", + """ + end_clause = "LEFT OUTER JOIN db_publisher on db_funder.org_id = db_publisher.org_id OR db_publisher.org_id = ANY(db_funder.non_primary_org_ids)" + query = f""" SELECT DISTINCT db_{entity_type}.org_id as "id", + {extra_select} db_{entity_type}.non_primary_org_ids as "non_primary_org_ids", db_{entity_type}.name as name, db_{entity_type}."aggregate" as "aggregate", db_{entity_type}.additional_data as "additionalData", - additional_data_orginfocache.data as "ftcData", - db_publisher.name as "publisherName", - db_publisher.prefix as "publisherPrefix" + additional_data_orginfocache.data as "ftcData" FROM db_{entity_type} LEFT OUTER JOIN additional_data_orginfocache on db_{entity_type}.org_id = additional_data_orginfocache.org_id - LEFT OUTER JOIN db_publisher on db_{entity_type}.org_id = db_publisher.org_id OR db_publisher.org_id = ANY(db_{entity_type}.non_primary_org_ids) + {end_clause} """ def parse_data_in_result(result, col_types): @@ -132,7 +148,7 @@ def add_arguments(self, parser): nargs="+", action="store", dest="entity_type", - help="The entity type to output. One of: recipient, funder", + help="The entity type to output. One of: recipient, funder or publisher", ) parser.add_argument( @@ -149,7 +165,7 @@ def handle(self, *args, **options): if options.get("entity_type"): for entity_type in options["entity_type"]: - if entity_type != "recipient" and entity_type != "funder": + if entity_type not in ["recipient", "funder", "publisher"]: raise CommandError(f"{entity_type} is an unknown entity type") create_orgs_list(entity_type) diff --git a/datastore/db/migrations/0025_publisher_non_primary_org_ids_and_more.py b/datastore/db/migrations/0025_publisher_non_primary_org_ids_and_more.py new file mode 100644 index 00000000..df357fa0 --- /dev/null +++ b/datastore/db/migrations/0025_publisher_non_primary_org_ids_and_more.py @@ -0,0 +1,35 @@ +# Generated by Django 4.2.21 on 2025-10-21 13:36 + +import django.contrib.postgres.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("db", "0024_auto_20240610_1847"), + ] + + operations = [ + migrations.AddField( + model_name="publisher", + name="non_primary_org_ids", + field=django.contrib.postgres.fields.ArrayField( + base_field=models.TextField(), default=list, size=None + ), + ), + migrations.AlterField( + model_name="funder", + name="non_primary_org_ids", + field=django.contrib.postgres.fields.ArrayField( + base_field=models.TextField(), default=list, size=None + ), + ), + migrations.AlterField( + model_name="recipient", + name="non_primary_org_ids", + field=django.contrib.postgres.fields.ArrayField( + base_field=models.TextField(), default=list, size=None + ), + ), + ] diff --git a/datastore/db/models.py b/datastore/db/models.py index 1af78ac5..a0c50686 100644 --- a/datastore/db/models.py +++ b/datastore/db/models.py @@ -252,6 +252,7 @@ class Meta: PUBLISHER = "PUBLISHER" SOURCES_CHOICES = [(GRANT, "Grant"), (PUBLISHER, "Publisher")] source = models.TextField(choices=SOURCES_CHOICES) + non_primary_org_ids = ArrayField(models.TextField(), default=list) def __str__(self): return "%s %s)" % (self.org_id, self.name) @@ -400,8 +401,6 @@ class Meta: Index(fields=["org_id", "name"]), ] - non_primary_org_ids = ArrayField(models.TextField()) - class Funder(Entity): class Meta: @@ -413,8 +412,6 @@ class Meta: Index(fields=["org_id", "name"]), ] - non_primary_org_ids = ArrayField(models.TextField()) - class Grant(models.Model): grant_id = models.CharField(max_length=300)