Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions backend/audit/intake_to_dissemination.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import logging
import pytz
from django.db import IntegrityError
from hashlib import sha1
from dateutil.parser import parse
from datetime import datetime

from django.forms.models import model_to_dict
from audit.intakelib.transforms.xform_resize_award_references import _format_reference
from audit.models.constants import RESUBMISSION_STATUS
from audit.utils import Util
Expand All @@ -20,6 +24,7 @@
SecondaryAuditor,
)

from dissemination.summary_reports import field_name_ordered

logger = logging.getLogger(__name__)

Expand All @@ -29,6 +34,61 @@ def omit(remove, d) -> dict:
return {k: d[k] for k in d if k not in remove}


# Date-type things need to be converted from datetimes to dates.
def convert_to_string(o):
if isinstance(o, datetime):
return f"{o.date()}"
if o is None:
return ""
else:
return f"{o}"


def hashable_types(o):
logger.info(f"{type(o)} {o}")
return o


# This is used to calculate a hash of the data for both internal and external integrity.
def hash_dissemination_object(obj):
# Given a hash, alpha sort the keys. We do this by taking
# the object to a list of tuples, and then sorting
# the resulting list on the first element of the tuple.
#
# See https://stackoverflow.com/a/22003440
# for reference. It isn't obvious how to do this well, and in particular,
# while leaving the JSON object keys out of the hash.

# 1. Get the fields we're going to hash from the object
fields_to_hash = obj.HASH_FIELDS
# 2. We are given a Django object. Convert it to a dictionary.
d = model_to_dict(obj)
# 3. Dictionary to tuples
tupes = list(d.items())
# 4. Tuples sorted by key
sorted_tupes = sorted(tupes, key=lambda k: k[0])
# 5. Get rid of fields that we're not hashing
filtered_sorted = list(filter(lambda t: t[0] in fields_to_hash, sorted_tupes))
# 6. Strip the keys
# Why strip the keys? We don't want our field names to impact
# the hashing value. We want to make sure the values in the object, in a consistent sort
# order, are what get hashed. If we change field names, yes, the hash will change. But
# our object field names are very consistent.
# It is unclear if we're going to get consistent, cross-language hashing here.
# It depends on how Python chooses to reprseent values as strings. If we don't quite get this right
# the first time, it will have to be improved, and the full dataset re-disseminated.
# p[0] is the key, p[1] is the value in the tuple list.
# Strings must be encoded to bytes before hashing.
just_values = list(map(lambda p: convert_to_string(p[1]), filtered_sorted))
# 7. Append the values with no spaces.
smooshed = "".join(just_values).strip().encode("ascii", "ignore")
# This is now hashable. Run a SHA1.
shaobj = sha1()
shaobj.update(smooshed)
digest = shaobj.hexdigest()
return digest


class IntakeToDissemination(object):
DISSEMINATION = "dissemination"
PRE_CERTIFICATION_REVIEW = "pre_certification_review"
Expand Down Expand Up @@ -75,6 +135,10 @@ def save_dissemination_objects(self):
for key, object_list in self.loaded_objects.items():
try:
if object_list:
# # Add the hashes at the last possible moment.
for obj in object_list:
sha = hash_dissemination_object(obj)
obj.hash = sha
model_class = type(object_list[0])
model_class.objects.bulk_create(object_list)
except IntegrityError as e:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Generated by Django 5.2.3 on 2025-09-12 11:12

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("audit", "0029_alter_submissionevent_event"),
]

operations = [
migrations.AddField(
model_name="singleauditchecklist",
name="hash",
field=models.CharField(
blank=True, help_text="A hash of the row", null=True
),
),
migrations.AddField(
model_name="singleauditreportfile",
name="hash",
field=models.CharField(
blank=True, help_text="A hash of the report", null=True
),
),
]
12 changes: 12 additions & 0 deletions backend/audit/models/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,18 @@ class SingleAuditReportFile(models.Model):
component_page_numbers = models.JSONField(
blank=True, null=True, validators=[validate_component_page_numbers]
)
# TODO: This value probably wants to be calculated at the point that the file is uploaded.
# If we do it on save(), it means we have to pull the object from S3 for hashing. It might
# be that we want to use the S3 checksum, however, instead of computing it ourselves:
# https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html
# That would have the benefit of storying the S3 hash in our application, and being something
# we can then verify between the DB and the store. However, it would be good if users could verify
# the checksum on the PDFs as well.
hash = models.CharField(
help_text="A hash of the report",
blank=True,
null=True,
)

def save(self, *args, **kwargs):
report_id = self.sac.report_id
Expand Down
19 changes: 19 additions & 0 deletions backend/audit/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,20 +255,24 @@ def disseminate(self):
intake_to_dissem.load_all()
intake_to_dissem.save_dissemination_objects()
if intake_to_dissem.errors:
logger.info(f"I2D ERRORS: {intake_to_dissem.errors}")
return {"errors": intake_to_dissem.errors}
except TransactionManagementError as err:
# We want to re-raise this to catch at the view level because we
# think it's due to a race condition where the user's submission
# has been disseminated successfully; see
# https://github.com/GSA-TTS/FAC/issues/3347
logger.error("Possible race in disseminate")
raise err
# TODO: figure out what narrower exceptions to catch here
except Exception as err:
logger.error(f"Unknown error in disseminate: {err}")
return {"errors": [err]}

return None

def redisseminate(self):

named_models = {
"AdditionalEins": AdditionalEin,
"AdditionalUeis": AdditionalUei,
Expand Down Expand Up @@ -585,6 +589,21 @@ def get_statuses(self) -> type[STATUS]:
blank=True, null=True, help_text="Resubmission JSON structure"
)

# Data hash for integrity
# This can be empty/null while data is being created, but it must be not-null
# at the point of dissemination. It should be the case that it is a hash of data, not fields,
# and the hash should match when calculated both on the internal table/data as well as the external data.
# That is, it should be possible to verify the hash via the API. Therefore, we compute this after
# submission, or as part of intake->dissemination. We store it in the internal table because it is then
# something that we expect to NOT CHANGE over time. (Resubmission metadata is not part of the hash.) Why?
# Because it is internal/administrative data tracking the connectedness of audits, not the audit data itself.
# (A later group/team may decide this is an incorrect decision.)
hash = models.CharField(
help_text="A hash of the row",
blank=True,
null=True,
)

def validate_full(self):
"""
Full validation, intended for use when the user indicates that the
Expand Down
5 changes: 4 additions & 1 deletion backend/audit/views/submission_progress_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,10 @@ def get(self, request, *args, **kwargs):
shaped_audit = None
audit_subcheck = None

_compare_progress_check(subcheck, audit_subcheck)
# MCJ 20250911 this should have been gated behind an `if`.
if audit:
_compare_progress_check(subcheck, audit_subcheck)

# Update with the view-specific info from SECTIONS_BASE:
for key, value in SECTIONS_BASE.items():
subcheck[key] = subcheck[key] | value
Expand Down
45 changes: 34 additions & 11 deletions backend/dissemination/api/api_v1_1_0/create_views.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@ create view api_v1_1_0.findings_text as
gen.report_id,
gen.auditee_uei,
gen.audit_year,
gen.fac_accepted_date,
---
ft.finding_ref_number,
ft.contains_chart_or_table,
ft.finding_text
ft.finding_text,
ft.hash
from
dissemination_findingtext ft,
dissemination_general gen
Expand All @@ -30,8 +33,10 @@ create view api_v1_1_0.additional_ueis as
gen.report_id,
gen.auditee_uei,
gen.audit_year,
gen.fac_accepted_date,
---
uei.additional_uei
uei.additional_uei,
uei.hash
from
dissemination_general gen,
dissemination_additionaluei uei
Expand All @@ -48,6 +53,8 @@ create view api_v1_1_0.findings as
gen.report_id,
gen.auditee_uei,
gen.audit_year,
gen.fac_accepted_date,
---
finding.award_reference,
finding.reference_number,
finding.is_material_weakness,
Expand All @@ -58,7 +65,8 @@ create view api_v1_1_0.findings as
finding.is_questioned_costs,
finding.is_repeat_finding,
finding.is_significant_deficiency,
finding.type_requirement
finding.type_requirement,
finding.hash
from
dissemination_finding finding,
dissemination_general gen
Expand All @@ -75,6 +83,7 @@ create view api_v1_1_0.federal_awards as
award.report_id,
gen.auditee_uei,
gen.audit_year,
gen.fac_accepted_date,
---
award.award_reference,
award.federal_agency_prefix,
Expand All @@ -94,7 +103,8 @@ create view api_v1_1_0.federal_awards as
award.audit_report_type,
award.findings_count,
award.is_passthrough_award,
award.passthrough_amount
award.passthrough_amount,
award.hash
from
dissemination_federalaward award,
dissemination_general gen
Expand All @@ -112,10 +122,12 @@ create view api_v1_1_0.corrective_action_plans as
gen.report_id,
gen.auditee_uei,
gen.audit_year,
gen.fac_accepted_date,
---
ct.finding_ref_number,
ct.contains_chart_or_table,
ct.planned_action
ct.planned_action,
ct.hash
from
dissemination_CAPText ct,
dissemination_General gen
Expand All @@ -135,13 +147,15 @@ create view api_v1_1_0.notes_to_sefa as
gen.report_id,
gen.auditee_uei,
gen.audit_year,
gen.fac_accepted_date,
---
note.note_title as title,
note.accounting_policies,
note.is_minimis_rate_used,
note.rate_explained,
note.content,
note.contains_chart_or_table
note.contains_chart_or_table,
note.hash
from
dissemination_general gen,
dissemination_note note
Expand All @@ -161,10 +175,12 @@ create view api_v1_1_0.passthrough as
gen.report_id,
gen.auditee_uei,
gen.audit_year,
gen.fac_accepted_date,
---
pass.award_reference,
pass.passthrough_id,
pass.passthrough_name
pass.passthrough_name,
pass.hash
from
dissemination_general as gen,
dissemination_passthrough as pass
Expand Down Expand Up @@ -254,7 +270,8 @@ create view api_v1_1_0.general as
CASE EXISTS(SELECT aud.report_id FROM dissemination_secondaryauditor aud WHERE aud.report_id = gen.report_id)
WHEN FALSE THEN 'No'
ELSE 'Yes'
END AS is_secondary_auditors
END AS is_secondary_auditors,
gen.hash
from
dissemination_general gen
order by gen.id
Expand All @@ -268,6 +285,7 @@ create view api_v1_1_0.secondary_auditors as
gen.report_id,
gen.auditee_uei,
gen.audit_year,
gen.fac_accepted_date,
---
sa.auditor_ein,
sa.auditor_name,
Expand All @@ -278,7 +296,8 @@ create view api_v1_1_0.secondary_auditors as
sa.address_street,
sa.address_city,
sa.address_state,
sa.address_zipcode
sa.address_zipcode,
sa.hash
from
dissemination_General gen,
dissemination_SecondaryAuditor sa
Expand All @@ -292,8 +311,10 @@ create view api_v1_1_0.additional_eins as
gen.report_id,
gen.auditee_uei,
gen.audit_year,
gen.fac_accepted_date,
---
ein.additional_ein
ein.additional_ein,
ein.hash
from
dissemination_general gen,
dissemination_additionalein ein
Expand All @@ -307,11 +328,13 @@ create view api_v1_1_0.resubmission as
gen.report_id,
gen.auditee_uei,
gen.audit_year,
gen.fac_accepted_date,
---
resub.version,
resub.status,
resub.previous_report_id,
resub.next_report_id
resub.next_report_id,
resub.hash
from
dissemination_general gen,
dissemination_resubmission resub
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def handle(self, *args, **_kwargs):
try:
sac = SingleAuditChecklist.objects.get(report_id=report_id)
sac.redisseminate()
logger.info(f"Redisseminated: {report_id}")
exit(0)
except SingleAuditChecklist.DoesNotExist:
logger.info(f"No report with report_id found: {report_id}")
Expand Down
Loading
Loading