From ba09db3e69ef492136fb03a4a1527ab9283ef955 Mon Sep 17 00:00:00 2001 From: JoneKone Date: Sat, 25 Oct 2025 15:11:10 +0300 Subject: [PATCH] Refactor flush paths to avoid cache deadlocks --- Ext4Fsd/ext3/generic.c | 142 +++++++++++++++++++++++++---------------- Ext4Fsd/flush.c | 121 ++++++++++++++++++++++------------- Ext4Fsd/shutdown.c | 64 +++++++++++-------- 3 files changed, 201 insertions(+), 126 deletions(-) diff --git a/Ext4Fsd/ext3/generic.c b/Ext4Fsd/ext3/generic.c index 5056c6d..70180d0 100644 --- a/Ext4Fsd/ext3/generic.c +++ b/Ext4Fsd/ext3/generic.c @@ -315,64 +315,96 @@ Ext2FlushRange(IN PEXT2_VCB Vcb, LARGE_INTEGER s, LARGE_INTEGER e) NTSTATUS Ext2FlushVcb(IN PEXT2_VCB Vcb) { - LARGE_INTEGER s = {0}, o; - struct ext4_sb_info *sbi = &Vcb->sbi; - struct rb_node *node; - struct buffer_head *bh; - - if (!IsFlagOn(Vcb->Flags, VCB_GD_LOADED)) { - CcFlushCache(&Vcb->SectionObject, NULL, 0, NULL); - goto errorout; - } - - ASSERT(ExIsResourceAcquiredExclusiveLite(&Vcb->MainResource)); - - __try { - - /* acqurie gd block */ - ExAcquireResourceExclusiveLite(&Vcb->sbi.s_gd_lock, TRUE); - + LARGE_INTEGER s = {0}, o; + struct rb_node *node; + struct buffer_head *bh; + + typedef struct _EXT2_BUSY_RANGE { + LARGE_INTEGER Start; + ULONG Length; + } EXT2_BUSY_RANGE, *PEXT2_BUSY_RANGE; + + PEXT2_BUSY_RANGE BusyRanges = NULL; + ULONG BusyCount = 0; + ULONG BusyIndex = 0; + + if (!IsFlagOn(Vcb->Flags, VCB_GD_LOADED)) { + CcFlushCache(&Vcb->SectionObject, NULL, 0, NULL); + goto errorout; + } + + __try { + + /* acqurie gd block */ + ExAcquireResourceExclusiveLite(&Vcb->sbi.s_gd_lock, TRUE); + /* acquire bd lock to avoid bh creation */ ExAcquireResourceExclusiveLite(&Vcb->bd.bd_bh_lock, TRUE); - /* drop unused bh */ - Ext2DropBH(Vcb); - - /* flush volume with all outstanding bh skipped */ - - node = rb_first(&Vcb->bd.bd_bh_root); - while (node) { - - bh = container_of(node, struct buffer_head, b_rb_node); - node = rb_next(node); - - o.QuadPart = bh->b_blocknr << BLOCK_BITS; - ASSERT(o.QuadPart >= s.QuadPart); - - if (o.QuadPart == s.QuadPart) { - s.QuadPart = s.QuadPart + bh->b_size; - continue; - } - - if (o.QuadPart > s.QuadPart) { - Ext2FlushRange(Vcb, s, o); - s.QuadPart = (bh->b_blocknr << BLOCK_BITS) + bh->b_size; - continue; - } - } - - o = Vcb->PartitionInformation.PartitionLength; - Ext2FlushRange(Vcb, s, o); - - } __finally { - - ExReleaseResourceLite(&Vcb->bd.bd_bh_lock); - ExReleaseResourceLite(&Vcb->sbi.s_gd_lock); - } - -errorout: - return STATUS_SUCCESS; -} + /* drop unused bh */ + Ext2DropBH(Vcb); + + /* count outstanding bh entries */ + for (node = rb_first(&Vcb->bd.bd_bh_root); + node != NULL; + node = rb_next(node)) { + BusyCount++; + } + + if (BusyCount) { + BusyRanges = Ext2AllocatePool( + PagedPool, + BusyCount * sizeof(EXT2_BUSY_RANGE), + 'rF2E'); + if (BusyRanges) { + for (node = rb_first(&Vcb->bd.bd_bh_root); + node != NULL && BusyIndex < BusyCount; + node = rb_next(node)) { + + bh = container_of(node, struct buffer_head, b_rb_node); + BusyRanges[BusyIndex].Start.QuadPart = bh->b_blocknr << BLOCK_BITS; + BusyRanges[BusyIndex].Length = bh->b_size; + BusyIndex++; + } + } else { + BusyCount = 0; + } + } + + } __finally { + + ExReleaseResourceLite(&Vcb->bd.bd_bh_lock); + ExReleaseResourceLite(&Vcb->sbi.s_gd_lock); + } + + s.QuadPart = 0; + + if (BusyRanges && BusyIndex) { + ULONG i; + for (i = 0; i < BusyIndex; i++) { + o = BusyRanges[i].Start; + if (o.QuadPart > s.QuadPart) { + Ext2FlushRange(Vcb, s, o); + s.QuadPart = BusyRanges[i].Start.QuadPart + BusyRanges[i].Length; + } else { + LONGLONG BusyEnd = BusyRanges[i].Start.QuadPart + BusyRanges[i].Length; + if (BusyEnd > s.QuadPart) { + s.QuadPart = BusyEnd; + } + } + } + } + + o = Vcb->PartitionInformation.PartitionLength; + Ext2FlushRange(Vcb, s, o); + + if (BusyRanges) { + Ext2FreePool(BusyRanges, 'rF2E'); + } + +errorout: + return STATUS_SUCCESS; +} BOOLEAN Ext2SaveGroup( diff --git a/Ext4Fsd/flush.c b/Ext4Fsd/flush.c index 5da995e..477d7d1 100644 --- a/Ext4Fsd/flush.c +++ b/Ext4Fsd/flush.c @@ -55,12 +55,14 @@ Ext2FlushFile ( IN PEXT2_CCB Ccb ) { - IO_STATUS_BLOCK IoStatus = {0}; - - ASSERT(Fcb != NULL); - ASSERT((Fcb->Identifier.Type == EXT2FCB) && - (Fcb->Identifier.Size == sizeof(EXT2_FCB))); - + IO_STATUS_BLOCK IoStatus = {0}; + SECTION_OBJECT_POINTERS *SectionObject = NULL; + BOOLEAN ResourceReleased = FALSE; + + ASSERT(Fcb != NULL); + ASSERT((Fcb->Identifier.Type == EXT2FCB) && + (Fcb->Identifier.Size == sizeof(EXT2_FCB))); + __try { /* do nothing if target fie was deleted */ @@ -83,24 +85,36 @@ Ext2FlushFile ( } } - if (IsDirectory(Fcb)) { - IoStatus.Status = STATUS_SUCCESS; - __leave; - } - - DEBUG(DL_INF, ( "Ext2FlushFile: Flushing File Inode=%xh %S ...\n", - Fcb->Inode->i_ino, Fcb->Mcb->ShortName.Buffer)); - - CcFlushCache(&(Fcb->SectionObject), NULL, 0, &IoStatus); - ClearFlag(Fcb->Flags, FCB_FILE_MODIFIED); - - } __finally { - - /* do cleanup here */ - } - - return IoStatus.Status; -} + if (IsDirectory(Fcb)) { + IoStatus.Status = STATUS_SUCCESS; + __leave; + } + + DEBUG(DL_INF, ( "Ext2FlushFile: Flushing File Inode=%xh %S ...\n", + Fcb->Inode->i_ino, Fcb->Mcb->ShortName.Buffer)); + + SectionObject = &Fcb->SectionObject; + + ExReleaseResourceLite(&Fcb->MainResource); + ResourceReleased = TRUE; + + CcFlushCache(SectionObject, NULL, 0, &IoStatus); + + } __finally { + + /* do cleanup here */ + + if (ResourceReleased) { + ExAcquireResourceExclusiveLite(&Fcb->MainResource, TRUE); + + if (NT_SUCCESS(IoStatus.Status)) { + ClearFlag(Fcb->Flags, FCB_FILE_MODIFIED); + } + } + } + + return IoStatus.Status; +} NTSTATUS Ext2FlushFiles( @@ -153,7 +167,8 @@ Ext2Flush (IN PEXT2_IRP_CONTEXT IrpContext) PDEVICE_OBJECT DeviceObject = NULL; - BOOLEAN MainResourceAcquired = FALSE; + BOOLEAN MainResourceAcquired = FALSE; + BOOLEAN VcbResourceReleased = FALSE; __try { @@ -202,22 +217,35 @@ Ext2Flush (IN PEXT2_IRP_CONTEXT IrpContext) DEBUG(DL_INF, ("Ext2Flush-pre: total mcb records=%u\n", FsRtlNumberOfRunsInLargeMcb(&Vcb->Extents))); - if (FcbOrVcb->Identifier.Type == EXT2VCB) { - - Ext2VerifyVcb(IrpContext, Vcb); - Status = Ext2FlushFiles(IrpContext, (PEXT2_VCB)(FcbOrVcb), FALSE); - if (NT_SUCCESS(Status)) { - __leave; - } - - /* TO INVESTIGATE: Ext2FlushFiles will always return STATUS_SUCCESS so Ext2FlushVolume will never be called? */ - - Status = Ext2FlushVolume(IrpContext, (PEXT2_VCB)(FcbOrVcb), FALSE); - - if (NT_SUCCESS(Status) && IsFlagOn(Vcb->Volume->Flags, FO_FILE_MODIFIED)) { - ClearFlag(Vcb->Volume->Flags, FO_FILE_MODIFIED); - } - + if (FcbOrVcb->Identifier.Type == EXT2VCB) { + + Ext2VerifyVcb(IrpContext, Vcb); + ExReleaseResourceLite(&FcbOrVcb->MainResource); + VcbResourceReleased = TRUE; + + Status = Ext2FlushFiles(IrpContext, (PEXT2_VCB)(FcbOrVcb), FALSE); + + ExAcquireResourceExclusiveLite(&FcbOrVcb->MainResource, TRUE); + VcbResourceReleased = FALSE; + + if (NT_SUCCESS(Status)) { + __leave; + } + + /* TO INVESTIGATE: Ext2FlushFiles will always return STATUS_SUCCESS so Ext2FlushVolume will never be called? */ + + ExReleaseResourceLite(&FcbOrVcb->MainResource); + VcbResourceReleased = TRUE; + + Status = Ext2FlushVolume(IrpContext, (PEXT2_VCB)(FcbOrVcb), FALSE); + + ExAcquireResourceExclusiveLite(&FcbOrVcb->MainResource, TRUE); + VcbResourceReleased = FALSE; + + if (NT_SUCCESS(Status) && IsFlagOn(Vcb->Volume->Flags, FO_FILE_MODIFIED)) { + ClearFlag(Vcb->Volume->Flags, FO_FILE_MODIFIED); + } + } else if (FcbOrVcb->Identifier.Type == EXT2FCB) { Fcb = (PEXT2_FCB)(FcbOrVcb); @@ -236,9 +264,14 @@ Ext2Flush (IN PEXT2_IRP_CONTEXT IrpContext) } __finally { - if (MainResourceAcquired) { - ExReleaseResourceLite(&FcbOrVcb->MainResource); - } + if (VcbResourceReleased) { + ExAcquireResourceExclusiveLite(&FcbOrVcb->MainResource, TRUE); + VcbResourceReleased = FALSE; + } + + if (MainResourceAcquired) { + ExReleaseResourceLite(&FcbOrVcb->MainResource); + } if (!IrpContext->ExceptionInProgress) { diff --git a/Ext4Fsd/shutdown.c b/Ext4Fsd/shutdown.c index f331b5d..0a9f272 100644 --- a/Ext4Fsd/shutdown.c +++ b/Ext4Fsd/shutdown.c @@ -60,33 +60,43 @@ Ext2ShutDown (IN PEXT2_IRP_CONTEXT IrpContext) Vcb = CONTAINING_RECORD(ListEntry, EXT2_VCB, Next); - if (ExAcquireResourceExclusiveLite( - &Vcb->MainResource, - TRUE )) { - - if (IsMounted(Vcb)) { - - /* update fs write time */ - KeQuerySystemTime(&SysTime); - Ext2TimeToSecondsSince1970(&SysTime, &LinuxTime.LowPart, &LinuxTime.HighPart); - Vcb->SuperBlock->s_wtime = LinuxTime.LowPart; - Vcb->SuperBlock->s_wtime_hi = (UCHAR)LinuxTime.HighPart; - - /* update mount count */ - Vcb->SuperBlock->s_mnt_count++; - Ext2SaveSuper(IrpContext, Vcb); - - /* flush dirty cache for all files */ - Ext2FlushFiles(IrpContext, Vcb, TRUE); - - /* flush volume stream's cache to disk */ - Ext2FlushVolume(IrpContext, Vcb, TRUE); - - /* send shutdown request to underlying disk */ - Ext2DiskShutDown(Vcb); - } - - ExReleaseResourceLite(&Vcb->MainResource); + if (ExAcquireResourceExclusiveLite( + &Vcb->MainResource, + TRUE )) { + + if (IsMounted(Vcb)) { + + BOOLEAN VcbMainReleased = FALSE; + + /* update fs write time */ + KeQuerySystemTime(&SysTime); + Ext2TimeToSecondsSince1970(&SysTime, &LinuxTime.LowPart, &LinuxTime.HighPart); + Vcb->SuperBlock->s_wtime = LinuxTime.LowPart; + Vcb->SuperBlock->s_wtime_hi = (UCHAR)LinuxTime.HighPart; + + /* update mount count */ + Vcb->SuperBlock->s_mnt_count++; + Ext2SaveSuper(IrpContext, Vcb); + + ExReleaseResourceLite(&Vcb->MainResource); + VcbMainReleased = TRUE; + + /* flush dirty cache for all files */ + Ext2FlushFiles(IrpContext, Vcb, TRUE); + + /* flush volume stream's cache to disk */ + Ext2FlushVolume(IrpContext, Vcb, TRUE); + + if (VcbMainReleased) { + ExAcquireResourceExclusiveLite(&Vcb->MainResource, TRUE); + VcbMainReleased = FALSE; + } + + /* send shutdown request to underlying disk */ + Ext2DiskShutDown(Vcb); + } + + ExReleaseResourceLite(&Vcb->MainResource); } }