From 13ac4a1a930abd84559701da096a36d84f948b60 Mon Sep 17 00:00:00 2001 From: Kui Wen Date: Thu, 9 Oct 2025 05:30:25 +0000 Subject: [PATCH] x86,fs/resctrl: Remove inappropriate references to cacheinfo in the resctrl subsystem commit 594902c986e269660302f09df9ec4bf1cf017b77 upstream. In the resctrl subsystem's Sub-NUMA Cluster (SNC) mode, the rdt_mon_domain structure representing a NUMA node relies on the cacheinfo interface (rdt_mon_domain::ci) to store L3 cache information (e.g., shared_cpu_map) for monitoring. The L3 cache information of a SNC NUMA node determines which domains are summed for the "top level" L3-scoped events. rdt_mon_domain::ci is initialized using the first online CPU of a NUMA node. When this CPU goes offline, its shared_cpu_map is cleared to contain only the offline CPU itself. Subsequently, attempting to read counters via smp_call_on_cpu(offline_cpu) fails (and error ignored), returning zero values for "top-level events" without any error indication. Replace the cacheinfo references in struct rdt_mon_domain and struct rmid_read with the cacheinfo ID (a unique identifier for the L3 cache). rdt_domain_hdr::cpu_mask contains the online CPUs associated with that domain. When reading "top-level events", select a CPU from rdt_domain_hdr::cpu_mask and utilize its L3 shared_cpu_map to determine valid CPUs for reading RMID counter via the MSR interface. Considering all CPUs associated with the L3 cache improves the chances of picking a housekeeping CPU on which the counter reading work can be queued, avoiding an unnecessary IPI. Intel-SIG: commit 594902c986e x86,fs/resctrl: Remove inappropriate references to cacheinfo in the resctrl subsystem backport to RDT driver for CWF Fixes: 328ea68874642 ("x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files") Signed-off-by: Qinyun Tan Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Tony Luck Link: https://lore.kernel.org/20250530182053.37502-2-qinyuntan@linux.alibaba.com Signed-off-by: Kui Wen --- arch/x86/kernel/cpu/resctrl/core.c | 6 ++++-- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 15 ++++++++++----- arch/x86/kernel/cpu/resctrl/internal.h | 4 ++-- arch/x86/kernel/cpu/resctrl/monitor.c | 6 ++++-- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 6 +++--- include/linux/resctrl.h | 4 ++-- 6 files changed, 25 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index ee2eaebe9daa1..9c2fa95f56f73 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -588,6 +588,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) struct rdt_hw_mon_domain *hw_dom; struct rdt_domain_hdr *hdr; struct rdt_mon_domain *d; + struct cacheinfo *ci; int err; lockdep_assert_held(&domain_list_lock); @@ -615,12 +616,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) d = &hw_dom->d_resctrl; d->hdr.id = id; d->hdr.type = RESCTRL_MON_DOMAIN; - d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); - if (!d->ci) { + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) { pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name); mon_domain_free(hw_dom); return; } + d->ci_id = ci->id; cpumask_set_cpu(cpu, &d->hdr.cpu_mask); arch_mon_domain_online(r, d); diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 50fa1fe9a073f..f44160482759d 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -558,11 +558,12 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) struct rdt_domain_hdr *hdr; struct rmid_read rr = {0}; struct rdt_mon_domain *d; - u32 resid, evtid, domid; + u32 resid, evtid; struct rdtgroup *rdtgrp; + int domid, cpu, ret = 0; struct rdt_resource *r; + struct cacheinfo *ci; union mon_data_bits md; - int ret = 0; rdtgrp = rdtgroup_kn_lock_live(of->kn); if (!rdtgrp) { @@ -584,10 +585,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) * one that matches this cache id. */ list_for_each_entry(d, &r->mon_domains, hdr.list) { - if (d->ci->id == domid) { - rr.ci = d->ci; + if (d->ci_id == domid) { + rr.ci_id = d->ci_id; + cpu = cpumask_any(&d->hdr.cpu_mask); + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) + continue; mon_event_read(&rr, r, NULL, rdtgrp, - &d->ci->shared_cpu_map, evtid, false); + &ci->shared_cpu_map, evtid, false); goto checkresult; } } diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 955999aecfca9..efe8ae4d41a51 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -160,7 +160,7 @@ union mon_data_bits { * domains in @r sharing L3 @ci.id * @evtid: Which monitor event to read. * @first: Initialize MBM counter when true. - * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains. + * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains. * @err: Error encountered when reading counter. * @val: Returned value of event counter. If @rgrp is a parent resource group, * @val includes the sum of event counts from its child resource groups. @@ -174,7 +174,7 @@ struct rmid_read { struct rdt_mon_domain *d; enum resctrl_event_id evtid; bool first; - struct cacheinfo *ci; + unsigned int ci_id; int err; u64 val; void *arch_mon_ctx; diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 97b5302c85e4b..866981ae71e3a 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -594,6 +594,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) { int cpu = smp_processor_id(); struct rdt_mon_domain *d; + struct cacheinfo *ci; struct mbm_state *m; int err, ret; u64 tval = 0; @@ -621,7 +622,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) } /* Summing domains that share a cache, must be on a CPU for that cache. */ - if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map)) + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci || ci->id != rr->ci_id) return -EINVAL; /* @@ -633,7 +635,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) */ ret = -EINVAL; list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { - if (d->ci->id != rr->ci->id) + if (d->ci_id != rr->ci_id) continue; err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, rr->evtid, &tval, rr->arch_mon_ctx); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 37bececcd2a66..1075fc783e90e 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -3036,7 +3036,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, char name[32]; snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); if (snc_mode) sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id); @@ -3061,7 +3061,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, return -EPERM; priv.u.rid = r->rid; - priv.u.domid = do_sum ? d->ci->id : d->hdr.id; + priv.u.domid = do_sum ? d->ci_id : d->hdr.id; priv.u.sum = do_sum; list_for_each_entry(mevt, &r->evt_list, list) { priv.u.evtid = mevt->evtid; @@ -3088,7 +3088,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, lockdep_assert_held(&rdtgroup_mutex); snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); kn = kernfs_find_and_get(parent_kn, name); if (kn) { /* diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index d94abba1c716e..f8433f8ee7644 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -97,7 +97,7 @@ struct rdt_ctrl_domain { /** * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types - * @ci: cache info for this domain + * @ci_id: cache info id for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth * @mbm_local: saved state for MBM local bandwidth @@ -108,7 +108,7 @@ struct rdt_ctrl_domain { */ struct rdt_mon_domain { struct rdt_domain_hdr hdr; - struct cacheinfo *ci; + unsigned long ci_id; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; struct mbm_state *mbm_local;