From 7d04aecbcc9552f458bf4b0789ca8a05552e22a1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:33 +0200 Subject: [PATCH 001/109] hwmon: (fam15h_power) Use topology_core_id() commit 94f0b3978ea87c180b7e989e54faedd0a097f7ea upstream. Use the provided topology helper function instead of fiddling in cpu_data. Intel-SIG: commit 94f0b3978ea8 hwmon: (fam15h_power) Use topology_core_id(). x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Acked-by: Guenter Roeck Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085112.506988471@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- drivers/hwmon/fam15h_power.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index 521534d5c1e5f..6307112c2c0c6 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -134,15 +135,13 @@ static DEVICE_ATTR_RO(power1_crit); static void do_read_registers_on_cu(void *_data) { struct fam15h_power_data *data = _data; - int cpu, cu; - - cpu = smp_processor_id(); + int cu; /* * With the new x86 topology modelling, cpu core id actually * is compute unit id. */ - cu = cpu_data(cpu).cpu_core_id; + cu = topology_core_id(smp_processor_id()); rdmsrl_safe(MSR_F15H_CU_PWR_ACCUMULATOR, &data->cu_acc_power[cu]); rdmsrl_safe(MSR_F15H_PTSC, &data->cpu_sw_pwr_ptsc[cu]); From e28484fad0e899d0b992280089f7cba8835f62fc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:34 +0200 Subject: [PATCH 002/109] x86/cpu: Move cpu_core_id into topology info commit e95256335d45cc965cd12c423535002974313340 upstream. Rename it to core_id and stick it to the other ID fields. No functional change. Intel-SIG: commit e95256335d45 x86/cpu: Move cpu_core_id into topology info. x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085112.566519388@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/processor.h | 3 ++- arch/x86/include/asm/topology.h | 2 +- arch/x86/kernel/amd_nb.c | 4 ++-- arch/x86/kernel/cpu/amd.c | 8 ++++---- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/cpu/hygon.c | 4 ++-- arch/x86/kernel/cpu/proc.c | 2 +- arch/x86/kernel/cpu/topology.c | 2 +- arch/x86/kernel/smpboot.c | 6 +++--- 9 files changed, 18 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 209ef8f95ebf6..0a778d14060ba 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -92,6 +92,8 @@ struct cpuinfo_topology { // AMD Node ID and Nodes per Package info u32 amd_node_id; + // Core ID relative to the package + u32 core_id; }; struct cpuinfo_x86 { @@ -160,7 +162,6 @@ struct cpuinfo_x86 { /* Logical processor id: */ u16 logical_proc_id; /* Core id: */ - u16 cpu_core_id; u16 logical_die_id; /* Index into per_cpu list: */ u16 cpu_index; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 741ba4fb1cfe6..feb2b12ed2308 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -109,7 +109,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_physical_package_id(cpu) (cpu_data(cpu).topo.pkg_id) #define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id) #define topology_die_id(cpu) (cpu_data(cpu).topo.die_id) -#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) +#define topology_core_id(cpu) (cpu_data(cpu).topo.core_id) #define topology_ppin(cpu) (cpu_data(cpu).ppin) #define topology_amd_node_id(cpu) (cpu_data(cpu).topo.die_id) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index ea8ed84240414..6d12a9b694327 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -179,7 +179,7 @@ int amd_get_subcaches(int cpu) pci_read_config_dword(link, 0x1d4, &mask); - return (mask >> (4 * cpu_data(cpu).cpu_core_id)) & 0xf; + return (mask >> (4 * cpu_data(cpu).topo.core_id)) & 0xf; } int amd_set_subcaches(int cpu, unsigned long mask) @@ -205,7 +205,7 @@ int amd_set_subcaches(int cpu, unsigned long mask) pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); } - cuid = cpu_data(cpu).cpu_core_id; + cuid = cpu_data(cpu).topo.core_id; mask <<= 4 * cuid; mask |= (0xf ^ (1 << cuid)) << 26; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2f1f82b720e6d..32f1aafe8a7a2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -302,7 +302,7 @@ static int nearby_node(int apicid) #endif /* - * Fix up cpu_core_id for pre-F17h systems to be in the + * Fix up topo::core_id for pre-F17h systems to be in the * [0 .. cores_per_node - 1] range. Not really needed but * kept so as not to break existing setups. */ @@ -314,7 +314,7 @@ static void legacy_fixup_core_id(struct cpuinfo_x86 *c) return; cus_per_node = c->x86_max_cores / nodes_per_socket; - c->cpu_core_id %= cus_per_node; + c->topo.core_id %= cus_per_node; } /* @@ -340,7 +340,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) c->cu_id = ebx & 0xff; if (c->x86 >= 0x17) { - c->cpu_core_id = ebx & 0xff; + c->topo.core_id = ebx & 0xff; if (smp_num_siblings > 1) c->x86_max_cores /= smp_num_siblings; @@ -383,7 +383,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ - c->cpu_core_id = c->topo.initial_apicid & ((1 << bits)-1); + c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1); /* Convert the initial APIC ID into the socket ID */ c->topo.pkg_id = c->topo.initial_apicid >> bits; /* use socket ID also for last level cache */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 93104a77aff2c..deca45c9e97d3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -923,8 +923,8 @@ void detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); - c->cpu_core_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb) & - ((1 << core_bits) - 1); + c->topo.core_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb) & + ((1 << core_bits) - 1); #endif } diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 1f4b5bd0bcb70..d98d915f79cb4 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -74,7 +74,7 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) c->topo.die_id = ecx & 0xff; - c->cpu_core_id = ebx & 0xff; + c->topo.core_id = ebx & 0xff; if (smp_num_siblings > 1) c->x86_max_cores /= smp_num_siblings; @@ -120,7 +120,7 @@ static void hygon_detect_cmp(struct cpuinfo_x86 *c) bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ - c->cpu_core_id = c->topo.initial_apicid & ((1 << bits)-1); + c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1); /* Convert the initial APIC ID into the socket ID */ c->topo.pkg_id = c->topo.initial_apicid >> bits; /* use socket ID also for last level cache */ diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 366af2e392d2b..e65fae63660e3 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -23,7 +23,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, seq_printf(m, "physical id\t: %d\n", c->topo.pkg_id); seq_printf(m, "siblings\t: %d\n", cpumask_weight(topology_core_cpumask(cpu))); - seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); + seq_printf(m, "core id\t\t: %d\n", c->topo.core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); seq_printf(m, "apicid\t\t: %d\n", c->topo.apicid); seq_printf(m, "initial apicid\t: %d\n", c->topo.initial_apicid); diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 3b48bc324dfd3..dc136703566f3 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -146,7 +146,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) die_select_mask = (~(-1 << die_plus_mask_width)) >> core_plus_mask_width; - c->cpu_core_id = apic->phys_pkg_id(c->topo.initial_apicid, + c->topo.core_id = apic->phys_pkg_id(c->topo.initial_apicid, ht_mask_width) & core_select_mask; if (die_level_present) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1f56b846785c4..b04cf7c959f55 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -476,7 +476,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) if (c->topo.pkg_id == o->topo.pkg_id && c->topo.die_id == o->topo.die_id && per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) { - if (c->cpu_core_id == o->cpu_core_id) + if (c->topo.core_id == o->topo.core_id) return topology_sane(c, o, "smt"); if ((c->cu_id != 0xff) && @@ -487,7 +487,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) } else if (c->topo.pkg_id == o->topo.pkg_id && c->topo.die_id == o->topo.die_id && - c->cpu_core_id == o->cpu_core_id) { + c->topo.core_id == o->topo.core_id) { return topology_sane(c, o, "smt"); } @@ -1395,7 +1395,7 @@ static void remove_siblinginfo(int cpu) cpumask_clear(topology_sibling_cpumask(cpu)); cpumask_clear(topology_core_cpumask(cpu)); cpumask_clear(topology_die_cpumask(cpu)); - c->cpu_core_id = 0; + c->topo.core_id = 0; c->booted_cores = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); recompute_smt_state(); From 234b9f484919f9e224a1d9cb79015fa94c376641 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:35 +0200 Subject: [PATCH 003/109] x86/cpu: Move cu_id into topology info commit e3c0c5d52ad34ab2c97f93ca4a0c2e9ca2fdc06b upstream. No functional change. Intel-SIG: commit e3c0c5d52ad3 x86/cpu: Move cu_id into topology info. x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085112.628405546@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/processor.h | 4 +++- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/smpboot.c | 6 +++--- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0a778d14060ba..880ee8be8ccc7 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -92,6 +92,9 @@ struct cpuinfo_topology { // AMD Node ID and Nodes per Package info u32 amd_node_id; + // Compute unit ID - AMD specific + u32 cu_id; + // Core ID relative to the package u32 core_id; }; @@ -126,7 +129,6 @@ struct cpuinfo_x86 { __u8 x86_phys_bits; /* CPUID returned core id bits: */ __u8 x86_coreid_bits; - __u8 cu_id; /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 32f1aafe8a7a2..9790e8e5523d7 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -337,7 +337,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) c->topo.die_id = ecx & 0xff; if (c->x86 == 0x15) - c->cu_id = ebx & 0xff; + c->topo.cu_id = ebx & 0xff; if (c->x86 >= 0x17) { c->topo.core_id = ebx & 0xff; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index deca45c9e97d3..880a995a8efa5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1935,7 +1935,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; c->x86_coreid_bits = 0; - c->cu_id = 0xff; + c->topo.cu_id = 0xff; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; c->x86_phys_bits = 36; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b04cf7c959f55..51e02e27741f8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -479,9 +479,9 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) if (c->topo.core_id == o->topo.core_id) return topology_sane(c, o, "smt"); - if ((c->cu_id != 0xff) && - (o->cu_id != 0xff) && - (c->cu_id == o->cu_id)) + if ((c->topo.cu_id != 0xff) && + (o->topo.cu_id != 0xff) && + (c->topo.cu_id == o->topo.cu_id)) return topology_sane(c, o, "smt"); } From 426bae97109fe17d416ed98bf5dea460f35cc465 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:36 +0200 Subject: [PATCH 004/109] x86/cpu: Remove pointless evaluation of x86_coreid_bits commit 594957d723a0674ca15bfefb755b3403624b8239 upstream. cpuinfo_x86::x86_coreid_bits is only used by the AMD numa topology code. No point in evaluating it on non AMD systems. No functional change. Intel-SIG: commit 594957d723a0 x86/cpu: Remove pointless evaluation of x86_coreid_bits. x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Reviewed-by: Arjan van de Ven Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085112.687588373@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/intel.c | 13 ------------- arch/x86/kernel/cpu/zhaoxin.c | 14 -------------- 2 files changed, 27 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 06ad5362bcec8..066cfa133f294 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -391,19 +391,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_PGE); } - if (c->cpuid_level >= 0x00000001) { - u32 eax, ebx, ecx, edx; - - cpuid(0x00000001, &eax, &ebx, &ecx, &edx); - /* - * If HTT (EDX[28]) is set EBX[16:23] contain the number of - * apicids which are reserved per package. Store the resulting - * shift value for the package management code. - */ - if (edx & (1U << 28)) - c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); - } - check_memory_type_self_snoop_errata(c); /* diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c index 05fa4ef634902..415564a6523b6 100644 --- a/arch/x86/kernel/cpu/zhaoxin.c +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -65,20 +65,6 @@ static void early_init_zhaoxin(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); } - - if (c->cpuid_level >= 0x00000001) { - u32 eax, ebx, ecx, edx; - - cpuid(0x00000001, &eax, &ebx, &ecx, &edx); - /* - * If HTT (EDX[28]) is set EBX[16:23] contain the number of - * apicids which are reserved per package. Store the resulting - * shift value for the package management code. - */ - if (edx & (1U << 28)) - c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); - } - } static void init_zhaoxin(struct cpuinfo_x86 *c) From 5a7a9f005922b0d89a794d650e74b3e0c6314821 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:37 +0200 Subject: [PATCH 005/109] x86/cpu: Move logical package and die IDs into topology info commit 22dc9631625352426cd665f4e3f8fe0d793b2bf5 upstream. Yet another topology related data pair. Rename logical_proc_id to logical_pkg_id so it fits the common naming conventions. No functional change. Intel-SIG: commit 22dc96316253 x86/cpu: Move logical package and die IDs into topology info. x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085112.745139505@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- Documentation/arch/x86/topology.rst | 2 +- arch/x86/events/intel/uncore.c | 2 +- arch/x86/include/asm/processor.h | 8 ++++---- arch/x86/include/asm/topology.h | 4 ++-- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/smpboot.c | 8 ++++---- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst index ff36e52091d94..88f29bab3ba1c 100644 --- a/Documentation/arch/x86/topology.rst +++ b/Documentation/arch/x86/topology.rst @@ -67,7 +67,7 @@ Package-related topology information in the kernel: Modern systems use this value for the socket. There may be multiple packages within a socket. This value may differ from topo.die_id. - - cpuinfo_x86.logical_proc_id: + - cpuinfo_x86.topo.logical_pkg_id: The logical ID of the package. As we do not trust BIOSes to enumerate the packages in a consistent way, we introduced the concept of logical package diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 4d856b51307f3..14c4875e5867d 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -74,7 +74,7 @@ int uncore_device_to_die(struct pci_dev *dev) struct cpuinfo_x86 *c = &cpu_data(cpu); if (c->initialized && cpu_to_node(cpu) == node) - return c->logical_die_id; + return c->topo.logical_die_id; } return -1; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 880ee8be8ccc7..b96bb94d67c78 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -97,6 +97,10 @@ struct cpuinfo_topology { // Core ID relative to the package u32 core_id; + + // Logical ID mappings + u32 logical_pkg_id; + u32 logical_die_id; }; struct cpuinfo_x86 { @@ -161,10 +165,6 @@ struct cpuinfo_x86 { u16 x86_clflush_size; /* number of cores as seen by the OS: */ u16 booted_cores; - /* Logical processor id: */ - u16 logical_proc_id; - /* Core id: */ - u16 logical_die_id; /* Index into per_cpu list: */ u16 cpu_index; /* Is SMT active on this core? */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index feb2b12ed2308..b10015d49976c 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -105,9 +105,9 @@ static inline void setup_node_to_cpumask_map(void) { } extern const struct cpumask *cpu_coregroup_mask(int cpu); extern const struct cpumask *cpu_clustergroup_mask(int cpu); -#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id) +#define topology_logical_package_id(cpu) (cpu_data(cpu).topo.logical_pkg_id) #define topology_physical_package_id(cpu) (cpu_data(cpu).topo.pkg_id) -#define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id) +#define topology_logical_die_id(cpu) (cpu_data(cpu).topo.logical_die_id) #define topology_die_id(cpu) (cpu_data(cpu).topo.die_id) #define topology_core_id(cpu) (cpu_data(cpu).topo.core_id) #define topology_ppin(cpu) (cpu_data(cpu).ppin) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 880a995a8efa5..37ceee7013b50 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1916,7 +1916,7 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c) BUG_ON(topology_update_package_map(c->topo.pkg_id, cpu)); BUG_ON(topology_update_die_map(c->topo.die_id, cpu)); #else - c->logical_proc_id = 0; + c->topo.logical_pkg_id = 0; #endif } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 51e02e27741f8..ba46631e1c1e5 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -337,7 +337,7 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg) struct cpuinfo_x86 *c = &cpu_data(cpu); if (c->initialized && c->topo.pkg_id == phys_pkg) - return c->logical_proc_id; + return c->topo.logical_pkg_id; } return -1; } @@ -359,7 +359,7 @@ static int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cp if (c->initialized && c->topo.die_id == die_id && c->topo.pkg_id == proc_id) - return c->logical_die_id; + return c->topo.logical_die_id; } return -1; } @@ -384,7 +384,7 @@ int topology_update_package_map(unsigned int pkg, unsigned int cpu) cpu, pkg, new); } found: - cpu_data(cpu).logical_proc_id = new; + cpu_data(cpu).topo.logical_pkg_id = new; return 0; } /** @@ -407,7 +407,7 @@ int topology_update_die_map(unsigned int die, unsigned int cpu) cpu, die, new); } found: - cpu_data(cpu).logical_die_id = new; + cpu_data(cpu).topo.logical_die_id = new; return 0; } From 67b9a75241cf48240d5b9760921d617c98569721 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:38 +0200 Subject: [PATCH 006/109] x86/cpu: Move cpu_l[l2]c_id into topology info commit 6e29032340b60f7aa7475c8234b17273e4424007 upstream. The topology IDs which identify the LLC and L2 domains clearly belong to the per CPU topology information. Move them into cpuinfo_x86::cpuinfo_topo and get rid of the extra per CPU data and the related exports. This also paves the way to do proper topology evaluation during early boot because it removes the only per CPU dependency for that. No functional change. Intel-SIG: commit 6e29032340b6 x86/cpu: Move cpu_l[l2]c_id into topology info. x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Reviewed-by: Arjan van de Ven Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085112.803864641@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- Documentation/arch/x86/topology.rst | 4 +--- arch/x86/events/amd/uncore.c | 2 +- arch/x86/include/asm/cacheinfo.h | 3 --- arch/x86/include/asm/processor.h | 14 +++++++++++- arch/x86/include/asm/smp.h | 2 -- arch/x86/include/asm/topology.h | 2 +- arch/x86/kernel/apic/apic_numachip.c | 2 +- arch/x86/kernel/cpu/amd.c | 12 ++++------ arch/x86/kernel/cpu/cacheinfo.c | 33 ++++++++++------------------ arch/x86/kernel/cpu/common.c | 14 ++---------- arch/x86/kernel/cpu/cpu.h | 3 +++ arch/x86/kernel/cpu/hygon.c | 14 +++++------- arch/x86/kernel/smpboot.c | 10 ++++----- 13 files changed, 48 insertions(+), 67 deletions(-) diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst index 88f29bab3ba1c..08ebf9edbfc1e 100644 --- a/Documentation/arch/x86/topology.rst +++ b/Documentation/arch/x86/topology.rst @@ -79,9 +79,7 @@ Package-related topology information in the kernel: The maximum possible number of packages in the system. Helpful for per package facilities to preallocate per package information. - - cpu_llc_id: - - A per-CPU variable containing: + - cpuinfo_x86.topo.llc_id: - On Intel, the first APIC ID of the list of CPUs sharing the Last Level Cache diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index e82d8970dcd75..7669f819fda04 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -775,7 +775,7 @@ void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) info.split.aux_data = 0; info.split.num_pmcs = NUM_COUNTERS_L2; info.split.gid = 0; - info.split.cid = get_llc_id(cpu); + info.split.cid = per_cpu_llc_id(cpu); if (boot_cpu_data.x86 >= 0x17) info.split.num_pmcs = NUM_COUNTERS_L3; diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h index ce9685fc78d8f..5aa0611998665 100644 --- a/arch/x86/include/asm/cacheinfo.h +++ b/arch/x86/include/asm/cacheinfo.h @@ -7,9 +7,6 @@ extern unsigned int memory_caching_control; #define CACHE_MTRR 0x01 #define CACHE_PAT 0x02 -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu); -void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu); - void cache_disable(void); void cache_enable(void); void set_cache_aps_delayed_init(bool val); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b96bb94d67c78..621b7358aeb2c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -101,6 +101,10 @@ struct cpuinfo_topology { // Logical ID mappings u32 logical_pkg_id; u32 logical_die_id; + + // Cache level topology IDs + u32 llc_id; + u32 l2c_id; }; struct cpuinfo_x86 { @@ -706,7 +710,15 @@ extern int set_tsc_mode(unsigned int val); DECLARE_PER_CPU(u64, msr_misc_features_shadow); -extern u16 get_llc_id(unsigned int cpu); +static inline u16 per_cpu_llc_id(unsigned int cpu) +{ + return per_cpu(cpu_info.topo.llc_id, cpu); +} + +static inline u16 per_cpu_l2c_id(unsigned int cpu) +{ + return per_cpu(cpu_info.topo.l2c_id, cpu); +} #ifdef CONFIG_CPU_SUP_AMD extern u32 amd_get_nodes_per_socket(void); diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 614092cf19398..9138ab64ae0f5 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -17,8 +17,6 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); /* cpus sharing the last level cache: */ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); -DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); -DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index b10015d49976c..25889e05dbfbf 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -117,7 +117,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); extern unsigned int __max_die_per_package; #ifdef CONFIG_SMP -#define topology_cluster_id(cpu) (per_cpu(cpu_l2c_id, cpu)) +#define topology_cluster_id(cpu) (cpu_data(cpu).topo.l2c_id) #define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu)) #define topology_cluster_cpumask(cpu) (cpu_clustergroup_mask(cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 578ded9abcabc..32a5e03238944 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -161,7 +161,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) u64 val; u32 nodes = 1; - this_cpu_write(cpu_llc_id, node); + c->topo.llc_id = node; /* Account for nodes per socket in multi-core-module processors */ if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9790e8e5523d7..0ce7bac212477 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -325,8 +325,6 @@ static void legacy_fixup_core_id(struct cpuinfo_x86 *c) */ static void amd_get_topology(struct cpuinfo_x86 *c) { - int cpu = smp_processor_id(); - /* get information required for multi-node processors */ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { int err; @@ -354,15 +352,14 @@ static void amd_get_topology(struct cpuinfo_x86 *c) if (!err) c->x86_coreid_bits = get_count_order(c->x86_max_cores); - cacheinfo_amd_init_llc_id(c, cpu); + cacheinfo_amd_init_llc_id(c); } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); c->topo.die_id = value & 7; - - per_cpu(cpu_llc_id, cpu) = c->topo.die_id; + c->topo.llc_id = c->topo.die_id; } else return; @@ -379,7 +376,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c) static void amd_detect_cmp(struct cpuinfo_x86 *c) { unsigned bits; - int cpu = smp_processor_id(); bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ @@ -387,7 +383,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) /* Convert the initial APIC ID into the socket ID */ c->topo.pkg_id = c->topo.initial_apicid >> bits; /* use socket ID also for last level cache */ - per_cpu(cpu_llc_id, cpu) = c->topo.die_id = c->topo.pkg_id; + c->topo.llc_id = c->topo.die_id = c->topo.pkg_id; } u32 amd_get_nodes_per_socket(void) @@ -405,7 +401,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c) node = numa_cpu_node(cpu); if (node == NUMA_NO_NODE) - node = get_llc_id(cpu); + node = per_cpu_llc_id(cpu); /* * On multi-fabric platform (e.g. Numascale NumaChip) a diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 80dbfa27977fe..b2ab7aabbe2ec 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -661,7 +661,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) return i; } -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu) +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c) { /* * We may have multiple LLCs if L3 caches exist, so check if we @@ -672,13 +672,13 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu) if (c->x86 < 0x17) { /* LLC is at the node level. */ - per_cpu(cpu_llc_id, cpu) = c->topo.die_id; + c->topo.llc_id = c->topo.die_id; } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { /* * LLC is at the core complex level. * Core complex ID is ApicId[3] for these processors. */ - per_cpu(cpu_llc_id, cpu) = c->topo.apicid >> 3; + c->topo.llc_id = c->topo.apicid >> 3; } else { /* * LLC ID is calculated from the number of threads sharing the @@ -694,12 +694,12 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu) if (num_sharing_cache) { int bits = get_count_order(num_sharing_cache); - per_cpu(cpu_llc_id, cpu) = c->topo.apicid >> bits; + c->topo.llc_id = c->topo.apicid >> bits; } } } -void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu) +void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c) { /* * We may have multiple LLCs if L3 caches exist, so check if we @@ -712,7 +712,7 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu) * LLC is at the core complex level. * Core complex ID is ApicId[3] for these processors. */ - per_cpu(cpu_llc_id, cpu) = c->topo.apicid >> 3; + c->topo.llc_id = c->topo.apicid >> 3; } void init_amd_cacheinfo(struct cpuinfo_x86 *c) @@ -740,9 +740,6 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; -#ifdef CONFIG_SMP - unsigned int cpu = c->cpu_index; -#endif if (c->cpuid_level > 3) { static int is_initialized; @@ -856,30 +853,24 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) if (new_l2) { l2 = new_l2; -#ifdef CONFIG_SMP - per_cpu(cpu_llc_id, cpu) = l2_id; - per_cpu(cpu_l2c_id, cpu) = l2_id; -#endif + c->topo.llc_id = l2_id; + c->topo.l2c_id = l2_id; } if (new_l3) { l3 = new_l3; -#ifdef CONFIG_SMP - per_cpu(cpu_llc_id, cpu) = l3_id; -#endif + c->topo.llc_id = l3_id; } -#ifdef CONFIG_SMP /* - * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in + * If llc_id is not yet set, this means cpuid_level < 4 which in * turns means that the only possibility is SMT (as indicated in * cpuid1). Since cpuid2 doesn't specify shared caches, and we know * that SMT shares all caches, we can unconditionally set cpu_llc_id to * c->topo.pkg_id. */ - if (per_cpu(cpu_llc_id, cpu) == BAD_APICID) - per_cpu(cpu_llc_id, cpu) = c->topo.pkg_id; -#endif + if (c->topo.llc_id == BAD_APICID) + c->topo.llc_id = c->topo.pkg_id; c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 37ceee7013b50..660ad14e4bb79 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -75,18 +75,6 @@ u32 elf_hwcap2 __read_mostly; int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); -/* Last level cache ID of each logical CPU */ -DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; - -u16 get_llc_id(unsigned int cpu) -{ - return per_cpu(cpu_llc_id, cpu); -} -EXPORT_SYMBOL_GPL(get_llc_id); - -/* L2 cache ID of each logical CPU */ -DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID; - static struct ppin_info { int feature; int msr_ppin_ctl; @@ -1936,6 +1924,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_max_cores = 1; c->x86_coreid_bits = 0; c->topo.cu_id = 0xff; + c->topo.llc_id = BAD_APICID; + c->topo.l2c_id = BAD_APICID; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; c->x86_phys_bits = 36; diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 1dcd7d4e38ef1..885281ae79a54 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -78,6 +78,9 @@ extern int detect_ht_early(struct cpuinfo_x86 *c); extern void detect_ht(struct cpuinfo_x86 *c); extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c); +void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c); + unsigned int aperfmperf_get_khz(int cpu); void cpu_select_mitigations(void); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index d98d915f79cb4..e81464801afb3 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -63,8 +63,6 @@ static void hygon_get_topology_early(struct cpuinfo_x86 *c) */ static void hygon_get_topology(struct cpuinfo_x86 *c) { - int cpu = smp_processor_id(); - /* get information required for multi-node processors */ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { int err; @@ -94,14 +92,13 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) if (!boot_cpu_has(X86_FEATURE_HYPERVISOR) && c->x86_model <= 0x3) c->topo.pkg_id = c->topo.apicid >> APICID_SOCKET_ID_BIT; - cacheinfo_hygon_init_llc_id(c, cpu); + cacheinfo_hygon_init_llc_id(c); } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); c->topo.die_id = value & 7; - - per_cpu(cpu_llc_id, cpu) = c->topo.die_id; + c->topo.llc_id = c->topo.die_id; } else return; @@ -116,15 +113,14 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) static void hygon_detect_cmp(struct cpuinfo_x86 *c) { unsigned int bits; - int cpu = smp_processor_id(); bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1); /* Convert the initial APIC ID into the socket ID */ c->topo.pkg_id = c->topo.initial_apicid >> bits; - /* use socket ID also for last level cache */ - per_cpu(cpu_llc_id, cpu) = c->topo.die_id = c->topo.pkg_id; + /* Use package ID also for last level cache */ + c->topo.llc_id = c->topo.die_id = c->topo.pkg_id; } static void srat_detect_node(struct cpuinfo_x86 *c) @@ -136,7 +132,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c) node = numa_cpu_node(cpu); if (node == NUMA_NO_NODE) - node = per_cpu(cpu_llc_id, cpu); + node = c->topo.llc_id; /* * On multi-fabric platform (e.g. Numascale NumaChip) a diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ba46631e1c1e5..4a60e2a3309b4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -475,7 +475,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) if (c->topo.pkg_id == o->topo.pkg_id && c->topo.die_id == o->topo.die_id && - per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) { + per_cpu_llc_id(cpu1) == per_cpu_llc_id(cpu2)) { if (c->topo.core_id == o->topo.core_id) return topology_sane(c, o, "smt"); @@ -507,11 +507,11 @@ static bool match_l2c(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) int cpu1 = c->cpu_index, cpu2 = o->cpu_index; /* If the arch didn't set up l2c_id, fall back to SMT */ - if (per_cpu(cpu_l2c_id, cpu1) == BAD_APICID) + if (per_cpu_l2c_id(cpu1) == BAD_APICID) return match_smt(c, o); /* Do not match if L2 cache id does not match: */ - if (per_cpu(cpu_l2c_id, cpu1) != per_cpu(cpu_l2c_id, cpu2)) + if (per_cpu_l2c_id(cpu1) != per_cpu_l2c_id(cpu2)) return false; return topology_sane(c, o, "l2c"); @@ -557,11 +557,11 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) bool intel_snc = id && id->driver_data; /* Do not match if we do not have a valid APICID for cpu: */ - if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID) + if (per_cpu_llc_id(cpu1) == BAD_APICID) return false; /* Do not match if LLC id does not match: */ - if (per_cpu(cpu_llc_id, cpu1) != per_cpu(cpu_llc_id, cpu2)) + if (per_cpu_llc_id(cpu1) != per_cpu_llc_id(cpu2)) return false; /* From 349c3e9ea647f4a048844e9e5a5ff2b8fe643e16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:39 +0200 Subject: [PATCH 007/109] x86/apic: Use BAD_APICID consistently commit 9ff4275bc8fd7bd5ac4677e2724397f8db3209bf upstream. APIC ID checks compare with BAD_APICID all over the place, but some initializers and some code which fiddles with global data structure use -1[U] instead. That simply cannot work at all. Fix it up and use BAD_APICID consistently all over the place. Intel-SIG: commit 9ff4275bc8fd x86/apic: Use BAD_APICID consistently. x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085112.862835121@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/apic/apic.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 3d56da48624b5..96699b5323253 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -799,7 +799,7 @@ int acpi_unmap_cpu(int cpu) set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE); #endif - per_cpu(x86_cpu_to_apicid, cpu) = -1; + per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; set_cpu_present(cpu, false); num_processors--; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 00ca9c3c1d8bf..3ed108bd5d062 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -72,7 +72,7 @@ unsigned int num_processors; unsigned disabled_cpus; /* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid __ro_after_init = -1U; +unsigned int boot_cpu_physical_apicid __ro_after_init = BAD_APICID; EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); u8 boot_cpu_apic_version __ro_after_init; @@ -2337,9 +2337,7 @@ static int nr_logical_cpuids = 1; /* * Used to store mapping between logical CPU IDs and APIC IDs. */ -int cpuid_to_apicid[] = { - [0 ... NR_CPUS - 1] = -1, -}; +int cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = BAD_APICID, }; bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { From 9e98b59e6def59884a71c00cedcbeca690f002cd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:40 +0200 Subject: [PATCH 008/109] x86/apic: Use u32 for APIC IDs in global data commit 4705243d237ab6abd8696e9672ad3fb36587c6f6 upstream. APIC IDs are used with random data types u16, u32, int, unsigned int, unsigned long. Make it all consistently use u32 because that reflects the hardware register width and fixup the most obvious usage sites of that. The APIC callbacks will be addressed separately. Intel-SIG: commit 4705243d237a x86/apic: Use u32 for APIC IDs in global data. x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Reviewed-by: Arjan van de Ven Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085112.922905727@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 8 ++++---- arch/x86/include/asm/mpspec.h | 2 +- arch/x86/include/asm/processor.h | 4 ++-- arch/x86/include/asm/smp.h | 2 +- arch/x86/kernel/apic/apic.c | 16 ++++++++-------- arch/x86/kernel/apic/ipi.c | 5 +++-- arch/x86/kernel/kvm.c | 6 +++--- arch/x86/mm/numa.c | 4 ++-- 8 files changed, 24 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a2258c894244a..240c9b23a954d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -55,7 +55,7 @@ extern int local_apic_timer_c2_ok; extern bool apic_is_disabled; extern unsigned int lapic_timer_period; -extern int cpuid_to_apicid[]; +extern u32 cpuid_to_apicid[]; extern enum apic_intr_mode_id apic_intr_mode; enum apic_intr_mode_id { @@ -519,9 +519,9 @@ extern void generic_bigsmp_probe(void); extern struct apic apic_noop; -static inline unsigned int read_apic_id(void) +static inline u32 read_apic_id(void) { - unsigned int reg = apic_read(APIC_ID); + u32 reg = apic_read(APIC_ID); return apic->get_apic_id(reg); } @@ -548,7 +548,7 @@ void apic_send_nmi_to_offline_cpu(unsigned int cpu); #else /* CONFIG_X86_LOCAL_APIC */ -static inline unsigned int read_apic_id(void) { return 0; } +static inline u32 read_apic_id(void) { return 0; } #endif /* !CONFIG_X86_LOCAL_APIC */ diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index f46df8349e86d..4b0f98a8d338d 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -37,7 +37,7 @@ extern int mp_bus_id_to_type[MAX_MP_BUSSES]; extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -extern unsigned int boot_cpu_physical_apicid; +extern u32 boot_cpu_physical_apicid; extern u8 boot_cpu_apic_version; #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 621b7358aeb2c..35542e0faccbe 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -710,12 +710,12 @@ extern int set_tsc_mode(unsigned int val); DECLARE_PER_CPU(u64, msr_misc_features_shadow); -static inline u16 per_cpu_llc_id(unsigned int cpu) +static inline u32 per_cpu_llc_id(unsigned int cpu) { return per_cpu(cpu_info.topo.llc_id, cpu); } -static inline u16 per_cpu_l2c_id(unsigned int cpu) +static inline u32 per_cpu_l2c_id(unsigned int cpu) { return per_cpu(cpu_info.topo.l2c_id, cpu); } diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 9138ab64ae0f5..390d53fd34f94 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -18,7 +18,7 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); -DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); +DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid); struct task_struct; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3ed108bd5d062..883caaf50413b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -72,7 +72,7 @@ unsigned int num_processors; unsigned disabled_cpus; /* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid __ro_after_init = BAD_APICID; +u32 boot_cpu_physical_apicid __ro_after_init = BAD_APICID; EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); u8 boot_cpu_apic_version __ro_after_init; @@ -87,7 +87,7 @@ physid_mask_t phys_cpu_present_map; * disable_cpu_apicid=, mostly used for the kdump 2nd kernel to * avoid undefined behaviour caused by sending INIT from AP to BSP. */ -static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID; +static u32 disabled_cpu_apicid __ro_after_init = BAD_APICID; /* * This variable controls which CPUs receive external NMIs. By default, @@ -111,7 +111,7 @@ static inline bool apic_accessible(void) /* * Map cpu index to physical APIC ID */ -DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID); DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); @@ -1777,7 +1777,7 @@ static void __x2apic_enable(void) static int __init setup_nox2apic(char *str) { if (x2apic_enabled()) { - int apicid = native_apic_msr_read(APIC_ID); + u32 apicid = native_apic_msr_read(APIC_ID); if (apicid >= 255) { pr_warn("Apicid: %08x, cannot enforce nox2apic\n", @@ -2337,11 +2337,11 @@ static int nr_logical_cpuids = 1; /* * Used to store mapping between logical CPU IDs and APIC IDs. */ -int cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = BAD_APICID, }; +u32 cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = BAD_APICID, }; bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { - return phys_id == cpuid_to_apicid[cpu]; + return phys_id == (u64)cpuid_to_apicid[cpu]; } #ifdef CONFIG_SMP @@ -2410,7 +2410,7 @@ static int allocate_logical_cpuid(int apicid) return nr_logical_cpuids++; } -static void cpu_update_apic(int cpu, int apicid) +static void cpu_update_apic(int cpu, u32 apicid) { #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; @@ -2563,7 +2563,7 @@ static struct { */ int active; /* r/w apic fields */ - unsigned int apic_id; + u32 apic_id; unsigned int apic_taskpri; unsigned int apic_ldr; unsigned int apic_dfr; diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index edad86f32e38c..5da693d633b78 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -289,7 +289,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) } #ifdef CONFIG_SMP -static int convert_apicid_to_cpu(int apic_id) +static int convert_apicid_to_cpu(u32 apic_id) { int i; @@ -302,7 +302,8 @@ static int convert_apicid_to_cpu(int apic_id) int safe_smp_processor_id(void) { - int apicid, cpuid; + u32 apicid; + int cpuid; if (!boot_cpu_has(X86_FEATURE_APIC)) return 0; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index ff44945df25a3..30bacea78ec95 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -500,13 +500,13 @@ static bool pv_sched_yield_supported(void) static void __send_ipi_mask(const struct cpumask *mask, int vector) { unsigned long flags; - int cpu, apic_id, icr; - int min = 0, max = 0; + int cpu, min = 0, max = 0; #ifdef CONFIG_X86_64 __uint128_t ipi_bitmap = 0; #else u64 ipi_bitmap = 0; #endif + u32 apic_id, icr; long ret; if (cpumask_empty(mask)) @@ -1044,8 +1044,8 @@ arch_initcall(activate_jump_labels); /* Kick a cpu by its apicid. Used to wake up a halted vcpu */ static void kvm_kick_cpu(int cpu) { - int apicid; unsigned long flags = 0; + u32 apicid; apicid = per_cpu(x86_cpu_to_apicid, cpu); kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid); diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 8b374c6919e60..df959deef5b24 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -57,7 +57,7 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] = { int numa_cpu_node(int cpu) { - int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); + u32 apicid = early_per_cpu(x86_cpu_to_apicid, cpu); if (apicid != BAD_APICID) return __apicid_to_node[apicid]; @@ -750,7 +750,7 @@ void __init init_gi_nodes(void) void __init init_cpu_to_node(void) { int cpu; - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); + u32 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); BUG_ON(cpu_to_apicid == NULL); From 44ba40ba2c7e05e8cedfb746e18eeb2731ef5848 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:41 +0200 Subject: [PATCH 009/109] x86/apic: Use u32 for check_apicid_used() commit 5d376b8fb165eca33ea166ee43a637c8e912abd4 upstream. APIC IDs are used with random data types u16, u32, int, unsigned int, unsigned long. Make it all consistently use u32 because that reflects the hardware register width and move the default implementation to local.h as there are no users outside the apic directory. Intel-SIG: commit 5d376b8fb165 x86/apic: Use u32 for check_apicid_used(). x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Reviewed-by: Arjan van de Ven Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085112.981956102@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 3 +-- arch/x86/kernel/apic/apic_common.c | 2 +- arch/x86/kernel/apic/apic_flat_64.c | 2 -- arch/x86/kernel/apic/apic_noop.c | 2 ++ arch/x86/kernel/apic/bigsmp_32.c | 2 +- arch/x86/kernel/apic/local.h | 1 + 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 240c9b23a954d..440187d69c1ea 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -294,7 +294,7 @@ struct apic { int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); bool (*apic_id_registered)(void); - bool (*check_apicid_used)(physid_mask_t *map, int apicid); + bool (*check_apicid_used)(physid_mask_t *map, u32 apicid); void (*init_apic_ldr)(void); void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); int (*cpu_present_to_apicid)(int mps_cpu); @@ -540,7 +540,6 @@ extern int default_apic_id_valid(u32 apicid); extern u32 apic_default_calc_apicid(unsigned int cpu); extern u32 apic_flat_calc_apicid(unsigned int cpu); -extern bool default_check_apicid_used(physid_mask_t *map, int apicid); extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); extern int default_cpu_present_to_apicid(int mps_cpu); diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c index 7bc5d9bf59cd1..ca73c3a8cb3cc 100644 --- a/arch/x86/kernel/apic/apic_common.c +++ b/arch/x86/kernel/apic/apic_common.c @@ -18,7 +18,7 @@ u32 apic_flat_calc_apicid(unsigned int cpu) return 1U << cpu; } -bool default_check_apicid_used(physid_mask_t *map, int apicid) +bool default_check_apicid_used(physid_mask_t *map, u32 apicid) { return physid_isset(apicid, *map); } diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index cd16228611ce8..53ea989a0c1be 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -159,8 +159,6 @@ static struct apic apic_physflat __ro_after_init = { .disable_esr = 0, - .check_apicid_used = NULL, - .ioapic_phys_id_map = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .phys_pkg_id = flat_phys_pkg_id, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 966d7cf10b958..f834a2064d736 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -18,6 +18,8 @@ #include +#include "local.h" + static void noop_send_IPI(int cpu, int vector) { } static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { } static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { } diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 0e5535add4b5f..52ac447b568e0 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -18,7 +18,7 @@ static unsigned bigsmp_get_apic_id(unsigned long x) return (x >> 24) & 0xFF; } -static bool bigsmp_check_apicid_used(physid_mask_t *map, int apicid) +static bool bigsmp_check_apicid_used(physid_mask_t *map, u32 apicid) { return false; } diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index ec219c659c7dd..860d90b6bd506 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -64,6 +64,7 @@ void default_send_IPI_all(int vector); void default_send_IPI_self(int vector); bool default_apic_id_registered(void); +bool default_check_apicid_used(physid_mask_t *map, u32 apicid); #ifdef CONFIG_X86_32 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector); From e29b3e146829c6a6d153cf2e062498b97a6c45e2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:42 +0200 Subject: [PATCH 010/109] x86/apic: Use u32 for cpu_present_to_apicid() commit 8aa2a4178dc535591ac0c17de45b14fe7f6a05c7 upstream. APIC IDs are used with random data types u16, u32, int, unsigned int, unsigned long. Make it all consistently use u32 because that reflects the hardware register width and fixup a few related usage sites for consistency sake. Intel-SIG: commit 8aa2a4178dc5 x86/apic: Use u32 for cpu_present_to_apicid(). x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Reviewed-by: Arjan van de Ven Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085113.054064391@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 4 ++-- arch/x86/kernel/apic/apic_common.c | 2 +- arch/x86/kernel/cpu/common.c | 3 ++- arch/x86/kernel/smpboot.c | 8 ++++---- arch/x86/xen/apic.c | 2 +- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 440187d69c1ea..4b8890317ec33 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -297,7 +297,7 @@ struct apic { bool (*check_apicid_used)(physid_mask_t *map, u32 apicid); void (*init_apic_ldr)(void); void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); - int (*cpu_present_to_apicid)(int mps_cpu); + u32 (*cpu_present_to_apicid)(int mps_cpu); int (*phys_pkg_id)(int cpuid_apic, int index_msb); u32 (*get_apic_id)(unsigned long x); @@ -541,7 +541,7 @@ extern u32 apic_default_calc_apicid(unsigned int cpu); extern u32 apic_flat_calc_apicid(unsigned int cpu); extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); -extern int default_cpu_present_to_apicid(int mps_cpu); +extern u32 default_cpu_present_to_apicid(int mps_cpu); void apic_send_nmi_to_offline_cpu(unsigned int cpu); diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c index ca73c3a8cb3cc..8a00141073ea8 100644 --- a/arch/x86/kernel/apic/apic_common.c +++ b/arch/x86/kernel/apic/apic_common.c @@ -28,7 +28,7 @@ void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) *retmap = *phys_map; } -int default_cpu_present_to_apicid(int mps_cpu) +u32 default_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) return (int)per_cpu(x86_cpu_to_apicid, mps_cpu); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 660ad14e4bb79..a9eedda9fbe47 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1893,7 +1893,8 @@ static void generic_identify(struct cpuinfo_x86 *c) static void validate_apic_and_package_id(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP - unsigned int apicid, cpu = smp_processor_id(); + unsigned int cpu = smp_processor_id(); + u32 apicid; apicid = apic->cpu_present_to_apicid(cpu); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4a60e2a3309b4..4be4349ddce21 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -799,7 +799,7 @@ static void __init smp_quirk_init_udelay(void) /* * Wake up AP by INIT, INIT, STARTUP sequence. */ -static void send_init_sequence(int phys_apicid) +static void send_init_sequence(u32 phys_apicid) { int maxlvt = lapic_get_maxlvt(); @@ -825,7 +825,7 @@ static void send_init_sequence(int phys_apicid) /* * Wake up AP by INIT, INIT, STARTUP sequence. */ -static int wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) +static int wakeup_secondary_cpu_via_init(u32 phys_apicid, unsigned long start_eip) { unsigned long send_status = 0, accept_status = 0; int num_starts, j, maxlvt; @@ -972,7 +972,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) * Returns zero if startup was successfully sent, else error code from * ->wakeup_secondary_cpu. */ -static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) +static int do_boot_cpu(u32 apicid, int cpu, struct task_struct *idle) { unsigned long start_ip = real_mode_header->trampoline_start; int ret; @@ -1040,7 +1040,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) int native_kick_ap(unsigned int cpu, struct task_struct *tidle) { - int apicid = apic->cpu_present_to_apicid(cpu); + u32 apicid = apic->cpu_present_to_apicid(cpu); int err; lockdep_assert_irqs_enabled(); diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 958b747cc82f8..c830ad4175a88 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -115,7 +115,7 @@ static int xen_phys_pkg_id(int initial_apic_id, int index_msb) return initial_apic_id >> index_msb; } -static int xen_cpu_present_to_apicid(int cpu) +static u32 xen_cpu_present_to_apicid(int cpu) { if (cpu_present(cpu)) return cpu_data(cpu).topo.apicid; From 4eca1b0471cf8e54bfa93d7d36110ccd8c169ed2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:43 +0200 Subject: [PATCH 011/109] x86/apic: Use u32 for phys_pkg_id() commit 01ccf9bbd25fa32baa6ed8ae16700cf2e4487cf5 upstream. APIC IDs are used with random data types u16, u32, int, unsigned int, unsigned long. Make it all consistently use u32 because that reflects the hardware register width even if that callback going to be removed soonish. Intel-SIG: commit 01ccf9bbd25f x86/apic: Use u32 for phys_pkg_id(). x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Reviewed-by: Arjan van de Ven Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085113.113097126@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 2 +- arch/x86/kernel/apic/apic_flat_64.c | 2 +- arch/x86/kernel/apic/apic_noop.c | 2 +- arch/x86/kernel/apic/apic_numachip.c | 2 +- arch/x86/kernel/apic/bigsmp_32.c | 2 +- arch/x86/kernel/apic/local.h | 2 +- arch/x86/kernel/apic/probe_32.c | 2 +- arch/x86/kernel/apic/x2apic_phys.c | 2 +- arch/x86/kernel/apic/x2apic_uv_x.c | 2 +- arch/x86/kernel/vsmp_64.c | 2 +- arch/x86/xen/apic.c | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 4b8890317ec33..7628cd2344673 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -298,7 +298,7 @@ struct apic { void (*init_apic_ldr)(void); void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); u32 (*cpu_present_to_apicid)(int mps_cpu); - int (*phys_pkg_id)(int cpuid_apic, int index_msb); + u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); u32 (*get_apic_id)(unsigned long x); u32 (*set_apic_id)(unsigned int id); diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 53ea989a0c1be..1aec3251bd144 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -66,7 +66,7 @@ static u32 set_apic_id(unsigned int id) return (id & 0xFF) << 24; } -static int flat_phys_pkg_id(int initial_apic_id, int index_msb) +static u32 flat_phys_pkg_id(u32 initial_apic_id, int index_msb) { return initial_apic_id >> index_msb; } diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index f834a2064d736..28ea3427a893e 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -29,7 +29,7 @@ static void noop_send_IPI_self(int vector) { } static void noop_apic_icr_write(u32 low, u32 id) { } static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip) { return -1; } static u64 noop_apic_icr_read(void) { return 0; } -static int noop_phys_pkg_id(int cpuid_apic, int index_msb) { return 0; } +static u32 noop_phys_pkg_id(u32 cpuid_apic, int index_msb) { return 0; } static unsigned int noop_get_apic_id(unsigned long x) { return 0; } static void noop_apic_eoi(void) { } diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 32a5e03238944..c35181c4aa144 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -56,7 +56,7 @@ static u32 numachip2_set_apic_id(unsigned int id) return id << 24; } -static int numachip_phys_pkg_id(int initial_apic_id, int index_msb) +static u32 numachip_phys_pkg_id(u32 initial_apic_id, int index_msb) { return initial_apic_id >> index_msb; } diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 52ac447b568e0..3c95e0b03e863 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -29,7 +29,7 @@ static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *re physids_promote(0xFFL, retmap); } -static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) +static u32 bigsmp_phys_pkg_id(u32 cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index 860d90b6bd506..2bf0d1c78d71e 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -17,7 +17,7 @@ void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest); unsigned int x2apic_get_apic_id(unsigned long id); u32 x2apic_set_apic_id(unsigned int id); -int x2apic_phys_pkg_id(int initial_apicid, int index_msb); +u32 x2apic_phys_pkg_id(u32 initial_apicid, int index_msb); void x2apic_send_IPI_all(int vector); void x2apic_send_IPI_allbutself(int vector); diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 9a06df6cdd680..2867fa61c4cdd 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -18,7 +18,7 @@ #include "local.h" -static int default_phys_pkg_id(int cpuid_apic, int index_msb) +static u32 default_phys_pkg_id(u32 cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index c8ac1b12b8ac6..d44ca06def5b9 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -134,7 +134,7 @@ u32 x2apic_set_apic_id(unsigned int id) return id; } -int x2apic_phys_pkg_id(int initial_apicid, int index_msb) +u32 x2apic_phys_pkg_id(u32 initial_apicid, int index_msb) { return initial_apicid >> index_msb; } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 205cee5676298..acfdb8dec805c 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -789,7 +789,7 @@ static unsigned int uv_read_apic_id(void) return x2apic_get_apic_id(apic_read(APIC_ID)); } -static int uv_phys_pkg_id(int initial_apicid, int index_msb) +static u32 uv_phys_pkg_id(u32 initial_apicid, int index_msb) { return uv_read_apic_id() >> index_msb; } diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 65e96b76c4232..d3fc017705587 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -127,7 +127,7 @@ static void __init vsmp_cap_cpus(void) #endif } -static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) +static u32 apicid_phys_pkg_id(u32 initial_apic_id, int index_msb) { return read_apic_id() >> index_msb; } diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index c830ad4175a88..40538b23e20b4 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -110,7 +110,7 @@ static int xen_madt_oem_check(char *oem_id, char *oem_table_id) return xen_pv_domain(); } -static int xen_phys_pkg_id(int initial_apic_id, int index_msb) +static u32 xen_phys_pkg_id(u32 initial_apic_id, int index_msb) { return initial_apic_id >> index_msb; } From 655645a32b409e0dbaf842679622f6d66f70067e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:44 +0200 Subject: [PATCH 012/109] x86/apic: Use u32 for [gs]et_apic_id() commit 59f7928cd46316371bee71b0cb34e133567e5b35 upstream. APIC IDs are used with random data types u16, u32, int, unsigned int, unsigned long. Make it all consistently use u32 because that reflects the hardware register width. Intel-SIG: commit 59f7928cd463 x86/apic: Use u32 for [gs]et_apic_id(). x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085113.172569282@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 14 ++------------ arch/x86/kernel/apic/apic_flat_64.c | 4 ++-- arch/x86/kernel/apic/apic_noop.c | 2 +- arch/x86/kernel/apic/apic_numachip.c | 8 ++++---- arch/x86/kernel/apic/bigsmp_32.c | 2 +- arch/x86/kernel/apic/local.h | 4 ++-- arch/x86/kernel/apic/probe_32.c | 10 ++++++++++ arch/x86/kernel/apic/x2apic_phys.c | 4 ++-- arch/x86/kernel/apic/x2apic_uv_x.c | 2 +- arch/x86/xen/apic.c | 4 ++-- 10 files changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 7628cd2344673..b2ef8c5d93e05 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -300,8 +300,8 @@ struct apic { u32 (*cpu_present_to_apicid)(int mps_cpu); u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); - u32 (*get_apic_id)(unsigned long x); - u32 (*set_apic_id)(unsigned int id); + u32 (*get_apic_id)(u32 id); + u32 (*set_apic_id)(u32 apicid); /* wakeup_secondary_cpu */ int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); @@ -495,16 +495,6 @@ static inline bool lapic_vector_set_in_irr(unsigned int vector) return !!(irr & (1U << (vector % 32))); } -static inline unsigned default_get_apic_id(unsigned long x) -{ - unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - - if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID)) - return (x >> 24) & 0xFF; - else - return (x >> 24) & 0x0F; -} - /* * Warm reset vector position: */ diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 1aec3251bd144..7139867d69cd2 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -56,12 +56,12 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) _flat_send_IPI_mask(mask, vector); } -static unsigned int flat_get_apic_id(unsigned long x) +static u32 flat_get_apic_id(u32 x) { return (x >> 24) & 0xFF; } -static u32 set_apic_id(unsigned int id) +static u32 set_apic_id(u32 id) { return (id & 0xFF) << 24; } diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 28ea3427a893e..f1114861938c5 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -30,7 +30,7 @@ static void noop_apic_icr_write(u32 low, u32 id) { } static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip) { return -1; } static u64 noop_apic_icr_read(void) { return 0; } static u32 noop_phys_pkg_id(u32 cpuid_apic, int index_msb) { return 0; } -static unsigned int noop_get_apic_id(unsigned long x) { return 0; } +static u32 noop_get_apic_id(u32 apicid) { return 0; } static void noop_apic_eoi(void) { } static u32 noop_apic_read(u32 reg) diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index c35181c4aa144..af350b4dad564 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -25,7 +25,7 @@ static const struct apic apic_numachip1; static const struct apic apic_numachip2; static void (*numachip_apic_icr_write)(int apicid, unsigned int val) __read_mostly; -static unsigned int numachip1_get_apic_id(unsigned long x) +static u32 numachip1_get_apic_id(u32 x) { unsigned long value; unsigned int id = (x >> 24) & 0xff; @@ -38,12 +38,12 @@ static unsigned int numachip1_get_apic_id(unsigned long x) return id; } -static u32 numachip1_set_apic_id(unsigned int id) +static u32 numachip1_set_apic_id(u32 id) { return (id & 0xff) << 24; } -static unsigned int numachip2_get_apic_id(unsigned long x) +static u32 numachip2_get_apic_id(u32 x) { u64 mcfg; @@ -51,7 +51,7 @@ static unsigned int numachip2_get_apic_id(unsigned long x) return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24); } -static u32 numachip2_set_apic_id(unsigned int id) +static u32 numachip2_set_apic_id(u32 id) { return id << 24; } diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 3c95e0b03e863..7ee3c486cb336 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -13,7 +13,7 @@ #include "local.h" -static unsigned bigsmp_get_apic_id(unsigned long x) +static u32 bigsmp_get_apic_id(u32 x) { return (x >> 24) & 0xFF; } diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index 2bf0d1c78d71e..9ea6186ea88c2 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -15,8 +15,8 @@ /* X2APIC */ void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest); -unsigned int x2apic_get_apic_id(unsigned long id); -u32 x2apic_set_apic_id(unsigned int id); +u32 x2apic_get_apic_id(u32 id); +u32 x2apic_set_apic_id(u32 id); u32 x2apic_phys_pkg_id(u32 initial_apicid, int index_msb); void x2apic_send_IPI_all(int vector); diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 2867fa61c4cdd..5eb3fbe472da9 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -23,6 +23,16 @@ static u32 default_phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } +static u32 default_get_apic_id(u32 x) +{ + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + + if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID)) + return (x >> 24) & 0xFF; + else + return (x >> 24) & 0x0F; +} + /* should be called last. */ static int probe_default(void) { diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index d44ca06def5b9..558a4a8824f47 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -124,12 +124,12 @@ static int x2apic_phys_probe(void) return apic == &apic_x2apic_phys; } -unsigned int x2apic_get_apic_id(unsigned long id) +u32 x2apic_get_apic_id(u32 id) { return id; } -u32 x2apic_set_apic_id(unsigned int id) +u32 x2apic_set_apic_id(u32 id) { return id; } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index acfdb8dec805c..72c3b3148d282 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -779,7 +779,7 @@ static void uv_send_IPI_all(int vector) uv_send_IPI_mask(cpu_online_mask, vector); } -static u32 set_apic_id(unsigned int id) +static u32 set_apic_id(u32 id) { return id; } diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 40538b23e20b4..9dd5490b33189 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -33,13 +33,13 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) return 0xfd; } -static u32 xen_set_apic_id(unsigned int x) +static u32 xen_set_apic_id(u32 x) { WARN_ON(1); return x; } -static unsigned int xen_get_apic_id(unsigned long x) +static u32 xen_get_apic_id(u32 x) { return ((x)>>24) & 0xFFu; } From c139358a2b08b0103365fbebc8d7b145d5f1fd6a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Oct 2023 12:14:27 +0200 Subject: [PATCH 013/109] x86/apic, x86/hyperv: Use u32 in hv_snp_boot_ap() too commit 92fe9bb77b0c9fade150350fdb0629a662f0923f upstream. The data type for APIC IDs was standardized to 'u32' in the following recent commit: db4a4086a223 ("x86/apic: Use u32 for wakeup_secondary_cpu[_64]()") Which changed the function arguments type signature of the apic->wakeup_secondary_cpu() APIC driver function. Propagate this to hv_snp_boot_ap() as well, which also addresses a 'assignment from incompatible pointer type' build warning that triggers under the -Werror=incompatible-pointer-types GCC warning. Fixes: db4a4086a223 ("x86/apic: Use u32 for wakeup_secondary_cpu[_64]()") Intel-SIG: commit 92fe9bb77b0c x86/apic, x86/hyperv: Use u32 in hv_snp_boot_ap() too. x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Ingo Molnar Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230814085113.233274223@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/hyperv/ivm.c | 2 +- arch/x86/include/asm/mshyperv.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index ee54f2add6993..095ba43a03ec0 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -289,7 +289,7 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) free_page((unsigned long)vmsa); } -int hv_snp_boot_ap(int cpu, unsigned long start_ip) +int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) { struct sev_es_save_area *vmsa = (struct sev_es_save_area *) __get_free_page(GFP_KERNEL | __GFP_ZERO); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index ec95d6e9f1682..b31d0b643b3a2 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -275,11 +275,11 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); #ifdef CONFIG_AMD_MEM_ENCRYPT bool hv_ghcb_negotiate_protocol(void); void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); -int hv_snp_boot_ap(int cpu, unsigned long start_ip); +int hv_snp_boot_ap(u32 cpu, unsigned long start_ip); #else static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} -static inline int hv_snp_boot_ap(int cpu, unsigned long start_ip) { return 0; } +static inline int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) { return 0; } #endif #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) From 537ea55fb3a43344de9ca118a47d6ace95220188 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:45 +0200 Subject: [PATCH 014/109] x86/apic: Use u32 for wakeup_secondary_cpu[_64]() commit db4a4086a223bd5cbfa5a66701c493124d808d55 upstream. APIC IDs are used with random data types u16, u32, int, unsigned int, unsigned long. Make it all consistently use u32 because that reflects the hardware register width. Intel-SIG: commit db4a4086a223 x86/apic: Use u32 for wakeup_secondary_cpu[_64](). x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Reviewed-by: Arjan van de Ven Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085113.233274223@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/hyperv/hv_vtl.c | 2 +- arch/x86/include/asm/apic.h | 8 ++++---- arch/x86/kernel/acpi/madt_wakeup.c | 2 +- arch/x86/kernel/apic/apic_noop.c | 2 +- arch/x86/kernel/apic/apic_numachip.c | 2 +- arch/x86/kernel/apic/x2apic_uv_x.c | 2 +- arch/x86/kernel/sev.c | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index b12bef0ff7bb6..48058bafecc39 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -207,7 +207,7 @@ static int hv_vtl_apicid_to_vp_id(u32 apic_id) return ret; } -static int hv_vtl_wakeup_secondary_cpu(int apicid, unsigned long start_eip) +static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) { int vp_id, cpu; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b2ef8c5d93e05..b6a47be78375d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -304,9 +304,9 @@ struct apic { u32 (*set_apic_id)(u32 apicid); /* wakeup_secondary_cpu */ - int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); + int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip); /* wakeup secondary CPU using 64-bit wakeup point */ - int (*wakeup_secondary_cpu_64)(int apicid, unsigned long start_eip); + int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip); char *name; }; @@ -324,8 +324,8 @@ struct apic_override { void (*send_IPI_self)(int vector); u64 (*icr_read)(void); void (*icr_write)(u32 low, u32 high); - int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); - int (*wakeup_secondary_cpu_64)(int apicid, unsigned long start_eip); + int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip); + int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip); }; /* diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index fe0133833e331..d5ef6215583bc 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -169,7 +169,7 @@ static int __init acpi_mp_setup_reset(u64 reset_vector) return 0; } -static int acpi_wakeup_cpu(int apicid, unsigned long start_ip) +static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip) { if (!acpi_mp_wake_mailbox_paddr) { pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n"); diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index f1114861938c5..b00d52ae84fa9 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -27,7 +27,7 @@ static void noop_send_IPI_allbutself(int vector) { } static void noop_send_IPI_all(int vector) { } static void noop_send_IPI_self(int vector) { } static void noop_apic_icr_write(u32 low, u32 id) { } -static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip) { return -1; } +static int noop_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) { return -1; } static u64 noop_apic_icr_read(void) { return 0; } static u32 noop_phys_pkg_id(u32 cpuid_apic, int index_msb) { return 0; } static u32 noop_get_apic_id(u32 apicid) { return 0; } diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index af350b4dad564..456a14c44f67a 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -71,7 +71,7 @@ static void numachip2_apic_icr_write(int apicid, unsigned int val) numachip2_write32_lcsr(NUMACHIP2_APIC_ICR, (apicid << 12) | val); } -static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) +static int numachip_wakeup_secondary(u32 phys_apicid, unsigned long start_rip) { numachip_apic_icr_write(phys_apicid, APIC_DM_INIT); numachip_apic_icr_write(phys_apicid, APIC_DM_STARTUP | diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 72c3b3148d282..3ca49dfb653c3 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -701,7 +701,7 @@ static __init void build_uv_gr_table(void) } } -static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) +static int uv_wakeup_secondary(u32 phys_apicid, unsigned long start_rip) { unsigned long val; int pnode; diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 24e72027019a7..a6f04b7180785 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1104,7 +1104,7 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) free_page((unsigned long)vmsa); } -static int wakeup_cpu_via_vmgexit(int apic_id, unsigned long start_ip) +static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) { struct sev_es_save_area *cur_vmsa, *vmsa; struct ghcb_state state; From eaa55afc87806f7b4c258da527dd94e8bfec2ec5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:46 +0200 Subject: [PATCH 015/109] x86/cpu/topology: Cure the abuse of cpuinfo for persisting logical ids commit 90781f0c4c41a41043e39d9acbc66cc3644769ba upstream. Per CPU cpuinfo is used to persist the logical package and die IDs. That's really not the right place simply because cpuinfo is subject to be reinitialized when a CPU goes through an offline/online cycle. This works by chance today, but that's far from correct and neither obvious nor documented. Add a per cpu datastructure which persists those logical IDs, which allows to cleanup the CPUID evaluation code. This is a temporary workaround until the larger topology management is in place, which makes all of this logical management mechanics obsolete. Intel-SIG: commit 90781f0c4c41 x86/cpu/topology: Cure the abuse of cpuinfo for persisting logical ids. x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085113.292947071@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/smpboot.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4be4349ddce21..7d5fb45dd53e0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -125,7 +125,20 @@ struct mwait_cpu_dead { */ static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead); -/* Logical package management. We might want to allocate that dynamically */ +/* Logical package management. */ +struct logical_maps { + u32 phys_pkg_id; + u32 phys_die_id; + u32 logical_pkg_id; + u32 logical_die_id; +}; + +/* Temporary workaround until the full topology mechanics is in place */ +static DEFINE_PER_CPU_READ_MOSTLY(struct logical_maps, logical_maps) = { + .phys_pkg_id = U32_MAX, + .phys_die_id = U32_MAX, +}; + unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); unsigned int logical_packages __read_mostly; @@ -334,10 +347,8 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg) int cpu; for_each_possible_cpu(cpu) { - struct cpuinfo_x86 *c = &cpu_data(cpu); - - if (c->initialized && c->topo.pkg_id == phys_pkg) - return c->topo.logical_pkg_id; + if (per_cpu(logical_maps.phys_pkg_id, cpu) == phys_pkg) + return per_cpu(logical_maps.logical_pkg_id, cpu); } return -1; } @@ -355,11 +366,9 @@ static int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cp int cpu, proc_id = cpu_data(cur_cpu).topo.pkg_id; for_each_possible_cpu(cpu) { - struct cpuinfo_x86 *c = &cpu_data(cpu); - - if (c->initialized && c->topo.die_id == die_id && - c->topo.pkg_id == proc_id) - return c->topo.logical_die_id; + if (per_cpu(logical_maps.phys_pkg_id, cpu) == proc_id && + per_cpu(logical_maps.phys_die_id, cpu) == die_id) + return per_cpu(logical_maps.logical_die_id, cpu); } return -1; } @@ -384,6 +393,8 @@ int topology_update_package_map(unsigned int pkg, unsigned int cpu) cpu, pkg, new); } found: + per_cpu(logical_maps.phys_pkg_id, cpu) = pkg; + per_cpu(logical_maps.logical_pkg_id, cpu) = new; cpu_data(cpu).topo.logical_pkg_id = new; return 0; } @@ -407,6 +418,8 @@ int topology_update_die_map(unsigned int die, unsigned int cpu) cpu, die, new); } found: + per_cpu(logical_maps.phys_die_id, cpu) = die; + per_cpu(logical_maps.logical_die_id, cpu) = new; cpu_data(cpu).topo.logical_die_id = new; return 0; } From d67fe5c54faa07b2cf0e6128c52ca3fefceca38a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Aug 2023 10:18:47 +0200 Subject: [PATCH 016/109] x86/cpu: Provide debug interface commit 48525fd1ea1cfa059a580e77b10ea8790914efa2 upstream. Provide debug files which dump the topology related information of cpuinfo_x86. This is useful to validate the upcoming conversion of the topology evaluation for correctness or bug compatibility. Intel-SIG: commit 48525fd1ea1c x86/cpu: Provide debug interface. x86/cpu: Rework the topology evaluation - part1 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230814085113.353191313@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/Makefile | 2 ++ arch/x86/kernel/cpu/debugfs.c | 58 +++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 arch/x86/kernel/cpu/debugfs.c diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4350f6bfc0641..93eabf5440318 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -54,6 +54,8 @@ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o obj-$(CONFIG_ACRN_GUEST) += acrn.o +obj-$(CONFIG_DEBUG_FS) += debugfs.o + quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $@ $^ diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c new file mode 100644 index 0000000000000..0c179d684b3b6 --- /dev/null +++ b/arch/x86/kernel/cpu/debugfs.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include +#include + +static int cpu_debug_show(struct seq_file *m, void *p) +{ + unsigned long cpu = (unsigned long)m->private; + struct cpuinfo_x86 *c = per_cpu_ptr(&cpu_info, cpu); + + seq_printf(m, "online: %d\n", cpu_online(cpu)); + if (!c->initialized) + return 0; + + seq_printf(m, "initial_apicid: %x\n", c->topo.initial_apicid); + seq_printf(m, "apicid: %x\n", c->topo.apicid); + seq_printf(m, "pkg_id: %u\n", c->topo.pkg_id); + seq_printf(m, "die_id: %u\n", c->topo.die_id); + seq_printf(m, "cu_id: %u\n", c->topo.cu_id); + seq_printf(m, "core_id: %u\n", c->topo.core_id); + seq_printf(m, "logical_pkg_id: %u\n", c->topo.logical_pkg_id); + seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id); + seq_printf(m, "llc_id: %u\n", c->topo.llc_id); + seq_printf(m, "l2c_id: %u\n", c->topo.l2c_id); + seq_printf(m, "max_cores: %u\n", c->x86_max_cores); + seq_printf(m, "max_die_per_pkg: %u\n", __max_die_per_package); + seq_printf(m, "smp_num_siblings: %u\n", smp_num_siblings); + return 0; +} + +static int cpu_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, cpu_debug_show, inode->i_private); +} + +static const struct file_operations dfs_cpu_ops = { + .open = cpu_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static __init int cpu_init_debugfs(void) +{ + struct dentry *dir, *base = debugfs_create_dir("topo", arch_debugfs_dir); + unsigned long id; + char name[24]; + + dir = debugfs_create_dir("cpus", base); + for_each_possible_cpu(id) { + sprintf(name, "%lu", id); + debugfs_create_file(name, 0444, dir, (void *)id, &dfs_cpu_ops); + } + return 0; +} +late_initcall(cpu_init_debugfs); From fc47edb8293632ae8ac61621fd34bdbe7c0f4530 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Feb 2024 21:29:39 +0100 Subject: [PATCH 017/109] x86/cpu: Provide cpuid_read() et al. commit 43d86e3cd9a77912772cf7ad37ad94211bf7351d upstream. Provide a few helper functions to read CPUID leafs or individual registers into a data structure without requiring unions. Intel-SIG: commit 43d86e3cd9a7 x86/cpu: Provide cpuid_read() et al.. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/878r3mg570.ffs@tglx [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/cpuid.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h index 9bee3e7bf9736..6b122a31da065 100644 --- a/arch/x86/include/asm/cpuid.h +++ b/arch/x86/include/asm/cpuid.h @@ -127,6 +127,42 @@ static inline unsigned int cpuid_edx(unsigned int op) return edx; } +static inline void __cpuid_read(unsigned int leaf, unsigned int subleaf, u32 *regs) +{ + regs[CPUID_EAX] = leaf; + regs[CPUID_ECX] = subleaf; + __cpuid(regs + CPUID_EAX, regs + CPUID_EBX, regs + CPUID_ECX, regs + CPUID_EDX); +} + +#define cpuid_subleaf(leaf, subleaf, regs) { \ + static_assert(sizeof(*(regs)) == 16); \ + __cpuid_read(leaf, subleaf, (u32 *)(regs)); \ +} + +#define cpuid_leaf(leaf, regs) { \ + static_assert(sizeof(*(regs)) == 16); \ + __cpuid_read(leaf, 0, (u32 *)(regs)); \ +} + +static inline void __cpuid_read_reg(unsigned int leaf, unsigned int subleaf, + enum cpuid_regs_idx regidx, u32 *reg) +{ + u32 regs[4]; + + __cpuid_read(leaf, subleaf, regs); + *reg = regs[regidx]; +} + +#define cpuid_subleaf_reg(leaf, subleaf, regidx, reg) { \ + static_assert(sizeof(*(reg)) == 4); \ + __cpuid_read_reg(leaf, subleaf, regidx, (u32 *)(reg)); \ +} + +#define cpuid_leaf_reg(leaf, regidx, reg) { \ + static_assert(sizeof(*(reg)) == 4); \ + __cpuid_read_reg(leaf, 0, regidx, (u32 *)(reg)); \ +} + static __always_inline bool cpuid_function_is_indexed(u32 function) { switch (function) { From ff8cd1706679603ec94447ff869452d7369d67aa Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Tue, 4 Mar 2025 09:51:15 +0100 Subject: [PATCH 018/109] x86/cpuid: Include in commit 97c7d5723537de08e076892e07d6089ae9777965 upstream. uses static_assert() at multiple locations but it does not include the CPP macro's definition at linux/build_bug.h. Include the needed header to make self-sufficient. This gets triggered when cpuid.h is included in new C files, which is to be done in further commits. Fixes: 43d86e3cd9a7 ("x86/cpu: Provide cpuid_read() et al.") Intel-SIG: commit 97c7d5723537 x86/cpuid: Include in . x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250304085152.51092-5-darwi@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/cpuid.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h index 6b122a31da065..d6b466e91e6d7 100644 --- a/arch/x86/include/asm/cpuid.h +++ b/arch/x86/include/asm/cpuid.h @@ -6,6 +6,7 @@ #ifndef _ASM_X86_CPUID_H #define _ASM_X86_CPUID_H +#include #include struct cpuid_regs { From db3b630c46efe392952539bab3497ad55dcfcfb2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:01 +0100 Subject: [PATCH 019/109] x86/cpu: Provide cpu_init/parse_topology() commit ebdb20361059b3c4fd7b23cfa10c28e798b7a3d2 upstream. Topology evaluation is a complete disaster and impenetrable mess. It's scattered all over the place with some vendor implementations doing early evaluation and some not. The most horrific part is the permanent overwriting of smt_max_siblings and __max_die_per_package, instead of establishing them once on the boot CPU and validating the result on the APs. The goals are: - One topology evaluation entry point - Proper sharing of pointlessly duplicated code - Proper structuring of the evaluation logic and preferences. - Evaluating important system wide information only once on the boot CPU - Making the 0xb/0x1f leaf parsing less convoluted and actually fixing the short comings of leaf 0x1f evaluation. Start to consolidate the topology evaluation code by providing the entry points for the early boot CPU evaluation and for the final parsing on the boot CPU and the APs. Move the trivial pieces into that new code: - The initialization of cpuinfo_x86::topo - The evaluation of CPUID leaf 1, which presets topo::initial_apicid - topo_apicid is set to topo::initial_apicid when invoked from early boot. When invoked for the final evaluation on the boot CPU it reads the actual APIC ID, which makes apic_get_initial_apicid() obsolete once everything is converted over. Provide a temporary helper function topo_converted() which shields off the not yet converted CPU vendors from invoking code which would break them. This shielding covers all vendor CPUs which support SMP, but not the historical pure UP ones as they only need the topology info init and eventually the initial APIC initialization. Provide two new members in cpuinfo_x86::topo to store the maximum number of SMT siblings and the number of dies per package and add them to the debugfs readout. These two members will be used to populate this information on the boot CPU and to validate the APs against it. Intel-SIG: commit ebdb20361059 x86/cpu: Provide cpu_init/parse_topology(). x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240212153624.581436579@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 19 +++ arch/x86/kernel/cpu/Makefile | 3 +- arch/x86/kernel/cpu/common.c | 24 +--- arch/x86/kernel/cpu/cpu.h | 6 + arch/x86/kernel/cpu/debugfs.c | 38 ++++++ arch/x86/kernel/cpu/topology.h | 36 +++++ arch/x86/kernel/cpu/topology_common.c | 188 ++++++++++++++++++++++++++ 7 files changed, 296 insertions(+), 18 deletions(-) create mode 100644 arch/x86/kernel/cpu/topology.h create mode 100644 arch/x86/kernel/cpu/topology_common.c diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 25889e05dbfbf..ebdc06cc9732a 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -102,6 +102,25 @@ static inline void setup_node_to_cpumask_map(void) { } #include +/* Topology information */ +enum x86_topology_domains { + TOPO_SMT_DOMAIN, + TOPO_CORE_DOMAIN, + TOPO_MODULE_DOMAIN, + TOPO_TILE_DOMAIN, + TOPO_DIE_DOMAIN, + TOPO_DIEGRP_DOMAIN, + TOPO_PKG_DOMAIN, + TOPO_MAX_DOMAIN, +}; + +struct x86_topology_system { + unsigned int dom_shifts[TOPO_MAX_DOMAIN]; + unsigned int dom_size[TOPO_MAX_DOMAIN]; +}; + +extern struct x86_topology_system x86_topo_system; + extern const struct cpumask *cpu_coregroup_mask(int cpu); extern const struct cpumask *cpu_clustergroup_mask(int cpu); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 93eabf5440318..b1c655e8689d6 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -17,7 +17,8 @@ KMSAN_SANITIZE_common.o := n # As above, instrumenting secondary CPU boot code causes boot hangs. KCSAN_SANITIZE_common.o := n -obj-y := cacheinfo.o scattered.o topology.o +obj-y := cacheinfo.o scattered.o +obj-y += topology_common.o topology.o obj-y += common.o obj-y += rdrand.o obj-y += match.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a9eedda9fbe47..22d0a518d769e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1692,6 +1692,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_address_sizes(c); cpu_parse_early_param(); + cpu_init_topology(c); + if (this_cpu->c_early_init) this_cpu->c_early_init(c); @@ -1703,6 +1705,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) } else { setup_clear_cpu_cap(X86_FEATURE_CPUID); get_cpu_address_sizes(c); + cpu_init_topology(c); } setup_force_cpu_cap(X86_FEATURE_ALWAYS); @@ -1854,18 +1857,6 @@ static void generic_identify(struct cpuinfo_x86 *c) get_cpu_address_sizes(c); - if (c->cpuid_level >= 0x00000001) { - c->topo.initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; -#ifdef CONFIG_X86_32 -# ifdef CONFIG_SMP - c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); -# else - c->topo.apicid = c->topo.initial_apicid; -# endif -#endif - c->topo.pkg_id = c->topo.initial_apicid; - } - get_model_name(c); /* Default name */ /* @@ -1924,9 +1915,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; c->x86_coreid_bits = 0; - c->topo.cu_id = 0xff; - c->topo.llc_id = BAD_APICID; - c->topo.l2c_id = BAD_APICID; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; c->x86_phys_bits = 36; @@ -1945,6 +1933,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) generic_identify(c); + cpu_parse_topology(c); + if (this_cpu->c_identify) this_cpu->c_identify(c); @@ -1952,10 +1942,10 @@ static void identify_cpu(struct cpuinfo_x86 *c) apply_forced_caps(c); #ifdef CONFIG_X86_64 - c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); + if (!topo_is_converted(c)) + c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); #endif - /* * Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and * Hygon will clear it in ->c_init() below. diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 885281ae79a54..2a446ecb2cc04 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -2,6 +2,11 @@ #ifndef ARCH_X86_CPU_H #define ARCH_X86_CPU_H +#include +#include + +#include "topology.h" + /* attempt to consolidate cpu attributes */ struct cpu_dev { const char *c_vendor; @@ -96,4 +101,5 @@ static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) mode == SPECTRE_V2_EIBRS_RETPOLINE || mode == SPECTRE_V2_EIBRS_LFENCE; } + #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index 0c179d684b3b6..be8ca21ecd6a0 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -5,6 +5,8 @@ #include #include +#include "cpu.h" + static int cpu_debug_show(struct seq_file *m, void *p) { unsigned long cpu = (unsigned long)m->private; @@ -42,12 +44,48 @@ static const struct file_operations dfs_cpu_ops = { .release = single_release, }; +static int dom_debug_show(struct seq_file *m, void *p) +{ + static const char *domain_names[TOPO_MAX_DOMAIN] = { + [TOPO_SMT_DOMAIN] = "Thread", + [TOPO_CORE_DOMAIN] = "Core", + [TOPO_MODULE_DOMAIN] = "Module", + [TOPO_TILE_DOMAIN] = "Tile", + [TOPO_DIE_DOMAIN] = "Die", + [TOPO_DIEGRP_DOMAIN] = "DieGrp", + [TOPO_PKG_DOMAIN] = "Package", + }; + unsigned int dom, nthreads = 1; + + for (dom = 0; dom < TOPO_MAX_DOMAIN; dom++) { + nthreads *= x86_topo_system.dom_size[dom]; + seq_printf(m, "domain: %-10s shift: %u dom_size: %5u max_threads: %5u\n", + domain_names[dom], x86_topo_system.dom_shifts[dom], + x86_topo_system.dom_size[dom], nthreads); + } + return 0; +} + +static int dom_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, dom_debug_show, inode->i_private); +} + +static const struct file_operations dfs_dom_ops = { + .open = dom_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static __init int cpu_init_debugfs(void) { struct dentry *dir, *base = debugfs_create_dir("topo", arch_debugfs_dir); unsigned long id; char name[24]; + debugfs_create_file("domains", 0444, base, NULL, &dfs_dom_ops); + dir = debugfs_create_dir("cpus", base); for_each_possible_cpu(id) { sprintf(name, "%lu", id); diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h new file mode 100644 index 0000000000000..99c94c519b5dc --- /dev/null +++ b/arch/x86/kernel/cpu/topology.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_X86_TOPOLOGY_H +#define ARCH_X86_TOPOLOGY_H + +struct topo_scan { + struct cpuinfo_x86 *c; + unsigned int dom_shifts[TOPO_MAX_DOMAIN]; + unsigned int dom_ncpus[TOPO_MAX_DOMAIN]; +}; + +bool topo_is_converted(struct cpuinfo_x86 *c); +void cpu_init_topology(struct cpuinfo_x86 *c); +void cpu_parse_topology(struct cpuinfo_x86 *c); +void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, + unsigned int shift, unsigned int ncpus); + +static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom) +{ + if (dom == TOPO_SMT_DOMAIN) + return apicid; + return apicid >> x86_topo_system.dom_shifts[dom - 1]; +} + +static inline u32 topo_relative_domain_id(u32 apicid, enum x86_topology_domains dom) +{ + if (dom != TOPO_SMT_DOMAIN) + apicid >>= x86_topo_system.dom_shifts[dom - 1]; + return apicid & (x86_topo_system.dom_size[dom] - 1); +} + +static inline u32 topo_domain_mask(enum x86_topology_domains dom) +{ + return (1U << x86_topo_system.dom_shifts[dom]) - 1; +} + +#endif /* ARCH_X86_TOPOLOGY_H */ diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c new file mode 100644 index 0000000000000..873d7c3fe7acb --- /dev/null +++ b/arch/x86/kernel/cpu/topology_common.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#include + +#include +#include +#include + +#include "cpu.h" + +struct x86_topology_system x86_topo_system __ro_after_init; + +void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, + unsigned int shift, unsigned int ncpus) +{ + tscan->dom_shifts[dom] = shift; + tscan->dom_ncpus[dom] = ncpus; + + /* Propagate to the upper levels */ + for (dom++; dom < TOPO_MAX_DOMAIN; dom++) { + tscan->dom_shifts[dom] = tscan->dom_shifts[dom - 1]; + tscan->dom_ncpus[dom] = tscan->dom_ncpus[dom - 1]; + } +} + +bool topo_is_converted(struct cpuinfo_x86 *c) +{ + /* Temporary until everything is converted over. */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + case X86_VENDOR_CENTAUR: + case X86_VENDOR_INTEL: + case X86_VENDOR_HYGON: + case X86_VENDOR_ZHAOXIN: + return false; + default: + /* Let all UP systems use the below */ + return true; + } +} + +static bool fake_topology(struct topo_scan *tscan) +{ + /* + * Preset the CORE level shift for CPUID less systems and XEN_PV, + * which has useless CPUID information. + */ + topology_set_dom(tscan, TOPO_SMT_DOMAIN, 0, 1); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, 1, 1); + + return tscan->c->cpuid_level < 1 || xen_pv_domain(); +} + +static void parse_topology(struct topo_scan *tscan, bool early) +{ + const struct cpuinfo_topology topo_defaults = { + .cu_id = 0xff, + .llc_id = BAD_APICID, + .l2c_id = BAD_APICID, + }; + struct cpuinfo_x86 *c = tscan->c; + struct { + u32 unused0 : 16, + nproc : 8, + apicid : 8; + } ebx; + + c->topo = topo_defaults; + + if (fake_topology(tscan)) + return; + + /* Preset Initial APIC ID from CPUID leaf 1 */ + cpuid_leaf_reg(1, CPUID_EBX, &ebx); + c->topo.initial_apicid = ebx.apicid; + + /* + * The initial invocation from early_identify_cpu() happens before + * the APIC is mapped or X2APIC enabled. For establishing the + * topology, that's not required. Use the initial APIC ID. + */ + if (early) + c->topo.apicid = c->topo.initial_apicid; + else + c->topo.apicid = read_apic_id(); + + /* The above is sufficient for UP */ + if (!IS_ENABLED(CONFIG_SMP)) + return; +} + +static void topo_set_ids(struct topo_scan *tscan) +{ + struct cpuinfo_x86 *c = tscan->c; + u32 apicid = c->topo.apicid; + + c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN); + c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN); + + /* Package relative core ID */ + c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> + x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; +} + +static void topo_set_max_cores(struct topo_scan *tscan) +{ + /* + * Bug compatible for now. This is broken on hybrid systems: + * 8 cores SMT + 8 cores w/o SMT + * tscan.dom_ncpus[TOPO_DIEGRP_DOMAIN] = 24; 24 / 2 = 12 !! + * + * Cannot be fixed without further topology enumeration changes. + */ + tscan->c->x86_max_cores = tscan->dom_ncpus[TOPO_DIEGRP_DOMAIN] >> + x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; +} + +void cpu_parse_topology(struct cpuinfo_x86 *c) +{ + unsigned int dom, cpu = smp_processor_id(); + struct topo_scan tscan = { .c = c, }; + + parse_topology(&tscan, false); + + if (!topo_is_converted(c)) + return; + + for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) { + if (tscan.dom_shifts[dom] == x86_topo_system.dom_shifts[dom]) + continue; + pr_err(FW_BUG "CPU%d: Topology domain %u shift %u != %u\n", cpu, dom, + tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]); + } + + /* Bug compatible with the existing parsers */ + if (tscan.dom_ncpus[TOPO_SMT_DOMAIN] > smp_num_siblings) { + if (system_state == SYSTEM_BOOTING) { + pr_warn_once("CPU%d: SMT detected and enabled late\n", cpu); + smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN]; + } else { + pr_warn_once("CPU%d: SMT detected after init. Too late!\n", cpu); + } + } + + topo_set_ids(&tscan); + topo_set_max_cores(&tscan); +} + +void __init cpu_init_topology(struct cpuinfo_x86 *c) +{ + struct topo_scan tscan = { .c = c, }; + unsigned int dom, sft; + + parse_topology(&tscan, true); + + if (!topo_is_converted(c)) + return; + + /* Copy the shift values and calculate the unit sizes. */ + memcpy(x86_topo_system.dom_shifts, tscan.dom_shifts, sizeof(x86_topo_system.dom_shifts)); + + dom = TOPO_SMT_DOMAIN; + x86_topo_system.dom_size[dom] = 1U << x86_topo_system.dom_shifts[dom]; + + for (dom++; dom < TOPO_MAX_DOMAIN; dom++) { + sft = x86_topo_system.dom_shifts[dom] - x86_topo_system.dom_shifts[dom - 1]; + x86_topo_system.dom_size[dom] = 1U << sft; + } + + topo_set_ids(&tscan); + topo_set_max_cores(&tscan); + + /* + * Bug compatible with the existing code. If the boot CPU does not + * have SMT this ends up with one sibling. This needs way deeper + * changes further down the road to get it right during early boot. + */ + smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN]; + + /* + * Neither it's clear whether there are as many dies as the APIC + * space indicating die level is. But assume that the actual number + * of CPUs gives a proper indication for now to stay bug compatible. + */ + __max_die_per_package = tscan.dom_ncpus[TOPO_DIE_DOMAIN] / + tscan.dom_ncpus[TOPO_DIE_DOMAIN - 1]; +} From a686ce207d312665fd8319845043988f2628e680 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:03 +0100 Subject: [PATCH 020/109] x86/cpu: Add legacy topology parser commit bda74aae20086c044b31ca0dcdab7deaaf23d0e8 upstream. The legacy topology detection via CPUID leaf 4, which provides the number of cores in the package and CPUID leaf 1 which provides the number of logical CPUs in case that FEATURE_HT is enabled and the CMP_LEGACY feature is not set, is shared for Intel, Centaur and Zhaoxin CPUs. Lift the code from common.c without the early detection hack and provide it as common fallback mechanism. Will be utilized in later changes. Intel-SIG: commit bda74aae2008 x86/cpu: Add legacy topology parser. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.644448852@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/common.c | 3 ++ arch/x86/kernel/cpu/topology.h | 3 ++ arch/x86/kernel/cpu/topology_common.c | 46 ++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 22d0a518d769e..07ead3bc41201 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -899,6 +899,9 @@ void detect_ht(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP int index_msb, core_bits; + if (topo_is_converted(c)) + return; + if (detect_ht_early(c) < 0) return; diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h index 99c94c519b5dc..934a100b9fe63 100644 --- a/arch/x86/kernel/cpu/topology.h +++ b/arch/x86/kernel/cpu/topology.h @@ -6,6 +6,9 @@ struct topo_scan { struct cpuinfo_x86 *c; unsigned int dom_shifts[TOPO_MAX_DOMAIN]; unsigned int dom_ncpus[TOPO_MAX_DOMAIN]; + + /* Legacy CPUID[1]:EBX[23:16] number of logical processors */ + unsigned int ebx1_nproc_shift; }; bool topo_is_converted(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 873d7c3fe7acb..b0ff1fc84a13d 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -24,6 +24,48 @@ void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, } } +static unsigned int __maybe_unused parse_num_cores_legacy(struct cpuinfo_x86 *c) +{ + struct { + u32 cache_type : 5, + unused : 21, + ncores : 6; + } eax; + + if (c->cpuid_level < 4) + return 1; + + cpuid_subleaf_reg(4, 0, CPUID_EAX, &eax); + if (!eax.cache_type) + return 1; + + return eax.ncores + 1; +} + +static void __maybe_unused parse_legacy(struct topo_scan *tscan) +{ + unsigned int cores, core_shift, smt_shift = 0; + struct cpuinfo_x86 *c = tscan->c; + + cores = parse_num_cores_legacy(c); + core_shift = get_count_order(cores); + + if (cpu_has(c, X86_FEATURE_HT)) { + if (!WARN_ON_ONCE(tscan->ebx1_nproc_shift < core_shift)) + smt_shift = tscan->ebx1_nproc_shift - core_shift; + /* + * The parser expects leaf 0xb/0x1f format, which means + * the number of logical processors at core level is + * counting threads. + */ + core_shift += smt_shift; + cores <<= smt_shift; + } + + topology_set_dom(tscan, TOPO_SMT_DOMAIN, smt_shift, 1U << smt_shift); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, core_shift, cores); +} + bool topo_is_converted(struct cpuinfo_x86 *c) { /* Temporary until everything is converted over. */ @@ -47,7 +89,7 @@ static bool fake_topology(struct topo_scan *tscan) * which has useless CPUID information. */ topology_set_dom(tscan, TOPO_SMT_DOMAIN, 0, 1); - topology_set_dom(tscan, TOPO_CORE_DOMAIN, 1, 1); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, 0, 1); return tscan->c->cpuid_level < 1 || xen_pv_domain(); } @@ -88,6 +130,8 @@ static void parse_topology(struct topo_scan *tscan, bool early) /* The above is sufficient for UP */ if (!IS_ENABLED(CONFIG_SMP)) return; + + tscan->ebx1_nproc_shift = get_count_order(ebx.nproc); } static void topo_set_ids(struct topo_scan *tscan) From 18c15ccf0989a12a35ba6848033a462d0b90229a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:04 +0100 Subject: [PATCH 021/109] x86/cpu: Use common topology code for Centaur and Zhaoxin commit 598e719c40d67b1473d78423e941bed4ea6c726d upstream. Centaur and Zhaoxin CPUs use only the legacy SMP detection. Remove the invocations from their 32bit path and exclude them from the 64-bit call path. No functional change intended. Intel-SIG: commit 598e719c40d6 x86/cpu: Use common topology code for Centaur and Zhaoxin. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.706794189@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/centaur.c | 4 ---- arch/x86/kernel/cpu/topology_common.c | 11 ++++++++--- arch/x86/kernel/cpu/zhaoxin.c | 4 ---- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 345f7d905db67..a3b55db35c961 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -128,10 +128,6 @@ static void init_centaur(struct cpuinfo_x86 *c) #endif early_init_centaur(c); init_intel_cacheinfo(c); - detect_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif if (c->cpuid_level > 9) { unsigned int eax = cpuid_eax(10); diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index b0ff1fc84a13d..bcaaeecaf1a63 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -42,7 +42,7 @@ static unsigned int __maybe_unused parse_num_cores_legacy(struct cpuinfo_x86 *c) return eax.ncores + 1; } -static void __maybe_unused parse_legacy(struct topo_scan *tscan) +static void parse_legacy(struct topo_scan *tscan) { unsigned int cores, core_shift, smt_shift = 0; struct cpuinfo_x86 *c = tscan->c; @@ -71,10 +71,8 @@ bool topo_is_converted(struct cpuinfo_x86 *c) /* Temporary until everything is converted over. */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: - case X86_VENDOR_CENTAUR: case X86_VENDOR_INTEL: case X86_VENDOR_HYGON: - case X86_VENDOR_ZHAOXIN: return false; default: /* Let all UP systems use the below */ @@ -132,6 +130,13 @@ static void parse_topology(struct topo_scan *tscan, bool early) return; tscan->ebx1_nproc_shift = get_count_order(ebx.nproc); + + switch (c->x86_vendor) { + case X86_VENDOR_CENTAUR: + case X86_VENDOR_ZHAOXIN: + parse_legacy(tscan); + break; + } } static void topo_set_ids(struct topo_scan *tscan) diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c index 415564a6523b6..90eba7eb53357 100644 --- a/arch/x86/kernel/cpu/zhaoxin.c +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -71,10 +71,6 @@ static void init_zhaoxin(struct cpuinfo_x86 *c) { early_init_zhaoxin(c); init_intel_cacheinfo(c); - detect_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif if (c->cpuid_level > 9) { unsigned int eax = cpuid_eax(10); From d077676164f76ed198041d43e42dab70f96b07d8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:05 +0100 Subject: [PATCH 022/109] x86/cpu: Move __max_die_per_package to common.c commit 92853a7774f942e3692dbd83bace82333a2b47bd upstream. In preparation of a complete replacement for the topology leaf 0xb/0x1f evaluation, move __max_die_per_package into the common code. Will be removed once everything is converted over. Intel-SIG: commit 92853a7774f9 x86/cpu: Move __max_die_per_package to common.c. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.768188958@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kernel/cpu/topology.c | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 07ead3bc41201..371a32abe47e5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -75,6 +75,9 @@ u32 elf_hwcap2 __read_mostly; int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); +unsigned int __max_die_per_package __read_mostly = 1; +EXPORT_SYMBOL(__max_die_per_package); + static struct ppin_info { int feature; int msr_ppin_ctl; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index dc136703566f3..208e17a772269 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -25,9 +25,6 @@ #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) #define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) -unsigned int __max_die_per_package __read_mostly = 1; -EXPORT_SYMBOL(__max_die_per_package); - #ifdef CONFIG_SMP /* * Check if given CPUID extended topology "leaf" is implemented From a37ed92f5d069c8c8768b69505d439ef0dc2837d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:07 +0100 Subject: [PATCH 023/109] x86/cpu: Provide a sane leaf 0xb/0x1f parser commit 3d41009425225ca5e09016c634ecee513b4713bb upstream. detect_extended_topology() along with it's early() variant is a classic example for duct tape engineering: - It evaluates an array of subleafs with a boatload of local variables for the relevant topology levels instead of using an array to save the enumerated information and propagate it to the right level - It has no boundary checks for subleafs - It prevents updating the die_id with a crude workaround instead of checking for leaf 0xb which does not provide die information. - It's broken vs. the number of dies evaluation as it uses: num_processors[DIE_LEVEL] / num_processors[CORE_LEVEL] which "works" only correctly if there is none of the intermediate topology levels (MODULE/TILE) enumerated. There is zero value in trying to "fix" that code as the only proper fix is to rewrite it from scratch. Implement a sane parser with proper code documentation, which will be used for the consolidated topology evaluation in the next step. Intel-SIG: commit 3d4100942522 x86/cpu: Provide a sane leaf 0xb/0x1f parser. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.830571770@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/topology.h | 12 +++ arch/x86/kernel/cpu/topology_ext.c | 130 +++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/cpu/topology_ext.c diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index b1c655e8689d6..45b735b8e1df6 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -18,7 +18,7 @@ KMSAN_SANITIZE_common.o := n KCSAN_SANITIZE_common.o := n obj-y := cacheinfo.o scattered.o -obj-y += topology_common.o topology.o +obj-y += topology_common.o topology_ext.o topology.o obj-y += common.o obj-y += rdrand.o obj-y += match.o diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h index 934a100b9fe63..2b100cd11f176 100644 --- a/arch/x86/kernel/cpu/topology.h +++ b/arch/x86/kernel/cpu/topology.h @@ -16,6 +16,7 @@ void cpu_init_topology(struct cpuinfo_x86 *c); void cpu_parse_topology(struct cpuinfo_x86 *c); void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, unsigned int shift, unsigned int ncpus); +bool cpu_parse_topology_ext(struct topo_scan *tscan); static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom) { @@ -36,4 +37,15 @@ static inline u32 topo_domain_mask(enum x86_topology_domains dom) return (1U << x86_topo_system.dom_shifts[dom]) - 1; } +/* + * Update a domain level after the fact without propagating. Used to fixup + * broken CPUID enumerations. + */ +static inline void topology_update_dom(struct topo_scan *tscan, enum x86_topology_domains dom, + unsigned int shift, unsigned int ncpus) +{ + tscan->dom_shifts[dom] = shift; + tscan->dom_ncpus[dom] = ncpus; +} + #endif /* ARCH_X86_TOPOLOGY_H */ diff --git a/arch/x86/kernel/cpu/topology_ext.c b/arch/x86/kernel/cpu/topology_ext.c new file mode 100644 index 0000000000000..e477228cd5b2b --- /dev/null +++ b/arch/x86/kernel/cpu/topology_ext.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#include +#include +#include + +#include "cpu.h" + +enum topo_types { + INVALID_TYPE = 0, + SMT_TYPE = 1, + CORE_TYPE = 2, + MAX_TYPE_0B = 3, + MODULE_TYPE = 3, + TILE_TYPE = 4, + DIE_TYPE = 5, + DIEGRP_TYPE = 6, + MAX_TYPE_1F = 7, +}; + +/* + * Use a lookup table for the case that there are future types > 6 which + * describe an intermediate domain level which does not exist today. + */ +static const unsigned int topo_domain_map_0b_1f[MAX_TYPE_1F] = { + [SMT_TYPE] = TOPO_SMT_DOMAIN, + [CORE_TYPE] = TOPO_CORE_DOMAIN, + [MODULE_TYPE] = TOPO_MODULE_DOMAIN, + [TILE_TYPE] = TOPO_TILE_DOMAIN, + [DIE_TYPE] = TOPO_DIE_DOMAIN, + [DIEGRP_TYPE] = TOPO_DIEGRP_DOMAIN, +}; + +static inline bool topo_subleaf(struct topo_scan *tscan, u32 leaf, u32 subleaf, + unsigned int *last_dom) +{ + unsigned int dom, maxtype; + const unsigned int *map; + struct { + // eax + u32 x2apic_shift : 5, // Number of bits to shift APIC ID right + // for the topology ID at the next level + : 27; // Reserved + // ebx + u32 num_processors : 16, // Number of processors at current level + : 16; // Reserved + // ecx + u32 level : 8, // Current topology level. Same as sub leaf number + type : 8, // Level type. If 0, invalid + : 16; // Reserved + // edx + u32 x2apic_id : 32; // X2APIC ID of the current logical processor + } sl; + + switch (leaf) { + case 0x0b: maxtype = MAX_TYPE_0B; map = topo_domain_map_0b_1f; break; + case 0x1f: maxtype = MAX_TYPE_1F; map = topo_domain_map_0b_1f; break; + default: return false; + } + + cpuid_subleaf(leaf, subleaf, &sl); + + if (!sl.num_processors || sl.type == INVALID_TYPE) + return false; + + if (sl.type >= maxtype) { + pr_err_once("Topology: leaf 0x%x:%d Unknown domain type %u\n", + leaf, subleaf, sl.type); + /* + * It really would have been too obvious to make the domain + * type space sparse and leave a few reserved types between + * the points which might change instead of following the + * usual "this can be fixed in software" principle. + */ + dom = *last_dom + 1; + } else { + dom = map[sl.type]; + *last_dom = dom; + } + + if (!dom) { + tscan->c->topo.initial_apicid = sl.x2apic_id; + } else if (tscan->c->topo.initial_apicid != sl.x2apic_id) { + pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf %d APIC ID mismatch %x != %x\n", + leaf, subleaf, tscan->c->topo.initial_apicid, sl.x2apic_id); + } + + topology_set_dom(tscan, dom, sl.x2apic_shift, sl.num_processors); + return true; +} + +static bool parse_topology_leaf(struct topo_scan *tscan, u32 leaf) +{ + unsigned int last_dom; + u32 subleaf; + + /* Read all available subleafs and populate the levels */ + for (subleaf = 0, last_dom = 0; topo_subleaf(tscan, leaf, subleaf, &last_dom); subleaf++); + + /* If subleaf 0 failed to parse, give up */ + if (!subleaf) + return false; + + /* + * There are machines in the wild which have shift 0 in the subleaf + * 0, but advertise 2 logical processors at that level. They are + * truly SMT. + */ + if (!tscan->dom_shifts[TOPO_SMT_DOMAIN] && tscan->dom_ncpus[TOPO_SMT_DOMAIN] > 1) { + unsigned int sft = get_count_order(tscan->dom_ncpus[TOPO_SMT_DOMAIN]); + + pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf 0 has shift level 0 but %u CPUs. Fixing it up.\n", + leaf, tscan->dom_ncpus[TOPO_SMT_DOMAIN]); + topology_update_dom(tscan, TOPO_SMT_DOMAIN, sft, tscan->dom_ncpus[TOPO_SMT_DOMAIN]); + } + + set_cpu_cap(tscan->c, X86_FEATURE_XTOPOLOGY); + return true; +} + +bool cpu_parse_topology_ext(struct topo_scan *tscan) +{ + /* Intel: Try leaf 0x1F first. */ + if (tscan->c->cpuid_level >= 0x1f && parse_topology_leaf(tscan, 0x1f)) + return true; + + /* Intel/AMD: Fall back to leaf 0xB if available */ + return tscan->c->cpuid_level >= 0x0b && parse_topology_leaf(tscan, 0x0b); +} From 8721dcd1e161c29b0895c0dc45d2e741bd72e92c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:08 +0100 Subject: [PATCH 024/109] x86/cpu: Use common topology code for Intel commit 22d63660c35eb751c63a709bf901a64c1726592a upstream. Intel CPUs use either topology leaf 0xb/0x1f evaluation or the legacy SMP/HT evaluation based on CPUID leaf 0x1/0x4. Move it over to the consolidated topology code and remove the random topology hacks which are sprinkled into the Intel and the common code. No functional change intended. Intel-SIG: commit 22d63660c35e x86/cpu: Use common topology code for Intel. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.893644349@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/common.c | 65 --------------------------- arch/x86/kernel/cpu/cpu.h | 4 -- arch/x86/kernel/cpu/intel.c | 25 ----------- arch/x86/kernel/cpu/topology.c | 22 --------- arch/x86/kernel/cpu/topology_common.c | 5 ++- 5 files changed, 4 insertions(+), 117 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 371a32abe47e5..fd41dd910b9cf 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -800,19 +800,6 @@ static void get_model_name(struct cpuinfo_x86 *c) *(s + 1) = '\0'; } -void detect_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, ebx, ecx, edx; - - c->x86_max_cores = 1; - if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4) - return; - - cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); - if (eax & 0x1f) - c->x86_max_cores = (eax >> 26) + 1; -} - void cpu_detect_cache_sizes(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -874,54 +861,6 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c) tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]); } -int detect_ht_early(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - u32 eax, ebx, ecx, edx; - - if (!cpu_has(c, X86_FEATURE_HT)) - return -1; - - if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) - return -1; - - if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) - return -1; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - smp_num_siblings = (ebx & 0xff0000) >> 16; - if (smp_num_siblings == 1) - pr_info_once("CPU0: Hyper-Threading is disabled\n"); -#endif - return 0; -} - -void detect_ht(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - int index_msb, core_bits; - - if (topo_is_converted(c)) - return; - - if (detect_ht_early(c) < 0) - return; - - index_msb = get_count_order(smp_num_siblings); - c->topo.pkg_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb); - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings); - - core_bits = get_count_order(c->x86_max_cores); - - c->topo.core_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb) & - ((1 << core_bits) - 1); -#endif -} - void get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; @@ -2005,10 +1944,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86, c->x86_model); } -#ifdef CONFIG_X86_64 - detect_ht(c); -#endif - x86_init_rdrand(c); setup_pku(c); setup_cet(c); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 2a446ecb2cc04..5a790f12dc280 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -76,11 +76,7 @@ extern void init_intel_cacheinfo(struct cpuinfo_x86 *c); extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); extern void init_hygon_cacheinfo(struct cpuinfo_x86 *c); -extern void detect_num_cpu_cores(struct cpuinfo_x86 *c); -extern int detect_extended_topology_early(struct cpuinfo_x86 *c); extern int detect_extended_topology(struct cpuinfo_x86 *c); -extern int detect_ht_early(struct cpuinfo_x86 *c); -extern void detect_ht(struct cpuinfo_x86 *c); extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 066cfa133f294..b82c2cf2026e8 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -393,13 +393,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) check_memory_type_self_snoop_errata(c); - /* - * Get the number of SMT siblings early from the extended topology - * leaf, if available. Otherwise try the legacy SMT detection. - */ - if (detect_extended_topology_early(c) < 0) - detect_ht_early(c); - /* * Adjust the number of physical bits early because it affects the * valid bits of the MTRR mask registers. @@ -603,24 +596,6 @@ static void init_intel(struct cpuinfo_x86 *c) intel_workarounds(c); - /* - * Detect the extended topology information if available. This - * will reinitialise the initial_apicid which will be used - * in init_intel_cacheinfo() - */ - detect_extended_topology(c); - - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - detect_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - } - init_intel_cacheinfo(c); if (c->cpuid_level > 9) { diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 208e17a772269..2b68f26a99a44 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -59,28 +59,6 @@ static int detect_extended_topology_leaf(struct cpuinfo_x86 *c) } #endif -int detect_extended_topology_early(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned int eax, ebx, ecx, edx; - int leaf; - - leaf = detect_extended_topology_leaf(c); - if (leaf < 0) - return -1; - - set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); - - cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); - /* - * initial apic id, which also represents 32-bit extended x2apic id. - */ - c->topo.initial_apicid = edx; - smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx)); -#endif - return 0; -} - /* * Check for extended topology enumeration cpuid leaf, and if it * exists, use it for populating initial_apicid and cpu topology diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index bcaaeecaf1a63..ef99499c9bbe9 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -71,7 +71,6 @@ bool topo_is_converted(struct cpuinfo_x86 *c) /* Temporary until everything is converted over. */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: - case X86_VENDOR_INTEL: case X86_VENDOR_HYGON: return false; default: @@ -136,6 +135,10 @@ static void parse_topology(struct topo_scan *tscan, bool early) case X86_VENDOR_ZHAOXIN: parse_legacy(tscan); break; + case X86_VENDOR_INTEL: + if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan)) + parse_legacy(tscan); + break; } } From 2fa04010be16d980782e341625871756e303c39f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 30 May 2024 17:29:18 +0200 Subject: [PATCH 025/109] x86/topology/intel: Unlock CPUID before evaluating anything commit 0c2f6d04619ec2b53ad4b0b591eafc9389786e86 upstream. Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If this bit is set by the BIOS then CPUID evaluation including topology enumeration does not work correctly as the evaluation code does not try to analyze any leaf greater than two. This went unnoticed before because the original topology code just repeated evaluation several times and managed to overwrite the initial limited information with the correct one later. The new evaluation code does it once and therefore ends up with the limited and wrong information. Cure this by unlocking CPUID right before evaluating anything which depends on the maximum CPUID leaf being greater than two instead of rereading stuff after unlock. Fixes: 22d63660c35e ("x86/cpu: Use common topology code for Intel") Reported-by: Peter Schneider Intel-SIG: commit 0c2f6d04619e x86/topology/intel: Unlock CPUID before evaluating anything. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Peter Schneider Cc: Link: https://lore.kernel.org/r/fd3f73dc-a86f-4bcf-9c60-43556a21eb42@googlemail.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/common.c | 3 ++- arch/x86/kernel/cpu/cpu.h | 2 ++ arch/x86/kernel/cpu/intel.c | 25 ++++++++++++++++--------- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fd41dd910b9cf..08626f21918b3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1632,6 +1632,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (have_cpuid_p()) { cpu_detect(c); get_cpu_vendor(c); + intel_unlock_cpuid_leafs(c); get_cpu_cap(c); setup_force_cpu_cap(X86_FEATURE_CPUID); get_cpu_address_sizes(c); @@ -1797,7 +1798,7 @@ static void generic_identify(struct cpuinfo_x86 *c) cpu_detect(c); get_cpu_vendor(c); - + intel_unlock_cpuid_leafs(c); get_cpu_cap(c); get_cpu_address_sizes(c); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 5a790f12dc280..08beca97ad17b 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -61,9 +61,11 @@ extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; extern void __init tsx_init(void); void tsx_ap_init(void); +void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c); #else static inline void tsx_init(void) { } static inline void tsx_ap_init(void) { } +static inline void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) { } #endif /* CONFIG_CPU_SUP_INTEL */ extern void init_spectral_chicken(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b82c2cf2026e8..8851f981693b8 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -265,19 +265,26 @@ static void detect_tme_early(struct cpuinfo_x86 *c) c->x86_phys_bits -= keyid_bits; } +void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return; + + if (c->x86 < 6 || (c->x86 == 6 && c->x86_model < 0xd)) + return; + + /* + * The BIOS can have limited CPUID to leaf 2, which breaks feature + * enumeration. Unlock it and update the maximum leaf info. + */ + if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) + c->cpuid_level = cpuid_eax(0); +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; - /* Unmask CPUID levels if masked: */ - if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { - if (msr_clear_bit(MSR_IA32_MISC_ENABLE, - MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) { - c->cpuid_level = cpuid_eax(0); - get_cpu_cap(c); - } - } - if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); From 9ec594b56fe0ca9e76d94ec2f01d66e49f478871 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:11 +0100 Subject: [PATCH 026/109] x86/cpu: Provide an AMD/HYGON specific topology parser commit f7fb3b2dd92c633871b7037773b89531c488a371 upstream. AMD/HYGON uses various methods for topology evaluation: - Leaf 0x80000008 and 0x8000001e based with an optional leaf 0xb, which is the preferred variant for modern CPUs. Leaf 0xb will be superseded by leaf 0x80000026 soon, which is just another variant of the Intel 0x1f leaf for whatever reasons. - Subleaf 0x80000008 and NODEID_MSR base - Legacy fallback That code is following the principle of random bits and pieces all over the place which results in multiple evaluations and impenetrable code flows in the same way as the Intel parsing did. Provide a sane implementation by clearly separating the three variants and bringing them in the proper preference order in one place. This provides the parsing for both AMD and HYGON because there is no point in having a separate HYGON parser which only differs by 3 lines of code. Any further divergence between AMD and HYGON can be handled in different functions, while still sharing the existing parsers. Intel-SIG: commit f7fb3b2dd92c x86/cpu: Provide an AMD/HYGON specific topology parser. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.020038641@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 2 + arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/cacheinfo.c | 4 +- arch/x86/kernel/cpu/cpu.h | 2 +- arch/x86/kernel/cpu/debugfs.c | 2 + arch/x86/kernel/cpu/topology.h | 6 + arch/x86/kernel/cpu/topology_amd.c | 183 ++++++++++++++++++++++++++ arch/x86/kernel/cpu/topology_common.c | 19 ++- 9 files changed, 215 insertions(+), 7 deletions(-) create mode 100644 arch/x86/kernel/cpu/topology_amd.c diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index ebdc06cc9732a..fadc19ae972ad 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -164,6 +164,8 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); +extern unsigned int __amd_nodes_per_pkg; + static inline unsigned int topology_amd_nodes_per_pkg(void) { return __max_die_per_package; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 45b735b8e1df6..3ee24ff62cc42 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -18,7 +18,7 @@ KMSAN_SANITIZE_common.o := n KCSAN_SANITIZE_common.o := n obj-y := cacheinfo.o scattered.o -obj-y += topology_common.o topology_ext.o topology.o +obj-y += topology_common.o topology_ext.o topology_amd.o topology.o obj-y += common.o obj-y += rdrand.o obj-y += match.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 0ce7bac212477..3634d877b5080 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -352,7 +352,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) if (!err) c->x86_coreid_bits = get_count_order(c->x86_max_cores); - cacheinfo_amd_init_llc_id(c); + cacheinfo_amd_init_llc_id(c, c->topo.die_id); } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index b2ab7aabbe2ec..522204ee195b1 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -661,7 +661,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) return i; } -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c) +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id) { /* * We may have multiple LLCs if L3 caches exist, so check if we @@ -672,7 +672,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c) if (c->x86 < 0x17) { /* LLC is at the node level. */ - c->topo.llc_id = c->topo.die_id; + c->topo.llc_id = die_id; } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { /* * LLC is at the core complex level. diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 08beca97ad17b..f3f0ccc52f03c 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -81,7 +81,7 @@ extern void init_hygon_cacheinfo(struct cpuinfo_x86 *c); extern int detect_extended_topology(struct cpuinfo_x86 *c); extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c); +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id); void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index be8ca21ecd6a0..86de544cdb14c 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -26,6 +26,8 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id); seq_printf(m, "llc_id: %u\n", c->topo.llc_id); seq_printf(m, "l2c_id: %u\n", c->topo.l2c_id); + seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id); + seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg()); seq_printf(m, "max_cores: %u\n", c->x86_max_cores); seq_printf(m, "max_die_per_pkg: %u\n", __max_die_per_package); seq_printf(m, "smp_num_siblings: %u\n", smp_num_siblings); diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h index 2b100cd11f176..7eead546c20e8 100644 --- a/arch/x86/kernel/cpu/topology.h +++ b/arch/x86/kernel/cpu/topology.h @@ -9,6 +9,10 @@ struct topo_scan { /* Legacy CPUID[1]:EBX[23:16] number of logical processors */ unsigned int ebx1_nproc_shift; + + /* AMD specific node ID which cannot be mapped into APIC space. */ + u16 amd_nodes_per_pkg; + u16 amd_node_id; }; bool topo_is_converted(struct cpuinfo_x86 *c); @@ -17,6 +21,8 @@ void cpu_parse_topology(struct cpuinfo_x86 *c); void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, unsigned int shift, unsigned int ncpus); bool cpu_parse_topology_ext(struct topo_scan *tscan); +void cpu_parse_topology_amd(struct topo_scan *tscan); +void cpu_topology_fixup_amd(struct topo_scan *tscan); static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom) { diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c new file mode 100644 index 0000000000000..1a8b3ad493afe --- /dev/null +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#include +#include +#include + +#include "cpu.h" + +static bool parse_8000_0008(struct topo_scan *tscan) +{ + struct { + // ecx + u32 cpu_nthreads : 8, // Number of physical threads - 1 + : 4, // Reserved + apicid_coreid_len : 4, // Number of thread core ID bits (shift) in APIC ID + perf_tsc_len : 2, // Performance time-stamp counter size + : 14; // Reserved + } ecx; + unsigned int sft; + + if (tscan->c->extended_cpuid_level < 0x80000008) + return false; + + cpuid_leaf_reg(0x80000008, CPUID_ECX, &ecx); + + /* If the thread bits are 0, then get the shift value from ecx.cpu_nthreads */ + sft = ecx.apicid_coreid_len; + if (!sft) + sft = get_count_order(ecx.cpu_nthreads + 1); + + topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1); + return true; +} + +static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id) +{ + /* + * Starting with Fam 17h the DIE domain could probably be used to + * retrieve the node info on AMD/HYGON. Analysis of CPUID dumps + * suggests it's the topmost bit(s) of the CPU cores area, but + * that's guess work and neither enumerated nor documented. + * + * Up to Fam 16h this does not work at all and the legacy node ID + * has to be used. + */ + tscan->amd_nodes_per_pkg = nr_nodes; + tscan->amd_node_id = node_id; +} + +static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) +{ + struct { + // eax + u32 ext_apic_id : 32; // Extended APIC ID + // ebx + u32 core_id : 8, // Unique per-socket logical core unit ID + core_nthreads : 8, // #Threads per core (zero-based) + : 16; // Reserved + // ecx + u32 node_id : 8, // Node (die) ID of invoking logical CPU + nnodes_per_socket : 3, // #nodes in invoking logical CPU's package/socket + : 21; // Reserved + // edx + u32 : 32; // Reserved + } leaf; + + if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) + return false; + + cpuid_leaf(0x8000001e, &leaf); + + tscan->c->topo.initial_apicid = leaf.ext_apic_id; + + /* + * If leaf 0xb is available, then SMT shift is set already. If not + * take it from ecx.threads_per_core and use topo_update_dom() - + * topology_set_dom() would propagate and overwrite the already + * propagated CORE level. + */ + if (!has_0xb) { + unsigned int nthreads = leaf.core_nthreads + 1; + + topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads); + } + + store_node(tscan, leaf.nnodes_per_socket + 1, leaf.node_id); + + if (tscan->c->x86_vendor == X86_VENDOR_AMD) { + if (tscan->c->x86 == 0x15) + tscan->c->topo.cu_id = leaf.core_id; + + cacheinfo_amd_init_llc_id(tscan->c, leaf.node_id); + } else { + /* + * Package ID is ApicId[6..] on certain Hygon CPUs. See + * commit e0ceeae708ce for explanation. The topology info + * is screwed up: The package shift is always 6 and the + * node ID is bit [4:5]. + */ + if (!boot_cpu_has(X86_FEATURE_HYPERVISOR) && tscan->c->x86_model <= 0x3) { + topology_set_dom(tscan, TOPO_CORE_DOMAIN, 6, + tscan->dom_ncpus[TOPO_CORE_DOMAIN]); + } + cacheinfo_hygon_init_llc_id(tscan->c); + } + return true; +} + +static bool parse_fam10h_node_id(struct topo_scan *tscan) +{ + struct { + union { + u64 node_id : 3, + nodes_per_pkg : 3, + unused : 58; + u64 msr; + }; + } nid; + + if (!boot_cpu_has(X86_FEATURE_NODEID_MSR)) + return false; + + rdmsrl(MSR_FAM10H_NODE_ID, nid.msr); + store_node(tscan, nid.nodes_per_pkg + 1, nid.node_id); + tscan->c->topo.llc_id = nid.node_id; + return true; +} + +static void legacy_set_llc(struct topo_scan *tscan) +{ + unsigned int apicid = tscan->c->topo.initial_apicid; + + /* parse_8000_0008() set everything up except llc_id */ + tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN]; +} + +static void parse_topology_amd(struct topo_scan *tscan) +{ + bool has_0xb = false; + + /* + * If the extended topology leaf 0x8000_001e is available + * try to get SMT and CORE shift from leaf 0xb first, then + * try to get the CORE shift from leaf 0x8000_0008. + */ + if (cpu_feature_enabled(X86_FEATURE_TOPOEXT)) + has_0xb = cpu_parse_topology_ext(tscan); + + if (!has_0xb && !parse_8000_0008(tscan)) + return; + + /* Prefer leaf 0x8000001e if available */ + if (parse_8000_001e(tscan, has_0xb)) + return; + + /* Try the NODEID MSR */ + if (parse_fam10h_node_id(tscan)) + return; + + legacy_set_llc(tscan); +} + +void cpu_parse_topology_amd(struct topo_scan *tscan) +{ + tscan->amd_nodes_per_pkg = 1; + parse_topology_amd(tscan); + + if (tscan->amd_nodes_per_pkg > 1) + set_cpu_cap(tscan->c, X86_FEATURE_AMD_DCM); +} + +void cpu_topology_fixup_amd(struct topo_scan *tscan) +{ + struct cpuinfo_x86 *c = tscan->c; + + /* + * Adjust the core_id relative to the node when there is more than + * one node. + */ + if (tscan->c->x86 < 0x17 && tscan->amd_nodes_per_pkg > 1) + c->topo.core_id %= tscan->dom_ncpus[TOPO_CORE_DOMAIN] / tscan->amd_nodes_per_pkg; +} diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index ef99499c9bbe9..e1d1a7b7c8a95 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -11,11 +11,13 @@ struct x86_topology_system x86_topo_system __ro_after_init; +unsigned int __amd_nodes_per_pkg __ro_after_init; +EXPORT_SYMBOL_GPL(__amd_nodes_per_pkg); + void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, unsigned int shift, unsigned int ncpus) { - tscan->dom_shifts[dom] = shift; - tscan->dom_ncpus[dom] = ncpus; + topology_update_dom(tscan, dom, shift, ncpus); /* Propagate to the upper levels */ for (dom++; dom < TOPO_MAX_DOMAIN; dom++) { @@ -153,6 +155,13 @@ static void topo_set_ids(struct topo_scan *tscan) /* Package relative core ID */ c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; + + /* Temporary workaround */ + if (tscan->amd_nodes_per_pkg) + c->topo.amd_node_id = c->topo.die_id = tscan->amd_node_id; + + if (c->x86_vendor == X86_VENDOR_AMD) + cpu_topology_fixup_amd(tscan); } static void topo_set_max_cores(struct topo_scan *tscan) @@ -237,4 +246,10 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) */ __max_die_per_package = tscan.dom_ncpus[TOPO_DIE_DOMAIN] / tscan.dom_ncpus[TOPO_DIE_DOMAIN - 1]; + /* + * AMD systems have Nodes per package which cannot be mapped to + * APIC ID. + */ + if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) + __amd_nodes_per_pkg = __max_die_per_package = tscan.amd_nodes_per_pkg; } From e449ffa3f318b23b4707564bc03feda5ee18ae97 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 10 Apr 2024 21:45:27 +0200 Subject: [PATCH 027/109] x86/cpu/amd: Make the CPUID 0x80000008 parser correct commit 1b3108f6898ef2e03973d65255182792e94e2240 upstream. CPUID 0x80000008 ECX.cpu_nthreads describes the number of threads in the package. The parser uses this value to initialize the SMT domain level. That's wrong because cpu_nthreads does not describe the number of threads per physical core. So this needs to set the CORE domain level and let the later parsers set the SMT shift if available. Preset the SMT domain level with the assumption of one thread per core, which is correct ifrt here are no other CPUID leafs to parse, and propagate cpu_nthreads and the core level APIC bitwidth into the CORE domain. Fixes: f7fb3b2dd92c ("x86/cpu: Provide an AMD/HYGON specific topology parser") Reported-by: "kernelci.org bot" Reported-by: Laura Nao Intel-SIG: commit 1b3108f6898e x86/cpu/amd: Make the CPUID 0x80000008 parser correct. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Laura Nao Link: https://lore.kernel.org/r/20240410194311.535206450@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology_amd.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 1a8b3ad493afe..79a85a4814ca1 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -29,11 +29,21 @@ static bool parse_8000_0008(struct topo_scan *tscan) if (!sft) sft = get_count_order(ecx.cpu_nthreads + 1); - topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1); + /* + * cpu_nthreads describes the number of threads in the package + * sft is the number of APIC ID bits per package + * + * As the number of actual threads per core is not described in + * this leaf, just set the CORE domain shift and let the later + * parsers set SMT shift. Assume one thread per core by default + * which is correct if there are no other CPUID leafs to parse. + */ + topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1); return true; } -static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id) +static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id) { /* * Starting with Fam 17h the DIE domain could probably be used to @@ -73,12 +83,14 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) tscan->c->topo.initial_apicid = leaf.ext_apic_id; /* - * If leaf 0xb is available, then SMT shift is set already. If not - * take it from ecx.threads_per_core and use topo_update_dom() - - * topology_set_dom() would propagate and overwrite the already - * propagated CORE level. + * If leaf 0xb is available, then the domain shifts are set + * already and nothing to do here. */ if (!has_0xb) { + /* + * Leaf 0x80000008 set the CORE domain shift already. + * Update the SMT domain, but do not propagate it. + */ unsigned int nthreads = leaf.core_nthreads + 1; topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads); From 2544fd540e6a038908c027761160ec87cb454d95 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 10 Apr 2024 21:45:28 +0200 Subject: [PATCH 028/109] x86/cpu/amd: Make the NODEID_MSR union actually work commit c064b536a8f9ab7c8e204da8f5a22f7420d0b56c upstream. A system with NODEID_MSR was reported to crash during early boot without any output. The reason is that the union which is used for accessing the bitfields in the MSR is written wrongly and the resulting executable code accesses the wrong part of the MSR data. As a consequence a later division by that value results in 0 and that result is used for another division as divisor, which obviously does not work well. The magic world of C, unions and bitfields: union { u64 bita : 3, bitb : 3; u64 all; } x; x.all = foo(); a = x.bita; b = x.bitb; results in the effective executable code of: a = b = x.bita; because bita and bitb are treated as union members and therefore both end up at bit offset 0. Wrapping the bitfield into an anonymous struct: union { struct { u64 bita : 3, bitb : 3; }; u64 all; } x; works like expected. Rework the NODEID_MSR union in exactly that way to cure the problem. Fixes: f7fb3b2dd92c ("x86/cpu: Provide an AMD/HYGON specific topology parser") Reported-by: "kernelci.org bot" Reported-by: Laura Nao Intel-SIG: commit c064b536a8f9 x86/cpu/amd: Make the NODEID_MSR union actually work. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Laura Nao Link: https://lore.kernel.org/r/20240410194311.596282919@linutronix.de Closes: https://lore.kernel.org/all/20240322175210.124416-1-laura.nao@collabora.com/ [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology_amd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 79a85a4814ca1..7f999aef19659 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -121,13 +121,13 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) static bool parse_fam10h_node_id(struct topo_scan *tscan) { - struct { - union { + union { + struct { u64 node_id : 3, nodes_per_pkg : 3, unused : 58; - u64 msr; }; + u64 msr; } nid; if (!boot_cpu_has(X86_FEATURE_NODEID_MSR)) From 23d28d26b4b6dbcd914c58ee0db33ee4fd6a38de Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Apr 2024 18:55:38 +0200 Subject: [PATCH 029/109] x86/cpu/amd: Move TOPOEXT enablement into the topology parser commit 7211274fe0ee352332255e41ab5e628b86e83994 upstream. The topology rework missed that early_init_amd() tries to re-enable the Topology Extensions when the BIOS disabled them. The new parser is invoked before early_init_amd() so the re-enable attempt happens too late. Move it into the AMD specific topology parser code where it belongs. Fixes: f7fb3b2dd92c ("x86/cpu: Provide an AMD/HYGON specific topology parser") Intel-SIG: commit 7211274fe0ee x86/cpu/amd: Move TOPOEXT enablement into the topology parser. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/878r1j260l.ffs@tglx [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/amd.c | 15 --------------- arch/x86/kernel/cpu/topology_amd.c | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3634d877b5080..bf455660f7ebe 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -675,7 +675,6 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) static void early_init_amd(struct cpuinfo_x86 *c) { - u64 value; u32 dummy; early_init_amd_mc(c); @@ -745,20 +744,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) early_detect_mem_encrypt(c); - /* Re-enable TopologyExtensions if switched off by BIOS */ - if (c->x86 == 0x15 && - (c->x86_model >= 0x10 && c->x86_model <= 0x6f) && - !cpu_has(c, X86_FEATURE_TOPOEXT)) { - - if (msr_set_bit(0xc0011005, 54) > 0) { - rdmsrl(0xc0011005, value); - if (value & BIT_64(54)) { - set_cpu_cap(c, X86_FEATURE_TOPOEXT); - pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); - } - } - } - if (cpu_has(c, X86_FEATURE_TOPOEXT)) smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 7f999aef19659..a7aa6eff4ae5b 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -147,6 +147,26 @@ static void legacy_set_llc(struct topo_scan *tscan) tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN]; } +static void topoext_fixup(struct topo_scan *tscan) +{ + struct cpuinfo_x86 *c = tscan->c; + u64 msrval; + + /* Try to re-enable TopologyExtensions if switched off by BIOS */ + if (cpu_has(c, X86_FEATURE_TOPOEXT) || c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f) + return; + + if (msr_set_bit(0xc0011005, 54) <= 0) + return; + + rdmsrl(0xc0011005, msrval); + if (msrval & BIT_64(54)) { + set_cpu_cap(c, X86_FEATURE_TOPOEXT); + pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); + } +} + static void parse_topology_amd(struct topo_scan *tscan) { bool has_0xb = false; @@ -176,6 +196,7 @@ static void parse_topology_amd(struct topo_scan *tscan) void cpu_parse_topology_amd(struct topo_scan *tscan) { tscan->amd_nodes_per_pkg = 1; + topoext_fixup(tscan); parse_topology_amd(tscan); if (tscan->amd_nodes_per_pkg > 1) From 5302eb57ab4b148e4b34b6cb68cee7bdcb081fa2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 May 2024 21:53:47 +0200 Subject: [PATCH 030/109] x86/topology/amd: Ensure that LLC ID is initialized commit 5754ace3c3199c162dcee1f3f87a538c46d1c832 upstream. The original topology evaluation code initialized cpu_data::topo::llc_id with the die ID initialy and then eventually overwrite it with information gathered from a CPUID leaf. The conversion analysis failed to spot that particular detail and omitted this initial assignment under the assumption that each topology evaluation path will set it up. That assumption is mostly correct, but turns out to be wrong in case that the CPUID leaf 0x80000006 does not provide a LLC ID. In that case, LLC ID is invalid and as a consequence the setup of the scheduling domain CPU masks is incorrect which subsequently causes the scheduler core to complain about it during CPU hotplug: BUG: arch topology borken the CLS domain not a subset of the MC domain Cure it by reusing legacy_set_llc() and assigning the die ID if the LLC ID is invalid after all possible parsers have been tried. Fixes: f7fb3b2dd92c ("x86/cpu: Provide an AMD/HYGON specific topology parser") Reported-by: Yuezhang Mo Intel-SIG: commit 5754ace3c319 x86/topology/amd: Ensure that LLC ID is initialized. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Yuezhang Mo Link: https://lore.kernel.org/r/PUZPR04MB63168AC442C12627E827368581292@PUZPR04MB6316.apcprd04.prod.outlook.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology_amd.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index a7aa6eff4ae5b..ce2d507c3b076 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -119,7 +119,7 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) return true; } -static bool parse_fam10h_node_id(struct topo_scan *tscan) +static void parse_fam10h_node_id(struct topo_scan *tscan) { union { struct { @@ -131,20 +131,20 @@ static bool parse_fam10h_node_id(struct topo_scan *tscan) } nid; if (!boot_cpu_has(X86_FEATURE_NODEID_MSR)) - return false; + return; rdmsrl(MSR_FAM10H_NODE_ID, nid.msr); store_node(tscan, nid.nodes_per_pkg + 1, nid.node_id); tscan->c->topo.llc_id = nid.node_id; - return true; } static void legacy_set_llc(struct topo_scan *tscan) { unsigned int apicid = tscan->c->topo.initial_apicid; - /* parse_8000_0008() set everything up except llc_id */ - tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN]; + /* If none of the parsers set LLC ID then use the die ID for it. */ + if (tscan->c->topo.llc_id == BAD_APICID) + tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN]; } static void topoext_fixup(struct topo_scan *tscan) @@ -187,10 +187,7 @@ static void parse_topology_amd(struct topo_scan *tscan) return; /* Try the NODEID MSR */ - if (parse_fam10h_node_id(tscan)) - return; - - legacy_set_llc(tscan); + parse_fam10h_node_id(tscan); } void cpu_parse_topology_amd(struct topo_scan *tscan) @@ -198,6 +195,7 @@ void cpu_parse_topology_amd(struct topo_scan *tscan) tscan->amd_nodes_per_pkg = 1; topoext_fixup(tscan); parse_topology_amd(tscan); + legacy_set_llc(tscan); if (tscan->amd_nodes_per_pkg > 1) set_cpu_cap(tscan->c, X86_FEATURE_AMD_DCM); From fa1a39dfe08f269f8fc4a01c71437b86aa2f0744 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 28 May 2024 22:21:31 +0200 Subject: [PATCH 031/109] x86/topology/amd: Evaluate SMT in CPUID leaf 0x8000001e only on family 0x17 and greater commit 34bf6bae3286a58762711cfbce2cf74ecd42e1b5 upstream. The new AMD/HYGON topology parser evaluates the SMT information in CPUID leaf 0x8000001e unconditionally while the original code restricted it to CPUs with family 0x17 and greater. This breaks family 0x15 CPUs which advertise that leaf and have a non-zero value in the SMT section. The machine boots, but the scheduler complains loudly about the mismatch of the core IDs: WARNING: CPU: 1 PID: 0 at kernel/sched/core.c:6482 sched_cpu_starting+0x183/0x250 WARNING: CPU: 0 PID: 1 at kernel/sched/topology.c:2408 build_sched_domains+0x76b/0x12b0 Add the condition back to cure it. [ bp: Make it actually build because grandpa is not concerned with trivial stuff. :-P ] Fixes: f7fb3b2dd92c ("x86/cpu: Provide an AMD/HYGON specific topology parser") Closes: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/56 Reported-by: Tim Teichmann Reported-by: Christian Heusel Intel-SIG: commit 34bf6bae3286 x86/topology/amd: Evaluate SMT in CPUID leaf 0x8000001e only on family 0x17 and greater. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Tim Teichmann Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/7skhx6mwe4hxiul64v6azhlxnokheorksqsdbp7qw6g2jduf6c@7b5pvomauugk [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology_amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index ce2d507c3b076..5ee6373d4d926 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -84,9 +84,9 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) /* * If leaf 0xb is available, then the domain shifts are set - * already and nothing to do here. + * already and nothing to do here. Only valid for family >= 0x17. */ - if (!has_0xb) { + if (!has_0xb && tscan->c->x86 >= 0x17) { /* * Leaf 0x80000008 set the CORE domain shift already. * Update the SMT domain, but do not propagate it. From cbba3cc2283928d9571cc63ab8925aa3f458cd55 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:13 +0100 Subject: [PATCH 032/109] x86/smpboot: Teach it about topo.amd_node_id commit ace278e7eca6be5d36eb6f1efb660c13b66c4d64 upstream. When switching AMD over to the new topology parser then the match functions need to look for AMD systems with the extended topology feature at the new topo.amd_node_id member which is then holding the node id information. Intel-SIG: commit ace278e7eca6 x86/smpboot: Teach it about topo.amd_node_id. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.082979150@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/smpboot.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7d5fb45dd53e0..5177cce3b8bb5 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -488,6 +488,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) if (c->topo.pkg_id == o->topo.pkg_id && c->topo.die_id == o->topo.die_id && + c->topo.amd_node_id == o->topo.amd_node_id && per_cpu_llc_id(cpu1) == per_cpu_llc_id(cpu2)) { if (c->topo.core_id == o->topo.core_id) return topology_sane(c, o, "smt"); @@ -509,10 +510,13 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { - if (c->topo.pkg_id == o->topo.pkg_id && - c->topo.die_id == o->topo.die_id) - return true; - return false; + if (c->topo.pkg_id != o->topo.pkg_id || c->topo.die_id != o->topo.die_id) + return false; + + if (cpu_feature_enabled(X86_FEATURE_TOPOEXT) && topology_amd_nodes_per_pkg() > 1) + return c->topo.amd_node_id == o->topo.amd_node_id; + + return true; } static bool match_l2c(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) From 613383588f22b2b3f9359fdb4999547f05cc5c91 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:14 +0100 Subject: [PATCH 033/109] x86/cpu: Use common topology code for AMD commit c749ce393b8fe9db5ed894411f06eafa88f0e13a upstream. Switch it over to the new topology evaluation mechanism and remove the random bits and pieces which are sprinkled all over the place. No functional change intended. Intel-SIG: commit c749ce393b8f x86/cpu: Use common topology code for AMD. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.145745053@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/processor.h | 2 - arch/x86/kernel/cpu/amd.c | 146 -------------------------- arch/x86/kernel/cpu/mce/inject.c | 3 +- arch/x86/kernel/cpu/topology_common.c | 5 +- drivers/ras/amd/atl/umc.c | 2 +- 5 files changed, 6 insertions(+), 152 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 35542e0faccbe..6714c01fd1e0b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -721,12 +721,10 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu) } #ifdef CONFIG_CPU_SUP_AMD -extern u32 amd_get_nodes_per_socket(void); extern u32 amd_get_highest_perf(void); extern void amd_clear_divider(void); extern void amd_check_microcode(void); #else -static inline u32 amd_get_nodes_per_socket(void) { return 0; } static inline u32 amd_get_highest_perf(void) { return 0; } static inline void amd_clear_divider(void) { } static inline void amd_check_microcode(void) { } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bf455660f7ebe..ea5c85efdcdfd 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -28,13 +28,6 @@ #include "cpu.h" -/* - * nodes_per_socket: Stores the number of nodes per socket. - * Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX - * Node Identifiers[10:8] - */ -static u32 nodes_per_socket = 1; - static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) { u32 gprs[8] = { 0 }; @@ -301,97 +294,6 @@ static int nearby_node(int apicid) } #endif -/* - * Fix up topo::core_id for pre-F17h systems to be in the - * [0 .. cores_per_node - 1] range. Not really needed but - * kept so as not to break existing setups. - */ -static void legacy_fixup_core_id(struct cpuinfo_x86 *c) -{ - u32 cus_per_node; - - if (c->x86 >= 0x17) - return; - - cus_per_node = c->x86_max_cores / nodes_per_socket; - c->topo.core_id %= cus_per_node; -} - -/* - * Fixup core topology information for - * (1) AMD multi-node processors - * Assumption: Number of cores in each internal node is the same. - * (2) AMD processors supporting compute units - */ -static void amd_get_topology(struct cpuinfo_x86 *c) -{ - /* get information required for multi-node processors */ - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - int err; - u32 eax, ebx, ecx, edx; - - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - - c->topo.die_id = ecx & 0xff; - - if (c->x86 == 0x15) - c->topo.cu_id = ebx & 0xff; - - if (c->x86 >= 0x17) { - c->topo.core_id = ebx & 0xff; - - if (smp_num_siblings > 1) - c->x86_max_cores /= smp_num_siblings; - } - - /* - * In case leaf B is available, use it to derive - * topology information. - */ - err = detect_extended_topology(c); - if (!err) - c->x86_coreid_bits = get_count_order(c->x86_max_cores); - - cacheinfo_amd_init_llc_id(c, c->topo.die_id); - - } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { - u64 value; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - c->topo.die_id = value & 7; - c->topo.llc_id = c->topo.die_id; - } else - return; - - if (nodes_per_socket > 1) { - set_cpu_cap(c, X86_FEATURE_AMD_DCM); - legacy_fixup_core_id(c); - } -} - -/* - * On a AMD dual core setup the lower bits of the APIC id distinguish the cores. - * Assumes number of cores is a power of two. - */ -static void amd_detect_cmp(struct cpuinfo_x86 *c) -{ - unsigned bits; - - bits = c->x86_coreid_bits; - /* Low order bits define the core id (index of core in socket) */ - c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->topo.pkg_id = c->topo.initial_apicid >> bits; - /* use socket ID also for last level cache */ - c->topo.llc_id = c->topo.die_id = c->topo.pkg_id; -} - -u32 amd_get_nodes_per_socket(void) -{ - return nodes_per_socket; -} -EXPORT_SYMBOL_GPL(amd_get_nodes_per_socket); - static void srat_detect_node(struct cpuinfo_x86 *c) { #ifdef CONFIG_NUMA @@ -443,32 +345,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -static void early_init_amd_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; -#endif -} - static void bsp_determine_snp(struct cpuinfo_x86 *c) { #ifdef CONFIG_ARCH_HAS_CC_PLATFORM @@ -523,18 +399,6 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_MWAITX)) use_mwaitx_delay(); - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - u32 ecx; - - ecx = cpuid_ecx(0x8000001e); - __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1; - } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { - u64 value; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1; - } - if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && !boot_cpu_has(X86_FEATURE_VIRT_SSBD) && c->x86 >= 0x15 && c->x86 <= 0x17) { @@ -677,8 +541,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) { u32 dummy; - early_init_amd_mc(c); - if (c->x86 >= 0xf) set_cpu_cap(c, X86_FEATURE_K8); @@ -744,9 +606,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) early_detect_mem_encrypt(c); - if (cpu_has(c, X86_FEATURE_TOPOEXT)) - smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) { if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB)) setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); @@ -1093,9 +952,6 @@ static void init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_FSRM)) set_cpu_cap(c, X86_FEATURE_FSRS); - /* get apicid instead of initial apic id from cpuid */ - c->topo.apicid = read_apic_id(); - /* K6s reports MCEs but don't actually have all the MSRs */ if (c->x86 < 6) clear_cpu_cap(c, X86_FEATURE_MCE); @@ -1138,8 +994,6 @@ static void init_amd(struct cpuinfo_x86 *c) cpu_detect_cache_sizes(c); - amd_detect_cmp(c); - amd_get_topology(c); srat_detect_node(c); init_amd_cacheinfo(c); diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 308c5b5e0bbeb..2b290452e1940 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -433,8 +433,7 @@ static u32 get_nbc_for_node(int node_id) struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket(); - + cores_per_node = (c->x86_max_cores * smp_num_siblings) / topology_amd_nodes_per_pkg(); return cores_per_node * node_id; } diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index e1d1a7b7c8a95..3c69229ba154f 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -72,7 +72,6 @@ bool topo_is_converted(struct cpuinfo_x86 *c) { /* Temporary until everything is converted over. */ switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: case X86_VENDOR_HYGON: return false; default: @@ -133,6 +132,10 @@ static void parse_topology(struct topo_scan *tscan, bool early) tscan->ebx1_nproc_shift = get_count_order(ebx.nproc); switch (c->x86_vendor) { + case X86_VENDOR_AMD: + if (IS_ENABLED(CONFIG_CPU_SUP_AMD)) + cpu_parse_topology_amd(tscan); + break; case X86_VENDOR_CENTAUR: case X86_VENDOR_ZHAOXIN: parse_legacy(tscan); diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index 9d51e4954687f..b1664ce1b5a1a 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -18,7 +18,7 @@ static u8 get_die_id(struct atl_err *err) * For CPUs, this is the AMD Node ID modulo the number * of AMD Nodes per socket. */ - return topology_die_id(err->cpu) % amd_get_nodes_per_socket(); + return topology_die_id(err->cpu) % topology_amd_nodes_per_pkg(); } #define UMC_CHANNEL_NUM GENMASK(31, 20) From 70f75725eeffc88dce25d39eea8f602dc51773d9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:15 +0100 Subject: [PATCH 034/109] x86/cpu: Use common topology code for HYGON commit 3279081dd0cb6bc13ffd5ee0e5cb11cfeae2c625 upstream. Switch it over to use the consolidated topology evaluation and remove the temporary safe guards which are not longer needed. No functional change intended. Intel-SIG: commit 3279081dd0cb x86/cpu: Use common topology code for HYGON. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.207750409@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/common.c | 5 - arch/x86/kernel/cpu/cpu.h | 1 - arch/x86/kernel/cpu/hygon.c | 129 -------------------------- arch/x86/kernel/cpu/topology.h | 1 - arch/x86/kernel/cpu/topology_common.c | 22 +---- 5 files changed, 4 insertions(+), 154 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 08626f21918b3..34bde63ccb64b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1887,11 +1887,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) /* Clear/Set all flags overridden by options, after probe */ apply_forced_caps(c); -#ifdef CONFIG_X86_64 - if (!topo_is_converted(c)) - c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); -#endif - /* * Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and * Hygon will clear it in ->c_init() below. diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index f3f0ccc52f03c..1beccefbaff9a 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -78,7 +78,6 @@ extern void init_intel_cacheinfo(struct cpuinfo_x86 *c); extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); extern void init_hygon_cacheinfo(struct cpuinfo_x86 *c); -extern int detect_extended_topology(struct cpuinfo_x86 *c); extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index e81464801afb3..321c5c6b6ffd3 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -18,14 +18,6 @@ #include "cpu.h" -#define APICID_SOCKET_ID_BIT 6 - -/* - * nodes_per_socket: Stores the number of nodes per socket. - * Refer to CPUID Fn8000_001E_ECX Node Identifiers[10:8] - */ -static u32 nodes_per_socket = 1; - #ifdef CONFIG_NUMA /* * To workaround broken NUMA config. Read the comment in @@ -49,80 +41,6 @@ static int nearby_node(int apicid) } #endif -static void hygon_get_topology_early(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_TOPOEXT)) - smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; -} - -/* - * Fixup core topology information for - * (1) Hygon multi-node processors - * Assumption: Number of cores in each internal node is the same. - * (2) Hygon processors supporting compute units - */ -static void hygon_get_topology(struct cpuinfo_x86 *c) -{ - /* get information required for multi-node processors */ - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - int err; - u32 eax, ebx, ecx, edx; - - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - - c->topo.die_id = ecx & 0xff; - - c->topo.core_id = ebx & 0xff; - - if (smp_num_siblings > 1) - c->x86_max_cores /= smp_num_siblings; - - /* - * In case leaf B is available, use it to derive - * topology information. - */ - err = detect_extended_topology(c); - if (!err) - c->x86_coreid_bits = get_count_order(c->x86_max_cores); - - /* - * Socket ID is ApicId[6] for the processors with model <= 0x3 - * when running on host. - */ - if (!boot_cpu_has(X86_FEATURE_HYPERVISOR) && c->x86_model <= 0x3) - c->topo.pkg_id = c->topo.apicid >> APICID_SOCKET_ID_BIT; - - cacheinfo_hygon_init_llc_id(c); - } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { - u64 value; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - c->topo.die_id = value & 7; - c->topo.llc_id = c->topo.die_id; - } else - return; - - if (nodes_per_socket > 1) - set_cpu_cap(c, X86_FEATURE_AMD_DCM); -} - -/* - * On Hygon setup the lower bits of the APIC id distinguish the cores. - * Assumes number of cores is a power of two. - */ -static void hygon_detect_cmp(struct cpuinfo_x86 *c) -{ - unsigned int bits; - - bits = c->x86_coreid_bits; - /* Low order bits define the core id (index of core in socket) */ - c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->topo.pkg_id = c->topo.initial_apicid >> bits; - /* Use package ID also for last level cache */ - c->topo.llc_id = c->topo.die_id = c->topo.pkg_id; -} - static void srat_detect_node(struct cpuinfo_x86 *c) { #ifdef CONFIG_NUMA @@ -173,32 +91,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -static void early_init_hygon_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned int bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; -#endif -} - static void bsp_init_hygon(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -212,18 +104,6 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_MWAITX)) use_mwaitx_delay(); - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - u32 ecx; - - ecx = cpuid_ecx(0x8000001e); - __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1; - } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { - u64 value; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1; - } - if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && !boot_cpu_has(X86_FEATURE_VIRT_SSBD)) { /* @@ -242,8 +122,6 @@ static void early_init_hygon(struct cpuinfo_x86 *c) { u32 dummy; - early_init_hygon_mc(c); - set_cpu_cap(c, X86_FEATURE_K8); rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); @@ -284,8 +162,6 @@ static void early_init_hygon(struct cpuinfo_x86 *c) * we can set it unconditionally. */ set_cpu_cap(c, X86_FEATURE_VMMCALL); - - hygon_get_topology_early(c); } static void init_hygon(struct cpuinfo_x86 *c) @@ -300,9 +176,6 @@ static void init_hygon(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - /* get apicid instead of initial apic id from cpuid */ - c->topo.apicid = read_apic_id(); - /* * XXX someone from Hygon needs to confirm this DTRT * @@ -314,8 +187,6 @@ static void init_hygon(struct cpuinfo_x86 *c) cpu_detect_cache_sizes(c); - hygon_detect_cmp(c); - hygon_get_topology(c); srat_detect_node(c); init_hygon_cacheinfo(c); diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h index 7eead546c20e8..2a3c838b6044d 100644 --- a/arch/x86/kernel/cpu/topology.h +++ b/arch/x86/kernel/cpu/topology.h @@ -15,7 +15,6 @@ struct topo_scan { u16 amd_node_id; }; -bool topo_is_converted(struct cpuinfo_x86 *c); void cpu_init_topology(struct cpuinfo_x86 *c); void cpu_parse_topology(struct cpuinfo_x86 *c); void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 3c69229ba154f..ab944d6f973f9 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -68,18 +68,6 @@ static void parse_legacy(struct topo_scan *tscan) topology_set_dom(tscan, TOPO_CORE_DOMAIN, core_shift, cores); } -bool topo_is_converted(struct cpuinfo_x86 *c) -{ - /* Temporary until everything is converted over. */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_HYGON: - return false; - default: - /* Let all UP systems use the below */ - return true; - } -} - static bool fake_topology(struct topo_scan *tscan) { /* @@ -144,6 +132,10 @@ static void parse_topology(struct topo_scan *tscan, bool early) if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan)) parse_legacy(tscan); break; + case X86_VENDOR_HYGON: + if (IS_ENABLED(CONFIG_CPU_SUP_HYGON)) + cpu_parse_topology_amd(tscan); + break; } } @@ -187,9 +179,6 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) parse_topology(&tscan, false); - if (!topo_is_converted(c)) - return; - for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) { if (tscan.dom_shifts[dom] == x86_topo_system.dom_shifts[dom]) continue; @@ -218,9 +207,6 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) parse_topology(&tscan, true); - if (!topo_is_converted(c)) - return; - /* Copy the shift values and calculate the unit sizes. */ memcpy(x86_topo_system.dom_shifts, tscan.dom_shifts, sizeof(x86_topo_system.dom_shifts)); From ac8dd1c053bc660446e157eab7179436c94cb1f3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:17 +0100 Subject: [PATCH 035/109] x86/mm/numa: Use core domain size on AMD commit d805a6916037a716e858a0a91d844bad1ca8f48b upstream. cpuinfo::topo::x86_coreid_bits is about to be phased out. Use the core domain size from the topology information. Add a comment why the early MPTABLE parsing is required and decrapify the loop which sets the APIC ID to node map. Intel-SIG: commit d805a6916037 x86/mm/numa: Use core domain size on AMD. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.270320718@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 5 +++++ arch/x86/mm/amdtopology.c | 35 +++++++++++++++------------------ 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index fadc19ae972ad..abbd86c144ad2 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -121,6 +121,11 @@ struct x86_topology_system { extern struct x86_topology_system x86_topo_system; +static inline unsigned int topology_get_domain_size(enum x86_topology_domains dom) +{ + return x86_topo_system.dom_size[dom]; +} + extern const struct cpumask *cpu_coregroup_mask(int cpu); extern const struct cpumask *cpu_clustergroup_mask(int cpu); diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c index b3ca7d23e4b01..5681b997b3574 100644 --- a/arch/x86/mm/amdtopology.c +++ b/arch/x86/mm/amdtopology.c @@ -54,13 +54,11 @@ static __init int find_northbridge(void) int __init amd_numa_init(void) { - u64 start = PFN_PHYS(0); + unsigned int numnodes, cores, apicid; + u64 prevbase, start = PFN_PHYS(0); u64 end = PFN_PHYS(max_pfn); - unsigned numnodes; - u64 prevbase; - int i, j, nb; u32 nodeid, reg; - unsigned int bits, cores, apicid_base; + int i, j, nb; if (!early_pci_allowed()) return -EINVAL; @@ -158,26 +156,25 @@ int __init amd_numa_init(void) return -ENOENT; /* - * We seem to have valid NUMA configuration. Map apicids to nodes - * using the coreid bits from early_identify_cpu. + * We seem to have valid NUMA configuration. Map apicids to nodes + * using the size of the core domain in the APIC space. */ - bits = boot_cpu_data.x86_coreid_bits; - cores = 1 << bits; - apicid_base = 0; + cores = topology_get_domain_size(TOPO_CORE_DOMAIN); /* - * get boot-time SMP configuration: + * Scan MPTABLE to map the local APIC and ensure that the boot CPU + * APIC ID is valid. This is required because on pre ACPI/SRAT + * systems IO-APICs are mapped before the boot CPU. */ early_get_smp_config(); - if (boot_cpu_physical_apicid > 0) { - pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); - apicid_base = boot_cpu_physical_apicid; - } - - for_each_node_mask(i, numa_nodes_parsed) - for (j = apicid_base; j < cores + apicid_base; j++) - set_apicid_to_node((i << bits) + j, i); + apicid = boot_cpu_physical_apicid; + if (apicid > 0) + pr_info("BSP APIC ID: %02x\n", apicid); + for_each_node_mask(i, numa_nodes_parsed) { + for (j = 0; j < cores; j++, apicid++) + set_apicid_to_node(apicid, i); + } return 0; } From 3fce5d9bce8dd687636196f94638866dd5d9c162 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:18 +0100 Subject: [PATCH 036/109] x86/cpu: Make topology_amd_node_id() use the actual node info commit 03fa6bea5a3e13ccbc211af1fa7e75d34239a408 upstream. Now that everything is converted switch it over and remove the intermediate operation. Intel-SIG: commit 03fa6bea5a3e x86/cpu: Make topology_amd_node_id() use the actual node info. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.334185785@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 4 ++-- arch/x86/kernel/cpu/topology_common.c | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index abbd86c144ad2..a9324347a0e7e 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -136,7 +136,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_core_id(cpu) (cpu_data(cpu).topo.core_id) #define topology_ppin(cpu) (cpu_data(cpu).ppin) -#define topology_amd_node_id(cpu) (cpu_data(cpu).topo.die_id) +#define topology_amd_node_id(cpu) (cpu_data(cpu).topo.amd_node_id) extern unsigned int __max_die_per_package; @@ -173,7 +173,7 @@ extern unsigned int __amd_nodes_per_pkg; static inline unsigned int topology_amd_nodes_per_pkg(void) { - return __max_die_per_package; + return __amd_nodes_per_pkg; } extern struct cpumask __cpu_primary_thread_mask; diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index ab944d6f973f9..afea34d59598f 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -151,9 +151,7 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; - /* Temporary workaround */ - if (tscan->amd_nodes_per_pkg) - c->topo.amd_node_id = c->topo.die_id = tscan->amd_node_id; + c->topo.amd_node_id = tscan->amd_node_id; if (c->x86_vendor == X86_VENDOR_AMD) cpu_topology_fixup_amd(tscan); @@ -239,6 +237,5 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) * AMD systems have Nodes per package which cannot be mapped to * APIC ID. */ - if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) - __amd_nodes_per_pkg = __max_die_per_package = tscan.amd_nodes_per_pkg; + __amd_nodes_per_pkg = tscan.amd_nodes_per_pkg; } From bed7892749e64ff7dbabe8fad6479d77c5ff9356 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:20 +0100 Subject: [PATCH 037/109] x86/cpu: Remove topology.c commit 6cf70394e7205a0d65780473aa2081839eb93471 upstream. No more users. Stick it into the ugly code museum. Intel-SIG: commit 6cf70394e720 x86/cpu: Remove topology.c. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.395230346@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/topology.c | 142 --------------------------------- 2 files changed, 1 insertion(+), 143 deletions(-) delete mode 100644 arch/x86/kernel/cpu/topology.c diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3ee24ff62cc42..9e0a1c119a91c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -18,7 +18,7 @@ KMSAN_SANITIZE_common.o := n KCSAN_SANITIZE_common.o := n obj-y := cacheinfo.o scattered.o -obj-y += topology_common.o topology_ext.o topology_amd.o topology.o +obj-y += topology_common.o topology_ext.o topology_amd.o obj-y += common.o obj-y += rdrand.o obj-y += match.o diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c deleted file mode 100644 index 2b68f26a99a44..0000000000000 --- a/arch/x86/kernel/cpu/topology.c +++ /dev/null @@ -1,142 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Check for extended topology enumeration cpuid leaf 0xb and if it - * exists, use it for populating initial_apicid and cpu topology - * detection. - */ - -#include -#include -#include -#include - -#include "cpu.h" - -/* leaf 0xb SMT level */ -#define SMT_LEVEL 0 - -/* extended topology sub-leaf types */ -#define INVALID_TYPE 0 -#define SMT_TYPE 1 -#define CORE_TYPE 2 -#define DIE_TYPE 5 - -#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) -#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) -#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) - -#ifdef CONFIG_SMP -/* - * Check if given CPUID extended topology "leaf" is implemented - */ -static int check_extended_topology_leaf(int leaf) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); - - if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) - return -1; - - return 0; -} -/* - * Return best CPUID Extended Topology Leaf supported - */ -static int detect_extended_topology_leaf(struct cpuinfo_x86 *c) -{ - if (c->cpuid_level >= 0x1f) { - if (check_extended_topology_leaf(0x1f) == 0) - return 0x1f; - } - - if (c->cpuid_level >= 0xb) { - if (check_extended_topology_leaf(0xb) == 0) - return 0xb; - } - - return -1; -} -#endif - -/* - * Check for extended topology enumeration cpuid leaf, and if it - * exists, use it for populating initial_apicid and cpu topology - * detection. - */ -int detect_extended_topology(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned int eax, ebx, ecx, edx, sub_index; - unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width; - unsigned int core_select_mask, core_level_siblings; - unsigned int die_select_mask, die_level_siblings; - unsigned int pkg_mask_width; - bool die_level_present = false; - int leaf; - - leaf = detect_extended_topology_leaf(c); - if (leaf < 0) - return -1; - - /* - * Populate HT related information from sub-leaf level 0. - */ - cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); - c->topo.initial_apicid = edx; - core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); - smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx)); - core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); - pkg_mask_width = die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - - sub_index = 1; - while (true) { - cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx); - - /* - * Check for the Core type in the implemented sub leaves. - */ - if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { - core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); - core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - die_level_siblings = core_level_siblings; - die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - } - if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) { - die_level_present = true; - die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); - die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - } - - if (LEAFB_SUBTYPE(ecx) != INVALID_TYPE) - pkg_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - else - break; - - sub_index++; - } - - core_select_mask = (~(-1 << pkg_mask_width)) >> ht_mask_width; - die_select_mask = (~(-1 << die_plus_mask_width)) >> - core_plus_mask_width; - - c->topo.core_id = apic->phys_pkg_id(c->topo.initial_apicid, - ht_mask_width) & core_select_mask; - - if (die_level_present) { - c->topo.die_id = apic->phys_pkg_id(c->topo.initial_apicid, - core_plus_mask_width) & die_select_mask; - } - - c->topo.pkg_id = apic->phys_pkg_id(c->topo.initial_apicid, pkg_mask_width); - /* - * Reinit the apicid, now that we have extended initial_apicid. - */ - c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); - - c->x86_max_cores = (core_level_siblings / smp_num_siblings); - __max_die_per_package = (die_level_siblings / core_level_siblings); -#endif - return 0; -} From 782bea50b1a87908e3e36ed6f0dab25e0944babd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:21 +0100 Subject: [PATCH 038/109] x86/cpu: Remove x86_coreid_bits commit fab75e790f00dca592d9a934d9f1237b81093b99 upstream. No more users. Intel-SIG: commit fab75e790f00 x86/cpu: Remove x86_coreid_bits. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.455839743@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/processor.h | 2 -- arch/x86/kernel/cpu/common.c | 1 - 2 files changed, 3 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6714c01fd1e0b..3c21968c8102f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -135,8 +135,6 @@ struct cpuinfo_x86 { #endif __u8 x86_virt_bits; __u8 x86_phys_bits; - /* CPUID returned core id bits: */ - __u8 x86_coreid_bits; /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 34bde63ccb64b..060c2216c4627 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1860,7 +1860,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; - c->x86_coreid_bits = 0; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; c->x86_phys_bits = 36; From c61ea320047f6c61abf310a08581cc417281e996 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:22 +0100 Subject: [PATCH 039/109] x86/apic: Remove unused phys_pkg_id() callback commit 035fc90a9d8fcff39b9bb43b9ff132756d947ea5 upstream. Now that the core code does not use this monstrosity anymore, it's time to put it to rest. The only real purpose was to read the APIC ID on UV and VSMP systems for the actual evaluation. That's what the core code does now. For doing the actual shift operation there is truly no APIC callback required. Intel-SIG: commit 035fc90a9d8f x86/apic: Remove unused phys_pkg_id() callback. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.516536121@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 1 - arch/x86/kernel/apic/apic_flat_64.c | 7 ------- arch/x86/kernel/apic/apic_noop.c | 3 --- arch/x86/kernel/apic/apic_numachip.c | 7 ------- arch/x86/kernel/apic/bigsmp_32.c | 6 ------ arch/x86/kernel/apic/local.h | 1 - arch/x86/kernel/apic/probe_32.c | 6 ------ arch/x86/kernel/apic/x2apic_cluster.c | 1 - arch/x86/kernel/apic/x2apic_phys.c | 6 ------ arch/x86/kernel/apic/x2apic_uv_x.c | 11 ----------- arch/x86/kernel/vsmp_64.c | 13 ------------- arch/x86/xen/apic.c | 6 ------ 12 files changed, 68 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b6a47be78375d..7ad2fcb55c67b 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -298,7 +298,6 @@ struct apic { void (*init_apic_ldr)(void); void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); u32 (*cpu_present_to_apicid)(int mps_cpu); - u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); u32 (*get_apic_id)(u32 id); u32 (*set_apic_id)(u32 apicid); diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 7139867d69cd2..5c2d476be004b 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -66,11 +66,6 @@ static u32 set_apic_id(u32 id) return (id & 0xFF) << 24; } -static u32 flat_phys_pkg_id(u32 initial_apic_id, int index_msb) -{ - return initial_apic_id >> index_msb; -} - static int flat_probe(void) { return 1; @@ -89,7 +84,6 @@ static struct apic apic_flat __ro_after_init = { .init_apic_ldr = default_init_apic_ldr, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = flat_phys_pkg_id, .max_apic_id = 0xFE, .get_apic_id = flat_get_apic_id, @@ -160,7 +154,6 @@ static struct apic apic_physflat __ro_after_init = { .disable_esr = 0, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = flat_phys_pkg_id, .max_apic_id = 0xFE, .get_apic_id = flat_get_apic_id, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index b00d52ae84fa9..73247856e23f7 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -29,7 +29,6 @@ static void noop_send_IPI_self(int vector) { } static void noop_apic_icr_write(u32 low, u32 id) { } static int noop_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) { return -1; } static u64 noop_apic_icr_read(void) { return 0; } -static u32 noop_phys_pkg_id(u32 cpuid_apic, int index_msb) { return 0; } static u32 noop_get_apic_id(u32 apicid) { return 0; } static void noop_apic_eoi(void) { } @@ -56,8 +55,6 @@ struct apic apic_noop __ro_after_init = { .ioapic_phys_id_map = default_ioapic_phys_id_map, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = noop_phys_pkg_id, - .max_apic_id = 0xFE, .get_apic_id = noop_get_apic_id, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 456a14c44f67a..70bedbc1a5bd1 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -56,11 +56,6 @@ static u32 numachip2_set_apic_id(u32 id) return id << 24; } -static u32 numachip_phys_pkg_id(u32 initial_apic_id, int index_msb) -{ - return initial_apic_id >> index_msb; -} - static void numachip1_apic_icr_write(int apicid, unsigned int val) { write_lcsr(CSR_G3_EXT_IRQ_GEN, (apicid << 16) | val); @@ -228,7 +223,6 @@ static const struct apic apic_numachip1 __refconst = { .disable_esr = 0, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = numachip_phys_pkg_id, .max_apic_id = UINT_MAX, .get_apic_id = numachip1_get_apic_id, @@ -265,7 +259,6 @@ static const struct apic apic_numachip2 __refconst = { .disable_esr = 0, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = numachip_phys_pkg_id, .max_apic_id = UINT_MAX, .get_apic_id = numachip2_get_apic_id, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 7ee3c486cb336..f3a51d6dc167d 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -29,11 +29,6 @@ static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *re physids_promote(0xFFL, retmap); } -static u32 bigsmp_phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - static void bigsmp_send_IPI_allbutself(int vector) { default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); @@ -88,7 +83,6 @@ static struct apic apic_bigsmp __ro_after_init = { .check_apicid_used = bigsmp_check_apicid_used, .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = bigsmp_phys_pkg_id, .max_apic_id = 0xFE, .get_apic_id = bigsmp_get_apic_id, diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index 9ea6186ea88c2..8fd37c9d1b344 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -17,7 +17,6 @@ void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest); u32 x2apic_get_apic_id(u32 id); u32 x2apic_set_apic_id(u32 id); -u32 x2apic_phys_pkg_id(u32 initial_apicid, int index_msb); void x2apic_send_IPI_all(int vector); void x2apic_send_IPI_allbutself(int vector); diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 5eb3fbe472da9..1c508681d9d4b 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -18,11 +18,6 @@ #include "local.h" -static u32 default_phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - static u32 default_get_apic_id(u32 x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); @@ -54,7 +49,6 @@ static struct apic apic_default __ro_after_init = { .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = default_phys_pkg_id, .max_apic_id = 0xFE, .get_apic_id = default_get_apic_id, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index a8306089c91bc..ec204edb31e90 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -236,7 +236,6 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = x2apic_phys_pkg_id, .max_apic_id = UINT_MAX, .x2apic_set_max_apicid = true, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 558a4a8824f47..3396540fb46b3 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -134,11 +134,6 @@ u32 x2apic_set_apic_id(u32 id) return id; } -u32 x2apic_phys_pkg_id(u32 initial_apicid, int index_msb) -{ - return initial_apicid >> index_msb; -} - static struct apic apic_x2apic_phys __ro_after_init = { .name = "physical x2apic", @@ -151,7 +146,6 @@ static struct apic apic_x2apic_phys __ro_after_init = { .disable_esr = 0, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = x2apic_phys_pkg_id, .max_apic_id = UINT_MAX, .x2apic_set_max_apicid = true, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 3ca49dfb653c3..f6b24e72af711 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -784,16 +784,6 @@ static u32 set_apic_id(u32 id) return id; } -static unsigned int uv_read_apic_id(void) -{ - return x2apic_get_apic_id(apic_read(APIC_ID)); -} - -static u32 uv_phys_pkg_id(u32 initial_apicid, int index_msb) -{ - return uv_read_apic_id() >> index_msb; -} - static int uv_probe(void) { return apic == &apic_x2apic_uv_x; @@ -811,7 +801,6 @@ static struct apic apic_x2apic_uv_x __ro_after_init = { .disable_esr = 0, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = uv_phys_pkg_id, .max_apic_id = UINT_MAX, .get_apic_id = x2apic_get_apic_id, diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index d3fc017705587..73511332bb670 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -127,25 +127,12 @@ static void __init vsmp_cap_cpus(void) #endif } -static u32 apicid_phys_pkg_id(u32 initial_apic_id, int index_msb) -{ - return read_apic_id() >> index_msb; -} - -static void vsmp_apic_post_init(void) -{ - /* need to update phys_pkg_id */ - apic->phys_pkg_id = apicid_phys_pkg_id; -} - void __init vsmp_init(void) { detect_vsmp_box(); if (!is_vsmp_box()) return; - x86_platform.apic_post_init = vsmp_apic_post_init; - vsmp_cap_cpus(); set_vsmp_ctl(); diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 9dd5490b33189..d4752382b6003 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -110,11 +110,6 @@ static int xen_madt_oem_check(char *oem_id, char *oem_table_id) return xen_pv_domain(); } -static u32 xen_phys_pkg_id(u32 initial_apic_id, int index_msb) -{ - return initial_apic_id >> index_msb; -} - static u32 xen_cpu_present_to_apicid(int cpu) { if (cpu_present(cpu)) @@ -133,7 +128,6 @@ static struct apic xen_pv_apic __ro_after_init = { .disable_esr = 0, .cpu_present_to_apicid = xen_cpu_present_to_apicid, - .phys_pkg_id = xen_phys_pkg_id, /* detect_ht */ .max_apic_id = UINT_MAX, .get_apic_id = xen_get_apic_id, From 2f6c02fce1e28fc99aa121ff70204818d3f44af9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:24 +0100 Subject: [PATCH 040/109] x86/xen/smp_pv: Remove cpudata fiddling commit d5474e4d2c91b3f27864e9898f7f6e49daf26d93 upstream. The new topology CPUID parser installs already fake topology for XEN/PV, which ends up with cpuinfo::max_cores = 1. Intel-SIG: commit d5474e4d2c91 x86/xen/smp_pv: Remove cpudata fiddling. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.576579177@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/xen/smp_pv.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index a0f07bbfcd6e3..3ae29c25ff855 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -73,7 +73,6 @@ static void cpu_bringup(void) } cpu = smp_processor_id(); smp_store_cpu_info(cpu); - cpu_data(cpu).x86_max_cores = 1; set_cpu_sibling_map(cpu); speculative_store_bypass_ht_init(); @@ -224,8 +223,6 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus) smp_prepare_cpus_common(); - cpu_data(0).x86_max_cores = 1; - speculative_store_bypass_ht_init(); xen_pmu_init(0); From 348260f5e1291f223fac1c797e8c067182904a1f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:25 +0100 Subject: [PATCH 041/109] x86/apic/uv: Remove the private leaf 0xb parser commit bcccdf8b30736250d5057e0940468a41d633e672 upstream. The package shift has been already evaluated by the early CPU init. Put the mindless copy right next to the original leaf 0xb parser. Intel-SIG: commit bcccdf8b3073 x86/apic/uv: Remove the private leaf 0xb parser. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.637385562@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 5 +++ arch/x86/kernel/apic/x2apic_uv_x.c | 52 ++++++------------------------ 2 files changed, 14 insertions(+), 43 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index a9324347a0e7e..8e2a18a9eb874 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -126,6 +126,11 @@ static inline unsigned int topology_get_domain_size(enum x86_topology_domains do return x86_topo_system.dom_size[dom]; } +static inline unsigned int topology_get_domain_shift(enum x86_topology_domains dom) +{ + return dom == TOPO_SMT_DOMAIN ? 0 : x86_topo_system.dom_shifts[dom - 1]; +} + extern const struct cpumask *cpu_coregroup_mask(int cpu); extern const struct cpumask *cpu_clustergroup_mask(int cpu); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f6b24e72af711..f296e85c75bb1 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -241,54 +241,20 @@ static void __init uv_tsc_check_sync(void) is_uv(UV3) ? sname.s3.field : \ undef) -/* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */ - -#define SMT_LEVEL 0 /* Leaf 0xb SMT level */ -#define INVALID_TYPE 0 /* Leaf 0xb sub-leaf types */ -#define SMT_TYPE 1 -#define CORE_TYPE 2 -#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) -#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) - -static void set_x2apic_bits(void) -{ - unsigned int eax, ebx, ecx, edx, sub_index; - unsigned int sid_shift; - - cpuid(0, &eax, &ebx, &ecx, &edx); - if (eax < 0xb) { - pr_info("UV: CPU does not have CPUID.11\n"); - return; - } - - cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); - if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) { - pr_info("UV: CPUID.11 not implemented\n"); - return; - } - - sid_shift = BITS_SHIFT_NEXT_LEVEL(eax); - sub_index = 1; - do { - cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); - if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { - sid_shift = BITS_SHIFT_NEXT_LEVEL(eax); - break; - } - sub_index++; - } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); - - uv_cpuid.apicid_shift = 0; - uv_cpuid.apicid_mask = (~(-1 << sid_shift)); - uv_cpuid.socketid_shift = sid_shift; -} - static void __init early_get_apic_socketid_shift(void) { + unsigned int sid_shift = topology_get_domain_shift(TOPO_PKG_DOMAIN); + if (is_uv2_hub() || is_uv3_hub()) uvh_apicid.v = uv_early_read_mmr(UVH_APICID); - set_x2apic_bits(); + if (sid_shift) { + uv_cpuid.apicid_shift = 0; + uv_cpuid.apicid_mask = (~(-1 << sid_shift)); + uv_cpuid.socketid_shift = sid_shift; + } else { + pr_info("UV: CPU does not have valid CPUID.11\n"); + } pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n", uv_cpuid.apicid_shift, uv_cpuid.apicid_mask); pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n", uv_cpuid.socketid_shift, uv_cpuid.pnode_mask); From 8c7498e63b9543fe619581ddcef9322405608bd7 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Mon, 25 Aug 2025 07:57:29 +0000 Subject: [PATCH 042/109] x86/cpu/topology: Use initial APIC ID from XTOPOLOGY leaf on AMD/HYGON commit c2415c407a2cde01290d52ce2a1f81b0616379a3 upstream. Prior to the topology parsing rewrite and the switchover to the new parsing logic for AMD processors in c749ce393b8f ("x86/cpu: Use common topology code for AMD"), the initial_apicid on these platforms was: - First initialized to the LocalApicId from CPUID leaf 0x1 EBX[31:24]. - Then overwritten by the ExtendedLocalApicId in CPUID leaf 0xb EDX[31:0] on processors that supported topoext. With the new parsing flow introduced in f7fb3b2dd92c ("x86/cpu: Provide an AMD/HYGON specific topology parser"), parse_8000_001e() now unconditionally overwrites the initial_apicid already parsed during cpu_parse_topology_ext(). Although this has not been a problem on baremetal platforms, on virtualized AMD guests that feature more than 255 cores, QEMU zeros out the CPUID leaf 0x8000001e on CPUs with CoreID > 255 to prevent collision of these IDs in EBX[7:0] which can only represent a maximum of 255 cores [1]. This results in the following FW_BUG being logged when booting a guest with more than 255 cores: [Firmware Bug]: CPU 512: APIC ID mismatch. CPUID: 0x0000 APIC: 0x0200 AMD64 Architecture Programmer's Manual Volume 2: System Programming Pub. 24593 Rev. 3.42 [2] Section 16.12 "x2APIC_ID" mentions the Extended Enumeration leaf 0xb (Fn0000_000B_EDX[31:0])(which was later superseded by the extended leaf 0x80000026) provides the full x2APIC ID under all circumstances unlike the one reported by CPUID leaf 0x8000001e EAX which depends on the mode in which APIC is configured. Rely on the APIC ID parsed during cpu_parse_topology_ext() from CPUID leaf 0x80000026 or 0xb and only use the APIC ID from leaf 0x8000001e if cpu_parse_topology_ext() failed (has_topoext is false). On platforms that support the 0xb leaf (Zen2 or later, AMD guests on QEMU) or the extended leaf 0x80000026 (Zen4 or later), the initial_apicid is now set to the value parsed from EDX[31:0]. On older AMD/Hygon platforms that do not support the 0xb leaf but support the TOPOEXT extension (families 0x15, 0x16, 0x17[Zen1], and Hygon), retain current behavior where the initial_apicid is set using the 0x8000001e leaf. Issue debugged by Naveen N Rao (AMD) and Sairaj Kodilkar . [ bp: Massage commit message. ] Fixes: c749ce393b8f ("x86/cpu: Use common topology code for AMD") Intel-SIG: commit c2415c407a2c x86/cpu/topology: Use initial APIC ID from XTOPOLOGY leaf on AMD/HYGON. x86/cpu: Rework the topology evaluation - part2 Suggested-by: Thomas Gleixner Signed-off-by: K Prateek Nayak Signed-off-by: Borislav Petkov (AMD) Tested-by: Naveen N Rao (AMD) Cc: stable@vger.kernel.org Link: https://github.com/qemu/qemu/commit/35ac5dfbcaa4b [1] Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 [2] Link: https://lore.kernel.org/20250825075732.10694-2-kprateek.nayak@amd.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology_amd.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 5ee6373d4d926..86c1dedb81b04 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -80,20 +80,24 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) cpuid_leaf(0x8000001e, &leaf); - tscan->c->topo.initial_apicid = leaf.ext_apic_id; - /* - * If leaf 0xb is available, then the domain shifts are set - * already and nothing to do here. Only valid for family >= 0x17. + * If leaf 0xb/0x26 is available, then the APIC ID and the domain + * shifts are set already. */ - if (!has_0xb && tscan->c->x86 >= 0x17) { + if (!has_0xb) { + tscan->c->topo.initial_apicid = leaf.ext_apic_id; /* - * Leaf 0x80000008 set the CORE domain shift already. - * Update the SMT domain, but do not propagate it. + * Leaf 0x8000008 sets the CORE domain shift but not the + * SMT domain shift. On CPUs with family >= 0x17, there + * might be hyperthreads. */ - unsigned int nthreads = leaf.core_nthreads + 1; + if (tscan->c->x86 >= 0x17) { + /* Update the SMT domain, but do not propagate it. */ + unsigned int nthreads = leaf.core_nthreads + 1; - topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads); + topology_update_dom(tscan, TOPO_SMT_DOMAIN, + get_count_order(nthreads), nthreads); + } } store_node(tscan, leaf.nnodes_per_socket + 1, leaf.node_id); From 9f450338dc1787e2acc4a36da5ae0cd3b3349fef Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:52 +0100 Subject: [PATCH 043/109] x86/cpu/topology: Make the APIC mismatch warnings complete commit 52128a7a21f79d5d0be62f10cb0b73d115ab492e upstream. Detect all possible combinations of mismatch right in the CPUID evaluation code. Intel-SIG: commit 52128a7a21f7 x86/cpu/topology: Make the APIC mismatch warnings complete. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154638.867699078@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 5 ++--- arch/x86/kernel/cpu/common.c | 15 ++------------- arch/x86/kernel/cpu/topology_common.c | 12 ++++++++++++ 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 7ad2fcb55c67b..a6bfeed50e14d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -47,16 +47,15 @@ extern void x86_32_probe_apic(void); static inline void x86_32_probe_apic(void) { } #endif -#ifdef CONFIG_X86_LOCAL_APIC +extern u32 cpuid_to_apicid[]; +#ifdef CONFIG_X86_LOCAL_APIC extern int apic_verbosity; extern int local_apic_timer_c2_ok; extern bool apic_is_disabled; extern unsigned int lapic_timer_period; -extern u32 cpuid_to_apicid[]; - extern enum apic_intr_mode_id apic_intr_mode; enum apic_intr_mode_id { APIC_PIC, diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 060c2216c4627..097b48c5d08da 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1823,22 +1823,11 @@ static void generic_identify(struct cpuinfo_x86 *c) #endif } -/* - * Validate that ACPI/mptables have the same information about the - * effective APIC id and update the package map. - */ -static void validate_apic_and_package_id(struct cpuinfo_x86 *c) +static void update_package_map(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP unsigned int cpu = smp_processor_id(); - u32 apicid; - apicid = apic->cpu_present_to_apicid(cpu); - - if (apicid != c->topo.apicid) { - pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n", - cpu, apicid, c->topo.initial_apicid); - } BUG_ON(topology_update_package_map(c->topo.pkg_id, cpu)); BUG_ON(topology_update_die_map(c->topo.die_id, cpu)); #else @@ -2030,7 +2019,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_32 enable_sep_cpu(); #endif - validate_apic_and_package_id(c); + update_package_map(c); x86_spec_ctrl_setup_ap(); update_srbds_msr(); if (boot_cpu_has_bug(X86_BUG_GDS)) diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index afea34d59598f..3876a3342fe96 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -177,6 +177,18 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) parse_topology(&tscan, false); + if (IS_ENABLED(CONFIG_X86_LOCAL_APIC)) { + if (c->topo.initial_apicid != c->topo.apicid) { + pr_err(FW_BUG "CPU%4u: APIC ID mismatch. CPUID: 0x%04x APIC: 0x%04x\n", + cpu, c->topo.initial_apicid, c->topo.apicid); + } + + if (c->topo.apicid != cpuid_to_apicid[cpu]) { + pr_err(FW_BUG "CPU%4u: APIC ID mismatch. Firmware: 0x%04x APIC: 0x%04x\n", + cpu, cpuid_to_apicid[cpu], c->topo.apicid); + } + } + for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) { if (tscan.dom_shifts[dom] == x86_topo_system.dom_shifts[dom]) continue; From 05ebf354e03bc5be2798d67d94f5066dec442059 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 5 Apr 2024 14:15:47 +0200 Subject: [PATCH 044/109] x86/xen: return a sane initial apic id when running as PV guest commit 802600ebdf23371b893a51a4ad046213f112ea3b upstream. With recent sanity checks for topology information added, there are now warnings issued for APs when running as a Xen PV guest: [Firmware Bug]: CPU 1: APIC ID mismatch. CPUID: 0x0000 APIC: 0x0001 This is due to the initial APIC ID obtained via CPUID for PV guests is always 0. Avoid the warnings by synthesizing the CPUID data to contain the same initial APIC ID as xen_pv_smp_config() is using for registering the APIC IDs of all CPUs. Fixes: 52128a7a21f7 ("86/cpu/topology: Make the APIC mismatch warnings complete") Intel-SIG: commit 802600ebdf23 x86/xen: return a sane initial apic id when running as PV guest. x86/topology: More cleanups and preparatory work Signed-off-by: Juergen Gross [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/xen/enlighten_pv.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index c26402608b575..3d4a76566228d 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -216,13 +216,21 @@ static __read_mostly unsigned int cpuid_leaf5_edx_val; static void xen_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { - unsigned maskebx = ~0; + unsigned int maskebx = ~0; + unsigned int or_ebx = 0; /* * Mask out inconvenient features, to try and disable as many * unsupported kernel subsystems as possible. */ switch (*ax) { + case 0x1: + /* Replace initial APIC ID in bits 24-31 of EBX. */ + /* See xen_pv_smp_config() for related topology preparations. */ + maskebx = 0x00ffffff; + or_ebx = smp_processor_id() << 24; + break; + case CPUID_MWAIT_LEAF: /* Synthesize the values.. */ *ax = 0; @@ -245,6 +253,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, : "0" (*ax), "2" (*cx)); *bx &= maskebx; + *bx |= or_ebx; } static bool __init xen_check_mwait(void) From def29e66799010ded7efc5f06549a9c1bd1b522d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:53 +0100 Subject: [PATCH 045/109] x86/platform/ce4100: Dont override x86_init.mpparse.setup_ioapic_ids commit 490cc3c5e724502667a104a4e818dc071faf5e77 upstream. There is no point to do that. The ATOMs have an XAPIC for which this function is a pointless exercise. Intel-SIG: commit 490cc3c5e724 x86/platform/ce4100: Dont override x86_init.mpparse.setup_ioapic_ids. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154638.931617775@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/io_apic.h | 1 - arch/x86/kernel/apic/io_apic.c | 2 +- arch/x86/platform/ce4100/ce4100.c | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 51c782600e026..0d806513c4b32 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -140,7 +140,6 @@ extern void mask_ioapic_entries(void); extern int restore_ioapic_entries(void); extern void setup_ioapic_ids_from_mpc(void); -extern void setup_ioapic_ids_from_mpc_nocheck(void); extern int mp_find_ioapic(u32 gsi); extern int mp_find_ioapic_pin(int ioapic, u32 gsi); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d0c5325d17510..bd15666f6407c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1449,7 +1449,7 @@ void restore_boot_irq_mode(void) * * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 */ -void __init setup_ioapic_ids_from_mpc_nocheck(void) +static void __init setup_ioapic_ids_from_mpc_nocheck(void) { union IO_APIC_reg_00 reg_00; physid_mask_t phys_id_present_map; diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index 40745664d92f3..bbe7e91245494 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -139,7 +139,6 @@ void __init x86_ce4100_early_setup(void) x86_init.resources.probe_roms = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; x86_init.mpparse.find_smp_config = x86_init_noop; - x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck; x86_init.pci.init = ce4100_pci_init; x86_init.pci.init_irq = sdv_pci_init; From f07f36ce1076c4be6e40024acb542ca7dc2e67ba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:55 +0100 Subject: [PATCH 046/109] x86/ioapic: Replace some more set bit nonsense commit 2ac9e529d76a8534fa357e723942dd3f076c37da upstream. Yet another set_bit() operation wrapped in oring a mask. Intel-SIG: commit 2ac9e529d76a x86/ioapic: Replace some more set bit nonsense. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154638.995080989@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/mpspec.h | 3 --- arch/x86/kernel/apic/io_apic.c | 6 ++---- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 4b0f98a8d338d..9bf1e75c1defe 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -86,9 +86,6 @@ typedef struct physid_mask physid_mask_t; #define physid_set(physid, map) set_bit(physid, (map).mask) #define physid_isset(physid, map) test_bit(physid, (map).mask) -#define physids_or(dst, src1, src2) \ - bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC) - #define physids_clear(map) \ bitmap_zero((map).mask, MAX_LOCAL_APIC) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index bd15666f6407c..1eaf2dc9a4709 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2485,9 +2485,8 @@ unsigned int arch_dynirq_lower_bound(unsigned int from) #ifdef CONFIG_X86_32 static int io_apic_get_unique_id(int ioapic, int apic_id) { - union IO_APIC_reg_00 reg_00; static physid_mask_t apic_id_map = PHYSID_MASK_NONE; - physid_mask_t tmp; + union IO_APIC_reg_00 reg_00; unsigned long flags; int i = 0; @@ -2533,8 +2532,7 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) apic_id = i; } - physid_set_mask_of_physid(apic_id, &tmp); - physids_or(apic_id_map, apic_id_map, tmp); + physid_set(apic_id, apic_id_map); if (reg_00.bits.ID != apic_id) { reg_00.bits.ID = apic_id; From 5eb19ff413d61367d484d826dd2c605eb5df322f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:56 +0100 Subject: [PATCH 047/109] x86/apic: Get rid of get_physical_broadcast() commit 517234446c1ad1d6bb0d9f5b94a71b24f80edaae upstream. There is no point for this function. The only case where this is used is when there is no XAPIC available, which means the broadcast address is 0xF. Intel-SIG: commit 517234446c1a x86/apic: Get rid of get_physical_broadcast(). x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.057209154@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/apic/apic.c | 10 ------- arch/x86/kernel/apic/io_apic.c | 49 +++++++++++++++------------------- 2 files changed, 22 insertions(+), 37 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 883caaf50413b..87c5f1931f4af 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -261,16 +261,6 @@ u64 native_apic_icr_read(void) return icr1 | ((u64)icr2 << 32); } -#ifdef CONFIG_X86_32 -/** - * get_physical_broadcast - Get number of physical broadcast IDs - */ -int get_physical_broadcast(void) -{ - return modern_apic() ? 0xff : 0xf; -} -#endif - /** * lapic_get_maxlvt - get the maximum number of local vector table entries */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1eaf2dc9a4709..62fbca0160f75 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1451,12 +1451,12 @@ void restore_boot_irq_mode(void) */ static void __init setup_ioapic_ids_from_mpc_nocheck(void) { - union IO_APIC_reg_00 reg_00; physid_mask_t phys_id_present_map; - int ioapic_idx; - int i; + const u32 broadcast_id = 0xF; + union IO_APIC_reg_00 reg_00; unsigned char old_id; unsigned long flags; + int ioapic_idx, i; /* * This is broken; anything with a real cpu count has to @@ -1475,11 +1475,10 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void) old_id = mpc_ioapic_id(ioapic_idx); - if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - ioapic_idx, mpc_ioapic_id(ioapic_idx)); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - reg_00.bits.ID); + if (mpc_ioapic_id(ioapic_idx) >= broadcast_id) { + pr_err(FW_BUG "IO-APIC#%d ID is %d in the MPC table!...\n", + ioapic_idx, mpc_ioapic_id(ioapic_idx)); + pr_err("... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID; } @@ -1490,15 +1489,14 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void) */ if (apic->check_apicid_used(&phys_id_present_map, mpc_ioapic_id(ioapic_idx))) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - ioapic_idx, mpc_ioapic_id(ioapic_idx)); - for (i = 0; i < get_physical_broadcast(); i++) + pr_err(FW_BUG "IO-APIC#%d ID %d is already used!...\n", + ioapic_idx, mpc_ioapic_id(ioapic_idx)); + for (i = 0; i < broadcast_id; i++) if (!physid_isset(i, phys_id_present_map)) break; - if (i >= get_physical_broadcast()) + if (i >= broadcast_id) panic("Max APIC ID exceeded!\n"); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - i); + pr_err("... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); ioapics[ioapic_idx].mp_config.apicid = i; } else { @@ -2200,7 +2198,7 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { - panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC"); + panic_if_irq_remap(FW_BUG "Timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2486,6 +2484,7 @@ unsigned int arch_dynirq_lower_bound(unsigned int from) static int io_apic_get_unique_id(int ioapic, int apic_id) { static physid_mask_t apic_id_map = PHYSID_MASK_NONE; + const u32 broadcast_id = 0xF; union IO_APIC_reg_00 reg_00; unsigned long flags; int i = 0; @@ -2506,9 +2505,9 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) reg_00.raw = io_apic_read(ioapic, 0); raw_spin_unlock_irqrestore(&ioapic_lock, flags); - if (apic_id >= get_physical_broadcast()) { - printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); + if (apic_id >= broadcast_id) { + pr_warn("IOAPIC[%d]: Invalid apic_id %d, trying %d\n", + ioapic, apic_id, reg_00.bits.ID); apic_id = reg_00.bits.ID; } @@ -2518,17 +2517,15 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) */ if (apic->check_apicid_used(&apic_id_map, apic_id)) { - for (i = 0; i < get_physical_broadcast(); i++) { + for (i = 0; i < broadcast_id; i++) { if (!apic->check_apicid_used(&apic_id_map, i)) break; } - if (i == get_physical_broadcast()) + if (i == broadcast_id) panic("Max apic_id exceeded!\n"); - printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); - + pr_warn("IOAPIC[%d]: apic_id %d already used, trying %d\n", ioapic, apic_id, i); apic_id = i; } @@ -2558,11 +2555,9 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) static u8 io_apic_unique_id(int idx, u8 id) { - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - !APIC_XAPIC(boot_cpu_apic_version)) + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && !APIC_XAPIC(boot_cpu_apic_version)) return io_apic_get_unique_id(idx, id); - else - return id; + return id; } #else static u8 io_apic_unique_id(int idx, u8 id) From 5abaab356e2c0cf8c67b7f4fa1b3f3ddab4e54b9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:57 +0100 Subject: [PATCH 048/109] x86/ioapic: Make io_apic_get_unique_id() simpler commit 533535afc079b745ae8a5fd06afd2ba51b3495fe upstream. No need to go through APIC callbacks. It's already established that this is an ancient APIC. So just copy the present mask and use the direct physid* functions all over the place. Intel-SIG: commit 533535afc079 x86/ioapic: Make io_apic_get_unique_id() simpler. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.119261725@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/apic/io_apic.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 62fbca0160f75..bf64cd21777d9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2489,17 +2489,9 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) unsigned long flags; int i = 0; - /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only - * supports up to 16 on one shared APIC bus. - * - * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full - * advantage of new APIC bus architecture. - */ - + /* Initialize the ID map */ if (physids_empty(apic_id_map)) - apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); + apic_id_map = phys_cpu_present_map; raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); @@ -2511,14 +2503,10 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) apic_id = reg_00.bits.ID; } - /* - * Every APIC in a system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (apic->check_apicid_used(&apic_id_map, apic_id)) { - + /* Every APIC in a system must have a unique ID */ + if (physid_isset(apic_id, apic_id_map)) { for (i = 0; i < broadcast_id; i++) { - if (!apic->check_apicid_used(&apic_id_map, i)) + if (!physid_isset(i, apic_id_map)) break; } From ac130d916b91e22c30a4c8cf5fe35d5d98a485c0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:59 +0100 Subject: [PATCH 049/109] x86/ioapic: Simplify setup_ioapic_ids_from_mpc_nocheck() commit 4b99e735a5c6cb3c8b23fba522cb1d24a1679f94 upstream. No need to go through APIC callbacks. It's already established that this is an ancient APIC. So just copy the present mask and use the direct physid* functions all over the place. Intel-SIG: commit 4b99e735a5c6 x86/ioapic: Simplify setup_ioapic_ids_from_mpc_nocheck(). x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.181901887@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/apic/io_apic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index bf64cd21777d9..c9fdf6e73108b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1462,7 +1462,7 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void) * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ - apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); + phys_id_present_map = phys_cpu_present_map; /* * Set the IOAPIC ID to the value stored in the MPC table. @@ -1487,8 +1487,7 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(&phys_id_present_map, - mpc_ioapic_id(ioapic_idx))) { + if (physid_isset(mpc_ioapic_id(ioapic_idx), phys_id_present_map)) { pr_err(FW_BUG "IO-APIC#%d ID %d is already used!...\n", ioapic_idx, mpc_ioapic_id(ioapic_idx)); for (i = 0; i < broadcast_id; i++) From 29f7ecc677b200b9a115f19a016db84b35431d92 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:00 +0100 Subject: [PATCH 050/109] x86/apic: Remove check_apicid_used() and ioapic_phys_id_map() commit 3e48d804c8ea99170638b4e14931686bfc093f02 upstream. No more users. Intel-SIG: commit 3e48d804c8ea x86/apic: Remove check_apicid_used() and ioapic_phys_id_map(). x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.243307499@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 3 --- arch/x86/include/asm/mpspec.h | 6 ------ arch/x86/kernel/apic/apic_noop.c | 2 -- arch/x86/kernel/apic/bigsmp_32.c | 13 ------------- arch/x86/kernel/apic/probe_32.c | 2 -- arch/x86/kernel/apic/x2apic_cluster.c | 2 -- 6 files changed, 28 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a6bfeed50e14d..4839389605343 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -293,9 +293,7 @@ struct apic { int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); bool (*apic_id_registered)(void); - bool (*check_apicid_used)(physid_mask_t *map, u32 apicid); void (*init_apic_ldr)(void); - void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); u32 (*cpu_present_to_apicid)(int mps_cpu); u32 (*get_apic_id)(u32 id); @@ -528,7 +526,6 @@ extern int default_apic_id_valid(u32 apicid); extern u32 apic_default_calc_apicid(unsigned int cpu); extern u32 apic_flat_calc_apicid(unsigned int cpu); -extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); extern u32 default_cpu_present_to_apicid(int mps_cpu); void apic_send_nmi_to_offline_cpu(unsigned int cpu); diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 9bf1e75c1defe..666dcdbe66b03 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -92,12 +92,6 @@ typedef struct physid_mask physid_mask_t; #define physids_empty(map) \ bitmap_empty((map).mask, MAX_LOCAL_APIC) -static inline void physids_promote(unsigned long physids, physid_mask_t *map) -{ - physids_clear(*map); - map->mask[0] = physids; -} - static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) { physids_clear(*map); diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 73247856e23f7..6558c59dc4510 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -51,8 +51,6 @@ struct apic apic_noop __ro_after_init = { .disable_esr = 0, - .check_apicid_used = default_check_apicid_used, - .ioapic_phys_id_map = default_ioapic_phys_id_map, .cpu_present_to_apicid = default_cpu_present_to_apicid, .max_apic_id = 0xFE, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index f3a51d6dc167d..4371abd4b0df7 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -18,17 +18,6 @@ static u32 bigsmp_get_apic_id(u32 x) return (x >> 24) & 0xFF; } -static bool bigsmp_check_apicid_used(physid_mask_t *map, u32 apicid) -{ - return false; -} - -static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - /* For clustered we don't have a good way to do this yet - hack */ - physids_promote(0xFFL, retmap); -} - static void bigsmp_send_IPI_allbutself(int vector) { default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); @@ -80,8 +69,6 @@ static struct apic apic_bigsmp __ro_after_init = { .disable_esr = 1, - .check_apicid_used = bigsmp_check_apicid_used, - .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, .cpu_present_to_apicid = default_cpu_present_to_apicid, .max_apic_id = 0xFE, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1c508681d9d4b..25df3528c56b0 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -45,9 +45,7 @@ static struct apic apic_default __ro_after_init = { .disable_esr = 0, - .check_apicid_used = default_check_apicid_used, .init_apic_ldr = default_init_apic_ldr, - .ioapic_phys_id_map = default_ioapic_phys_id_map, .cpu_present_to_apicid = default_cpu_present_to_apicid, .max_apic_id = 0xFE, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index ec204edb31e90..506d0d32de692 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -232,9 +232,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .disable_esr = 0, - .check_apicid_used = NULL, .init_apic_ldr = init_x2apic_ldr, - .ioapic_phys_id_map = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .max_apic_id = UINT_MAX, From ead48d15f6c3cd3a49a93703a24ccf185243f35e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:02 +0100 Subject: [PATCH 051/109] x86/mpparse: Rename default_find_smp_config() commit e061c7ae0830ff320d77566849a5cc30decfa602 upstream. MPTABLE is no longer the default SMP configuration mechanism. Rename it to mpparse_find_mptable() because that's what it does. Intel-SIG: commit e061c7ae0830 x86/mpparse: Rename default_find_smp_config(). x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.306287711@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/mpspec.h | 13 ++++--------- arch/x86/include/asm/x86_init.h | 4 ++-- arch/x86/kernel/mpparse.c | 2 +- arch/x86/kernel/setup.c | 6 ++---- arch/x86/kernel/x86_init.c | 2 +- arch/x86/mm/mem_encrypt_amd.c | 2 +- arch/x86/platform/ce4100/ce4100.c | 2 +- arch/x86/platform/intel-mid/intel-mid.c | 2 +- arch/x86/xen/smp_pv.c | 2 +- 9 files changed, 14 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 666dcdbe66b03..c154dd7261e7e 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -56,21 +56,16 @@ static inline void early_get_smp_config(void) x86_init.mpparse.get_smp_config(1); } -static inline void find_smp_config(void) -{ - x86_init.mpparse.find_smp_config(); -} - #ifdef CONFIG_X86_MPPARSE extern void e820__memblock_alloc_reserved_mpc_new(void); extern int enable_update_mptable; -extern void default_find_smp_config(void); +extern void mpparse_find_mptable(void); extern void default_get_smp_config(unsigned int early); #else static inline void e820__memblock_alloc_reserved_mpc_new(void) { } -#define enable_update_mptable 0 -#define default_find_smp_config x86_init_noop -#define default_get_smp_config x86_init_uint_noop +#define enable_update_mptable 0 +#define mpparse_find_mptable x86_init_noop +#define default_get_smp_config x86_init_uint_noop #endif int generic_processor_info(int apicid); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index daf8c55ce7ebb..301d94bb34684 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -15,12 +15,12 @@ struct irq_domain; /** * struct x86_init_mpparse - platform specific mpparse ops * @setup_ioapic_ids: platform specific ioapic id override - * @find_smp_config: find the smp configuration + * @find_mptable: Find MPTABLE early to reserve the memory region * @get_smp_config: get the smp configuration */ struct x86_init_mpparse { void (*setup_ioapic_ids)(void); - void (*find_smp_config)(void); + void (*find_mptable)(void); void (*get_smp_config)(unsigned int early); }; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b223922248e9f..86acccdbd6315 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -587,7 +587,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) return ret; } -void __init default_find_smp_config(void) +void __init mpparse_find_mptable(void) { unsigned int address; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ff924aff768d5..8801c142c85d0 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1095,10 +1095,8 @@ void __init setup_arch(char **cmdline_p) high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; #endif - /* - * Find and reserve possible boot-time SMP configuration: - */ - find_smp_config(); + /* Find and reserve MPTABLE area */ + x86_init.mpparse.find_mptable(); early_alloc_pgt_buf(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 5ca3f72a2bd34..330c75afe8171 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -73,7 +73,7 @@ struct x86_init_ops x86_init __initdata = { .mpparse = { .setup_ioapic_ids = x86_init_noop, - .find_smp_config = default_find_smp_config, + .find_mptable = mpparse_find_mptable, .get_smp_config = default_get_smp_config, }, diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 2b7f3efe04ecb..44177d331d6de 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -524,7 +524,7 @@ void __init sme_early_init(void) * as this memory is not pre-validated and would thus cause a crash. */ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { - x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.find_mptable = x86_init_noop; x86_init.pci.init_irq = x86_init_noop; x86_init.resources.probe_roms = x86_init_noop; diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index bbe7e91245494..de7f1e9ea01d7 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -138,7 +138,7 @@ void __init x86_ce4100_early_setup(void) x86_init.oem.arch_setup = sdv_arch_setup; x86_init.resources.probe_roms = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; - x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.find_mptable = x86_init_noop; x86_init.pci.init = ce4100_pci_init; x86_init.pci.init_irq = sdv_pci_init; diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index f4592dc7a1c19..595dd4cfc6be6 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -118,7 +118,7 @@ void __init x86_intel_mid_early_setup(void) machine_ops.emergency_restart = intel_mid_reboot; /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; set_bit(MP_BUS_ISA, mp_bus_not_pci); } diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 3ae29c25ff855..44c35b12430ff 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -455,6 +455,6 @@ void __init xen_smp_init(void) smp_ops = xen_smp_ops; /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.get_smp_config = _get_smp_config; } From a24a31939956d5ea21b37b29c8f15b89d937c7b6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:03 +0100 Subject: [PATCH 052/109] x86/mpparse: Provide separate early/late callbacks commit fc60fd009c830a21c7699c6e36ab9ec51b9dd939 upstream. The early argument of x86_init::mpparse::get_smp_config() is more than confusing. Provide two callbacks, one for each purpose. Intel-SIG: commit fc60fd009c83 x86/mpparse: Provide separate early/late callbacks. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.370491894@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/x86_init.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 301d94bb34684..c7feb006df739 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -17,11 +17,15 @@ struct irq_domain; * @setup_ioapic_ids: platform specific ioapic id override * @find_mptable: Find MPTABLE early to reserve the memory region * @get_smp_config: get the smp configuration + * @early_parse_smp_cfg: Parse the SMP configuration data early before initmem_init() + * @parse_smp_cfg: Parse the SMP configuration data */ struct x86_init_mpparse { void (*setup_ioapic_ids)(void); void (*find_mptable)(void); void (*get_smp_config)(unsigned int early); + void (*early_parse_smp_cfg)(void); + void (*parse_smp_cfg)(void); }; /** From eca3ce378fb4f720fe637d2f4d86426cc2e70e98 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:04 +0100 Subject: [PATCH 053/109] x86/mpparse: Prepare for callback separation commit d0a85126b137598eab969e5ba283e5e70ca9c686 upstream. In preparation of splitting the get_smp_config() callback, rename default_get_smp_config() to mpparse_get_smp_config() and provide an early and late wrapper. Intel-SIG: commit d0a85126b137 x86/mpparse: Prepare for callback separation. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.433811243@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/mpspec.h | 12 ++++++++---- arch/x86/kernel/mpparse.c | 12 +++++++++++- arch/x86/kernel/x86_init.c | 2 +- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index c154dd7261e7e..72700ae4a7708 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -60,12 +60,16 @@ static inline void early_get_smp_config(void) extern void e820__memblock_alloc_reserved_mpc_new(void); extern int enable_update_mptable; extern void mpparse_find_mptable(void); -extern void default_get_smp_config(unsigned int early); +extern void mpparse_parse_early_smp_config(void); +extern void mpparse_parse_smp_config(void); +extern void mpparse_get_smp_config(unsigned int early); #else static inline void e820__memblock_alloc_reserved_mpc_new(void) { } -#define enable_update_mptable 0 -#define mpparse_find_mptable x86_init_noop -#define default_get_smp_config x86_init_uint_noop +#define enable_update_mptable 0 +#define mpparse_find_mptable x86_init_noop +#define mpparse_parse_early_smp_config x86_init_noop +#define mpparse_parse_smp_config x86_init_noop +#define mpparse_get_smp_config x86_init_uint_noop #endif int generic_processor_info(int apicid); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 86acccdbd6315..b22093d2265b8 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -473,7 +473,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) /* * Scan the memory blocks for an SMP configuration block. */ -void __init default_get_smp_config(unsigned int early) +void __init mpparse_get_smp_config(unsigned int early) { struct mpf_intel *mpf; @@ -538,6 +538,16 @@ void __init default_get_smp_config(unsigned int early) early_memunmap(mpf, sizeof(*mpf)); } +void __init mpparse_parse_early_smp_config(void) +{ + mpparse_get_smp_config(true); +} + +void __init mpparse_parse_smp_config(void) +{ + mpparse_get_smp_config(false); +} + static void __init smp_reserve_memory(struct mpf_intel *mpf) { memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr)); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 330c75afe8171..2995d000f0bd9 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -74,7 +74,7 @@ struct x86_init_ops x86_init __initdata = { .mpparse = { .setup_ioapic_ids = x86_init_noop, .find_mptable = mpparse_find_mptable, - .get_smp_config = default_get_smp_config, + .get_smp_config = mpparse_get_smp_config, }, .irqs = { From dbde3271eedd7a2759ba7f9ebd82eea6b539a216 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:06 +0100 Subject: [PATCH 054/109] x86/dtb: Rename x86_dtb_init() commit 5faf8ec77111a699b6a566c4155511fc020f8644 upstream. x86_dtb_init() is a misnomer and it really should be used as a SMP configuration parser which is selected by the platform via x86_init::mpparse:parse_smp_config(). Rename it to x86_dtb_parse_smp_config() in preparation for that. Intel-SIG: commit 5faf8ec77111 x86/dtb: Rename x86_dtb_init(). x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.495992801@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/prom.h | 4 ++-- arch/x86/kernel/devicetree.c | 2 +- arch/x86/kernel/setup.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index b716d291d0d44..b86b91cd3561b 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h @@ -23,11 +23,11 @@ extern int of_ioapic; extern u64 initial_dtb; extern void add_dtb(u64 data); void x86_of_pci_init(void); -void x86_dtb_init(void); +void x86_dtb_parse_smp_config(void); #else static inline void add_dtb(u64 data) { } static inline void x86_of_pci_init(void) { } -static inline void x86_dtb_init(void) { } +static inline void x86_dtb_parse_smp_config(void) { } #define of_ioapic 0 #endif diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 37ca25d82bbcd..64126b20a0b58 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -306,7 +306,7 @@ static void __init x86_flattree_get_config(void) static inline void x86_flattree_get_config(void) { } #endif -void __init x86_dtb_init(void) +void __init x86_dtb_parse_smp_config(void) { x86_flattree_get_config(); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8801c142c85d0..bbcf94573ffbe 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1259,7 +1259,7 @@ void __init setup_arch(char **cmdline_p) * Read APIC and some other early information from ACPI tables. */ acpi_boot_init(); - x86_dtb_init(); + x86_dtb_parse_smp_config(); /* * get boot-time SMP configuration: From 9ce9566b0590f422d988487a4cf21bf306809a4d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:07 +0100 Subject: [PATCH 055/109] x86/platform/ce4100: Prepare for separate mpparse callbacks commit fe280ffd7eab3dd63fd349d12b449666845e905c upstream. Select x86_dtb_parse_smp_config() as SMP configuration parser in preparation of splitting up the get_smp_config() callback. Intel-SIG: commit fe280ffd7eab x86/platform/ce4100: Prepare for separate mpparse callbacks. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.558085053@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/platform/ce4100/ce4100.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index de7f1e9ea01d7..6378082d88552 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -135,12 +135,14 @@ static void sdv_pci_init(void) */ void __init x86_ce4100_early_setup(void) { - x86_init.oem.arch_setup = sdv_arch_setup; - x86_init.resources.probe_roms = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; - x86_init.mpparse.find_mptable = x86_init_noop; - x86_init.pci.init = ce4100_pci_init; - x86_init.pci.init_irq = sdv_pci_init; + x86_init.oem.arch_setup = sdv_arch_setup; + x86_init.resources.probe_roms = x86_init_noop; + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; + x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config; + x86_init.mpparse.get_smp_config = x86_init_uint_noop; + x86_init.pci.init = ce4100_pci_init; + x86_init.pci.init_irq = sdv_pci_init; /* * By default, the reboot method is ACPI which is supported by the From d09db63cf02eb8d12b1efefbb9afa20762b1fd56 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:09 +0100 Subject: [PATCH 056/109] x86/platform/intel-mid: Prepare for separate mpparse callbacks commit a626ded4e3088319e3d108bb328d48768110ae0b upstream. Initialize the split SMP configuration callbacks with NOOPs as MID is strictly ACPI only. Intel-SIG: commit a626ded4e308 x86/platform/intel-mid: Prepare for separate mpparse callbacks. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240212154639.620189339@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/platform/intel-mid/intel-mid.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 595dd4cfc6be6..14ca3675999a5 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -118,7 +118,9 @@ void __init x86_intel_mid_early_setup(void) machine_ops.emergency_restart = intel_mid_reboot; /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_mptable = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; + x86_init.mpparse.parse_smp_cfg = x86_init_noop; + x86_init.mpparse.get_smp_config = x86_init_uint_noop; set_bit(MP_BUS_ISA, mp_bus_not_pci); } From 9c6c6a2e5a38bc6fcda333922758ac4ba1270705 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:10 +0100 Subject: [PATCH 057/109] x86/jailhouse: Prepare for separate mpparse callbacks commit 30c928691ce1c861d22ef236ed28bbf0b7a763bc upstream. Provide a wrapper around the existing function and fill the new callbacks in. No functional change as the new callbacks are not yet operational. Intel-SIG: commit 30c928691ce1 x86/jailhouse: Prepare for separate mpparse callbacks. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.683073662@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/jailhouse.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 5481c7c5db301..25e091eb5d043 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -119,6 +119,11 @@ static void __init jailhouse_get_smp_config(unsigned int early) } } +static void __init jailhouse_parse_smp_config(void) +{ + jailhouse_get_smp_config(false); +} + static void jailhouse_no_restart(void) { pr_notice("Jailhouse: Restart not supported, halting\n"); @@ -202,21 +207,24 @@ static void __init jailhouse_init_platform(void) struct setup_data header; void *mapping; - x86_init.irqs.pre_vector_init = x86_init_noop; - x86_init.timers.timer_init = jailhouse_timer_init; - x86_init.mpparse.get_smp_config = jailhouse_get_smp_config; - x86_init.pci.arch_init = jailhouse_pci_arch_init; + x86_init.irqs.pre_vector_init = x86_init_noop; + x86_init.timers.timer_init = jailhouse_timer_init; + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; + x86_init.mpparse.parse_smp_cfg = jailhouse_parse_smp_config; + x86_init.mpparse.get_smp_config = jailhouse_get_smp_config; + x86_init.pci.arch_init = jailhouse_pci_arch_init; - x86_platform.calibrate_cpu = jailhouse_get_tsc; - x86_platform.calibrate_tsc = jailhouse_get_tsc; - x86_platform.get_wallclock = jailhouse_get_wallclock; - x86_platform.legacy.rtc = 0; - x86_platform.legacy.warm_reset = 0; - x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT; + x86_platform.calibrate_cpu = jailhouse_get_tsc; + x86_platform.calibrate_tsc = jailhouse_get_tsc; + x86_platform.get_wallclock = jailhouse_get_wallclock; + x86_platform.legacy.rtc = 0; + x86_platform.legacy.warm_reset = 0; + x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT; - legacy_pic = &null_legacy_pic; + legacy_pic = &null_legacy_pic; - machine_ops.emergency_restart = jailhouse_no_restart; + machine_ops.emergency_restart = jailhouse_no_restart; while (pa_data) { mapping = early_memremap(pa_data, sizeof(header)); From 3b7250c11b419226ce673525b6fddbac7a5d22b9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:12 +0100 Subject: [PATCH 058/109] x86/xen/smp_pv: Prepare for separate mpparse callbacks commit 0baf4d485cbe5c1b94433f3f5aed2e6e6cd91b02 upstream. Provide a wrapper around the existing function and fill the new callbacks in. No functional change as the new callbacks are not yet operational. Intel-SIG: commit 0baf4d485cbe x86/xen/smp_pv: Prepare for separate mpparse callbacks. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.745028043@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/xen/smp_pv.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 44c35b12430ff..bd939b8b1a063 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -185,6 +185,11 @@ static void __init _get_smp_config(unsigned int early) smp_found_config = 1; } +static void __init xen_pv_smp_config(void) +{ + _get_smp_config(false); +} + static void __init xen_pv_smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != 0); @@ -455,6 +460,8 @@ void __init xen_smp_init(void) smp_ops = xen_smp_ops; /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_mptable = x86_init_noop; - x86_init.mpparse.get_smp_config = _get_smp_config; + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; + x86_init.mpparse.parse_smp_cfg = xen_pv_smp_config; + x86_init.mpparse.get_smp_config = _get_smp_config; } From 97e80a7dd9f21b8bfaa8b76136764a16107dc9be Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:13 +0100 Subject: [PATCH 059/109] x86/hyperv/vtl: Prepare for separate mpparse callbacks commit c22e19cd2c8a8f8ef8cfc0a0aaaa95d8cc064309 upstream. Initialize the new callbacks in preparation for switching the core code. Intel-SIG: commit c22e19cd2c8a x86/hyperv/vtl: Prepare for separate mpparse callbacks. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.808238769@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/hyperv/hv_vtl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 48058bafecc39..b98abd08b2c18 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -33,7 +33,9 @@ void __init hv_vtl_init_platform(void) x86_init.resources.probe_roms = x86_init_noop; /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; + x86_init.mpparse.parse_smp_cfg = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; x86_platform.get_wallclock = get_rtc_noop; From 2aef4d364bee91d004fd4cc0262ec544edfb8735 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:14 +0100 Subject: [PATCH 060/109] x86/mpparse: Switch to new init callbacks commit dcb7600849ce9b3d9b3d2965f452287f06fc9093 upstream. Now that all platforms have the new split SMP configuration callbacks set up, flip the switch and remove the old callback pointer and mop up the platform code. Intel-SIG: commit dcb7600849ce x86/mpparse: Switch to new init callbacks. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.870883080@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/hyperv/hv_vtl.c | 1 - arch/x86/include/asm/mpspec.h | 9 +-------- arch/x86/include/asm/x86_init.h | 2 -- arch/x86/kernel/jailhouse.c | 8 +------- arch/x86/kernel/mpparse.c | 2 +- arch/x86/kernel/setup.c | 10 +++------- arch/x86/kernel/x86_init.c | 3 ++- arch/x86/platform/ce4100/ce4100.c | 1 - arch/x86/platform/intel-mid/intel-mid.c | 1 - arch/x86/xen/smp_pv.c | 11 +---------- 10 files changed, 9 insertions(+), 39 deletions(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index b98abd08b2c18..64ca4543ceca2 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -36,7 +36,6 @@ void __init hv_vtl_init_platform(void) x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; x86_init.mpparse.parse_smp_cfg = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; x86_platform.get_wallclock = get_rtc_noop; x86_platform.set_wallclock = set_rtc_noop; diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 72700ae4a7708..82480b7d01f02 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -46,14 +46,9 @@ extern int smp_found_config; # define smp_found_config 0 #endif -static inline void get_smp_config(void) -{ - x86_init.mpparse.get_smp_config(0); -} - static inline void early_get_smp_config(void) { - x86_init.mpparse.get_smp_config(1); + x86_init.mpparse.early_parse_smp_cfg(); } #ifdef CONFIG_X86_MPPARSE @@ -62,14 +57,12 @@ extern int enable_update_mptable; extern void mpparse_find_mptable(void); extern void mpparse_parse_early_smp_config(void); extern void mpparse_parse_smp_config(void); -extern void mpparse_get_smp_config(unsigned int early); #else static inline void e820__memblock_alloc_reserved_mpc_new(void) { } #define enable_update_mptable 0 #define mpparse_find_mptable x86_init_noop #define mpparse_parse_early_smp_config x86_init_noop #define mpparse_parse_smp_config x86_init_noop -#define mpparse_get_smp_config x86_init_uint_noop #endif int generic_processor_info(int apicid); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index c7feb006df739..530b0bf8873ef 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -16,14 +16,12 @@ struct irq_domain; * struct x86_init_mpparse - platform specific mpparse ops * @setup_ioapic_ids: platform specific ioapic id override * @find_mptable: Find MPTABLE early to reserve the memory region - * @get_smp_config: get the smp configuration * @early_parse_smp_cfg: Parse the SMP configuration data early before initmem_init() * @parse_smp_cfg: Parse the SMP configuration data */ struct x86_init_mpparse { void (*setup_ioapic_ids)(void); void (*find_mptable)(void); - void (*get_smp_config)(unsigned int early); void (*early_parse_smp_cfg)(void); void (*parse_smp_cfg)(void); }; diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 25e091eb5d043..bf47224c88b15 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -90,7 +90,7 @@ static void __init jailhouse_x2apic_init(void) #endif } -static void __init jailhouse_get_smp_config(unsigned int early) +static void __init jailhouse_parse_smp_config(void) { struct ioapic_domain_cfg ioapic_cfg = { .type = IOAPIC_DOMAIN_STRICT, @@ -119,11 +119,6 @@ static void __init jailhouse_get_smp_config(unsigned int early) } } -static void __init jailhouse_parse_smp_config(void) -{ - jailhouse_get_smp_config(false); -} - static void jailhouse_no_restart(void) { pr_notice("Jailhouse: Restart not supported, halting\n"); @@ -212,7 +207,6 @@ static void __init jailhouse_init_platform(void) x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; x86_init.mpparse.parse_smp_cfg = jailhouse_parse_smp_config; - x86_init.mpparse.get_smp_config = jailhouse_get_smp_config; x86_init.pci.arch_init = jailhouse_pci_arch_init; x86_platform.calibrate_cpu = jailhouse_get_tsc; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b22093d2265b8..9c000c409eb1d 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -473,7 +473,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) /* * Scan the memory blocks for an SMP configuration block. */ -void __init mpparse_get_smp_config(unsigned int early) +static __init void mpparse_get_smp_config(unsigned int early) { struct mpf_intel *mpf; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bbcf94573ffbe..7a4b0c6cd669a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1256,15 +1256,11 @@ void __init setup_arch(char **cmdline_p) early_quirks(); /* - * Read APIC and some other early information from ACPI tables. + * Parse SMP configuration. Try ACPI first and then the platform + * specific parser. */ acpi_boot_init(); - x86_dtb_parse_smp_config(); - - /* - * get boot-time SMP configuration: - */ - get_smp_config(); + x86_init.mpparse.parse_smp_cfg(); /* * Systems w/o ACPI and mptables might not have it mapped the local diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 2995d000f0bd9..0a2bbd674a6d9 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -74,7 +74,8 @@ struct x86_init_ops x86_init __initdata = { .mpparse = { .setup_ioapic_ids = x86_init_noop, .find_mptable = mpparse_find_mptable, - .get_smp_config = mpparse_get_smp_config, + .early_parse_smp_cfg = mpparse_parse_early_smp_config, + .parse_smp_cfg = mpparse_parse_smp_config, }, .irqs = { diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index 6378082d88552..f32451bdcfdd5 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -140,7 +140,6 @@ void __init x86_ce4100_early_setup(void) x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; x86_init.pci.init = ce4100_pci_init; x86_init.pci.init_irq = sdv_pci_init; diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 14ca3675999a5..7be71c2cdc83e 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -121,6 +121,5 @@ void __init x86_intel_mid_early_setup(void) x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; x86_init.mpparse.parse_smp_cfg = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; set_bit(MP_BUS_ISA, mp_bus_not_pci); } diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index bd939b8b1a063..7f6f34056e13e 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -148,14 +148,11 @@ int xen_smp_intr_init_pv(unsigned int cpu) return rc; } -static void __init _get_smp_config(unsigned int early) +static void __init xen_pv_smp_config(void) { int i, rc; unsigned int subtract = 0; - if (early) - return; - num_processors = 0; disabled_cpus = 0; for (i = 0; i < nr_cpu_ids; i++) { @@ -185,11 +182,6 @@ static void __init _get_smp_config(unsigned int early) smp_found_config = 1; } -static void __init xen_pv_smp_config(void) -{ - _get_smp_config(false); -} - static void __init xen_pv_smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != 0); @@ -463,5 +455,4 @@ void __init xen_smp_init(void) x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; x86_init.mpparse.parse_smp_cfg = xen_pv_smp_config; - x86_init.mpparse.get_smp_config = _get_smp_config; } From 8ee5580f5091932975f99625274b3b1cdc7fe4bc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:16 +0100 Subject: [PATCH 061/109] x86/mm/numa: Move early mptable evaluation into common code commit de6aec241750a4f9d33d0e055d97fb3e0170c31a upstream. There is no reason to have the early mptable evaluation conditionally invoked only from the AMD numa topology code. Make it explicit and invoke it from setup_arch() right after the corresponding ACPI init call. Remove the pointless wrapper and invoke x86_init::mpparse::early_parse_smp_config() directly. Intel-SIG: commit de6aec241750 x86/mm/numa: Move early mptable evaluation into common code. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.931761608@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/mpspec.h | 5 ----- arch/x86/kernel/setup.c | 2 ++ arch/x86/mm/amdtopology.c | 7 ------- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 82480b7d01f02..b423d11e002d1 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -46,11 +46,6 @@ extern int smp_found_config; # define smp_found_config 0 #endif -static inline void early_get_smp_config(void) -{ - x86_init.mpparse.early_parse_smp_cfg(); -} - #ifdef CONFIG_X86_MPPARSE extern void e820__memblock_alloc_reserved_mpc_new(void); extern int enable_update_mptable; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 7a4b0c6cd669a..fb8bfe7966374 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1214,7 +1214,9 @@ void __init setup_arch(char **cmdline_p) early_platform_quirks(); + /* Some platforms need the APIC registered for NUMA configuration */ early_acpi_boot_init(); + x86_init.mpparse.early_parse_smp_cfg(); initmem_init(); dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c index 5681b997b3574..9332b36a10915 100644 --- a/arch/x86/mm/amdtopology.c +++ b/arch/x86/mm/amdtopology.c @@ -161,13 +161,6 @@ int __init amd_numa_init(void) */ cores = topology_get_domain_size(TOPO_CORE_DOMAIN); - /* - * Scan MPTABLE to map the local APIC and ensure that the boot CPU - * APIC ID is valid. This is required because on pre ACPI/SRAT - * systems IO-APICs are mapped before the boot CPU. - */ - early_get_smp_config(); - apicid = boot_cpu_physical_apicid; if (apicid > 0) pr_info("BSP APIC ID: %02x\n", apicid); From 78accae02e3fd96f2d04630ea8554c0ab9361796 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:17 +0100 Subject: [PATCH 062/109] x86/mpparse: Remove the physid_t bitmap wrapper commit 350b5e2730d1e15337a10bd913694ee4527c02f0 upstream. physid_t is a wrapper around bitmap. Just remove the onion layer and use bitmap functionality directly. Intel-SIG: commit 350b5e2730d1 x86/mpparse: Remove the physid_t bitmap wrapper. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.994904510@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/mpspec.h | 32 +++++++++--------------------- arch/x86/kernel/apic/apic.c | 11 +++++----- arch/x86/kernel/apic/apic_common.c | 12 +---------- arch/x86/kernel/apic/io_apic.c | 24 +++++++++++----------- arch/x86/kernel/apic/local.h | 1 - arch/x86/kernel/smpboot.c | 8 +++----- 6 files changed, 30 insertions(+), 58 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index b423d11e002d1..1b79d0ee95dfa 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_MPSPEC_H #define _ASM_X86_MPSPEC_H +#include #include #include @@ -62,32 +63,17 @@ static inline void e820__memblock_alloc_reserved_mpc_new(void) { } int generic_processor_info(int apicid); -#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC) +extern DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC); -struct physid_mask { - unsigned long mask[PHYSID_ARRAY_SIZE]; -}; - -typedef struct physid_mask physid_mask_t; - -#define physid_set(physid, map) set_bit(physid, (map).mask) -#define physid_isset(physid, map) test_bit(physid, (map).mask) - -#define physids_clear(map) \ - bitmap_zero((map).mask, MAX_LOCAL_APIC) - -#define physids_empty(map) \ - bitmap_empty((map).mask, MAX_LOCAL_APIC) - -static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) +static inline void reset_phys_cpu_present_map(u32 apicid) { - physids_clear(*map); - physid_set(physid, *map); + bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC); + set_bit(apicid, phys_cpu_present_map); } -#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} } -#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } - -extern physid_mask_t phys_cpu_present_map; +static inline void copy_phys_cpu_present_map(unsigned long *dst) +{ + bitmap_copy(dst, phys_cpu_present_map, MAX_LOCAL_APIC); +} #endif /* _ASM_X86_MPSPEC_H */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 87c5f1931f4af..0700db748fbcc 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -77,10 +78,8 @@ EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); u8 boot_cpu_apic_version __ro_after_init; -/* - * Bitmask of physically existing CPUs: - */ -physid_mask_t phys_cpu_present_map; +/* Bitmap of physically present CPUs. */ +DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC); /* * Processor to be disabled specified by kernel parameter @@ -2406,7 +2405,7 @@ static void cpu_update_apic(int cpu, u32 apicid) early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; #endif set_cpu_possible(cpu, true); - physid_set(apicid, phys_cpu_present_map); + set_bit(apicid, phys_cpu_present_map); set_cpu_present(cpu, true); num_processors++; @@ -2508,7 +2507,7 @@ static void __init apic_bsp_up_setup(void) #ifdef CONFIG_X86_64 apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid)); #endif - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); + reset_phys_cpu_present_map(boot_cpu_physical_apicid); } /** diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c index 8a00141073ea8..d4dfa437081a9 100644 --- a/arch/x86/kernel/apic/apic_common.c +++ b/arch/x86/kernel/apic/apic_common.c @@ -18,16 +18,6 @@ u32 apic_flat_calc_apicid(unsigned int cpu) return 1U << cpu; } -bool default_check_apicid_used(physid_mask_t *map, u32 apicid) -{ - return physid_isset(apicid, *map); -} - -void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - *retmap = *phys_map; -} - u32 default_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) @@ -39,7 +29,7 @@ EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid); bool default_apic_id_registered(void) { - return physid_isset(read_apic_id(), phys_cpu_present_map); + return test_bit(read_apic_id(), phys_cpu_present_map); } /* diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index c9fdf6e73108b..8280c75471d32 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1451,7 +1451,7 @@ void restore_boot_irq_mode(void) */ static void __init setup_ioapic_ids_from_mpc_nocheck(void) { - physid_mask_t phys_id_present_map; + DECLARE_BITMAP(phys_id_present_map, MAX_LOCAL_APIC); const u32 broadcast_id = 0xF; union IO_APIC_reg_00 reg_00; unsigned char old_id; @@ -1462,7 +1462,7 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void) * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ - phys_id_present_map = phys_cpu_present_map; + copy_phys_cpu_present_map(phys_id_present_map); /* * Set the IOAPIC ID to the value stored in the MPC table. @@ -1487,21 +1487,21 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (physid_isset(mpc_ioapic_id(ioapic_idx), phys_id_present_map)) { + if (test_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map)) { pr_err(FW_BUG "IO-APIC#%d ID %d is already used!...\n", ioapic_idx, mpc_ioapic_id(ioapic_idx)); for (i = 0; i < broadcast_id; i++) - if (!physid_isset(i, phys_id_present_map)) + if (!test_bit(i, phys_id_present_map)) break; if (i >= broadcast_id) panic("Max APIC ID exceeded!\n"); pr_err("... fixing up to %d. (tell your hw vendor)\n", i); - physid_set(i, phys_id_present_map); + set_bit(i, phys_id_present_map); ioapics[ioapic_idx].mp_config.apicid = i; } else { apic_printk(APIC_VERBOSE, "Setting %d in the phys_id_present_map\n", mpc_ioapic_id(ioapic_idx)); - physid_set(mpc_ioapic_id(ioapic_idx), phys_id_present_map); + set_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map); } /* @@ -2482,15 +2482,15 @@ unsigned int arch_dynirq_lower_bound(unsigned int from) #ifdef CONFIG_X86_32 static int io_apic_get_unique_id(int ioapic, int apic_id) { - static physid_mask_t apic_id_map = PHYSID_MASK_NONE; + static DECLARE_BITMAP(apic_id_map, MAX_LOCAL_APIC); const u32 broadcast_id = 0xF; union IO_APIC_reg_00 reg_00; unsigned long flags; int i = 0; /* Initialize the ID map */ - if (physids_empty(apic_id_map)) - apic_id_map = phys_cpu_present_map; + if (bitmap_empty(apic_id_map, MAX_LOCAL_APIC)) + copy_phys_cpu_present_map(apic_id_map); raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); @@ -2503,9 +2503,9 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) } /* Every APIC in a system must have a unique ID */ - if (physid_isset(apic_id, apic_id_map)) { + if (test_bit(apic_id, apic_id_map)) { for (i = 0; i < broadcast_id; i++) { - if (!physid_isset(i, apic_id_map)) + if (!test_bit(i, apic_id_map)) break; } @@ -2516,7 +2516,7 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) apic_id = i; } - physid_set(apic_id, apic_id_map); + set_bit(apic_id, apic_id_map); if (reg_00.bits.ID != apic_id) { reg_00.bits.ID = apic_id; diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index 8fd37c9d1b344..a77c23e624593 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -63,7 +63,6 @@ void default_send_IPI_all(int vector); void default_send_IPI_self(int vector); bool default_apic_id_registered(void); -bool default_check_apicid_used(physid_mask_t *map, u32 apicid); #ifdef CONFIG_X86_32 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5177cce3b8bb5..c8322867adb43 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1064,7 +1064,7 @@ int native_kick_ap(unsigned int cpu, struct task_struct *tidle) pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); - if (apicid == BAD_APICID || !physid_isset(apicid, phys_cpu_present_map) || + if (apicid == BAD_APICID || !test_bit(apicid, phys_cpu_present_map) || !apic_id_valid(apicid)) { pr_err("%s: bad cpu %d\n", __func__, cpu); return -EINVAL; @@ -1139,10 +1139,8 @@ static __init void disable_smp(void) init_cpu_present(cpumask_of(0)); init_cpu_possible(cpumask_of(0)); - if (smp_found_config) - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); - else - physid_set_mask_of_physid(0, &phys_cpu_present_map); + reset_phys_cpu_present_map(smp_found_config ? boot_cpu_physical_apicid : 0); + cpumask_set_cpu(0, topology_sibling_cpumask(0)); cpumask_set_cpu(0, topology_core_cpumask(0)); cpumask_set_cpu(0, topology_die_cpumask(0)); From 0186496e3b7389237e55d87d8fb6fdd9f12bb6c3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:19 +0100 Subject: [PATCH 063/109] x86/apic: Remove the pointless writeback of boot_cpu_physical_apicid commit 58d16928358f91d48421838a7484321b3149130d upstream. There is absolutely no point to write the APIC ID which was read from the local APIC earlier, back into the local APIC for the 64-bit UP case. Remove that along with the apic callback which is solely there for this pointless exercise. Intel-SIG: commit 58d16928358f x86/apic: Remove the pointless writeback of boot_cpu_physical_apicid. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154640.055288922@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 1 - arch/x86/kernel/apic/apic.c | 3 --- arch/x86/kernel/apic/apic_flat_64.c | 7 ------- arch/x86/kernel/apic/apic_numachip.c | 12 ------------ arch/x86/kernel/apic/bigsmp_32.c | 1 - arch/x86/kernel/apic/local.h | 1 - arch/x86/kernel/apic/x2apic_cluster.c | 1 - arch/x86/kernel/apic/x2apic_phys.c | 6 ------ arch/x86/kernel/apic/x2apic_uv_x.c | 6 ------ arch/x86/xen/apic.c | 7 ------- 10 files changed, 45 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 4839389605343..c3754b5507846 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -297,7 +297,6 @@ struct apic { u32 (*cpu_present_to_apicid)(int mps_cpu); u32 (*get_apic_id)(u32 id); - u32 (*set_apic_id)(u32 apicid); /* wakeup_secondary_cpu */ int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 0700db748fbcc..04fad508abfe9 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2504,9 +2504,6 @@ EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid); static void __init apic_bsp_up_setup(void) { -#ifdef CONFIG_X86_64 - apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid)); -#endif reset_phys_cpu_present_map(boot_cpu_physical_apicid); } diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 5c2d476be004b..0efd66194f448 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -61,11 +61,6 @@ static u32 flat_get_apic_id(u32 x) return (x >> 24) & 0xFF; } -static u32 set_apic_id(u32 id) -{ - return (id & 0xFF) << 24; -} - static int flat_probe(void) { return 1; @@ -87,7 +82,6 @@ static struct apic apic_flat __ro_after_init = { .max_apic_id = 0xFE, .get_apic_id = flat_get_apic_id, - .set_apic_id = set_apic_id, .calc_dest_apicid = apic_flat_calc_apicid, @@ -157,7 +151,6 @@ static struct apic apic_physflat __ro_after_init = { .max_apic_id = 0xFE, .get_apic_id = flat_get_apic_id, - .set_apic_id = set_apic_id, .calc_dest_apicid = apic_default_calc_apicid, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 70bedbc1a5bd1..56dde960f3306 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -38,11 +38,6 @@ static u32 numachip1_get_apic_id(u32 x) return id; } -static u32 numachip1_set_apic_id(u32 id) -{ - return (id & 0xff) << 24; -} - static u32 numachip2_get_apic_id(u32 x) { u64 mcfg; @@ -51,11 +46,6 @@ static u32 numachip2_get_apic_id(u32 x) return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24); } -static u32 numachip2_set_apic_id(u32 id) -{ - return id << 24; -} - static void numachip1_apic_icr_write(int apicid, unsigned int val) { write_lcsr(CSR_G3_EXT_IRQ_GEN, (apicid << 16) | val); @@ -226,7 +216,6 @@ static const struct apic apic_numachip1 __refconst = { .max_apic_id = UINT_MAX, .get_apic_id = numachip1_get_apic_id, - .set_apic_id = numachip1_set_apic_id, .calc_dest_apicid = apic_default_calc_apicid, @@ -262,7 +251,6 @@ static const struct apic apic_numachip2 __refconst = { .max_apic_id = UINT_MAX, .get_apic_id = numachip2_get_apic_id, - .set_apic_id = numachip2_set_apic_id, .calc_dest_apicid = apic_default_calc_apicid, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 4371abd4b0df7..565a70513f1c2 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -73,7 +73,6 @@ static struct apic apic_bigsmp __ro_after_init = { .max_apic_id = 0xFE, .get_apic_id = bigsmp_get_apic_id, - .set_apic_id = NULL, .calc_dest_apicid = apic_default_calc_apicid, diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index a77c23e624593..f8a87f9e72e67 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -16,7 +16,6 @@ /* X2APIC */ void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest); u32 x2apic_get_apic_id(u32 id); -u32 x2apic_set_apic_id(u32 id); void x2apic_send_IPI_all(int vector); void x2apic_send_IPI_allbutself(int vector); diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 506d0d32de692..025c9ff66729c 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -238,7 +238,6 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .max_apic_id = UINT_MAX, .x2apic_set_max_apicid = true, .get_apic_id = x2apic_get_apic_id, - .set_apic_id = x2apic_set_apic_id, .calc_dest_apicid = x2apic_calc_apicid, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 3396540fb46b3..24ac3b05d7c67 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -129,11 +129,6 @@ u32 x2apic_get_apic_id(u32 id) return id; } -u32 x2apic_set_apic_id(u32 id) -{ - return id; -} - static struct apic apic_x2apic_phys __ro_after_init = { .name = "physical x2apic", @@ -150,7 +145,6 @@ static struct apic apic_x2apic_phys __ro_after_init = { .max_apic_id = UINT_MAX, .x2apic_set_max_apicid = true, .get_apic_id = x2apic_get_apic_id, - .set_apic_id = x2apic_set_apic_id, .calc_dest_apicid = apic_default_calc_apicid, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f296e85c75bb1..f5d008be80be2 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -745,11 +745,6 @@ static void uv_send_IPI_all(int vector) uv_send_IPI_mask(cpu_online_mask, vector); } -static u32 set_apic_id(u32 id) -{ - return id; -} - static int uv_probe(void) { return apic == &apic_x2apic_uv_x; @@ -770,7 +765,6 @@ static struct apic apic_x2apic_uv_x __ro_after_init = { .max_apic_id = UINT_MAX, .get_apic_id = x2apic_get_apic_id, - .set_apic_id = set_apic_id, .calc_dest_apicid = apic_default_calc_apicid, diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index d4752382b6003..8835d1cc961d8 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -33,12 +33,6 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) return 0xfd; } -static u32 xen_set_apic_id(u32 x) -{ - WARN_ON(1); - return x; -} - static u32 xen_get_apic_id(u32 x) { return ((x)>>24) & 0xFFu; @@ -131,7 +125,6 @@ static struct apic xen_pv_apic __ro_after_init = { .max_apic_id = UINT_MAX, .get_apic_id = xen_get_apic_id, - .set_apic_id = xen_set_apic_id, .calc_dest_apicid = apic_flat_calc_apicid, From 06c702f6454ea774e7a52eb5c1bc829834570afa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:20 +0100 Subject: [PATCH 064/109] x86/apic: Remove yet another dubious callback commit 4a5f72a4a39f5d5dcf9b9dc1acc57ecbbb8d4caa upstream. Paranoia is not wrong, but having an APIC callback which is in most implementations a complete NOOP and in one actually looking whether the APICID of an upcoming CPU has been registered. The same APICID which was used to bring the CPU out of wait for startup. That's paranoia for the paranoia sake. Remove the voodoo. Intel-SIG: commit 4a5f72a4a39f x86/apic: Remove yet another dubious callback. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154640.116510935@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 1 - arch/x86/kernel/apic/apic.c | 3 --- arch/x86/kernel/apic/apic_common.c | 5 ----- arch/x86/kernel/apic/apic_flat_64.c | 2 -- arch/x86/kernel/apic/local.h | 2 -- arch/x86/kernel/apic/probe_32.c | 1 - 6 files changed, 14 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index c3754b5507846..2e71e648f045f 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -291,7 +291,6 @@ struct apic { /* Probe, setup and smpboot functions */ int (*probe)(void); int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - bool (*apic_id_registered)(void); void (*init_apic_ldr)(void); u32 (*cpu_present_to_apicid)(int mps_cpu); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 04fad508abfe9..b09a7c7ebcd9a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1550,9 +1550,6 @@ static void setup_local_APIC(void) apic_write(APIC_ESR, 0); } #endif - /* Validate that the APIC is registered if required */ - BUG_ON(apic->apic_id_registered && !apic->apic_id_registered()); - /* * Intel recommends to set DFR, LDR and TPR before enabling * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c index d4dfa437081a9..9ef3be866832c 100644 --- a/arch/x86/kernel/apic/apic_common.c +++ b/arch/x86/kernel/apic/apic_common.c @@ -27,11 +27,6 @@ u32 default_cpu_present_to_apicid(int mps_cpu) } EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid); -bool default_apic_id_registered(void) -{ - return test_bit(read_apic_id(), phys_cpu_present_map); -} - /* * Set up the logical destination ID when the APIC operates in logical * destination mode. diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 0efd66194f448..47396dbc65d14 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -70,7 +70,6 @@ static struct apic apic_flat __ro_after_init = { .name = "flat", .probe = flat_probe, .acpi_madt_oem_check = flat_acpi_madt_oem_check, - .apic_id_registered = default_apic_id_registered, .delivery_mode = APIC_DELIVERY_MODE_FIXED, .dest_mode_logical = true, @@ -140,7 +139,6 @@ static struct apic apic_physflat __ro_after_init = { .name = "physical flat", .probe = physflat_probe, .acpi_madt_oem_check = physflat_acpi_madt_oem_check, - .apic_id_registered = default_apic_id_registered, .delivery_mode = APIC_DELIVERY_MODE_FIXED, .dest_mode_logical = false, diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index f8a87f9e72e67..842fe28496bee 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -61,8 +61,6 @@ void default_send_IPI_allbutself(int vector); void default_send_IPI_all(int vector); void default_send_IPI_self(int vector); -bool default_apic_id_registered(void); - #ifdef CONFIG_X86_32 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector); void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector); diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 25df3528c56b0..80fba7521f0b9 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -38,7 +38,6 @@ static struct apic apic_default __ro_after_init = { .name = "default", .probe = probe_default, - .apic_id_registered = default_apic_id_registered, .delivery_mode = APIC_DELIVERY_MODE_FIXED, .dest_mode_logical = true, From 97e357db8f57d7b9ccbe2d2a19ff2c1296d8a46c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:21 +0100 Subject: [PATCH 065/109] x86/apic: Use a proper define for invalid ACPI CPU ID commit 1a5d0f62d10d5da44c2b6a97b6600dea8a7519fb upstream. The ACPI ID for CPUs is preset with U32_MAX which is completely non obvious. Use a proper define for it. Intel-SIG: commit 1a5d0f62d10d x86/apic: Use a proper define for invalid ACPI CPU ID. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154640.177504138@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 3 +++ arch/x86/kernel/apic/apic.c | 2 +- arch/x86/xen/enlighten_hvm.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 2e71e648f045f..12c023ceba83a 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -49,7 +49,10 @@ static inline void x86_32_probe_apic(void) { } extern u32 cpuid_to_apicid[]; +#define CPU_ACPIID_INVALID U32_MAX + #ifdef CONFIG_X86_LOCAL_APIC + extern int apic_verbosity; extern int local_apic_timer_c2_ok; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b09a7c7ebcd9a..66cab7808638b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -111,7 +111,7 @@ static inline bool apic_accessible(void) * Map cpu index to physical APIC ID */ DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 70be57e8f51ca..7410416cd8b6b 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -161,7 +161,7 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) */ xen_uninit_lock_cpu(cpu); - if (cpu_acpi_id(cpu) != U32_MAX) + if (cpu_acpi_id(cpu) != CPU_ACPIID_INVALID) per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); else per_cpu(xen_vcpu_id, cpu) = cpu; From 1207850511a534c634c06809d8276ad5eb9e67e8 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Fri, 25 Aug 2023 00:47:36 -0700 Subject: [PATCH 066/109] x86/of: Move the x86_flattree_get_config() call out of x86_dtb_init() commit 0d294c8c4efa5c0f283a6dfc82dc014a5dbd9308 upstream. Fetching the device tree configuration before initmem_init() is necessary to allow the parsing of NUMA node information. However moving the entire x86_dtb_init() call before initmem_init() is not correct as APIC/IO-APIC enumeration has to be after initmem_init(). Thus, move the x86_flattree_get_config() call out of x86_dtb_init(), into setup_arch(), to call it before initmem_init(), and leave the ACPI/IOAPIC registration sequence as-is. [ mingo: Updated the changelog for clarity. ] Intel-SIG: commit 0d294c8c4efa x86/of: Move the x86_flattree_get_config() call out of x86_dtb_init(). x86/topology: More cleanups and preparatory work Signed-off-by: Saurabh Sengar Signed-off-by: Ingo Molnar Cc: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/1692949657-16446-1-git-send-email-ssengar@linux.microsoft.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/prom.h | 5 +++++ arch/x86/kernel/devicetree.c | 6 +----- arch/x86/kernel/setup.c | 2 ++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index b86b91cd3561b..043758a2e6270 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h @@ -31,6 +31,11 @@ static inline void x86_dtb_parse_smp_config(void) { } #define of_ioapic 0 #endif +#ifdef CONFIG_OF_EARLY_FLATTREE +void x86_flattree_get_config(void); +#else +static inline void x86_flattree_get_config(void) { } +#endif extern char cmd_line[COMMAND_LINE_SIZE]; #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 64126b20a0b58..9aa520992bb03 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -278,7 +278,7 @@ static void __init dtb_apic_setup(void) } #ifdef CONFIG_OF_EARLY_FLATTREE -static void __init x86_flattree_get_config(void) +void __init x86_flattree_get_config(void) { u32 size, map_len; void *dt; @@ -302,14 +302,10 @@ static void __init x86_flattree_get_config(void) if (initial_dtb) early_memunmap(dt, map_len); } -#else -static inline void x86_flattree_get_config(void) { } #endif void __init x86_dtb_parse_smp_config(void) { - x86_flattree_get_config(); - if (!of_have_populated_dt()) return; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index fb8bfe7966374..4e092972acf35 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1218,6 +1218,8 @@ void __init setup_arch(char **cmdline_p) early_acpi_boot_init(); x86_init.mpparse.early_parse_smp_cfg(); + x86_flattree_get_config(); + initmem_init(); dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); From cba4aa109e082b35084157a234178e7854182212 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Tue, 2 Apr 2024 07:40:27 -0700 Subject: [PATCH 067/109] x86/hyperv/vtl: Correct x86_init.mpparse.parse_smp_cfg assignment commit fe5e6b599fbc417662c549c04d278a13098eb52a upstream. VTL platform uses DeviceTree for fetching SMP configuration, assign the correct parsing function x86_dtb_parse_smp_config() for it to parse_smp_cfg. Fixes: c22e19cd2c8a ("x86/hyperv/vtl: Prepare for separate mpparse callbacks") Intel-SIG: commit fe5e6b599fbc x86/hyperv/vtl: Correct x86_init.mpparse.parse_smp_cfg assignment. x86/topology: More cleanups and preparatory work Signed-off-by: Saurabh Sengar Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/1712068830-4513-2-git-send-email-ssengar@linux.microsoft.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/hyperv/hv_vtl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 64ca4543ceca2..1490e4fb7ce9e 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -35,7 +35,7 @@ void __init hv_vtl_init_platform(void) /* Avoid searching for BIOS MP tables */ x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; - x86_init.mpparse.parse_smp_cfg = x86_init_noop; + x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config; x86_platform.get_wallclock = get_rtc_noop; x86_platform.set_wallclock = set_rtc_noop; From d7b22898b2e8fdf690625db630dd257d6e2993b3 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Tue, 2 Apr 2024 07:40:28 -0700 Subject: [PATCH 068/109] x86/of: Set the parse_smp_cfg for all the DeviceTree platforms by default commit 222408cde4d0ab17e54d4db26751c2b5cab9ac2b upstream. x86_dtb_parse_smp_config() must be set by DeviceTree platform for parsing SMP configuration. Set the parse_smp_cfg pointer to x86_dtb_parse_smp_config() by default so that all the dtb platforms need not to assign it explicitly. Today there are only two platforms using DeviceTree in x86, ce4100 and hv_vtl. Remove the explicit assignment of x86_dtb_parse_smp_config() function from these. Intel-SIG: commit 222408cde4d0 x86/of: Set the parse_smp_cfg for all the DeviceTree platforms by default. x86/topology: More cleanups and preparatory work Signed-off-by: Saurabh Sengar Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/1712068830-4513-3-git-send-email-ssengar@linux.microsoft.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/hyperv/hv_vtl.c | 1 - arch/x86/include/asm/prom.h | 7 ++----- arch/x86/kernel/devicetree.c | 6 ++++-- arch/x86/platform/ce4100/ce4100.c | 1 - 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 1490e4fb7ce9e..d04ccd4b3b4af 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -35,7 +35,6 @@ void __init hv_vtl_init_platform(void) /* Avoid searching for BIOS MP tables */ x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; - x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config; x86_platform.get_wallclock = get_rtc_noop; x86_platform.set_wallclock = set_rtc_noop; diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index 043758a2e6270..02644e0105141 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h @@ -24,18 +24,15 @@ extern u64 initial_dtb; extern void add_dtb(u64 data); void x86_of_pci_init(void); void x86_dtb_parse_smp_config(void); +void x86_flattree_get_config(void); #else static inline void add_dtb(u64 data) { } static inline void x86_of_pci_init(void) { } static inline void x86_dtb_parse_smp_config(void) { } +static inline void x86_flattree_get_config(void) { } #define of_ioapic 0 #endif -#ifdef CONFIG_OF_EARLY_FLATTREE -void x86_flattree_get_config(void); -#else -static inline void x86_flattree_get_config(void) { } -#endif extern char cmd_line[COMMAND_LINE_SIZE]; #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 9aa520992bb03..3d659474129f7 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -277,9 +277,9 @@ static void __init dtb_apic_setup(void) dtb_ioapic_setup(); } -#ifdef CONFIG_OF_EARLY_FLATTREE void __init x86_flattree_get_config(void) { +#ifdef CONFIG_OF_EARLY_FLATTREE u32 size, map_len; void *dt; @@ -301,8 +301,10 @@ void __init x86_flattree_get_config(void) if (initial_dtb) early_memunmap(dt, map_len); -} #endif + if (of_have_populated_dt()) + x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config; +} void __init x86_dtb_parse_smp_config(void) { diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index f32451bdcfdd5..f8126821a94d7 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -139,7 +139,6 @@ void __init x86_ce4100_early_setup(void) x86_init.resources.probe_roms = x86_init_noop; x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; - x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config; x86_init.pci.init = ce4100_pci_init; x86_init.pci.init_irq = sdv_pci_init; From 41443179d4879ab04a34369ce1e7ea9ad01340b8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:35 +0100 Subject: [PATCH 069/109] x86/cpu/topology: Move registration out of APIC code commit c0a66c2847908e41c771ca2355fba935a82a9f62 upstream. The APIC/CPU registration sits in the middle of the APIC code. In fact this is a topology evaluation function and has nothing to do with the inner workings of the local APIC. Move it out into a file which reflects what this is about. Intel-SIG: commit c0a66c284790 x86/cpu/topology: Move registration out of APIC code. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.543948812@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 2 + arch/x86/kernel/apic/apic.c | 185 +-------------------------------- arch/x86/kernel/cpu/Makefile | 12 ++- arch/x86/kernel/cpu/topology.c | 184 ++++++++++++++++++++++++++++++++ 4 files changed, 195 insertions(+), 188 deletions(-) create mode 100644 arch/x86/kernel/cpu/topology.c diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 12c023ceba83a..16b4a913f086f 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -172,6 +172,8 @@ extern bool apic_needs_pit(void); extern void apic_send_IPI_allbutself(unsigned int vector); +extern void topology_register_boot_apic(u32 apic_id); + #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } #define local_apic_timer_c2_ok 1 diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 66cab7808638b..1389fb20b4f17 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -68,26 +68,12 @@ #include "local.h" -unsigned int num_processors; - -unsigned disabled_cpus; - /* Processor that is doing the boot up */ u32 boot_cpu_physical_apicid __ro_after_init = BAD_APICID; EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); u8 boot_cpu_apic_version __ro_after_init; -/* Bitmap of physically present CPUs. */ -DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC); - -/* - * Processor to be disabled specified by kernel parameter - * disable_cpu_apicid=, mostly used for the kdump 2nd kernel to - * avoid undefined behaviour caused by sending INIT from AP to BSP. - */ -static u32 disabled_cpu_apicid __ro_after_init = BAD_APICID; - /* * This variable controls which CPUs receive external NMIs. By default, * external NMIs are delivered only to the BSP. @@ -107,14 +93,6 @@ static inline bool apic_accessible(void) return x2apic_mode || apic_mmio_base; } -/* - * Map cpu index to physical APIC ID - */ -DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); - #ifdef CONFIG_X86_32 /* Local APIC was disabled by the BIOS and enabled by the kernel */ static int enabled_via_apicbase __ro_after_init; @@ -1688,8 +1666,6 @@ void apic_ap_setup(void) end_local_APIC_setup(); } -static __init void cpu_set_boot_apic(void); - static __init void apic_read_boot_cpu_id(bool x2apic) { /* @@ -1704,7 +1680,8 @@ static __init void apic_read_boot_cpu_id(bool x2apic) boot_cpu_physical_apicid = read_apic_id(); boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); } - cpu_set_boot_apic(); + topology_register_boot_apic(boot_cpu_physical_apicid); + x86_32_probe_bigsmp_early(); } #ifdef CONFIG_X86_X2APIC @@ -2310,155 +2287,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) apic_write(APIC_LVT1, value); } -/* - * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated - * contiguously, it equals to current allocated max logical CPU ID plus 1. - * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, - * so the maximum of nr_logical_cpuids is nr_cpu_ids. - * - * NOTE: Reserve 0 for BSP. - */ -static int nr_logical_cpuids = 1; - -/* - * Used to store mapping between logical CPU IDs and APIC IDs. - */ -u32 cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = BAD_APICID, }; - -bool arch_match_cpu_phys_id(int cpu, u64 phys_id) -{ - return phys_id == (u64)cpuid_to_apicid[cpu]; -} - -#ifdef CONFIG_SMP -static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) -{ - /* Isolate the SMT bit(s) in the APICID and check for 0 */ - u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1; - - if (smp_num_siblings == 1 || !(apicid & mask)) - cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); -} - -/* - * Due to the utter mess of CPUID evaluation smp_num_siblings is not valid - * during early boot. Initialize the primary thread mask before SMP - * bringup. - */ -static int __init smp_init_primary_thread_mask(void) -{ - unsigned int cpu; - - /* - * XEN/PV provides either none or useless topology information. - * Pretend that all vCPUs are primary threads. - */ - if (xen_pv_domain()) { - cpumask_copy(&__cpu_primary_thread_mask, cpu_possible_mask); - return 0; - } - - for (cpu = 0; cpu < nr_logical_cpuids; cpu++) - cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]); - return 0; -} -early_initcall(smp_init_primary_thread_mask); -#else -static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } -#endif - -/* - * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids - * and cpuid_to_apicid[] synchronized. - */ -static int allocate_logical_cpuid(int apicid) -{ - int i; - - /* - * cpuid <-> apicid mapping is persistent, so when a cpu is up, - * check if the kernel has allocated a cpuid for it. - */ - for (i = 0; i < nr_logical_cpuids; i++) { - if (cpuid_to_apicid[i] == apicid) - return i; - } - - /* Allocate a new cpuid. */ - if (nr_logical_cpuids >= nr_cpu_ids) { - WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " - "Processor %d/0x%x and the rest are ignored.\n", - nr_cpu_ids, nr_logical_cpuids, apicid); - return -EINVAL; - } - - cpuid_to_apicid[nr_logical_cpuids] = apicid; - return nr_logical_cpuids++; -} - -static void cpu_update_apic(int cpu, u32 apicid) -{ -#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) - early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; -#endif - set_cpu_possible(cpu, true); - set_bit(apicid, phys_cpu_present_map); - set_cpu_present(cpu, true); - num_processors++; - - if (system_state != SYSTEM_BOOTING) - cpu_mark_primary_thread(cpu, apicid); -} - -static __init void cpu_set_boot_apic(void) -{ - cpuid_to_apicid[0] = boot_cpu_physical_apicid; - cpu_update_apic(0, boot_cpu_physical_apicid); - x86_32_probe_bigsmp_early(); -} - -int generic_processor_info(int apicid) -{ - int cpu, max = nr_cpu_ids; - - /* The boot CPU must be set before MADT/MPTABLE parsing happens */ - if (cpuid_to_apicid[0] == BAD_APICID) - panic("Boot CPU APIC not registered yet\n"); - - if (apicid == boot_cpu_physical_apicid) - return 0; - - if (disabled_cpu_apicid == apicid) { - int thiscpu = num_processors + disabled_cpus; - - pr_warn("APIC: Disabling requested cpu. Processor %d/0x%x ignored.\n", - thiscpu, apicid); - - disabled_cpus++; - return -ENODEV; - } - - if (num_processors >= nr_cpu_ids) { - int thiscpu = max + disabled_cpus; - - pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. " - "Processor %d/0x%x ignored.\n", max, thiscpu, apicid); - - disabled_cpus++; - return -EINVAL; - } - - cpu = allocate_logical_cpuid(apicid); - if (cpu < 0) { - disabled_cpus++; - return -EINVAL; - } - - cpu_update_apic(cpu, apicid); - return cpu; -} - - void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, bool dmar) { @@ -2847,15 +2675,6 @@ static int __init lapic_insert_resource(void) */ late_initcall(lapic_insert_resource); -static int __init apic_set_disabled_cpu_apicid(char *arg) -{ - if (!arg || !get_option(&arg, &disabled_cpu_apicid)) - return -EINVAL; - - return 0; -} -early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); - static int __init apic_set_extnmi(char *arg) { if (!arg) diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 9e0a1c119a91c..eb4dbcdf41f1d 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -26,14 +26,16 @@ obj-y += bugs.o obj-y += aperfmperf.o obj-y += cpuid-deps.o obj-y += umwait.o +obj-y += capflags.o powerflags.o -obj-$(CONFIG_PROC_FS) += proc.o -obj-y += capflags.o powerflags.o +obj-$(CONFIG_X86_LOCAL_APIC) += topology.o -obj-$(CONFIG_IA32_FEAT_CTL) += feat_ctl.o +obj-$(CONFIG_PROC_FS) += proc.o + +obj-$(CONFIG_IA32_FEAT_CTL) += feat_ctl.o ifdef CONFIG_CPU_SUP_INTEL -obj-y += intel.o intel_pconfig.o tsx.o -obj-$(CONFIG_PM) += intel_epb.o +obj-y += intel.o intel_pconfig.o tsx.o +obj-$(CONFIG_PM) += intel_epb.o endif obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c new file mode 100644 index 0000000000000..b99cd19b7d12f --- /dev/null +++ b/arch/x86/kernel/cpu/topology.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include + +#include + +#include +#include +#include + +/* + * Map cpu index to physical APIC ID + */ +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); + +/* Bitmap of physically present CPUs. */ +DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly; + +/* Used for CPU number allocation and parallel CPU bringup */ +u32 cpuid_to_apicid[] __read_mostly = { [0 ... NR_CPUS - 1] = BAD_APICID, }; + +/* + * Processor to be disabled specified by kernel parameter + * disable_cpu_apicid=, mostly used for the kdump 2nd kernel to + * avoid undefined behaviour caused by sending INIT from AP to BSP. + */ +static u32 disabled_cpu_apicid __ro_after_init = BAD_APICID; + +unsigned int num_processors; +unsigned disabled_cpus; + +/* + * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated + * contiguously, it equals to current allocated max logical CPU ID plus 1. + * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, + * so the maximum of nr_logical_cpuids is nr_cpu_ids. + * + * NOTE: Reserve 0 for BSP. + */ +static int nr_logical_cpuids = 1; + +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == (u64)cpuid_to_apicid[cpu]; +} + +#ifdef CONFIG_SMP +static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) +{ + /* Isolate the SMT bit(s) in the APICID and check for 0 */ + u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1; + + if (smp_num_siblings == 1 || !(apicid & mask)) + cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); +} + +/* + * Due to the utter mess of CPUID evaluation smp_num_siblings is not valid + * during early boot. Initialize the primary thread mask before SMP + * bringup. + */ +static int __init smp_init_primary_thread_mask(void) +{ + unsigned int cpu; + + /* + * XEN/PV provides either none or useless topology information. + * Pretend that all vCPUs are primary threads. + */ + if (xen_pv_domain()) { + cpumask_copy(&__cpu_primary_thread_mask, cpu_possible_mask); + return 0; + } + + for (cpu = 0; cpu < nr_logical_cpuids; cpu++) + cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]); + return 0; +} +early_initcall(smp_init_primary_thread_mask); +#else +static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } +#endif + +/* + * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids + * and cpuid_to_apicid[] synchronized. + */ +static int allocate_logical_cpuid(int apicid) +{ + int i; + + /* + * cpuid <-> apicid mapping is persistent, so when a cpu is up, + * check if the kernel has allocated a cpuid for it. + */ + for (i = 0; i < nr_logical_cpuids; i++) { + if (cpuid_to_apicid[i] == apicid) + return i; + } + + /* Allocate a new cpuid. */ + if (nr_logical_cpuids >= nr_cpu_ids) { + WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " + "Processor %d/0x%x and the rest are ignored.\n", + nr_cpu_ids, nr_logical_cpuids, apicid); + return -EINVAL; + } + + cpuid_to_apicid[nr_logical_cpuids] = apicid; + return nr_logical_cpuids++; +} + +static void cpu_update_apic(int cpu, u32 apicid) +{ +#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) + early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; +#endif + set_cpu_possible(cpu, true); + set_bit(apicid, phys_cpu_present_map); + set_cpu_present(cpu, true); + num_processors++; + + if (system_state != SYSTEM_BOOTING) + cpu_mark_primary_thread(cpu, apicid); +} + +void __init topology_register_boot_apic(u32 apic_id) +{ + cpuid_to_apicid[0] = apic_id; + cpu_update_apic(0, apic_id); +} + +int generic_processor_info(int apicid) +{ + int cpu, max = nr_cpu_ids; + + /* The boot CPU must be set before MADT/MPTABLE parsing happens */ + if (cpuid_to_apicid[0] == BAD_APICID) + panic("Boot CPU APIC not registered yet\n"); + + if (apicid == boot_cpu_physical_apicid) + return 0; + + if (disabled_cpu_apicid == apicid) { + int thiscpu = num_processors + disabled_cpus; + + pr_warn("APIC: Disabling requested cpu. Processor %d/0x%x ignored.\n", + thiscpu, apicid); + + disabled_cpus++; + return -ENODEV; + } + + if (num_processors >= nr_cpu_ids) { + int thiscpu = max + disabled_cpus; + + pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. " + "Processor %d/0x%x ignored.\n", max, thiscpu, apicid); + + disabled_cpus++; + return -EINVAL; + } + + cpu = allocate_logical_cpuid(apicid); + if (cpu < 0) { + disabled_cpus++; + return -EINVAL; + } + + cpu_update_apic(cpu, apicid); + return cpu; +} + +static int __init apic_set_disabled_cpu_apicid(char *arg) +{ + if (!arg || !get_option(&arg, &disabled_cpu_apicid)) + return -EINVAL; + + return 0; +} +early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); From 7cadafbb6950d63e3e10475562a127252d9038e8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:36 +0100 Subject: [PATCH 070/109] x86/cpu/topology: Provide separate APIC registration functions commit 4176b541c2c68bf79d0a05f316713ed8f0c9cdb4 upstream. generic_processor_info() aside of being a complete misnomer is used for both early boot registration and ACPI CPU hotplug. While it's arguable that this can share some code, it results in code which is hard to understand and kept around post init for no real reason. Also the call sites do lots of manual fiddling in topology related variables instead of having proper interfaces for the purpose which handle the topology internals correctly. Provide topology_register_apic(), topology_hotplug_apic() and topology_hotunplug_apic() which have the extra magic of the call sites incorporated and for now are wrappers around generic_processor_info(). Intel-SIG: commit 4176b541c2c6 x86/cpu/topology: Provide separate APIC registration functions. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.605007456@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 3 + arch/x86/kernel/cpu/topology.c | 113 +++++++++++++++++++++++++++------ 2 files changed, 98 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 16b4a913f086f..e29b5f17e0ba8 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -172,7 +172,10 @@ extern bool apic_needs_pit(void); extern void apic_send_IPI_allbutself(unsigned int vector); +extern void topology_register_apic(u32 apic_id, u32 acpi_id, bool present); extern void topology_register_boot_apic(u32 apic_id); +extern int topology_hotplug_apic(u32 apic_id, u32 acpi_id); +extern void topology_hotunplug_apic(unsigned int cpu); #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index b99cd19b7d12f..3dd7e6c08cb58 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -84,32 +84,38 @@ early_initcall(smp_init_primary_thread_mask); static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } #endif -/* - * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids - * and cpuid_to_apicid[] synchronized. - */ -static int allocate_logical_cpuid(int apicid) +static int topo_lookup_cpuid(u32 apic_id) { int i; - /* - * cpuid <-> apicid mapping is persistent, so when a cpu is up, - * check if the kernel has allocated a cpuid for it. - */ + /* CPU# to APICID mapping is persistent once it is established */ for (i = 0; i < nr_logical_cpuids; i++) { - if (cpuid_to_apicid[i] == apicid) + if (cpuid_to_apicid[i] == apic_id) return i; } + return -ENODEV; +} + +/* + * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids + * and cpuid_to_apicid[] synchronized. + */ +static int allocate_logical_cpuid(u32 apic_id) +{ + int cpu = topo_lookup_cpuid(apic_id); + + if (cpu >= 0) + return cpu; /* Allocate a new cpuid. */ if (nr_logical_cpuids >= nr_cpu_ids) { WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " "Processor %d/0x%x and the rest are ignored.\n", - nr_cpu_ids, nr_logical_cpuids, apicid); + nr_cpu_ids, nr_logical_cpuids, apic_id); return -EINVAL; } - cpuid_to_apicid[nr_logical_cpuids] = apicid; + cpuid_to_apicid[nr_logical_cpuids] = apic_id; return nr_logical_cpuids++; } @@ -127,12 +133,6 @@ static void cpu_update_apic(int cpu, u32 apicid) cpu_mark_primary_thread(cpu, apicid); } -void __init topology_register_boot_apic(u32 apic_id) -{ - cpuid_to_apicid[0] = apic_id; - cpu_update_apic(0, apic_id); -} - int generic_processor_info(int apicid) { int cpu, max = nr_cpu_ids; @@ -174,6 +174,83 @@ int generic_processor_info(int apicid) return cpu; } +/** + * topology_register_apic - Register an APIC in early topology maps + * @apic_id: The APIC ID to set up + * @acpi_id: The ACPI ID associated to the APIC + * @present: True if the corresponding CPU is present + */ +void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) +{ + int cpu; + + if (apic_id >= MAX_LOCAL_APIC) { + pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1); + return; + } + + if (!present) { + disabled_cpus++; + return; + } + + cpu = generic_processor_info(apic_id); + if (cpu >= 0) + early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; +} + +/** + * topology_register_boot_apic - Register the boot CPU APIC + * @apic_id: The APIC ID to set up + * + * Separate so CPU #0 can be assigned + */ +void __init topology_register_boot_apic(u32 apic_id) +{ + cpuid_to_apicid[0] = apic_id; + cpu_update_apic(0, apic_id); +} + +#ifdef CONFIG_ACPI_HOTPLUG_CPU +/** + * topology_hotplug_apic - Handle a physical hotplugged APIC after boot + * @apic_id: The APIC ID to set up + * @acpi_id: The ACPI ID associated to the APIC + */ +int topology_hotplug_apic(u32 apic_id, u32 acpi_id) +{ + int cpu; + + if (apic_id >= MAX_LOCAL_APIC) + return -EINVAL; + + cpu = topo_lookup_cpuid(apic_id); + if (cpu < 0) { + cpu = generic_processor_info(apic_id); + if (cpu >= 0) + per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; + } + return cpu; +} + +/** + * topology_hotunplug_apic - Remove a physical hotplugged APIC after boot + * @cpu: The CPU number for which the APIC ID is removed + */ +void topology_hotunplug_apic(unsigned int cpu) +{ + u32 apic_id = cpuid_to_apicid[cpu]; + + if (apic_id == BAD_APICID) + return; + + per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; + clear_bit(apic_id, phys_cpu_present_map); + set_cpu_present(cpu, false); + num_processors--; +} +#endif + static int __init apic_set_disabled_cpu_apicid(char *arg) { if (!arg || !get_option(&arg, &disabled_cpu_apicid)) From 4143ba8a8100e1f8fbdbbef19bf8eee3bd2e699e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:37 +0100 Subject: [PATCH 071/109] x86/acpi: Use new APIC registration functions commit ff37b09c8495ed897ea470014d1461660db6a942 upstream. Use the new topology registration functions and make the early boot code path __init. No functional change intended. Intel-SIG: commit ff37b09c8495 x86/acpi: Use new APIC registration functions. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.664738831@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/acpi/boot.c | 44 ++++++------------------------------- 1 file changed, 7 insertions(+), 37 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 96699b5323253..f182a37453863 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -156,33 +156,9 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) return 0; } -/** - * acpi_register_lapic - register a local apic and generates a logic cpu number - * @id: local apic id to register - * @acpiid: ACPI id to register - * @enabled: this cpu is enabled or not - * - * Returns the logic cpu number which maps to the local apic - */ -static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) +static __init void acpi_register_lapic(u32 apic_id, u32 acpi_id, bool present) { - int cpu; - - if (id >= MAX_LOCAL_APIC) { - pr_info("skipped apicid that is too big\n"); - return -EINVAL; - } - - if (!enabled) { - ++disabled_cpus; - return -EINVAL; - } - - cpu = generic_processor_info(id); - if (cpu >= 0) - early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid; - - return cpu; + topology_register_apic(apic_id, acpi_id, present); } static bool __init acpi_is_processor_usable(u32 lapic_flags) @@ -774,12 +750,10 @@ static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) return 0; } -int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, - int *pcpu) +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu) { - int cpu; + int cpu = topology_hotplug_apic(physid, acpi_id); - cpu = acpi_register_lapic(physid, acpi_id, ACPI_MADT_ENABLED); if (cpu < 0) { pr_info("Unable to map lapic to logical cpu number\n"); return cpu; @@ -798,15 +772,11 @@ int acpi_unmap_cpu(int cpu) #ifdef CONFIG_ACPI_NUMA set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE); #endif - - per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; - set_cpu_present(cpu, false); - num_processors--; - - return (0); + topology_hotunplug_apic(cpu); + return 0; } EXPORT_SYMBOL(acpi_unmap_cpu); -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) { From 56b551a58a4792af65a97e3688e1d3abf9b0bb9a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:39 +0100 Subject: [PATCH 072/109] x86/jailhouse: Use new APIC registration function commit 8cd01c8a68b083e2a0af601c5464e2dfa64f1421 upstream. No functional change intended. Intel-SIG: commit 8cd01c8a68b0 x86/jailhouse: Use new APIC registration function. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.720970412@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/jailhouse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index bf47224c88b15..cd8ed1edbf9ee 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -103,7 +103,7 @@ static void __init jailhouse_parse_smp_config(void) register_lapic_address(0xfee00000); for (cpu = 0; cpu < setup_data.v1.num_cpus; cpu++) - generic_processor_info(setup_data.v1.cpu_ids[cpu]); + topology_register_apic(setup_data.v1.cpu_ids[cpu], CPU_ACPIID_INVALID, true); smp_found_config = 1; From cee66e55c1952f1946ca447fb66e523bdf5635b3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:40 +0100 Subject: [PATCH 073/109] x86/of: Use new APIC registration functions commit 7d319c0fcae68e489fcf806cdea46a795062eaf7 upstream. No functional change intended. Intel-SIG: commit 7d319c0fcae6 x86/of: Use new APIC registration functions. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.776009244@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/devicetree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 3d659474129f7..a4ffed7678724 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -136,7 +136,7 @@ static void __init dtb_cpu_setup(void) pr_warn("%pOF: missing local APIC ID\n", dn); continue; } - generic_processor_info(apic_id); + topology_register_apic(apic_id, CPU_ACPIID_INVALID, true); } } From 349598d9418e1385f1dd1e2ae338a0070818ab55 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:42 +0100 Subject: [PATCH 074/109] x86/mpparse: Use new APIC registration function commit 8098428c541212e9835c1771ee90caa968ffef4f upstream. Aside of switching over to the new interface, record the number of registered CPUs locally, which allows to make num_processors and disabled_cpus confined to the topology code. No functional change intended. Intel-SIG: commit 8098428c5412 x86/mpparse: Use new APIC registration function. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.830955273@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/mpspec.h | 2 -- arch/x86/kernel/cpu/topology.c | 2 +- arch/x86/kernel/mpparse.c | 17 +++++++++-------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 1b79d0ee95dfa..c72c7ff78fcdc 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -61,8 +61,6 @@ static inline void e820__memblock_alloc_reserved_mpc_new(void) { } #define mpparse_parse_smp_config x86_init_noop #endif -int generic_processor_info(int apicid); - extern DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC); static inline void reset_phys_cpu_present_map(u32 apicid) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 3dd7e6c08cb58..669e258b9e0b4 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -133,7 +133,7 @@ static void cpu_update_apic(int cpu, u32 apicid) cpu_mark_primary_thread(cpu, apicid); } -int generic_processor_info(int apicid) +static int generic_processor_info(int apicid) { int cpu, max = nr_cpu_ids; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 9c000c409eb1d..1ccd30c8246fa 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -36,6 +36,8 @@ * Checksum an MP configuration block. */ +static unsigned int num_procs __initdata; + static int __init mpf_checksum(unsigned char *mp, int len) { int sum = 0; @@ -50,16 +52,15 @@ static void __init MP_processor_info(struct mpc_cpu *m) { char *bootup_cpu = ""; - if (!(m->cpuflag & CPU_ENABLED)) { - disabled_cpus++; + topology_register_apic(m->apicid, CPU_ACPIID_INVALID, m->cpuflag & CPU_ENABLED); + if (!(m->cpuflag & CPU_ENABLED)) return; - } if (m->cpuflag & CPU_BOOTPROCESSOR) bootup_cpu = " (Bootup-CPU)"; pr_info("Processor #%d%s\n", m->apicid, bootup_cpu); - generic_processor_info(m->apicid); + num_procs++; } #ifdef CONFIG_X86_IO_APIC @@ -236,9 +237,9 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) } } - if (!num_processors) + if (!num_procs && !acpi_lapic) pr_err("MPTABLE: no processors registered!\n"); - return num_processors; + return num_procs || acpi_lapic; } #ifdef CONFIG_X86_IO_APIC @@ -529,8 +530,8 @@ static __init void mpparse_get_smp_config(unsigned int early) } else BUG(); - if (!early) - pr_info("Processors: %d\n", num_processors); + if (!early && !acpi_lapic) + pr_info("Processors: %d\n", num_procs); /* * Only use the first configuration found. */ From 9b3f2de3e50b3a2fbfa296fe378408bcc3f3ced6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:43 +0100 Subject: [PATCH 075/109] x86/acpi: Dont invoke topology_register_apic() for XEN PV commit cab8e164a49c0ee5c9acb7edec33d76422d831bf upstream. The MADT table for XEN/PV dom0 is not really useful and registering the APICs is momentarily a pointless exercise because XENPV does not use an APIC at all. It overrides the x86_init.mpparse.parse_smp_config() callback, resets num_processors and counts how many of them are provided by the hypervisor. This is in the way of cleaning up the APIC registration. Prevent MADT registration for XEN/PV temporarily until the rework is completed and XEN/PV can use the MADT again. Intel-SIG: commit cab8e164a49c x86/acpi: Dont invoke topology_register_apic() for XEN PV. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.885489468@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/acpi/boot.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f182a37453863..6c26e2116202b 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -23,6 +23,8 @@ #include #include +#include + #include #include #include @@ -158,7 +160,8 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) static __init void acpi_register_lapic(u32 apic_id, u32 acpi_id, bool present) { - topology_register_apic(apic_id, acpi_id, present); + if (!xen_pv_domain()) + topology_register_apic(apic_id, acpi_id, present); } static bool __init acpi_is_processor_usable(u32 lapic_flags) @@ -1017,7 +1020,8 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) return count; } - register_lapic_address(acpi_lapic_addr); + if (!xen_pv_domain()) + register_lapic_address(acpi_lapic_addr); return count; } From a7572dd41bccd8f21e1d81980c0091a43b71fb69 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:44 +0100 Subject: [PATCH 076/109] x86/xen/smp_pv: Register fake APICs commit e7530702346637af46bca1d114e6d63312eb3461 upstream. XENPV does not use the APIC. It's just piggy packing on the infrastructure and fiddles with global variables as it sees fit. These global variables are going away, so let XENPV register pseudo APIC IDs to keep the accounting correct and keep up the illusion that XEN/PV is something sane. Intel-SIG: commit e75307023466 x86/xen/smp_pv: Register fake APICs. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.940043512@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/xen/smp_pv.c | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 7f6f34056e13e..98849b3f5eac2 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -150,34 +151,16 @@ int xen_smp_intr_init_pv(unsigned int cpu) static void __init xen_pv_smp_config(void) { - int i, rc; - unsigned int subtract = 0; - - num_processors = 0; - disabled_cpus = 0; - for (i = 0; i < nr_cpu_ids; i++) { - rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); - if (rc >= 0) { - num_processors++; - set_cpu_possible(i, true); - } else { - set_cpu_possible(i, false); - set_cpu_present(i, false); - subtract++; - } + u32 apicid = 0; + int i; + + topology_register_boot_apic(apicid++); + + for (i = 1; i < nr_cpu_ids; i++) { + if (HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL) < 0) + break; + topology_register_apic(apicid++, CPU_ACPIID_INVALID, true); } -#ifdef CONFIG_HOTPLUG_CPU - /* This is akin to using 'nr_cpus' on the Linux command line. - * Which is OK as when we use 'dom0_max_vcpus=X' we can only - * have up to X, while nr_cpu_ids is greater than X. This - * normally is not a problem, except when CPU hotplugging - * is involved and then there might be more than X CPUs - * in the guest - which will not work as there is no - * hypercall to expand the max number of VCPUs an already - * running guest has. So cap it up to X. */ - if (subtract) - set_nr_cpu_ids(nr_cpu_ids - subtract); -#endif /* Pretend to be a proper enumerated system */ smp_found_config = 1; } From 877eb859499f50e901d89077078162dafad8e0fa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:46 +0100 Subject: [PATCH 077/109] x86/cpu/topology: Confine topology information commit 58aa34abe9954cd5dfbf322fc612146c5f45e52b upstream. Now that all external fiddling with num_processors and disabled_cpus is gone, move the last user prefill_possible_map() into the topology code too and remove the global visibility of these variables. Intel-SIG: commit 58aa34abe995 x86/cpu/topology: Confine topology information. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.994756960@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/smp.h | 3 -- arch/x86/kernel/apic/apic.c | 1 - arch/x86/kernel/cpu/topology.c | 76 +++++++++++++++++++++++++++++++++- arch/x86/kernel/smpboot.c | 72 -------------------------------- 4 files changed, 74 insertions(+), 78 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 390d53fd34f94..807aafa63a3ec 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -9,7 +9,6 @@ #include extern int smp_num_siblings; -extern unsigned int num_processors; DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); @@ -175,8 +174,6 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) } #endif /* CONFIG_SMP */ -extern unsigned disabled_cpus; - #ifdef CONFIG_DEBUG_NMI_SELFTEST extern void nmi_selftest(void); #else diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 1389fb20b4f17..ccb858cc18bb0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2072,7 +2072,6 @@ void __init init_apic_mappings(void) pr_info("APIC: disable apic facility\n"); apic_disable(); } - num_processors = 1; } } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 669e258b9e0b4..a6c9314349516 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -29,8 +29,8 @@ u32 cpuid_to_apicid[] __read_mostly = { [0 ... NR_CPUS - 1] = BAD_APICID, }; */ static u32 disabled_cpu_apicid __ro_after_init = BAD_APICID; -unsigned int num_processors; -unsigned disabled_cpus; +static unsigned int num_processors; +static unsigned int disabled_cpus; /* * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated @@ -174,6 +174,71 @@ static int generic_processor_info(int apicid) return cpu; } +static int __initdata setup_possible_cpus = -1; + +/* + * cpu_possible_mask should be static, it cannot change as cpu's + * are onlined, or offlined. The reason is per-cpu data-structures + * are allocated by some modules at init time, and don't expect to + * do this dynamically on cpu arrival/departure. + * cpu_present_mask on the other hand can change dynamically. + * In case when cpu_hotplug is not compiled, then we resort to current + * behaviour, which is cpu_possible == cpu_present. + * - Ashok Raj + * + * Three ways to find out the number of additional hotplug CPUs: + * - If the BIOS specified disabled CPUs in ACPI/mptables use that. + * - The user can overwrite it with possible_cpus=NUM + * - Otherwise don't reserve additional CPUs. + * We do this because additional CPUs waste a lot of memory. + * -AK + */ +__init void prefill_possible_map(void) +{ + int i, possible; + + i = setup_max_cpus ?: 1; + if (setup_possible_cpus == -1) { + possible = num_processors; +#ifdef CONFIG_HOTPLUG_CPU + if (setup_max_cpus) + possible += disabled_cpus; +#else + if (possible > i) + possible = i; +#endif + } else + possible = setup_possible_cpus; + + total_cpus = max_t(int, possible, num_processors + disabled_cpus); + + /* nr_cpu_ids could be reduced via nr_cpus= */ + if (possible > nr_cpu_ids) { + pr_warn("%d Processors exceeds NR_CPUS limit of %u\n", + possible, nr_cpu_ids); + possible = nr_cpu_ids; + } + +#ifdef CONFIG_HOTPLUG_CPU + if (!setup_max_cpus) +#endif + if (possible > i) { + pr_warn("%d Processors exceeds max_cpus limit of %u\n", + possible, setup_max_cpus); + possible = i; + } + + set_nr_cpu_ids(possible); + + pr_info("Allowing %d CPUs, %d hotplug CPUs\n", + possible, max_t(int, possible - num_processors, 0)); + + reset_cpu_possible_mask(); + + for (i = 0; i < possible; i++) + set_cpu_possible(i, true); +} + /** * topology_register_apic - Register an APIC in early topology maps * @apic_id: The APIC ID to set up @@ -251,6 +316,13 @@ void topology_hotunplug_apic(unsigned int cpu) } #endif +static int __init _setup_possible_cpus(char *str) +{ + get_option(&str, &setup_possible_cpus); + return 0; +} +early_param("possible_cpus", _setup_possible_cpus); + static int __init apic_set_disabled_cpu_apicid(char *arg) { if (!arg || !get_option(&arg, &disabled_cpu_apicid)) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c8322867adb43..f7299151947df 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1283,78 +1283,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) cache_aps_init(); } -static int __initdata setup_possible_cpus = -1; -static int __init _setup_possible_cpus(char *str) -{ - get_option(&str, &setup_possible_cpus); - return 0; -} -early_param("possible_cpus", _setup_possible_cpus); - - -/* - * cpu_possible_mask should be static, it cannot change as cpu's - * are onlined, or offlined. The reason is per-cpu data-structures - * are allocated by some modules at init time, and don't expect to - * do this dynamically on cpu arrival/departure. - * cpu_present_mask on the other hand can change dynamically. - * In case when cpu_hotplug is not compiled, then we resort to current - * behaviour, which is cpu_possible == cpu_present. - * - Ashok Raj - * - * Three ways to find out the number of additional hotplug CPUs: - * - If the BIOS specified disabled CPUs in ACPI/mptables use that. - * - The user can overwrite it with possible_cpus=NUM - * - Otherwise don't reserve additional CPUs. - * We do this because additional CPUs waste a lot of memory. - * -AK - */ -__init void prefill_possible_map(void) -{ - int i, possible; - - i = setup_max_cpus ?: 1; - if (setup_possible_cpus == -1) { - possible = num_processors; -#ifdef CONFIG_HOTPLUG_CPU - if (setup_max_cpus) - possible += disabled_cpus; -#else - if (possible > i) - possible = i; -#endif - } else - possible = setup_possible_cpus; - - total_cpus = max_t(int, possible, num_processors + disabled_cpus); - - /* nr_cpu_ids could be reduced via nr_cpus= */ - if (possible > nr_cpu_ids) { - pr_warn("%d Processors exceeds NR_CPUS limit of %u\n", - possible, nr_cpu_ids); - possible = nr_cpu_ids; - } - -#ifdef CONFIG_HOTPLUG_CPU - if (!setup_max_cpus) -#endif - if (possible > i) { - pr_warn("%d Processors exceeds max_cpus limit of %u\n", - possible, setup_max_cpus); - possible = i; - } - - set_nr_cpu_ids(possible); - - pr_info("Allowing %d CPUs, %d hotplug CPUs\n", - possible, max_t(int, possible - num_processors, 0)); - - reset_cpu_possible_mask(); - - for (i = 0; i < possible; i++) - set_cpu_possible(i, true); -} - /* correctly size the local cpu masks */ void __init setup_cpu_local_masks(void) { From f8edcc18f6631371a4c32980290fee27d0abb21c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:47 +0100 Subject: [PATCH 078/109] x86/cpu/topology: Simplify APIC registration commit 4c4c6f38704ab0e3f85f660b7479de7aa559d79a upstream. Having the same check whether the number of assigned CPUs has reached the nr_cpu_ids limit twice in the same code path is pointless. Repeating the information that CPUs are ignored over and over is also pointless noise. Remove the redundant check and reduce the noise by using a pr_warn_once(). Intel-SIG: commit 4c4c6f38704a x86/cpu/topology: Simplify APIC registration. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.050264369@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index a6c9314349516..8b42918b7b582 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -107,14 +107,6 @@ static int allocate_logical_cpuid(u32 apic_id) if (cpu >= 0) return cpu; - /* Allocate a new cpuid. */ - if (nr_logical_cpuids >= nr_cpu_ids) { - WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " - "Processor %d/0x%x and the rest are ignored.\n", - nr_cpu_ids, nr_logical_cpuids, apic_id); - return -EINVAL; - } - cpuid_to_apicid[nr_logical_cpuids] = apic_id; return nr_logical_cpuids++; } @@ -135,7 +127,7 @@ static void cpu_update_apic(int cpu, u32 apicid) static int generic_processor_info(int apicid) { - int cpu, max = nr_cpu_ids; + int cpu; /* The boot CPU must be set before MADT/MPTABLE parsing happens */ if (cpuid_to_apicid[0] == BAD_APICID) @@ -155,21 +147,12 @@ static int generic_processor_info(int apicid) } if (num_processors >= nr_cpu_ids) { - int thiscpu = max + disabled_cpus; - - pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. " - "Processor %d/0x%x ignored.\n", max, thiscpu, apicid); - + pr_warn_once("APIC: CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids); disabled_cpus++; - return -EINVAL; + return -ENOSPC; } cpu = allocate_logical_cpuid(apicid); - if (cpu < 0) { - disabled_cpus++; - return -EINVAL; - } - cpu_update_apic(cpu, apicid); return cpu; } From 05cb171236d33e96dc5741350b90e64ddad51b16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:49 +0100 Subject: [PATCH 079/109] x86/cpu/topology: Use a data structure for topology info commit 72530464ed609bcdd49a760e9d0bc3b16717ff2b upstream. Put the processor accounting into a data structure, which will gain more topology related information in the next steps, and sanitize the accounting. Intel-SIG: commit 72530464ed60 x86/cpu/topology: Use a data structure for topology info. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.111451909@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 59 +++++++++++++++++----------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 8b42918b7b582..815e3eeed4159 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -22,6 +22,18 @@ DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly; /* Used for CPU number allocation and parallel CPU bringup */ u32 cpuid_to_apicid[] __read_mostly = { [0 ... NR_CPUS - 1] = BAD_APICID, }; +/* + * Keep track of assigned, disabled and rejected CPUs. Present assigned + * with 1 as CPU #0 is reserved for the boot CPU. + */ +static struct { + unsigned int nr_assigned_cpus; + unsigned int nr_disabled_cpus; + unsigned int nr_rejected_cpus; +} topo_info __read_mostly = { + .nr_assigned_cpus = 1, +}; + /* * Processor to be disabled specified by kernel parameter * disable_cpu_apicid=, mostly used for the kdump 2nd kernel to @@ -29,19 +41,6 @@ u32 cpuid_to_apicid[] __read_mostly = { [0 ... NR_CPUS - 1] = BAD_APICID, }; */ static u32 disabled_cpu_apicid __ro_after_init = BAD_APICID; -static unsigned int num_processors; -static unsigned int disabled_cpus; - -/* - * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated - * contiguously, it equals to current allocated max logical CPU ID plus 1. - * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, - * so the maximum of nr_logical_cpuids is nr_cpu_ids. - * - * NOTE: Reserve 0 for BSP. - */ -static int nr_logical_cpuids = 1; - bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { return phys_id == (u64)cpuid_to_apicid[cpu]; @@ -75,7 +74,7 @@ static int __init smp_init_primary_thread_mask(void) return 0; } - for (cpu = 0; cpu < nr_logical_cpuids; cpu++) + for (cpu = 0; cpu < topo_info.nr_assigned_cpus; cpu++) cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]); return 0; } @@ -89,7 +88,7 @@ static int topo_lookup_cpuid(u32 apic_id) int i; /* CPU# to APICID mapping is persistent once it is established */ - for (i = 0; i < nr_logical_cpuids; i++) { + for (i = 0; i < topo_info.nr_assigned_cpus; i++) { if (cpuid_to_apicid[i] == apic_id) return i; } @@ -107,22 +106,21 @@ static int allocate_logical_cpuid(u32 apic_id) if (cpu >= 0) return cpu; - cpuid_to_apicid[nr_logical_cpuids] = apic_id; - return nr_logical_cpuids++; + return topo_info.nr_assigned_cpus++; } -static void cpu_update_apic(int cpu, u32 apicid) +static void cpu_update_apic(unsigned int cpu, u32 apic_id) { #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) - early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; + early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; #endif + cpuid_to_apicid[cpu] = apic_id; set_cpu_possible(cpu, true); - set_bit(apicid, phys_cpu_present_map); + set_bit(apic_id, phys_cpu_present_map); set_cpu_present(cpu, true); - num_processors++; if (system_state != SYSTEM_BOOTING) - cpu_mark_primary_thread(cpu, apicid); + cpu_mark_primary_thread(cpu, apic_id); } static int generic_processor_info(int apicid) @@ -137,18 +135,18 @@ static int generic_processor_info(int apicid) return 0; if (disabled_cpu_apicid == apicid) { - int thiscpu = num_processors + disabled_cpus; + int thiscpu = topo_info.nr_assigned_cpus + topo_info.nr_disabled_cpus; pr_warn("APIC: Disabling requested cpu. Processor %d/0x%x ignored.\n", thiscpu, apicid); - disabled_cpus++; + topo_info.nr_rejected_cpus++; return -ENODEV; } - if (num_processors >= nr_cpu_ids) { + if (topo_info.nr_assigned_cpus >= nr_cpu_ids) { pr_warn_once("APIC: CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids); - disabled_cpus++; + topo_info.nr_rejected_cpus++; return -ENOSPC; } @@ -178,14 +176,16 @@ static int __initdata setup_possible_cpus = -1; */ __init void prefill_possible_map(void) { + unsigned int num_processors = topo_info.nr_assigned_cpus; + unsigned int disabled_cpus = topo_info.nr_disabled_cpus; int i, possible; i = setup_max_cpus ?: 1; if (setup_possible_cpus == -1) { - possible = num_processors; + possible = topo_info.nr_assigned_cpus; #ifdef CONFIG_HOTPLUG_CPU if (setup_max_cpus) - possible += disabled_cpus; + possible += num_processors; #else if (possible > i) possible = i; @@ -238,7 +238,7 @@ void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) } if (!present) { - disabled_cpus++; + topo_info.nr_disabled_cpus++; return; } @@ -295,7 +295,6 @@ void topology_hotunplug_apic(unsigned int cpu) per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; clear_bit(apic_id, phys_cpu_present_map); set_cpu_present(cpu, false); - num_processors--; } #endif From 3d11222206946a0888c318b8a74780a04cfabcbc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:50 +0100 Subject: [PATCH 080/109] x86/smpboot: Make error message actually useful commit 6055f6cf0d462fa0d9212a8279b6b0d1130552e1 upstream. "smpboot: native_kick_ap: bad cpu 33" is absolutely useless information. Replace it with something meaningful which allows to decode the failure condition. Intel-SIG: commit 6055f6cf0d46 x86/smpboot: Make error message actually useful. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.170806023@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/smpboot.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f7299151947df..7f04d3490b120 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1064,9 +1064,13 @@ int native_kick_ap(unsigned int cpu, struct task_struct *tidle) pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); - if (apicid == BAD_APICID || !test_bit(apicid, phys_cpu_present_map) || - !apic_id_valid(apicid)) { - pr_err("%s: bad cpu %d\n", __func__, cpu); + if (apicid == BAD_APICID || !apic_id_valid(apicid)) { + pr_err("CPU %u has invalid APIC ID %x. Aborting bringup\n", cpu, apicid); + return -EINVAL; + } + + if (!test_bit(apicid, phys_cpu_present_map)) { + pr_err("CPU %u APIC ID %x is not present. Aborting bringup\n", cpu, apicid); return -EINVAL; } From cb0ff074b97675faf36a0939ce545647e0445c64 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:52 +0100 Subject: [PATCH 081/109] x86/cpu/topology: Sanitize the APIC admission logic commit 0e53e7b656cf5aa67c08eca381cec858478195a7 upstream. Move the actually required content of generic_processor_id() into the call sites and use common helper functions for them. This separates the early boot registration and the ACPI hotplug mechanism completely which allows further cleanups and improvements. Intel-SIG: commit 0e53e7b656cf x86/cpu/topology: Sanitize the APIC admission logic. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.230433953@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 159 ++++++++++++++++----------------- 1 file changed, 77 insertions(+), 82 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 815e3eeed4159..80625f4194de5 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -30,8 +30,10 @@ static struct { unsigned int nr_assigned_cpus; unsigned int nr_disabled_cpus; unsigned int nr_rejected_cpus; + u32 boot_cpu_apic_id; } topo_info __read_mostly = { .nr_assigned_cpus = 1, + .boot_cpu_apic_id = BAD_APICID, }; /* @@ -83,78 +85,6 @@ early_initcall(smp_init_primary_thread_mask); static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } #endif -static int topo_lookup_cpuid(u32 apic_id) -{ - int i; - - /* CPU# to APICID mapping is persistent once it is established */ - for (i = 0; i < topo_info.nr_assigned_cpus; i++) { - if (cpuid_to_apicid[i] == apic_id) - return i; - } - return -ENODEV; -} - -/* - * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids - * and cpuid_to_apicid[] synchronized. - */ -static int allocate_logical_cpuid(u32 apic_id) -{ - int cpu = topo_lookup_cpuid(apic_id); - - if (cpu >= 0) - return cpu; - - return topo_info.nr_assigned_cpus++; -} - -static void cpu_update_apic(unsigned int cpu, u32 apic_id) -{ -#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) - early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; -#endif - cpuid_to_apicid[cpu] = apic_id; - set_cpu_possible(cpu, true); - set_bit(apic_id, phys_cpu_present_map); - set_cpu_present(cpu, true); - - if (system_state != SYSTEM_BOOTING) - cpu_mark_primary_thread(cpu, apic_id); -} - -static int generic_processor_info(int apicid) -{ - int cpu; - - /* The boot CPU must be set before MADT/MPTABLE parsing happens */ - if (cpuid_to_apicid[0] == BAD_APICID) - panic("Boot CPU APIC not registered yet\n"); - - if (apicid == boot_cpu_physical_apicid) - return 0; - - if (disabled_cpu_apicid == apicid) { - int thiscpu = topo_info.nr_assigned_cpus + topo_info.nr_disabled_cpus; - - pr_warn("APIC: Disabling requested cpu. Processor %d/0x%x ignored.\n", - thiscpu, apicid); - - topo_info.nr_rejected_cpus++; - return -ENODEV; - } - - if (topo_info.nr_assigned_cpus >= nr_cpu_ids) { - pr_warn_once("APIC: CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids); - topo_info.nr_rejected_cpus++; - return -ENOSPC; - } - - cpu = allocate_logical_cpuid(apicid); - cpu_update_apic(cpu, apicid); - return cpu; -} - static int __initdata setup_possible_cpus = -1; /* @@ -222,6 +152,43 @@ __init void prefill_possible_map(void) set_cpu_possible(i, true); } +static int topo_lookup_cpuid(u32 apic_id) +{ + int i; + + /* CPU# to APICID mapping is persistent once it is established */ + for (i = 0; i < topo_info.nr_assigned_cpus; i++) { + if (cpuid_to_apicid[i] == apic_id) + return i; + } + return -ENODEV; +} + +static int topo_get_cpunr(u32 apic_id) +{ + int cpu = topo_lookup_cpuid(apic_id); + + if (cpu >= 0) + return cpu; + + return topo_info.nr_assigned_cpus++; +} + +static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) +{ +#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) + early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; + early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; +#endif + cpuid_to_apicid[cpu] = apic_id; + + set_cpu_possible(cpu, true); + set_cpu_present(cpu, true); + + if (system_state != SYSTEM_BOOTING) + cpu_mark_primary_thread(cpu, apic_id); +} + /** * topology_register_apic - Register an APIC in early topology maps * @apic_id: The APIC ID to set up @@ -234,17 +201,40 @@ void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) if (apic_id >= MAX_LOCAL_APIC) { pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1); + topo_info.nr_rejected_cpus++; return; } - if (!present) { - topo_info.nr_disabled_cpus++; + if (disabled_cpu_apicid == apic_id) { + pr_info("Disabling CPU as requested via 'disable_cpu_apicid=0x%x'.\n", apic_id); + topo_info.nr_rejected_cpus++; return; } - cpu = generic_processor_info(apic_id); - if (cpu >= 0) - early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; + /* CPU numbers exhausted? */ + if (apic_id != topo_info.boot_cpu_apic_id && topo_info.nr_assigned_cpus >= nr_cpu_ids) { + pr_warn_once("CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids); + topo_info.nr_rejected_cpus++; + return; + } + + if (present) { + set_bit(apic_id, phys_cpu_present_map); + + /* + * Double registration is valid in case of the boot CPU + * APIC because that is registered before the enumeration + * of the APICs via firmware parsers or VM guest + * mechanisms. + */ + if (apic_id == topo_info.boot_cpu_apic_id) + cpu = 0; + else + cpu = topo_get_cpunr(apic_id); + topo_set_cpuids(cpu, apic_id, acpi_id); + } else { + topo_info.nr_disabled_cpus++; + } } /** @@ -255,8 +245,10 @@ void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) */ void __init topology_register_boot_apic(u32 apic_id) { - cpuid_to_apicid[0] = apic_id; - cpu_update_apic(0, apic_id); + WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID); + + topo_info.boot_cpu_apic_id = apic_id; + topology_register_apic(apic_id, CPU_ACPIID_INVALID, true); } #ifdef CONFIG_ACPI_HOTPLUG_CPU @@ -274,10 +266,13 @@ int topology_hotplug_apic(u32 apic_id, u32 acpi_id) cpu = topo_lookup_cpuid(apic_id); if (cpu < 0) { - cpu = generic_processor_info(apic_id); - if (cpu >= 0) - per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; + if (topo_info.nr_assigned_cpus >= nr_cpu_ids) + return -ENOSPC; + + cpu = topo_assign_cpunr(apic_id); } + set_bit(apic_id, phys_cpu_present_map); + topo_set_cpuids(cpu, apic_id, acpi_id); return cpu; } From afd51213c0181c855476e1354aff02c2b9d5be93 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:53 +0100 Subject: [PATCH 082/109] x86/cpu/topology: Rework possible CPU management commit 7c0edad3643f4493c4dafa6f5dfcfb1a86432156 upstream. Managing possible CPUs is an unreadable and uncomprehensible maze. Aside of that it's backwards because it applies command line limits after registering all APICs. Rewrite it so that it: - Applies the command line limits upfront so that only the allowed amount of APIC IDs can be registered. - Applies eventual late restrictions in an understandable way - Uses simple min_t() calculations which are trivial to follow. - Provides a separate function for resetting to UP mode late in the bringup process. Intel-SIG: commit 7c0edad3643f x86/cpu/topology: Rework possible CPU management. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.290098853@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 5 + arch/x86/include/asm/cpu.h | 10 +- arch/x86/include/asm/topology.h | 1 + arch/x86/kernel/cpu/topology.c | 176 +++++++++++++++++++------------- arch/x86/kernel/setup.c | 9 +- arch/x86/kernel/smpboot.c | 6 +- 6 files changed, 118 insertions(+), 89 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index e29b5f17e0ba8..5a6ca11bcf4fe 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -176,6 +176,9 @@ extern void topology_register_apic(u32 apic_id, u32 acpi_id, bool present); extern void topology_register_boot_apic(u32 apic_id); extern int topology_hotplug_apic(u32 apic_id, u32 acpi_id); extern void topology_hotunplug_apic(unsigned int cpu); +extern void topology_apply_cmdline_limits_early(void); +extern void topology_init_possible_cpus(void); +extern void topology_reset_possible_cpus_up(void); #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } @@ -191,6 +194,8 @@ static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } static inline bool apic_needs_pit(void) { return true; } +static inline void topology_apply_cmdline_limits_early(void) { } +static inline void topology_init_possible_cpus(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_X2APIC diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index fecc4fe1d68af..e51f5c369ece0 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -9,18 +9,10 @@ #include #include -#ifdef CONFIG_SMP - -extern void prefill_possible_map(void); - -#else /* CONFIG_SMP */ - -static inline void prefill_possible_map(void) {} - +#ifndef CONFIG_SMP #define cpu_physical_id(cpu) boot_cpu_physical_apicid #define cpu_acpi_id(cpu) 0 #define safe_smp_processor_id() 0 - #endif /* CONFIG_SMP */ struct x86_cpu { diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 8e2a18a9eb874..b9254b482c0b0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -192,6 +192,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_primary_thread_mask); } + #else /* CONFIG_SMP */ #define topology_max_packages() (1) #define logical_packages (1) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 80625f4194de5..fc47f5216f9af 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -85,73 +86,6 @@ early_initcall(smp_init_primary_thread_mask); static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } #endif -static int __initdata setup_possible_cpus = -1; - -/* - * cpu_possible_mask should be static, it cannot change as cpu's - * are onlined, or offlined. The reason is per-cpu data-structures - * are allocated by some modules at init time, and don't expect to - * do this dynamically on cpu arrival/departure. - * cpu_present_mask on the other hand can change dynamically. - * In case when cpu_hotplug is not compiled, then we resort to current - * behaviour, which is cpu_possible == cpu_present. - * - Ashok Raj - * - * Three ways to find out the number of additional hotplug CPUs: - * - If the BIOS specified disabled CPUs in ACPI/mptables use that. - * - The user can overwrite it with possible_cpus=NUM - * - Otherwise don't reserve additional CPUs. - * We do this because additional CPUs waste a lot of memory. - * -AK - */ -__init void prefill_possible_map(void) -{ - unsigned int num_processors = topo_info.nr_assigned_cpus; - unsigned int disabled_cpus = topo_info.nr_disabled_cpus; - int i, possible; - - i = setup_max_cpus ?: 1; - if (setup_possible_cpus == -1) { - possible = topo_info.nr_assigned_cpus; -#ifdef CONFIG_HOTPLUG_CPU - if (setup_max_cpus) - possible += num_processors; -#else - if (possible > i) - possible = i; -#endif - } else - possible = setup_possible_cpus; - - total_cpus = max_t(int, possible, num_processors + disabled_cpus); - - /* nr_cpu_ids could be reduced via nr_cpus= */ - if (possible > nr_cpu_ids) { - pr_warn("%d Processors exceeds NR_CPUS limit of %u\n", - possible, nr_cpu_ids); - possible = nr_cpu_ids; - } - -#ifdef CONFIG_HOTPLUG_CPU - if (!setup_max_cpus) -#endif - if (possible > i) { - pr_warn("%d Processors exceeds max_cpus limit of %u\n", - possible, setup_max_cpus); - possible = i; - } - - set_nr_cpu_ids(possible); - - pr_info("Allowing %d CPUs, %d hotplug CPUs\n", - possible, max_t(int, possible - num_processors, 0)); - - reset_cpu_possible_mask(); - - for (i = 0; i < possible; i++) - set_cpu_possible(i, true); -} - static int topo_lookup_cpuid(u32 apic_id) { int i; @@ -293,12 +227,114 @@ void topology_hotunplug_apic(unsigned int cpu) } #endif -static int __init _setup_possible_cpus(char *str) +#ifdef CONFIG_SMP +static unsigned int max_possible_cpus __initdata = NR_CPUS; + +/** + * topology_apply_cmdline_limits_early - Apply topology command line limits early + * + * Ensure that command line limits are in effect before firmware parsing + * takes place. + */ +void __init topology_apply_cmdline_limits_early(void) { - get_option(&str, &setup_possible_cpus); + unsigned int possible = nr_cpu_ids; + + /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */ + if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled) + possible = 1; + + /* 'possible_cpus=N' */ + possible = min_t(unsigned int, max_possible_cpus, possible); + + if (possible < nr_cpu_ids) { + pr_info("Limiting to %u possible CPUs\n", possible); + set_nr_cpu_ids(possible); + } +} + +static __init bool restrict_to_up(void) +{ + if (!smp_found_config || ioapic_is_disabled) + return true; + /* + * XEN PV is special as it does not advertise the local APIC + * properly, but provides a fake topology for it so that the + * infrastructure works. So don't apply the restrictions vs. APIC + * here. + */ + if (xen_pv_domain()) + return false; + + return apic_is_disabled; +} + +void __init topology_init_possible_cpus(void) +{ + unsigned int assigned = topo_info.nr_assigned_cpus; + unsigned int disabled = topo_info.nr_disabled_cpus; + unsigned int total = assigned + disabled; + unsigned int cpu, allowed = 1; + + if (!restrict_to_up()) { + if (WARN_ON_ONCE(assigned > nr_cpu_ids)) { + disabled += assigned - nr_cpu_ids; + assigned = nr_cpu_ids; + } + allowed = min_t(unsigned int, total, nr_cpu_ids); + } + + if (total > allowed) + pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed); + + assigned = min_t(unsigned int, allowed, assigned); + disabled = allowed - assigned; + + topo_info.nr_assigned_cpus = assigned; + topo_info.nr_disabled_cpus = disabled; + + total_cpus = allowed; + set_nr_cpu_ids(allowed); + + pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled); + if (topo_info.nr_rejected_cpus) + pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus); + + init_cpu_present(cpumask_of(0)); + init_cpu_possible(cpumask_of(0)); + + for (cpu = 0; cpu < allowed; cpu++) { + u32 apicid = cpuid_to_apicid[cpu]; + + set_cpu_possible(cpu, true); + + if (apicid == BAD_APICID) + continue; + + set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map)); + } +} + +/* + * Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed. + */ +void __init topology_reset_possible_cpus_up(void) +{ + init_cpu_present(cpumask_of(0)); + init_cpu_possible(cpumask_of(0)); + + bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC); + if (topo_info.boot_cpu_apic_id != BAD_APICID) + set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map); +} + +static int __init setup_possible_cpus(char *str) +{ + get_option(&str, &max_possible_cpus); return 0; } -early_param("possible_cpus", _setup_possible_cpus); +early_param("possible_cpus", setup_possible_cpus); +#endif static int __init apic_set_disabled_cpu_apicid(char *arg) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4e092972acf35..b4e3f1219afe8 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1259,6 +1259,8 @@ void __init setup_arch(char **cmdline_p) early_quirks(); + topology_apply_cmdline_limits_early(); + /* * Parse SMP configuration. Try ACPI first and then the platform * specific parser. @@ -1266,13 +1268,10 @@ void __init setup_arch(char **cmdline_p) acpi_boot_init(); x86_init.mpparse.parse_smp_cfg(); - /* - * Systems w/o ACPI and mptables might not have it mapped the local - * APIC yet, but prefill_possible_map() might need to access it. - */ + /* Last opportunity to detect and map the local APIC */ init_apic_mappings(); - prefill_possible_map(); + topology_init_possible_cpus(); init_cpu_to_node(); init_gi_nodes(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7f04d3490b120..075ebea611f52 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1139,11 +1139,7 @@ static __init void disable_smp(void) pr_info("SMP disabled\n"); disable_ioapic_support(); - - init_cpu_present(cpumask_of(0)); - init_cpu_possible(cpumask_of(0)); - - reset_phys_cpu_present_map(smp_found_config ? boot_cpu_physical_apicid : 0); + topology_reset_possible_cpus_up(); cpumask_set_cpu(0, topology_sibling_cpumask(0)); cpumask_set_cpu(0, topology_core_cpumask(0)); From 1f073934947b486e7c77c35c3a334fe97e117be2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:54 +0100 Subject: [PATCH 083/109] x86/cpu: Detect real BSP on crash kernels commit 5c5682b9f87a3b7bd4833884f300ec673685f6a6 upstream. When a kdump kernel is started from a crashing CPU then there is no guarantee that this CPU is the real boot CPU (BSP). If the kdump kernel tries to online the BSP then the INIT sequence will reset the machine. There is a command line option to prevent this, but in case of nested kdump kernels this is wrong. But that command line option is not required at all because the real BSP is enumerated as the first CPU by firmware. Support for the only known system which was different (Voyager) got removed long ago. Detect whether the boot CPU APIC ID is the first APIC ID enumerated by the firmware. If the first APIC ID enumerated is not matching the boot CPU APIC ID then skip registering it. Intel-SIG: commit 5c5682b9f87a x86/cpu: Detect real BSP on crash kernels. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.348542071@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- Documentation/admin-guide/kdump/kdump.rst | 7 +- .../admin-guide/kernel-parameters.txt | 9 -- arch/x86/kernel/cpu/topology.c | 97 +++++++++++-------- 3 files changed, 61 insertions(+), 52 deletions(-) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index a748e7eb4429b..7ab6b46f12440 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -191,9 +191,7 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64) CPU is enough for kdump kernel to dump vmcore on most of systems. However, you can also specify nr_cpus=X to enable multiple processors - in kdump kernel. In this case, "disable_cpu_apicid=" is needed to - tell kdump kernel which cpu is 1st kernel's BSP. Please refer to - admin-guide/kernel-parameters.txt for more details. + in kdump kernel. With CONFIG_SMP=n, the above things are not related. @@ -485,8 +483,7 @@ Notes on loading the dump-capture kernel: to use multi-thread programs with it, such as parallel dump feature of makedumpfile. Otherwise, the multi-thread program may have a great performance degradation. To enable multi-cpu support, you should bring up an - SMP dump-capture kernel and specify maxcpus/nr_cpus, disable_cpu_apicid=[X] - options while loading it. + SMP dump-capture kernel and specify maxcpus/nr_cpus options while loading it. * For s390x there are two kdump modes: If a ELF header is specified with the elfcorehdr= kernel parameter, it is used by the kdump kernel as it diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4e954c52d72ba..00d7fa98ed1af 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1078,15 +1078,6 @@ Disable TLBIE instruction. Currently does not work with KVM, with HASH MMU, or with coherent accelerators. - disable_cpu_apicid= [X86,APIC,SMP] - Format: - The number of initial APIC ID for the - corresponding CPU to be disabled at boot, - mostly used for the kdump 2nd kernel to - disable BSP to wake up multiple CPUs without - causing system reset or hang due to sending - INIT from AP to BSP. - disable_ddw [PPC/PSERIES] Disable Dynamic DMA Window support. Use this to workaround buggy firmware. diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index fc47f5216f9af..d55533170ba0f 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -32,18 +32,13 @@ static struct { unsigned int nr_disabled_cpus; unsigned int nr_rejected_cpus; u32 boot_cpu_apic_id; + u32 real_bsp_apic_id; } topo_info __read_mostly = { .nr_assigned_cpus = 1, .boot_cpu_apic_id = BAD_APICID, + .real_bsp_apic_id = BAD_APICID, }; -/* - * Processor to be disabled specified by kernel parameter - * disable_cpu_apicid=, mostly used for the kdump 2nd kernel to - * avoid undefined behaviour caused by sending INIT from AP to BSP. - */ -static u32 disabled_cpu_apicid __ro_after_init = BAD_APICID; - bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { return phys_id == (u64)cpuid_to_apicid[cpu]; @@ -123,6 +118,60 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) cpu_mark_primary_thread(cpu, apic_id); } +static __init bool check_for_real_bsp(u32 apic_id) +{ + /* + * There is no real good way to detect whether this a kdump() + * kernel, but except on the Voyager SMP monstrosity which is not + * longer supported, the real BSP APIC ID is the first one which is + * enumerated by firmware. That allows to detect whether the boot + * CPU is the real BSP. If it is not, then do not register the APIC + * because sending INIT to the real BSP would reset the whole + * system. + * + * The first APIC ID which is enumerated by firmware is detectable + * because the boot CPU APIC ID is registered before that without + * invoking this code. + */ + if (topo_info.real_bsp_apic_id != BAD_APICID) + return false; + + if (apic_id == topo_info.boot_cpu_apic_id) { + topo_info.real_bsp_apic_id = apic_id; + return false; + } + + pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x > %x\n", + topo_info.boot_cpu_apic_id, apic_id); + pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n"); + + topo_info.real_bsp_apic_id = apic_id; + return true; +} + +static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) +{ + int cpu; + + if (present) { + set_bit(apic_id, phys_cpu_present_map); + + /* + * Double registration is valid in case of the boot CPU + * APIC because that is registered before the enumeration + * of the APICs via firmware parsers or VM guest + * mechanisms. + */ + if (apic_id == topo_info.boot_cpu_apic_id) + cpu = 0; + else + cpu = topo_get_cpunr(apic_id); + topo_set_cpuids(cpu, apic_id, acpi_id); + } else { + topo_info.nr_disabled_cpus++; + } +} + /** * topology_register_apic - Register an APIC in early topology maps * @apic_id: The APIC ID to set up @@ -131,16 +180,13 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) */ void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) { - int cpu; - if (apic_id >= MAX_LOCAL_APIC) { pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1); topo_info.nr_rejected_cpus++; return; } - if (disabled_cpu_apicid == apic_id) { - pr_info("Disabling CPU as requested via 'disable_cpu_apicid=0x%x'.\n", apic_id); + if (check_for_real_bsp(apic_id)) { topo_info.nr_rejected_cpus++; return; } @@ -152,23 +198,7 @@ void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) return; } - if (present) { - set_bit(apic_id, phys_cpu_present_map); - - /* - * Double registration is valid in case of the boot CPU - * APIC because that is registered before the enumeration - * of the APICs via firmware parsers or VM guest - * mechanisms. - */ - if (apic_id == topo_info.boot_cpu_apic_id) - cpu = 0; - else - cpu = topo_get_cpunr(apic_id); - topo_set_cpuids(cpu, apic_id, acpi_id); - } else { - topo_info.nr_disabled_cpus++; - } + topo_register_apic(apic_id, acpi_id, present); } /** @@ -182,7 +212,7 @@ void __init topology_register_boot_apic(u32 apic_id) WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID); topo_info.boot_cpu_apic_id = apic_id; - topology_register_apic(apic_id, CPU_ACPIID_INVALID, true); + topo_register_apic(apic_id, CPU_ACPIID_INVALID, true); } #ifdef CONFIG_ACPI_HOTPLUG_CPU @@ -335,12 +365,3 @@ static int __init setup_possible_cpus(char *str) } early_param("possible_cpus", setup_possible_cpus); #endif - -static int __init apic_set_disabled_cpu_apicid(char *arg) -{ - if (!arg || !get_option(&arg, &disabled_cpu_apicid)) - return -EINVAL; - - return 0; -} -early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); From 43ee9b587e69245fe693de6c4bfcee0212845e78 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:56 +0100 Subject: [PATCH 084/109] x86/topology: Add a mechanism to track topology via APIC IDs commit f1f758a80516775b5d12d7c93cbedb2a08cd4c98 upstream. Topology on X86 is determined by the registered APIC IDs and the segmentation information retrieved from CPUID. Depending on the granularity of the provided CPUID information the most fine grained scheme looks like this according to Intel terminology: [PKG][DIEGRP][DIE][TILE][MODULE][CORE][THREAD] Not enumerated domain levels consume 0 bits in the APIC ID. This allows to provide a consistent view at the topology and determine other information precisely like the number of cores in a package on hybrid systems, where the existing assumption that number or cores == number of threads / threads per core does not hold. Provide per domain level bitmaps which record the APIC ID split into the domain levels to make later evaluation of domain level specific information simple. This allows to calculate e.g. the logical IDs without any further extra logic. Contrary to the existing registration mechanism this records disabled CPUs, which are subject to later hotplug as well. That's useful for boot time sizing of package or die dependent allocations without using heuristics. Intel-SIG: commit f1f758a80516 x86/topology: Add a mechanism to track topology via APIC IDs. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.406985021@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 48 ++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index d55533170ba0f..c67120657e556 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -1,5 +1,27 @@ // SPDX-License-Identifier: GPL-2.0-only - +/* + * CPU/APIC topology + * + * The APIC IDs describe the system topology in multiple domain levels. + * The CPUID topology parser provides the information which part of the + * APIC ID is associated to the individual levels: + * + * [PACKAGE][DIEGRP][DIE][TILE][MODULE][CORE][THREAD] + * + * The root space contains the package (socket) IDs. + * + * Not enumerated levels consume 0 bits space, but conceptually they are + * always represented. If e.g. only CORE and THREAD levels are enumerated + * then the DIE, MODULE and TILE have the same physical ID as the PACKAGE. + * + * If SMT is not supported, then the THREAD domain is still used. It then + * has the same physical ID as the CORE domain and is the only child of + * the core domain. + * + * This allows a unified view on the system independent of the enumerated + * domain levels without requiring any conditionals in the code. + */ +#define pr_fmt(fmt) "CPU topo: " fmt #include #include @@ -9,6 +31,8 @@ #include #include +#include "cpu.h" + /* * Map cpu index to physical APIC ID */ @@ -23,6 +47,9 @@ DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly; /* Used for CPU number allocation and parallel CPU bringup */ u32 cpuid_to_apicid[] __read_mostly = { [0 ... NR_CPUS - 1] = BAD_APICID, }; +/* Bitmaps to mark registered APICs at each topology domain */ +static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init; + /* * Keep track of assigned, disabled and rejected CPUs. Present assigned * with 1 as CPU #0 is reserved for the boot CPU. @@ -39,6 +66,8 @@ static struct { .real_bsp_apic_id = BAD_APICID, }; +#define domain_weight(_dom) bitmap_weight(apic_maps[_dom].map, MAX_LOCAL_APIC) + bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { return phys_id == (u64)cpuid_to_apicid[cpu]; @@ -81,6 +110,17 @@ early_initcall(smp_init_primary_thread_mask); static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } #endif +/* + * Convert the APIC ID to a domain level ID by masking out the low bits + * below the domain level @dom. + */ +static inline u32 topo_apicid(u32 apicid, enum x86_topology_domains dom) +{ + if (dom == TOPO_SMT_DOMAIN) + return apicid; + return apicid & (UINT_MAX << x86_topo_system.dom_shifts[dom - 1]); +} + static int topo_lookup_cpuid(u32 apic_id) { int i; @@ -151,7 +191,7 @@ static __init bool check_for_real_bsp(u32 apic_id) static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) { - int cpu; + int cpu, dom; if (present) { set_bit(apic_id, phys_cpu_present_map); @@ -170,6 +210,10 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) } else { topo_info.nr_disabled_cpus++; } + + /* Register present and possible CPUs in the domain maps */ + for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) + set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map); } /** From 864e2a0ef8c10be6fe97584161dbffb5334dec11 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:57 +0100 Subject: [PATCH 085/109] x86/cpu/topology: Reject unknown APIC IDs on ACPI hotplug commit 7cdcdab1a660bbe9f98bf1591c048ce7ccee59e0 upstream. The topology bitmaps track all possible APIC IDs which have been registered during enumeration. As sizing and further topology information is going to be derived from these bitmaps, reject attempts to hotplug an APIC ID which was not registered during enumeration. Intel-SIG: commit 7cdcdab1a660 x86/cpu/topology: Reject unknown APIC IDs on ACPI hotplug. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.462231229@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index c67120657e556..a6d045b15ebdc 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -272,6 +272,10 @@ int topology_hotplug_apic(u32 apic_id, u32 acpi_id) if (apic_id >= MAX_LOCAL_APIC) return -EINVAL; + /* Reject if the APIC ID was not registered during enumeration. */ + if (!test_bit(apic_id, apic_maps[TOPO_SMT_DOMAIN].map)) + return -ENODEV; + cpu = topo_lookup_cpuid(apic_id); if (cpu < 0) { if (topo_info.nr_assigned_cpus >= nr_cpu_ids) From 31ad02b0b37b9a63361308f8c7f7947394b2a109 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:59 +0100 Subject: [PATCH 086/109] x86/cpu/topology: Assign hotpluggable CPUIDs during init commit ea2dd8a5d4361ef0b000196043fa407f05b16f1d upstream. There is no point in assigning the CPU numbers during ACPI physical hotplug. The number of possible hotplug CPUs is known when the possible map is initialized, so the CPU numbers can be associated to the registered non-present APIC IDs right there. This allows to put more code into the __init section and makes the related data __ro_after_init. Intel-SIG: commit ea2dd8a5d436 x86/cpu/topology: Assign hotpluggable CPUIDs during init. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.517339971@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index a6d045b15ebdc..07124da9df717 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -45,7 +45,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly; /* Used for CPU number allocation and parallel CPU bringup */ -u32 cpuid_to_apicid[] __read_mostly = { [0 ... NR_CPUS - 1] = BAD_APICID, }; +u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, }; /* Bitmaps to mark registered APICs at each topology domain */ static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init; @@ -60,7 +60,7 @@ static struct { unsigned int nr_rejected_cpus; u32 boot_cpu_apic_id; u32 real_bsp_apic_id; -} topo_info __read_mostly = { +} topo_info __ro_after_init = { .nr_assigned_cpus = 1, .boot_cpu_apic_id = BAD_APICID, .real_bsp_apic_id = BAD_APICID, @@ -133,7 +133,7 @@ static int topo_lookup_cpuid(u32 apic_id) return -ENODEV; } -static int topo_get_cpunr(u32 apic_id) +static __init int topo_get_cpunr(u32 apic_id) { int cpu = topo_lookup_cpuid(apic_id); @@ -149,8 +149,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; #endif - cpuid_to_apicid[cpu] = apic_id; - set_cpu_possible(cpu, true); set_cpu_present(cpu, true); @@ -206,6 +204,8 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) cpu = 0; else cpu = topo_get_cpunr(apic_id); + + cpuid_to_apicid[cpu] = apic_id; topo_set_cpuids(cpu, apic_id, acpi_id); } else { topo_info.nr_disabled_cpus++; @@ -277,12 +277,9 @@ int topology_hotplug_apic(u32 apic_id, u32 acpi_id) return -ENODEV; cpu = topo_lookup_cpuid(apic_id); - if (cpu < 0) { - if (topo_info.nr_assigned_cpus >= nr_cpu_ids) - return -ENOSPC; + if (cpu < 0) + return -ENOSPC; - cpu = topo_assign_cpunr(apic_id); - } set_bit(apic_id, phys_cpu_present_map); topo_set_cpuids(cpu, apic_id, acpi_id); return cpu; @@ -353,6 +350,7 @@ void __init topology_init_possible_cpus(void) unsigned int disabled = topo_info.nr_disabled_cpus; unsigned int total = assigned + disabled; unsigned int cpu, allowed = 1; + u32 apicid; if (!restrict_to_up()) { if (WARN_ON_ONCE(assigned > nr_cpu_ids)) { @@ -381,8 +379,17 @@ void __init topology_init_possible_cpus(void) init_cpu_present(cpumask_of(0)); init_cpu_possible(cpumask_of(0)); + /* Assign CPU numbers to non-present CPUs */ + for (apicid = 0; disabled; disabled--, apicid++) { + apicid = find_next_andnot_bit(apic_maps[TOPO_SMT_DOMAIN].map, phys_cpu_present_map, + MAX_LOCAL_APIC, apicid); + if (apicid >= MAX_LOCAL_APIC) + break; + cpuid_to_apicid[topo_info.nr_assigned_cpus++] = apicid; + } + for (cpu = 0; cpu < allowed; cpu++) { - u32 apicid = cpuid_to_apicid[cpu]; + apicid = cpuid_to_apicid[cpu]; set_cpu_possible(cpu, true); From 14347039f3dd0a856180587a74d5641fb625caed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:00 +0100 Subject: [PATCH 087/109] x86/xen/smp_pv: Count number of vCPUs early commit c8f808231f1fb63553f90d4b3796cb6804d1e693 upstream. XEN/PV has a completely broken vCPU enumeration scheme, which just works by chance and provides zero topology information. Each vCPU ends up being a single core package. Dom0 provides MADT which can be used for topology information, but that table is the unmodified host table, which means that there can be more CPUs registered than the number of vCPUs XEN provides for the dom0 guest. DomU does not have ACPI and both rely on counting the possible vCPUs via an hypercall. To prepare for using CPUID topology information either via MADT or via fake APIC IDs count the number of possible CPUs during early boot and adjust nr_cpu_ids() accordingly. Intel-SIG: commit c8f808231f1f x86/xen/smp_pv: Count number of vCPUs early. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.571795063@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/xen/enlighten_pv.c | 3 +++ arch/x86/xen/smp.h | 2 ++ arch/x86/xen/smp_pv.c | 14 ++++++++++++++ 3 files changed, 19 insertions(+) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 3d4a76566228d..68f619076ed6c 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -200,6 +200,9 @@ static void __init xen_pv_init_platform(void) xen_set_mtrr_data(); else mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); + + /* Adjust nr_cpu_ids before "enumeration" happens */ + xen_smp_count_cpus(); } static void __init xen_pv_guest_late_init(void) diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index c20cbb14c82ba..b8efdbc693f7a 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h @@ -19,6 +19,7 @@ extern void xen_smp_intr_free(unsigned int cpu); int xen_smp_intr_init_pv(unsigned int cpu); void xen_smp_intr_free_pv(unsigned int cpu); +void xen_smp_count_cpus(void); void xen_smp_cpus_done(unsigned int max_cpus); void xen_smp_send_reschedule(int cpu); @@ -44,6 +45,7 @@ static inline int xen_smp_intr_init_pv(unsigned int cpu) return 0; } static inline void xen_smp_intr_free_pv(unsigned int cpu) {} +static inline void xen_smp_count_cpus(void) { } #endif /* CONFIG_SMP */ #endif diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 98849b3f5eac2..44706f01bb9bc 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -411,6 +411,20 @@ static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +void __init xen_smp_count_cpus(void) +{ + unsigned int cpus; + + for (cpus = 0; cpus < nr_cpu_ids; cpus++) { + if (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpus, NULL) < 0) + break; + } + + pr_info("Xen PV: Detected %u vCPUS\n", cpus); + if (cpus < nr_cpu_ids) + set_nr_cpu_ids(cpus); +} + static const struct smp_ops xen_smp_ops __initconst = { .smp_prepare_boot_cpu = xen_pv_smp_prepare_boot_cpu, .smp_prepare_cpus = xen_pv_smp_prepare_cpus, From 58b294dcb1f31dcde8101ffc00ef11e69424c10b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:02 +0100 Subject: [PATCH 088/109] x86/cpu/topology: Let XEN/PV use topology from CPUID/MADT commit 354da4cf57af5d8b5302251204d6077600b6d3d6 upstream. It turns out that XEN/PV Dom0 has halfways usable CPUID/MADT enumeration except that it cannot deal with CPUs which are enumerated as disabled in MADT. DomU has no MADT and provides at least rudimentary topology information in CPUID leaves 1 and 4. For both it's important that there are not more possible Linux CPUs than vCPUs provided by the hypervisor. As this is ensured by counting the vCPUs before enumeration happens: - lift the restrictions in the CPUID evaluation and the MADT parser - Utilize MADT registration for Dom0 - Keep the fake APIC ID registration for DomU - Fix the XEN APIC fake so the readout of the local APIC ID works for Dom0 via the hypercall and for DomU by returning the registered fake APIC IDs. With that the XEN/PV fake approximates usefulness. Intel-SIG: commit 354da4cf57af x86/cpu/topology: Let XEN/PV use topology from CPUID/MADT. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.626195405@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/acpi/boot.c | 25 ++++++++----------------- arch/x86/kernel/cpu/topology_common.c | 2 +- arch/x86/xen/apic.c | 14 +++++++------- arch/x86/xen/smp_pv.c | 13 ++++++++----- 4 files changed, 24 insertions(+), 30 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 6c26e2116202b..8bea0db2a7e6a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -23,8 +23,6 @@ #include #include -#include - #include #include #include @@ -158,12 +156,6 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) return 0; } -static __init void acpi_register_lapic(u32 apic_id, u32 acpi_id, bool present) -{ - if (!xen_pv_domain()) - topology_register_apic(apic_id, acpi_id, present); -} - static bool __init acpi_is_processor_usable(u32 lapic_flags) { if (lapic_flags & ACPI_MADT_ENABLED) @@ -217,7 +209,7 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) return 0; } - acpi_register_lapic(apic_id, processor->uid, enabled); + topology_register_apic(apic_id, processor->uid, enabled); #else pr_warn("x2apic entry ignored\n"); #endif @@ -252,9 +244,9 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) * to not preallocating memory for all NR_CPUS * when we use CPU hotplug. */ - acpi_register_lapic(processor->id, /* APIC ID */ - processor->processor_id, /* ACPI ID */ - processor->lapic_flags & ACPI_MADT_ENABLED); + topology_register_apic(processor->id, /* APIC ID */ + processor->processor_id, /* ACPI ID */ + processor->lapic_flags & ACPI_MADT_ENABLED); return 0; } @@ -271,9 +263,9 @@ acpi_parse_sapic(union acpi_subtable_headers *header, const unsigned long end) acpi_table_print_madt_entry(&header->common); - acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */ - processor->processor_id, /* ACPI ID */ - processor->lapic_flags & ACPI_MADT_ENABLED); + topology_register_apic((processor->id << 8) | processor->eid,/* APIC ID */ + processor->processor_id, /* ACPI ID */ + processor->lapic_flags & ACPI_MADT_ENABLED); return 0; } @@ -1020,8 +1012,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) return count; } - if (!xen_pv_domain()) - register_lapic_address(acpi_lapic_addr); + register_lapic_address(acpi_lapic_addr); return count; } diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 3876a3342fe96..b0b68c867aaff 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -77,7 +77,7 @@ static bool fake_topology(struct topo_scan *tscan) topology_set_dom(tscan, TOPO_SMT_DOMAIN, 0, 1); topology_set_dom(tscan, TOPO_CORE_DOMAIN, 0, 1); - return tscan->c->cpuid_level < 1 || xen_pv_domain(); + return tscan->c->cpuid_level < 1; } static void parse_topology(struct topo_scan *tscan, bool early) diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 8835d1cc961d8..8b045dd25196b 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -43,20 +43,20 @@ static u32 xen_apic_read(u32 reg) struct xen_platform_op op = { .cmd = XENPF_get_cpuinfo, .interface_version = XENPF_INTERFACE_VERSION, - .u.pcpu_info.xen_cpuid = 0, }; - int ret; - - /* Shouldn't need this as APIC is turned off for PV, and we only - * get called on the bootup processor. But just in case. */ - if (!xen_initial_domain() || smp_processor_id()) - return 0; + int ret, cpu; if (reg == APIC_LVR) return 0x14; if (reg != APIC_ID) return 0; + cpu = smp_processor_id(); + if (!xen_initial_domain()) + return cpu ? cpuid_to_apicid[cpu] << 24 : 0; + + op.u.pcpu_info.xen_cpuid = cpu; + ret = HYPERVISOR_platform_op(&op); if (ret) op.u.pcpu_info.apic_id = BAD_APICID; diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 44706f01bb9bc..27d1a5b7f571a 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -156,11 +156,9 @@ static void __init xen_pv_smp_config(void) topology_register_boot_apic(apicid++); - for (i = 1; i < nr_cpu_ids; i++) { - if (HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL) < 0) - break; + for (i = 1; i < nr_cpu_ids; i++) topology_register_apic(apicid++, CPU_ACPIID_INVALID, true); - } + /* Pretend to be a proper enumerated system */ smp_found_config = 1; } @@ -451,5 +449,10 @@ void __init xen_smp_init(void) /* Avoid searching for BIOS MP tables */ x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; - x86_init.mpparse.parse_smp_cfg = xen_pv_smp_config; + + /* XEN/PV Dom0 has halfways sane topology information via CPUID/MADT */ + if (xen_initial_domain()) + x86_init.mpparse.parse_smp_cfg = x86_init_noop; + else + x86_init.mpparse.parse_smp_cfg = xen_pv_smp_config; } From 36333138a1cd3d16f54b5f6bff1d44aaa8c19d7f Mon Sep 17 00:00:00 2001 From: Quanxian Wang Date: Tue, 6 Jan 2026 04:06:08 -0500 Subject: [PATCH 089/109] Revert "bytedance: x86/tsc: Enhance watchdog check exemption for 4 sockets platform" This reverts commit af350abd90bab471b8d8b312c8c48f9b9a164f4f. upstream commmit implement this function. b4bac279319d("x86/tsc: Use topology_max_packages() to get package number") Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 2 -- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/tsc.c | 32 -------------------------------- 3 files changed, 1 insertion(+), 35 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index b9254b482c0b0..2e205203a31d4 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -154,7 +154,6 @@ extern unsigned int __max_die_per_package; extern unsigned int __max_logical_packages; #define topology_max_packages() (__max_logical_packages) -extern unsigned int logical_packages; static inline int topology_max_die_per_package(void) { @@ -195,7 +194,6 @@ static inline bool topology_is_primary_thread(unsigned int cpu) #else /* CONFIG_SMP */ #define topology_max_packages() (1) -#define logical_packages (1) static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 075ebea611f52..2e6c45d4be314 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -141,7 +141,7 @@ static DEFINE_PER_CPU_READ_MOSTLY(struct logical_maps, logical_maps) = { unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); -unsigned int logical_packages __read_mostly; +static unsigned int logical_packages __read_mostly; static unsigned int logical_die __read_mostly; /* Maximum number of SMT threads on any online core */ diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c22494a702be5..81e9b436c3b68 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1475,38 +1475,6 @@ static int __init init_tsc_clocksource(void) if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; - /* - * Disable the clocksource watchdog when the system has: - * - TSC running at constant frequency - * - TSC which does not stop in C-States - * - the TSC_ADJUST register which allows to detect even minimal - * modifications - * - not more than four sockets. During tsc_init() time, - * logical_packages is not available yet so nr_online_nodes - * is used instead but due to reasons like SNC, fake nodes etc - * that node number can be > 4 even in a 2 sockets system. Detect - * this condition here when logical_packages is available now. - * - * This helps for systems which incorrectly detect TSC as unreliable - * during runtime because of reasons like SMI, delayed watchdog timer - * handling etc. This kind of achieved upstream commit b4bac279319d(" - * x86/tsc: Use topology_max_packages() to get package number"). - */ - if (nr_online_nodes > 4 && logical_packages <= 4) { - if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && - boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && - boot_cpu_has(X86_FEATURE_TSC_ADJUST)) - /* - * tsc_disable_clocksource_watchdog() will clear - * MUST_VERIFY for both tsc_early and tsc but since - * tsc_early is already registered with MUST_VERIFY - * set and thus is already on that watchdog list, keep - * its MUST_VERIFY flag so when it's unregistered - * below, it will be removed from watchdog_list. - */ - clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; - } - /* * When TSC frequency is known (retrieved via MSR or CPUID), we skip * the refined calibration and directly register it as a clocksource. From fd74fe02743a9ebb1cc4ea481d0bc7e1da755d4a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:03 +0100 Subject: [PATCH 090/109] x86/cpu/topology: Use topology bitmaps for sizing commit 090610ba704a66d7a58919be3bad195f24499ecb upstream. Now that all possible APIC IDs are tracked in the topology bitmaps, its trivial to retrieve the real information from there. This gets rid of the guesstimates for the maximal packages and dies per package as the actual numbers can be determined before a single AP has been brought up. The number of SMT threads can now be determined correctly from the bitmaps in all situations. Up to now a system which has SMT disabled in the BIOS will still claim that it is SMT capable, because the lowest APIC ID bit is reserved for that and CPUID leaf 0xb/0x1f still enumerates the SMT domain accordingly. By calculating the bitmap weights of the SMT and the CORE domain and setting them into relation the SMT disabled in BIOS situation reports correctly that the system is not SMT capable. It also handles the situation correctly when a hybrid systems boot CPU does not have SMT as it takes the SMT capability of the APs fully into account. Intel-SIG: commit 090610ba704a x86/cpu/topology: Use topology bitmaps for sizing. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.681709880@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/smp.h | 3 +-- arch/x86/include/asm/topology.h | 23 ++++++++++++----------- arch/x86/kernel/cpu/common.c | 9 ++++++--- arch/x86/kernel/cpu/debugfs.c | 2 +- arch/x86/kernel/cpu/topology.c | 20 +++++++++++++++++++- arch/x86/kernel/cpu/topology_common.c | 24 ------------------------ arch/x86/kernel/smpboot.c | 16 ---------------- arch/x86/xen/smp.c | 2 -- 8 files changed, 39 insertions(+), 60 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 807aafa63a3ec..9de16e982ea01 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -8,7 +8,7 @@ #include #include -extern int smp_num_siblings; +extern unsigned int smp_num_siblings; DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); @@ -110,7 +110,6 @@ void cpu_disable_common(void); void native_smp_prepare_boot_cpu(void); void smp_prepare_cpus_common(void); void native_smp_prepare_cpus(unsigned int max_cpus); -void calculate_max_logical_packages(void); void native_smp_cpus_done(unsigned int max_cpus); int common_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_kick_ap(unsigned int cpu, struct task_struct *tidle); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 2e205203a31d4..9b5d9adc1bbe5 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -143,7 +143,18 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_amd_node_id(cpu) (cpu_data(cpu).topo.amd_node_id) -extern unsigned int __max_die_per_package; +extern unsigned int __max_dies_per_package; +extern unsigned int __max_logical_packages; + +static inline unsigned int topology_max_packages(void) +{ + return __max_logical_packages; +} + +static inline unsigned int topology_max_die_per_package(void) +{ + return __max_dies_per_package; +} #ifdef CONFIG_SMP #define topology_cluster_id(cpu) (cpu_data(cpu).topo.l2c_id) @@ -152,14 +163,6 @@ extern unsigned int __max_die_per_package; #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) -extern unsigned int __max_logical_packages; -#define topology_max_packages() (__max_logical_packages) - -static inline int topology_max_die_per_package(void) -{ - return __max_die_per_package; -} - extern int __max_smt_threads; static inline int topology_max_smt_threads(void) @@ -193,13 +196,11 @@ static inline bool topology_is_primary_thread(unsigned int cpu) } #else /* CONFIG_SMP */ -#define topology_max_packages() (1) static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } -static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 097b48c5d08da..de769ec1ca252 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -72,11 +72,14 @@ u32 elf_hwcap2 __read_mostly; /* Number of siblings per CPU package */ -int smp_num_siblings = 1; +unsigned int smp_num_siblings __ro_after_init = 1; EXPORT_SYMBOL(smp_num_siblings); -unsigned int __max_die_per_package __read_mostly = 1; -EXPORT_SYMBOL(__max_die_per_package); +unsigned int __max_dies_per_package __ro_after_init = 1; +EXPORT_SYMBOL(__max_dies_per_package); + +unsigned int __max_logical_packages __ro_after_init = 1; +EXPORT_SYMBOL(__max_logical_packages); static struct ppin_info { int feature; diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index 86de544cdb14c..543efc487206a 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -29,7 +29,7 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id); seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg()); seq_printf(m, "max_cores: %u\n", c->x86_max_cores); - seq_printf(m, "max_die_per_pkg: %u\n", __max_die_per_package); + seq_printf(m, "max_dies_per_pkg: %u\n", __max_dies_per_package); seq_printf(m, "smp_num_siblings: %u\n", smp_num_siblings); return 0; } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 07124da9df717..630ebe503ae38 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -348,8 +348,8 @@ void __init topology_init_possible_cpus(void) { unsigned int assigned = topo_info.nr_assigned_cpus; unsigned int disabled = topo_info.nr_disabled_cpus; + unsigned int cnta, cntb, cpu, allowed = 1; unsigned int total = assigned + disabled; - unsigned int cpu, allowed = 1; u32 apicid; if (!restrict_to_up()) { @@ -372,6 +372,24 @@ void __init topology_init_possible_cpus(void) total_cpus = allowed; set_nr_cpu_ids(allowed); + cnta = domain_weight(TOPO_PKG_DOMAIN); + cntb = domain_weight(TOPO_DIE_DOMAIN); + __max_logical_packages = cnta; + __max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta)); + + pr_info("Max. logical packages: %3u\n", cnta); + pr_info("Max. logical dies: %3u\n", cntb); + pr_info("Max. dies per package: %3u\n", __max_dies_per_package); + + cnta = domain_weight(TOPO_CORE_DOMAIN); + cntb = domain_weight(TOPO_SMT_DOMAIN); + /* + * Can't use order delta here as order(cnta) can be equal + * order(cntb) even if cnta != cntb. + */ + smp_num_siblings = DIV_ROUND_UP(cntb, cnta); + pr_info("Max. threads per core: %3u\n", smp_num_siblings); + pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled); if (topo_info.nr_rejected_cpus) pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus); diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index b0b68c867aaff..0276978bc272d 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -196,16 +196,6 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]); } - /* Bug compatible with the existing parsers */ - if (tscan.dom_ncpus[TOPO_SMT_DOMAIN] > smp_num_siblings) { - if (system_state == SYSTEM_BOOTING) { - pr_warn_once("CPU%d: SMT detected and enabled late\n", cpu); - smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN]; - } else { - pr_warn_once("CPU%d: SMT detected after init. Too late!\n", cpu); - } - } - topo_set_ids(&tscan); topo_set_max_cores(&tscan); } @@ -231,20 +221,6 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) topo_set_ids(&tscan); topo_set_max_cores(&tscan); - /* - * Bug compatible with the existing code. If the boot CPU does not - * have SMT this ends up with one sibling. This needs way deeper - * changes further down the road to get it right during early boot. - */ - smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN]; - - /* - * Neither it's clear whether there are as many dies as the APIC - * space indicating die level is. But assume that the actual number - * of CPUs gives a proper indication for now to stay bug compatible. - */ - __max_die_per_package = tscan.dom_ncpus[TOPO_DIE_DOMAIN] / - tscan.dom_ncpus[TOPO_DIE_DOMAIN - 1]; /* * AMD systems have Nodes per package which cannot be mapped to * APIC ID. diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2e6c45d4be314..c6a7a47bf07e7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -139,8 +139,6 @@ static DEFINE_PER_CPU_READ_MOSTLY(struct logical_maps, logical_maps) = { .phys_die_id = U32_MAX, }; -unsigned int __max_logical_packages __read_mostly; -EXPORT_SYMBOL(__max_logical_packages); static unsigned int logical_packages __read_mostly; static unsigned int logical_die __read_mostly; @@ -1259,24 +1257,10 @@ void __init native_smp_prepare_boot_cpu(void) native_pv_lock_init(); } -void __init calculate_max_logical_packages(void) -{ - int ncpus; - - /* - * Today neither Intel nor AMD support heterogeneous systems so - * extrapolate the boot cpu's data to all packages. - */ - ncpus = cpu_data(0).booted_cores * topology_max_smt_threads(); - __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); - pr_info("Max logical packages: %u\n", __max_logical_packages); -} - void __init native_smp_cpus_done(unsigned int max_cpus) { pr_debug("Boot done\n"); - calculate_max_logical_packages(); build_sched_topology(); nmi_selftest(); impress_friends(); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 1fb9a1644d944..935771726f9cb 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -135,8 +135,6 @@ void __init xen_smp_cpus_done(unsigned int max_cpus) { if (xen_hvm_domain()) native_smp_cpus_done(max_cpus); - else - calculate_max_logical_packages(); } void xen_smp_send_reschedule(int cpu) From f119b648e317c40065e3a3b32af5b0edbc9f2bf1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:05 +0100 Subject: [PATCH 091/109] x86/cpu/topology: Mop up primary thread mask handling commit 882e0cff9ef340e7a47659a9aab9da64f4b9b847 upstream. The early initcall to initialize the primary thread mask is not longer required because topology_init_possible_cpus() can mark primary threads correctly when initializing the possible and present map as the number of SMT threads is already determined correctly. The XENPV workaround is not longer required because XENPV now registers fake APIC IDs which will just work like any other enumeration. Intel-SIG: commit 882e0cff9ef3 x86/cpu/topology: Mop up primary thread mask handling. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.736104257@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 630ebe503ae38..f3397e233ffbd 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -82,30 +82,6 @@ static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) if (smp_num_siblings == 1 || !(apicid & mask)) cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); } - -/* - * Due to the utter mess of CPUID evaluation smp_num_siblings is not valid - * during early boot. Initialize the primary thread mask before SMP - * bringup. - */ -static int __init smp_init_primary_thread_mask(void) -{ - unsigned int cpu; - - /* - * XEN/PV provides either none or useless topology information. - * Pretend that all vCPUs are primary threads. - */ - if (xen_pv_domain()) { - cpumask_copy(&__cpu_primary_thread_mask, cpu_possible_mask); - return 0; - } - - for (cpu = 0; cpu < topo_info.nr_assigned_cpus; cpu++) - cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]); - return 0; -} -early_initcall(smp_init_primary_thread_mask); #else static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } #endif @@ -151,9 +127,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) #endif set_cpu_possible(cpu, true); set_cpu_present(cpu, true); - - if (system_state != SYSTEM_BOOTING) - cpu_mark_primary_thread(cpu, apic_id); } static __init bool check_for_real_bsp(u32 apic_id) @@ -282,6 +255,7 @@ int topology_hotplug_apic(u32 apic_id, u32 acpi_id) set_bit(apic_id, phys_cpu_present_map); topo_set_cpuids(cpu, apic_id, acpi_id); + cpu_mark_primary_thread(cpu, apic_id); return cpu; } @@ -414,6 +388,7 @@ void __init topology_init_possible_cpus(void) if (apicid == BAD_APICID) continue; + cpu_mark_primary_thread(cpu, apicid); set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map)); } } From d60f4f39d6de17db6be1f42ad6bcda0b370062f7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:06 +0100 Subject: [PATCH 092/109] x86/cpu/topology: Simplify cpu_mark_primary_thread() commit 5e40fb2d4a4c7503cab4f923b7d985dbcf583581 upstream. No point in creating a mask via fls(). smp_num_siblings is guaranteed to be a power of 2. So just using (smp_num_siblings - 1) has the same effect. Intel-SIG: commit 5e40fb2d4a4c x86/cpu/topology: Simplify cpu_mark_primary_thread(). x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.791176581@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index f3397e233ffbd..aea408de3be44 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -76,10 +76,7 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) #ifdef CONFIG_SMP static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { - /* Isolate the SMT bit(s) in the APICID and check for 0 */ - u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1; - - if (smp_num_siblings == 1 || !(apicid & mask)) + if (!(apicid & (smp_num_siblings - 1))) cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); } #else From 33da8da74ac4a090eee9b016e386c08db8f27cde Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:07 +0100 Subject: [PATCH 093/109] x86/cpu/topology: Provide logical pkg/die mapping commit b7065f4f844c7876ed071b67e2ba57838152bd63 upstream. With the topology bitmaps in place the logical package and die IDs can trivially be retrieved by determining the bitmap weight of the relevant topology domain level up to and including the physical ID in question. Provide a function to that effect. Intel-SIG: commit b7065f4f844c x86/cpu/topology: Provide logical pkg/die mapping. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.846136196@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 9 +++++++++ arch/x86/kernel/cpu/topology.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 9b5d9adc1bbe5..8810c16885a04 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -156,6 +156,15 @@ static inline unsigned int topology_max_die_per_package(void) return __max_dies_per_package; } +#ifdef CONFIG_X86_LOCAL_APIC +int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level); +#else +static inline int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level) +{ + return 0; +} +#endif + #ifdef CONFIG_SMP #define topology_cluster_id(cpu) (cpu_data(cpu).topo.l2c_id) #define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu)) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index aea408de3be44..29759b4f72136 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -229,6 +229,34 @@ void __init topology_register_boot_apic(u32 apic_id) topo_register_apic(apic_id, CPU_ACPIID_INVALID, true); } +/** + * topology_get_logical_id - Retrieve the logical ID at a given topology domain level + * @apicid: The APIC ID for which to lookup the logical ID + * @at_level: The topology domain level to use + * + * @apicid must be a full APIC ID, not the normalized variant. It's valid to have + * all bits below the domain level specified by @at_level to be clear. So both + * real APIC IDs and backshifted normalized APIC IDs work correctly. + * + * Returns: + * - >= 0: The requested logical ID + * - -ERANGE: @apicid is out of range + * - -ENODEV: @apicid is not registered + */ +int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level) +{ + /* Remove the bits below @at_level to get the proper level ID of @apicid */ + unsigned int lvlid = topo_apicid(apicid, at_level); + + if (lvlid >= MAX_LOCAL_APIC) + return -ERANGE; + if (!test_bit(lvlid, apic_maps[at_level].map)) + return -ENODEV; + /* Get the number of set bits before @lvlid. */ + return bitmap_weight(apic_maps[at_level].map, lvlid); +} +EXPORT_SYMBOL_GPL(topology_get_logical_id); + #ifdef CONFIG_ACPI_HOTPLUG_CPU /** * topology_hotplug_apic - Handle a physical hotplugged APIC after boot From d383abb7a8de640ec1d744d32fe9545e0719fa92 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:09 +0100 Subject: [PATCH 094/109] x86/cpu/topology: Use topology logical mapping mechanism commit 380414be78bf8dbe1c3ed98feb75e2579c4a1bae upstream. Replace the logical package and die management functionality and retrieve the logical IDs from the topology bitmaps. Intel-SIG: commit 380414be78bf x86/cpu/topology: Use topology logical mapping mechanism. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.901865302@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 15 ++-- arch/x86/kernel/cpu/common.c | 13 --- arch/x86/kernel/cpu/topology_common.c | 4 + arch/x86/kernel/smpboot.c | 111 +------------------------- 4 files changed, 12 insertions(+), 131 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 8810c16885a04..b2b923ce2547e 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -172,6 +172,13 @@ static inline int topology_get_logical_id(u32 apicid, enum x86_topology_domains #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) + +static inline int topology_phys_to_logical_pkg(unsigned int pkg) +{ + return topology_get_logical_id(pkg << x86_topo_system.dom_shifts[TOPO_PKG_DOMAIN], + TOPO_PKG_DOMAIN); +} + extern int __max_smt_threads; static inline int topology_max_smt_threads(void) @@ -181,10 +188,6 @@ static inline int topology_max_smt_threads(void) #include -int topology_update_package_map(unsigned int apicid, unsigned int cpu); -int topology_update_die_map(unsigned int dieid, unsigned int cpu); -int topology_phys_to_logical_pkg(unsigned int pkg); - extern unsigned int __amd_nodes_per_pkg; static inline unsigned int topology_amd_nodes_per_pkg(void) @@ -205,10 +208,6 @@ static inline bool topology_is_primary_thread(unsigned int cpu) } #else /* CONFIG_SMP */ -static inline int -topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } -static inline int -topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index de769ec1ca252..eb455c3387476 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1826,18 +1826,6 @@ static void generic_identify(struct cpuinfo_x86 *c) #endif } -static void update_package_map(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned int cpu = smp_processor_id(); - - BUG_ON(topology_update_package_map(c->topo.pkg_id, cpu)); - BUG_ON(topology_update_die_map(c->topo.die_id, cpu)); -#else - c->topo.logical_pkg_id = 0; -#endif -} - /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -2022,7 +2010,6 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_32 enable_sep_cpu(); #endif - update_package_map(c); x86_spec_ctrl_setup_ap(); update_srbds_msr(); if (boot_cpu_has_bug(X86_BUG_GDS)) diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 0276978bc272d..c21a3871a8709 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -10,6 +10,7 @@ #include "cpu.h" struct x86_topology_system x86_topo_system __ro_after_init; +EXPORT_SYMBOL_GPL(x86_topo_system); unsigned int __amd_nodes_per_pkg __ro_after_init; EXPORT_SYMBOL_GPL(__amd_nodes_per_pkg); @@ -147,6 +148,9 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN); c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN); + c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN); + c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN); + /* Package relative core ID */ c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c6a7a47bf07e7..84e90c5bb7292 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -125,23 +125,6 @@ struct mwait_cpu_dead { */ static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead); -/* Logical package management. */ -struct logical_maps { - u32 phys_pkg_id; - u32 phys_die_id; - u32 logical_pkg_id; - u32 logical_die_id; -}; - -/* Temporary workaround until the full topology mechanics is in place */ -static DEFINE_PER_CPU_READ_MOSTLY(struct logical_maps, logical_maps) = { - .phys_pkg_id = U32_MAX, - .phys_die_id = U32_MAX, -}; - -static unsigned int logical_packages __read_mostly; -static unsigned int logical_die __read_mostly; - /* Maximum number of SMT threads on any online core */ int __read_mostly __max_smt_threads = 1; @@ -334,103 +317,11 @@ static void notrace start_secondary(void *unused) cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } -/** - * topology_phys_to_logical_pkg - Map a physical package id to a logical - * @phys_pkg: The physical package id to map - * - * Returns logical package id or -1 if not found - */ -int topology_phys_to_logical_pkg(unsigned int phys_pkg) -{ - int cpu; - - for_each_possible_cpu(cpu) { - if (per_cpu(logical_maps.phys_pkg_id, cpu) == phys_pkg) - return per_cpu(logical_maps.logical_pkg_id, cpu); - } - return -1; -} -EXPORT_SYMBOL(topology_phys_to_logical_pkg); - -/** - * topology_phys_to_logical_die - Map a physical die id to logical - * @die_id: The physical die id to map - * @cur_cpu: The CPU for which the mapping is done - * - * Returns logical die id or -1 if not found - */ -static int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu) -{ - int cpu, proc_id = cpu_data(cur_cpu).topo.pkg_id; - - for_each_possible_cpu(cpu) { - if (per_cpu(logical_maps.phys_pkg_id, cpu) == proc_id && - per_cpu(logical_maps.phys_die_id, cpu) == die_id) - return per_cpu(logical_maps.logical_die_id, cpu); - } - return -1; -} - -/** - * topology_update_package_map - Update the physical to logical package map - * @pkg: The physical package id as retrieved via CPUID - * @cpu: The cpu for which this is updated - */ -int topology_update_package_map(unsigned int pkg, unsigned int cpu) -{ - int new; - - /* Already available somewhere? */ - new = topology_phys_to_logical_pkg(pkg); - if (new >= 0) - goto found; - - new = logical_packages++; - if (new != pkg) { - pr_info("CPU %u Converting physical %u to logical package %u\n", - cpu, pkg, new); - } -found: - per_cpu(logical_maps.phys_pkg_id, cpu) = pkg; - per_cpu(logical_maps.logical_pkg_id, cpu) = new; - cpu_data(cpu).topo.logical_pkg_id = new; - return 0; -} -/** - * topology_update_die_map - Update the physical to logical die map - * @die: The die id as retrieved via CPUID - * @cpu: The cpu for which this is updated - */ -int topology_update_die_map(unsigned int die, unsigned int cpu) -{ - int new; - - /* Already available somewhere? */ - new = topology_phys_to_logical_die(die, cpu); - if (new >= 0) - goto found; - - new = logical_die++; - if (new != die) { - pr_info("CPU %u Converting physical %u to logical die %u\n", - cpu, die, new); - } -found: - per_cpu(logical_maps.phys_die_id, cpu) = die; - per_cpu(logical_maps.logical_die_id, cpu) = new; - cpu_data(cpu).topo.logical_die_id = new; - return 0; -} - static void __init smp_store_boot_cpu_info(void) { - int id = 0; /* CPU 0 */ - struct cpuinfo_x86 *c = &cpu_data(id); + struct cpuinfo_x86 *c = &cpu_data(0); *c = boot_cpu_data; - c->cpu_index = id; - topology_update_package_map(c->topo.pkg_id, id); - topology_update_die_map(c->topo.die_id, id); c->initialized = true; } From 26442c6938d617a8928e2e4ccfc8ccc98e713e72 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:10 +0100 Subject: [PATCH 095/109] x86/cpu/topology: Retrieve cores per package from topology bitmaps commit 3205c9833d69b97e8694efe3e193312dea4c571f upstream. Similar to other sizing information the number of cores per package can be established from the topology bitmap. Provide a function for retrieving that information and replace the buggy hack in the CPUID evaluation with it. Intel-SIG: commit 3205c9833d69 x86/cpu/topology: Retrieve cores per package from topology bitmaps. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.956858282@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 43 +++++++++++++++++++++++++++ arch/x86/kernel/cpu/topology.h | 11 +++++++ arch/x86/kernel/cpu/topology_common.c | 18 ++--------- 3 files changed, 57 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 29759b4f72136..7db9df50ada8d 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -257,6 +257,49 @@ int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level) } EXPORT_SYMBOL_GPL(topology_get_logical_id); +/** + * topology_unit_count - Retrieve the count of specified units at a given topology domain level + * @apicid: The APIC ID which specifies the search range + * @which_units: The domain level specifying the units to count + * @at_level: The domain level at which @which_units have to be counted + * + * This returns the number of possible units according to the enumerated + * information. + * + * E.g. topology_count_units(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN) + * counts the number of possible cores in the package to which @apicid + * belongs. + * + * @at_level must obviously be greater than @which_level to produce useful + * results. If @at_level is equal to @which_units the result is + * unsurprisingly 1. If @at_level is less than @which_units the results + * is by definition undefined and the function returns 0. + */ +unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units, + enum x86_topology_domains at_level) +{ + /* Remove the bits below @at_level to get the proper level ID of @apicid */ + unsigned int lvlid = topo_apicid(apicid, at_level); + unsigned int id, end, cnt = 0; + + if (lvlid >= MAX_LOCAL_APIC) + return 0; + if (!test_bit(lvlid, apic_maps[at_level].map)) + return 0; + if (which_units > at_level) + return 0; + if (which_units == at_level) + return 1; + + /* Calculate the exclusive end */ + end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]); + /* Unfortunately there is no bitmap_weight_range() */ + for (id = find_next_bit(apic_maps[which_units].map, end, lvlid); + id < end; id = find_next_bit(apic_maps[which_units].map, end, ++id)) + cnt++; + return cnt; +} + #ifdef CONFIG_ACPI_HOTPLUG_CPU /** * topology_hotplug_apic - Handle a physical hotplugged APIC after boot diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h index 2a3c838b6044d..37326297f80c8 100644 --- a/arch/x86/kernel/cpu/topology.h +++ b/arch/x86/kernel/cpu/topology.h @@ -53,4 +53,15 @@ static inline void topology_update_dom(struct topo_scan *tscan, enum x86_topolog tscan->dom_ncpus[dom] = ncpus; } +#ifdef CONFIG_X86_LOCAL_APIC +unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units, + enum x86_topology_domains at_level); +#else +static inline unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units, + enum x86_topology_domains at_level) +{ + return 1; +} +#endif + #endif /* ARCH_X86_TOPOLOGY_H */ diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index c21a3871a8709..a2c3f8f5886d7 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -155,25 +155,15 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; + /* Maximum number of cores on this package */ + c->x86_max_cores = topology_unit_count(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN); + c->topo.amd_node_id = tscan->amd_node_id; if (c->x86_vendor == X86_VENDOR_AMD) cpu_topology_fixup_amd(tscan); } -static void topo_set_max_cores(struct topo_scan *tscan) -{ - /* - * Bug compatible for now. This is broken on hybrid systems: - * 8 cores SMT + 8 cores w/o SMT - * tscan.dom_ncpus[TOPO_DIEGRP_DOMAIN] = 24; 24 / 2 = 12 !! - * - * Cannot be fixed without further topology enumeration changes. - */ - tscan->c->x86_max_cores = tscan->dom_ncpus[TOPO_DIEGRP_DOMAIN] >> - x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; -} - void cpu_parse_topology(struct cpuinfo_x86 *c) { unsigned int dom, cpu = smp_processor_id(); @@ -201,7 +191,6 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) } topo_set_ids(&tscan); - topo_set_max_cores(&tscan); } void __init cpu_init_topology(struct cpuinfo_x86 *c) @@ -223,7 +212,6 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) } topo_set_ids(&tscan); - topo_set_max_cores(&tscan); /* * AMD systems have Nodes per package which cannot be mapped to From 20dfc179756d04ac728ddec55fd111d86b6e3bf4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:12 +0100 Subject: [PATCH 096/109] x86/cpu/topology: Rename smp_num_siblings commit 8078f4d6102f9370b3b7436d25717735d21f5c09 upstream. It's really a non-intuitive name. Rename it to __max_threads_per_core which is obvious. Intel-SIG: commit 8078f4d6102f x86/cpu/topology: Rename smp_num_siblings. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210253.011307973@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/perf_event_p4.h | 4 ++-- arch/x86/include/asm/smp.h | 2 -- arch/x86/include/asm/topology.h | 1 + arch/x86/kernel/cpu/common.c | 6 +++--- arch/x86/kernel/cpu/debugfs.c | 2 +- arch/x86/kernel/cpu/mce/inject.c | 2 +- arch/x86/kernel/cpu/topology.c | 6 +++--- arch/x86/kernel/process.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- 9 files changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 94de1a05aebaa..d65e338b6a5fc 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -181,7 +181,7 @@ static inline u64 p4_clear_ht_bit(u64 config) static inline int p4_ht_active(void) { #ifdef CONFIG_SMP - return smp_num_siblings > 1; + return __max_threads_per_core > 1; #endif return 0; } @@ -189,7 +189,7 @@ static inline int p4_ht_active(void) static inline int p4_ht_thread(int cpu) { #ifdef CONFIG_SMP - if (smp_num_siblings == 2) + if (__max_threads_per_core == 2) return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 9de16e982ea01..2a5fbe0d2d680 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -8,8 +8,6 @@ #include #include -extern unsigned int smp_num_siblings; - DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index b2b923ce2547e..39f69e1f08ccb 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -145,6 +145,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); extern unsigned int __max_dies_per_package; extern unsigned int __max_logical_packages; +extern unsigned int __max_threads_per_core; static inline unsigned int topology_max_packages(void) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index eb455c3387476..f4f4d2cb2d51f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -72,8 +72,8 @@ u32 elf_hwcap2 __read_mostly; /* Number of siblings per CPU package */ -unsigned int smp_num_siblings __ro_after_init = 1; -EXPORT_SYMBOL(smp_num_siblings); +unsigned int __max_threads_per_core __ro_after_init = 1; +EXPORT_SYMBOL(__max_threads_per_core); unsigned int __max_dies_per_package __ro_after_init = 1; EXPORT_SYMBOL(__max_dies_per_package); @@ -2360,7 +2360,7 @@ void __init arch_cpu_finalize_init(void) * identify_boot_cpu() initialized SMT support information, let the * core code know. */ - cpu_smt_set_num_threads(smp_num_siblings, smp_num_siblings); + cpu_smt_set_num_threads(__max_threads_per_core, __max_threads_per_core); if (!IS_ENABLED(CONFIG_SMP)) { pr_info("CPU: "); diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index 543efc487206a..f40f3eecebc60 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -30,7 +30,7 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg()); seq_printf(m, "max_cores: %u\n", c->x86_max_cores); seq_printf(m, "max_dies_per_pkg: %u\n", __max_dies_per_package); - seq_printf(m, "smp_num_siblings: %u\n", smp_num_siblings); + seq_printf(m, "max_threads_per_core:%u\n", __max_threads_per_core); return 0; } diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 2b290452e1940..1e327881073f5 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -433,7 +433,7 @@ static u32 get_nbc_for_node(int node_id) struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = (c->x86_max_cores * smp_num_siblings) / topology_amd_nodes_per_pkg(); + cores_per_node = (c->x86_max_cores * __max_threads_per_core) / topology_amd_nodes_per_pkg(); return cores_per_node * node_id; } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 7db9df50ada8d..b078facce8657 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -76,7 +76,7 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) #ifdef CONFIG_SMP static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { - if (!(apicid & (smp_num_siblings - 1))) + if (!(apicid & (__max_threads_per_core - 1))) cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); } #else @@ -429,8 +429,8 @@ void __init topology_init_possible_cpus(void) * Can't use order delta here as order(cnta) can be equal * order(cntb) even if cnta != cntb. */ - smp_num_siblings = DIV_ROUND_UP(cntb, cnta); - pr_info("Max. threads per core: %3u\n", smp_num_siblings); + __max_threads_per_core = DIV_ROUND_UP(cntb, cnta); + pr_info("Max. threads per core: %3u\n", __max_threads_per_core); pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled); if (topo_info.nr_rejected_cpus) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9eebb35756839..f71f13ad95709 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -951,7 +951,7 @@ static __cpuidle void mwait_idle(void) void select_idle_routine(const struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP - if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) + if (boot_option_idle_override == IDLE_POLL && __max_threads_per_core > 1) pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); #endif if (x86_idle_set() || boot_option_idle_override == IDLE_POLL) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 84e90c5bb7292..af34fabdbb5e3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -555,7 +555,7 @@ static void __init build_sched_topology(void) void set_cpu_sibling_map(int cpu) { - bool has_smt = smp_num_siblings > 1; + bool has_smt = __max_threads_per_core > 1; bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; From fa557209cca0f7b7c2974e31091eff806db50c9b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:13 +0100 Subject: [PATCH 097/109] x86/cpu/topology: Rename topology_max_die_per_package() commit bd745d1c41e7fa56242889eb5dc6df2d7dd5df32 upstream. The plural of die is dies. Intel-SIG: commit bd745d1c41e7 x86/cpu/topology: Rename topology_max_die_per_package(). x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210253.065874205@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/events/intel/cstate.c | 2 +- arch/x86/events/intel/uncore.c | 2 +- arch/x86/events/intel/uncore_snbep.c | 2 +- arch/x86/events/rapl.c | 2 +- arch/x86/include/asm/topology.h | 2 +- drivers/hwmon/coretemp.c | 2 +- drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c | 2 +- drivers/powercap/intel_rapl_common.c | 2 +- drivers/thermal/intel/intel_hfi.c | 2 +- drivers/thermal/intel/intel_powerclamp.c | 2 +- drivers/thermal/intel/x86_pkg_temp_thermal.c | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index d7ce7eec0cd2f..b61d367b89e24 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -828,7 +828,7 @@ static int __init cstate_init(void) } if (has_cstate_pkg) { - if (topology_max_die_per_package() > 1) { + if (topology_max_dies_per_package() > 1) { err = perf_pmu_register(&cstate_pkg_pmu, "cstate_die", -1); } else { diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 14c4875e5867d..0c724b804e7b1 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1914,7 +1914,7 @@ static int __init intel_uncore_init(void) return -ENODEV; __uncore_max_dies = - topology_max_packages() * topology_max_die_per_package(); + topology_max_packages() * topology_max_dies_per_package(); id = x86_match_cpu(intel_uncore_match); if (!id) { diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index abb0325b1eb1e..82b37edccd535 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1443,7 +1443,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool */ for (i = 0; i < 8; i++) { if (nodeid == GIDNIDMAP(config, i)) { - if (topology_max_die_per_package() > 1) + if (topology_max_dies_per_package() > 1) die_id = i; else die_id = topology_phys_to_logical_pkg(i); diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 1579429846ccf..ce645e0b60d21 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -678,7 +678,7 @@ static const struct attribute_group *rapl_attr_update[] = { static int __init init_rapl_pmus(void) { - int maxdie = topology_max_packages() * topology_max_die_per_package(); + int maxdie = topology_max_packages() * topology_max_dies_per_package(); size_t size; size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 39f69e1f08ccb..0e06b21e82130 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -152,7 +152,7 @@ static inline unsigned int topology_max_packages(void) return __max_logical_packages; } -static inline unsigned int topology_max_die_per_package(void) +static inline unsigned int topology_max_dies_per_package(void) { return __max_dies_per_package; } diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index b8fc8d1ef20df..b0991dde2e59d 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -782,7 +782,7 @@ static int __init coretemp_init(void) if (!x86_match_cpu(coretemp_ids)) return -ENODEV; - max_zones = topology_max_packages() * topology_max_die_per_package(); + max_zones = topology_max_packages() * topology_max_dies_per_package(); zone_devices = kcalloc(max_zones, sizeof(struct platform_device *), GFP_KERNEL); if (!zone_devices) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c index a82e58750110c..8a70650cb8ed9 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c @@ -259,7 +259,7 @@ static int __init intel_uncore_init(void) return -ENODEV; uncore_max_entries = topology_max_packages() * - topology_max_die_per_package(); + topology_max_dies_per_package(); uncore_instances = kcalloc(uncore_max_entries, sizeof(*uncore_instances), GFP_KERNEL); if (!uncore_instances) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index f1de4111e98d9..bdfbdbd13a652 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1586,7 +1586,7 @@ struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *pr if (id_is_cpu) { rp->id = topology_logical_die_id(id); rp->lead_cpu = id; - if (topology_max_die_per_package() > 1) + if (topology_max_dies_per_package() > 1) snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d", topology_physical_package_id(id), topology_die_id(id)); else diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index 1c5a429b2e3e9..3d8d97dc28d98 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -604,7 +604,7 @@ void __init intel_hfi_init(void) /* There is one HFI instance per die/package. */ max_hfi_instances = topology_max_packages() * - topology_max_die_per_package(); + topology_max_dies_per_package(); /* * This allocation may fail. CPU hotplug callbacks must check diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 5ac5cb60bae67..e84678bfa84e4 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -616,7 +616,7 @@ static int powerclamp_idle_injection_register(void) poll_pkg_cstate_enable = false; if (cpumask_equal(cpu_present_mask, idle_injection_cpu_mask)) { ii_dev = idle_inject_register_full(idle_injection_cpu_mask, idle_inject_update); - if (topology_max_packages() == 1 && topology_max_die_per_package() == 1) + if (topology_max_packages() == 1 && topology_max_dies_per_package() == 1) poll_pkg_cstate_enable = true; } else { ii_dev = idle_inject_register(idle_injection_cpu_mask); diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c index 2e06b26be4ef6..68a1002abfe11 100644 --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c @@ -495,7 +495,7 @@ static int __init pkg_temp_thermal_init(void) if (!x86_match_cpu(pkg_temp_thermal_ids)) return -ENODEV; - max_id = topology_max_packages() * topology_max_die_per_package(); + max_id = topology_max_packages() * topology_max_dies_per_package(); zones = kcalloc(max_id, sizeof(struct zone_device *), GFP_KERNEL); if (!zones) From 180611c622edf6b726585e3bb9348731661f511d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:14 +0100 Subject: [PATCH 098/109] x86/cpu/topology: Provide __num_[cores|threads]_per_package commit fd43b8ae76e903c76f14d06eb939449bcc3f614f upstream. Expose properly accounted information and accessors so the fiddling with other topology variables can be replaced. Intel-SIG: commit fd43b8ae76e9 x86/cpu/topology: Provide __num_[cores|threads]_per_package. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210253.120958987@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 12 ++++++++++++ arch/x86/kernel/cpu/common.c | 6 ++++++ arch/x86/kernel/cpu/topology.c | 8 +++++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 0e06b21e82130..abe3a8f22cbd9 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -146,6 +146,8 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); extern unsigned int __max_dies_per_package; extern unsigned int __max_logical_packages; extern unsigned int __max_threads_per_core; +extern unsigned int __num_threads_per_package; +extern unsigned int __num_cores_per_package; static inline unsigned int topology_max_packages(void) { @@ -157,6 +159,16 @@ static inline unsigned int topology_max_dies_per_package(void) return __max_dies_per_package; } +static inline unsigned int topology_num_cores_per_package(void) +{ + return __num_cores_per_package; +} + +static inline unsigned int topology_num_threads_per_package(void) +{ + return __num_threads_per_package; +} + #ifdef CONFIG_X86_LOCAL_APIC int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level); #else diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f4f4d2cb2d51f..831a834b0cfa7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -81,6 +81,12 @@ EXPORT_SYMBOL(__max_dies_per_package); unsigned int __max_logical_packages __ro_after_init = 1; EXPORT_SYMBOL(__max_logical_packages); +unsigned int __num_cores_per_package __ro_after_init = 1; +EXPORT_SYMBOL(__num_cores_per_package); + +unsigned int __num_threads_per_package __ro_after_init = 1; +EXPORT_SYMBOL(__num_threads_per_package); + static struct ppin_info { int feature; int msr_ppin_ctl; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index b078facce8657..41dd8e0eb5117 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -392,7 +392,7 @@ void __init topology_init_possible_cpus(void) unsigned int disabled = topo_info.nr_disabled_cpus; unsigned int cnta, cntb, cpu, allowed = 1; unsigned int total = assigned + disabled; - u32 apicid; + u32 apicid, firstid; if (!restrict_to_up()) { if (WARN_ON_ONCE(assigned > nr_cpu_ids)) { @@ -432,6 +432,12 @@ void __init topology_init_possible_cpus(void) __max_threads_per_core = DIV_ROUND_UP(cntb, cnta); pr_info("Max. threads per core: %3u\n", __max_threads_per_core); + firstid = find_first_bit(apic_maps[TOPO_SMT_DOMAIN].map, MAX_LOCAL_APIC); + __num_cores_per_package = topology_unit_count(firstid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN); + pr_info("Num. cores per package: %3u\n", __num_cores_per_package); + __num_threads_per_package = topology_unit_count(firstid, TOPO_SMT_DOMAIN, TOPO_PKG_DOMAIN); + pr_info("Num. threads per package: %3u\n", __num_threads_per_package); + pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled); if (topo_info.nr_rejected_cpus) pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus); From 6f790aa9f743170a1b74bafddd21b1c0113c3ff9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:16 +0100 Subject: [PATCH 099/109] x86/cpu/topology: Get rid of cpuinfo::x86_max_cores commit 89b0f15f408f7c4ee98c1ec4c3224852fcbc3274 upstream. Now that __num_cores_per_package and __num_threads_per_package are available, cpuinfo::x86_max_cores and the related math all over the place can be replaced with the ready to consume data. Intel-SIG: commit 89b0f15f408f x86/cpu/topology: Get rid of cpuinfo::x86_max_cores. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210253.176147806@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- Documentation/arch/x86/topology.rst | 24 +++++++------------ arch/x86/events/intel/uncore_nhmex.c | 4 ++-- arch/x86/events/intel/uncore_snb.c | 8 +++---- arch/x86/events/intel/uncore_snbep.c | 16 ++++++------- arch/x86/include/asm/processor.h | 2 -- arch/x86/kernel/cpu/cacheinfo.c | 2 +- arch/x86/kernel/cpu/common.c | 1 - arch/x86/kernel/cpu/debugfs.c | 3 ++- arch/x86/kernel/cpu/mce/inject.c | 3 +-- arch/x86/kernel/cpu/microcode/intel.c | 2 +- arch/x86/kernel/cpu/topology_common.c | 3 --- arch/x86/kernel/smpboot.c | 2 +- .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 2 +- drivers/hwmon/fam15h_power.c | 2 +- 14 files changed, 31 insertions(+), 43 deletions(-) diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst index 08ebf9edbfc1e..7352ab89a55ae 100644 --- a/Documentation/arch/x86/topology.rst +++ b/Documentation/arch/x86/topology.rst @@ -47,17 +47,21 @@ AMD nomenclature for package is 'Node'. Package-related topology information in the kernel: - - cpuinfo_x86.x86_max_cores: + - topology_num_threads_per_package() - The number of cores in a package. This information is retrieved via CPUID. + The number of threads in a package. - - cpuinfo_x86.x86_max_dies: + - topology_num_cores_per_package() - The number of dies in a package. This information is retrieved via CPUID. + The number of cores in a package. + + - topology_max_dies_per_package() + + The maximum number of dies in a package. - cpuinfo_x86.topo.die_id: - The physical ID of the die. This information is retrieved via CPUID. + The physical ID of the die. - cpuinfo_x86.topo.pkg_id: @@ -96,16 +100,6 @@ are SMT- or CMT-type threads. AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses "core". -Core-related topology information in the kernel: - - - smp_num_siblings: - - The number of threads in a core. The number of threads in a package can be - calculated by:: - - threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings - - Threads ======= A thread is a single scheduling unit. It's the equivalent to a logical Linux diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 56eea2c66cfb8..92da8aaa59660 100644 --- a/arch/x86/events/intel/uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -1221,8 +1221,8 @@ void nhmex_uncore_cpu_init(void) uncore_nhmex = true; else nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events; - if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (nhmex_uncore_cbox.num_boxes > topology_num_cores_per_package()) + nhmex_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = nhmex_msr_uncores; } /* end of Nehalem-EX uncore support */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 7fd4334e12a17..9462fd9f3b7ab 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -364,8 +364,8 @@ static struct intel_uncore_type *snb_msr_uncores[] = { void snb_uncore_cpu_init(void) { uncore_msr_uncores = snb_msr_uncores; - if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (snb_uncore_cbox.num_boxes > topology_num_cores_per_package()) + snb_uncore_cbox.num_boxes = topology_num_cores_per_package(); } static void skl_uncore_msr_init_box(struct intel_uncore_box *box) @@ -428,8 +428,8 @@ static struct intel_uncore_type *skl_msr_uncores[] = { void skl_uncore_cpu_init(void) { uncore_msr_uncores = skl_msr_uncores; - if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (skl_uncore_cbox.num_boxes > topology_num_cores_per_package()) + skl_uncore_cbox.num_boxes = topology_num_cores_per_package(); snb_uncore_arb.ops = &skl_uncore_msr_ops; } diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 82b37edccd535..9c624342336bf 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1174,8 +1174,8 @@ static struct intel_uncore_type *snbep_msr_uncores[] = { void snbep_uncore_cpu_init(void) { - if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (snbep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + snbep_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = snbep_msr_uncores; } @@ -1839,8 +1839,8 @@ static struct intel_uncore_type *ivbep_msr_uncores[] = { void ivbep_uncore_cpu_init(void) { - if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (ivbep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + ivbep_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = ivbep_msr_uncores; } @@ -2911,8 +2911,8 @@ static bool hswep_has_limit_sbox(unsigned int device) void hswep_uncore_cpu_init(void) { - if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (hswep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + hswep_uncore_cbox.num_boxes = topology_num_cores_per_package(); /* Detect 6-8 core systems with only two SBOXes */ if (hswep_has_limit_sbox(HSWEP_PCU_DID)) @@ -3274,8 +3274,8 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { void bdx_uncore_cpu_init(void) { - if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (bdx_uncore_cbox.num_boxes > topology_num_cores_per_package()) + bdx_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = bdx_msr_uncores; /* Detect systems with no SBOXes */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3c21968c8102f..f4f914dc47b59 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -162,8 +162,6 @@ struct cpuinfo_x86 { unsigned long loops_per_jiffy; /* protected processor identification number */ u64 ppin; - /* cpuid returned max cores value: */ - u16 x86_max_cores; u16 x86_clflush_size; /* number of cores as seen by the OS: */ u16 booted_cores; diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 522204ee195b1..7a054387a2db2 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -301,7 +301,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, eax->split.type = types[leaf]; eax->split.level = levels[leaf]; eax->split.num_threads_sharing = 0; - eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1; + eax->split.num_cores_on_die = topology_num_cores_per_package(); if (assoc == 0xffff) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 831a834b0cfa7..fbe697366f6dd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1845,7 +1845,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_stepping = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_max_cores = 1; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; c->x86_phys_bits = 36; diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index f40f3eecebc60..3baf3e4358347 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -28,7 +28,8 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "l2c_id: %u\n", c->topo.l2c_id); seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id); seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg()); - seq_printf(m, "max_cores: %u\n", c->x86_max_cores); + seq_printf(m, "num_threads: %u\n", __num_threads_per_package); + seq_printf(m, "num_cores: %u\n", __num_cores_per_package); seq_printf(m, "max_dies_per_pkg: %u\n", __max_dies_per_package); seq_printf(m, "max_threads_per_core:%u\n", __max_threads_per_core); return 0; diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 1e327881073f5..94953d749475d 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -430,10 +430,9 @@ static void trigger_thr_int(void *info) static u32 get_nbc_for_node(int node_id) { - struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = (c->x86_max_cores * __max_threads_per_core) / topology_amd_nodes_per_pkg(); + cores_per_node = topology_num_threads_per_package() / topology_amd_nodes_per_pkg(); return cores_per_node * node_id; } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 973784e337a44..53707b14fdbf2 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -641,7 +641,7 @@ static __init void calc_llc_size_per_core(struct cpuinfo_x86 *c) { u64 llc_size = c->x86_cache_size * 1024ULL; - do_div(llc_size, c->x86_max_cores); + do_div(llc_size, topology_num_cores_per_package()); llc_size_per_core = (unsigned int)llc_size; } diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index a2c3f8f5886d7..a50ae8d63d1c8 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -155,9 +155,6 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; - /* Maximum number of cores on this package */ - c->x86_max_cores = topology_unit_count(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN); - c->topo.amd_node_id = tscan->amd_node_id; if (c->x86_vendor == X86_VENDOR_AMD) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index af34fabdbb5e3..c348a821e951a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -556,7 +556,7 @@ static void __init build_sched_topology(void) void set_cpu_sibling_map(int cpu) { bool has_smt = __max_threads_per_core > 1; - bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; + bool has_mp = has_smt || topology_num_cores_per_package() > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; int i, threads; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 454216bd6f1dd..5d60fd6672c6c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -475,7 +475,7 @@ static int vangogh_init_smc_tables(struct smu_context *smu) #ifdef CONFIG_X86 /* AMD x86 APU only */ - smu->cpu_core_num = boot_cpu_data.x86_max_cores; + smu->cpu_core_num = topology_num_cores_per_package(); #else smu->cpu_core_num = 4; #endif diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index 6307112c2c0c6..9ed2c4b6734ed 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -209,7 +209,7 @@ static ssize_t power1_average_show(struct device *dev, * With the new x86 topology modelling, x86_max_cores is the * compute unit number. */ - cu_num = boot_cpu_data.x86_max_cores; + cu_num = topology_num_cores_per_package(); ret = read_registers(data); if (ret) From 190fb9712314679e4b3967c098a044d31844fe8c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Mar 2024 12:17:02 +0100 Subject: [PATCH 100/109] x86/topology: Ignore non-present APIC IDs in a present package commit f0551af021308a2a1163dc63d1f1bba3594208bd upstream. Borislav reported that one of his systems has a broken MADT table which advertises eight present APICs and 24 non-present APICs in the same package. The non-present ones are considered hot-pluggable by the topology evaluation code, which is obviously bogus as there is no way to hot-plug within the same package. As the topology evaluation code accounts for hot-pluggable CPUs in a package, the maximum number of cores per package is computed wrong, which in turn causes the uncore performance counter driver to access non-existing MSRs. It will probably confuse other entities which rely on the maximum number of cores and threads per package too. Cure this by ignoring hot-pluggable APIC IDs within a present package. In theory it would be reasonable to just do this unconditionally, but then there is this thing called reality^Wvirtualization which ruins everything. Virtualization is the only existing user of "physical" hotplug and the virtualization tools allow the above scenario. Whether that is actually in use or not is unknown. As it can be argued that the virtualization case is not affected by the issues which exposed the reported problem, allow the bogosity if the kernel determined that it is running in a VM for now. Fixes: 89b0f15f408f ("x86/cpu/topology: Get rid of cpuinfo::x86_max_cores") Reported-by: Borislav Petkov (AMD) Intel-SIG: commit f0551af02130 x86/topology: Ignore non-present APIC IDs in a present package. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/87a5nbvccx.ffs@tglx [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 39 ++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 41dd8e0eb5117..d7296f501faf0 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -157,6 +158,20 @@ static __init bool check_for_real_bsp(u32 apic_id) return true; } +static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level, + unsigned long *map) +{ + unsigned int id, end, cnt = 0; + + /* Calculate the exclusive end */ + end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]); + + /* Unfortunately there is no bitmap_weight_range() */ + for (id = find_next_bit(map, end, lvlid); id < end; id = find_next_bit(map, end, ++id)) + cnt++; + return cnt; +} + static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) { int cpu, dom; @@ -178,6 +193,20 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) cpuid_to_apicid[cpu] = apic_id; topo_set_cpuids(cpu, apic_id, acpi_id); } else { + u32 pkgid = topo_apicid(apic_id, TOPO_PKG_DOMAIN); + + /* + * Check for present APICs in the same package when running + * on bare metal. Allow the bogosity in a guest. + */ + if (hypervisor_is_type(X86_HYPER_NATIVE) && + topo_unit_count(pkgid, TOPO_PKG_DOMAIN, phys_cpu_present_map)) { + pr_info_once("Ignoring hot-pluggable APIC ID %x in present package.\n", + apic_id); + topo_info.nr_rejected_cpus++; + return; + } + topo_info.nr_disabled_cpus++; } @@ -280,7 +309,6 @@ unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_uni { /* Remove the bits below @at_level to get the proper level ID of @apicid */ unsigned int lvlid = topo_apicid(apicid, at_level); - unsigned int id, end, cnt = 0; if (lvlid >= MAX_LOCAL_APIC) return 0; @@ -290,14 +318,7 @@ unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_uni return 0; if (which_units == at_level) return 1; - - /* Calculate the exclusive end */ - end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]); - /* Unfortunately there is no bitmap_weight_range() */ - for (id = find_next_bit(apic_maps[which_units].map, end, lvlid); - id < end; id = find_next_bit(apic_maps[which_units].map, end, ++id)) - cnt++; - return cnt; + return topo_unit_count(lvlid, at_level, apic_maps[which_units].map); } #ifdef CONFIG_ACPI_HOTPLUG_CPU From 2b7d56ee6eeb2a2ec4c0c85932b3f6fcebed23b0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Mar 2024 19:56:36 +0100 Subject: [PATCH 101/109] x86/topology: Don't evaluate logical IDs during early boot commit 7af541cee1e0eb48c6eb439bc6309175339fa96f upstream. The local APICs have not yet been enumerated so the logical ID evaluation from the topology bitmaps does not work and would return an error code. Skip the evaluation during the early boot CPUID evaluation and only apply it on the final run. Fixes: 380414be78bf ("x86/cpu/topology: Use topology logical mapping mechanism") Intel-SIG: commit 7af541cee1e0 x86/topology: Don't evaluate logical IDs during early boot. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Guenter Roeck Link: https://lore.kernel.org/r/20240322185305.186943142@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology_common.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index a50ae8d63d1c8..9a6069e7133c9 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -140,7 +140,7 @@ static void parse_topology(struct topo_scan *tscan, bool early) } } -static void topo_set_ids(struct topo_scan *tscan) +static void topo_set_ids(struct topo_scan *tscan, bool early) { struct cpuinfo_x86 *c = tscan->c; u32 apicid = c->topo.apicid; @@ -148,8 +148,10 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN); c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN); - c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN); - c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN); + if (!early) { + c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN); + c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN); + } /* Package relative core ID */ c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> @@ -187,7 +189,7 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]); } - topo_set_ids(&tscan); + topo_set_ids(&tscan, false); } void __init cpu_init_topology(struct cpuinfo_x86 *c) @@ -208,7 +210,7 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) x86_topo_system.dom_size[dom] = 1U << sft; } - topo_set_ids(&tscan); + topo_set_ids(&tscan, true); /* * AMD systems have Nodes per package which cannot be mapped to From 043bf61631f991ea32bb95db3b6bfc7b5b59579c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 8 Apr 2024 15:22:01 +0200 Subject: [PATCH 102/109] x86/topology: Don't update cpu_possible_map in topo_set_cpuids() commit a9025cd1c673a8d6eefc79d911075b8b452eba8f upstream. topo_set_cpuids() updates cpu_present_map and cpu_possible map. It is invoked during enumeration and "physical hotplug" operations. In the latter case this results in a kernel crash because cpu_possible_map is marked read only after init completes. There is no reason to update cpu_possible_map in that function. During enumeration cpu_possible_map is not relevant and gets fully initialized after enumeration completed. On "physical hotplug" the bit is already set because the kernel allows only CPUs to be plugged which have been enumerated and associated to a CPU number during early boot. Remove the bogus update of cpu_possible_map. Fixes: 0e53e7b656cf ("x86/cpu/topology: Sanitize the APIC admission logic") Reported-by: Jonathan Cameron Intel-SIG: commit a9025cd1c673 x86/topology: Don't update cpu_possible_map in topo_set_cpuids(). x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87ttkc6kwx.ffs@tglx [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index d7296f501faf0..eabafe1dae38a 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -123,7 +123,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; #endif - set_cpu_possible(cpu, true); set_cpu_present(cpu, true); } @@ -210,7 +209,11 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) topo_info.nr_disabled_cpus++; } - /* Register present and possible CPUs in the domain maps */ + /* + * Register present and possible CPUs in the domain + * maps. cpu_possible_map will be updated in + * topology_init_possible_cpus() after enumeration is done. + */ for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map); } From 3204f406df9fcb1fc57aa48857b45aa7bbe42b0f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 2 May 2024 16:39:47 +0200 Subject: [PATCH 103/109] x86/xen/smp_pv: Register the boot CPU APIC properly commit 8a95db3bf8a32186fe448b1a934afbd5f1da0263 upstream. The topology core expects the boot APIC to be registered from earhy APIC detection first and then again when the firmware tables are evaluated. This is used for detecting the real BSP CPU on a kexec kernel. The recent conversion of XEN/PV to register fake APIC IDs failed to register the boot CPU APIC correctly as it only registers it once. This causes the BSP detection mechanism to trigger wrongly: CPU topo: Boot CPU APIC ID not the first enumerated APIC ID: 0 > 1 Additionally this results in one CPU being ignored. Register the boot CPU APIC twice so that the XEN/PV fake enumeration behaves like real firmware. Reported-by: Juergen Gross Fixes: e75307023466 ("x86/xen/smp_pv: Register fake APICs") Intel-SIG: commit 8a95db3bf8a3 x86/xen/smp_pv: Register the boot CPU APIC properly. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/87a5l8s2fg.ffs@tglx Signed-off-by: Juergen Gross [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/xen/smp_pv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 27d1a5b7f571a..ac41d83b38d3c 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -154,9 +154,9 @@ static void __init xen_pv_smp_config(void) u32 apicid = 0; int i; - topology_register_boot_apic(apicid++); + topology_register_boot_apic(apicid); - for (i = 1; i < nr_cpu_ids; i++) + for (i = 0; i < nr_cpu_ids; i++) topology_register_apic(apicid++, CPU_ACPIID_INVALID, true); /* Pretend to be a proper enumerated system */ From e953f77ad5b4d50faadf4403c86f9084ac76eab1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 17 May 2024 16:40:36 +0200 Subject: [PATCH 104/109] x86/topology: Handle bogus ACPI tables correctly commit 9d22c96316ac59ed38e80920c698fed38717b91b upstream. The ACPI specification clearly states how the processors should be enumerated in the MADT: "To ensure that the boot processor is supported post initialization, two guidelines should be followed. The first is that OSPM should initialize processors in the order that they appear in the MADT. The second is that platform firmware should list the boot processor as the first processor entry in the MADT. ... Failure of OSPM implementations and platform firmware to abide by these guidelines can result in both unpredictable and non optimal platform operation." The kernel relies on that ordering to detect the real BSP on crash kernels which is important to avoid sending a INIT IPI to it as that would cause a full machine reset. On a Dell XPS 16 9640 the BIOS ignores this rule and enumerates the CPUs in the wrong order. As a consequence the kernel falsely detects a crash kernel and disables the corresponding CPU. Prevent this by checking the IA32_APICBASE MSR for the BSP bit on the boot CPU. If that bit is set, then the MADT based BSP detection can be safely ignored. If the kernel detects a mismatch between the BSP bit and the first enumerated MADT entry then emit a firmware bug message. This obviously also has to be taken into account when the boot APIC ID and the first enumerated APIC ID match. If the boot CPU does not have the BSP bit set in the APICBASE MSR then there is no way for the boot CPU to determine which of the CPUs is the real BSP. Sending an INIT to the real BSP would reset the machine so the only sane way to deal with that is to limit the number of CPUs to one and emit a corresponding warning message. Fixes: 5c5682b9f87a ("x86/cpu: Detect real BSP on crash kernels") Reported-by: Carsten Tolkmit Intel-SIG: commit 9d22c96316ac x86/topology: Handle bogus ACPI tables correctly. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Carsten Tolkmit Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87le48jycb.ffs@tglx Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218837 [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 53 ++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index eabafe1dae38a..d1edea0a0999b 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -128,6 +128,9 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) static __init bool check_for_real_bsp(u32 apic_id) { + bool is_bsp = false, has_apic_base = boot_cpu_data.x86 >= 6; + u64 msr; + /* * There is no real good way to detect whether this a kdump() * kernel, but except on the Voyager SMP monstrosity which is not @@ -144,17 +147,61 @@ static __init bool check_for_real_bsp(u32 apic_id) if (topo_info.real_bsp_apic_id != BAD_APICID) return false; + /* + * Check whether the enumeration order is broken by evaluating the + * BSP bit in the APICBASE MSR. If the CPU does not have the + * APICBASE MSR then the BSP detection is not possible and the + * kernel must rely on the firmware enumeration order. + */ + if (has_apic_base) { + rdmsrl(MSR_IA32_APICBASE, msr); + is_bsp = !!(msr & MSR_IA32_APICBASE_BSP); + } + if (apic_id == topo_info.boot_cpu_apic_id) { - topo_info.real_bsp_apic_id = apic_id; - return false; + /* + * If the boot CPU has the APIC BSP bit set then the + * firmware enumeration is agreeing. If the CPU does not + * have the APICBASE MSR then the only choice is to trust + * the enumeration order. + */ + if (is_bsp || !has_apic_base) { + topo_info.real_bsp_apic_id = apic_id; + return false; + } + /* + * If the boot APIC is enumerated first, but the APICBASE + * MSR does not have the BSP bit set, then there is no way + * to discover the real BSP here. Assume a crash kernel and + * limit the number of CPUs to 1 as an INIT to the real BSP + * would reset the machine. + */ + pr_warn("Enumerated BSP APIC %x is not marked in APICBASE MSR\n", apic_id); + pr_warn("Assuming crash kernel. Limiting to one CPU to prevent machine INIT\n"); + set_nr_cpu_ids(1); + goto fwbug; } - pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x > %x\n", + pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x != %x\n", topo_info.boot_cpu_apic_id, apic_id); + + if (is_bsp) { + /* + * The boot CPU has the APIC BSP bit set. Use it and complain + * about the broken firmware enumeration. + */ + topo_info.real_bsp_apic_id = topo_info.boot_cpu_apic_id; + goto fwbug; + } + pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n"); topo_info.real_bsp_apic_id = apic_id; return true; + +fwbug: + pr_warn(FW_BUG "APIC enumeration order not specification compliant\n"); + return false; } static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level, From 161e54a2d8951c0282bfe4b08d8213e74f105a0f Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Mon, 2 Dec 2024 14:58:45 +0000 Subject: [PATCH 105/109] x86/cpu/topology: Remove limit of CPUs due to disabled IO/APIC commit 73da582a476ea6e3512f89f8ed57dfed945829a2 upstream. The rework of possible CPUs management erroneously disabled SMP when the IO/APIC is disabled either by the 'noapic' command line parameter or during IO/APIC setup. SMP is possible without IO/APIC. Remove the ioapic_is_disabled conditions from the relevant possible CPU management code paths to restore the orgininal behaviour. Fixes: 7c0edad3643f ("x86/cpu/topology: Rework possible CPU management") Intel-SIG: commit 73da582a476e x86/cpu/topology: Remove limit of CPUs due to disabled IO/APIC. x86/apic: Rework APIC registration Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241202145905.1482-1-ffmancera@riseup.net [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index d1edea0a0999b..f50533080cf0e 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -428,8 +428,8 @@ void __init topology_apply_cmdline_limits_early(void) { unsigned int possible = nr_cpu_ids; - /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */ - if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled) + /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' */ + if (!setup_max_cpus || apic_is_disabled) possible = 1; /* 'possible_cpus=N' */ @@ -443,7 +443,7 @@ void __init topology_apply_cmdline_limits_early(void) static __init bool restrict_to_up(void) { - if (!smp_found_config || ioapic_is_disabled) + if (!smp_found_config) return true; /* * XEN PV is special as it does not advertise the local APIC From 36040f891b7c7f995e0226bacec50109a4518704 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Mon, 7 Apr 2025 15:24:27 +0200 Subject: [PATCH 106/109] x86/acpi: Don't limit CPUs to 1 for Xen PV guests due to disabled ACPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8b37357a78d7fa13d88ea822b35b40137da1c85e upstream. Xen disables ACPI for PV guests in DomU, which causes acpi_mps_check() to return 1 when CONFIG_X86_MPPARSE is not set. As a result, the local APIC is disabled and the guest is later limited to a single vCPU, despite being configured with more. This regression was introduced in version 6.9 in commit 7c0edad3643f ("x86/cpu/topology: Rework possible CPU management"), which added an early check that limits CPUs to 1 if apic_is_disabled. Update the acpi_mps_check() logic to return 0 early when running as a Xen PV guest in DomU, preventing APIC from being disabled in this specific case and restoring correct multi-vCPU behaviour. Fixes: 7c0edad3643f ("x86/cpu/topology: Rework possible CPU management") Intel-SIG: commit 8b37357a78d7 x86/acpi: Don't limit CPUs to 1 for Xen PV guests due to disabled ACPI. x86/apic: Rework APIC registration Signed-off-by: Petr Vaněk Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250407132445.6732-2-arkamar@atlas.cz [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/acpi/boot.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8bea0db2a7e6a..377cf62cac1db 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -23,6 +23,8 @@ #include #include +#include + #include #include #include @@ -1695,6 +1697,15 @@ int __init acpi_mps_check(void) { #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_X86_MPPARSE) /* mptable code is not built-in*/ + + /* + * Xen disables ACPI in PV DomU guests but it still emulates APIC and + * supports SMP. Returning early here ensures that APIC is not disabled + * unnecessarily and the guest is not limited to a single vCPU. + */ + if (xen_pv_domain() && !xen_initial_domain()) + return 0; + if (acpi_disabled || acpi_noirq) { pr_warn("MPS support code is not built-in, using acpi=off or acpi=noirq or pci=noacpi may have problem\n"); return 1; From 753e6d6110d9efb7bdd3a2012752c0604e2d2160 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 4 Oct 2024 12:22:12 +0200 Subject: [PATCH 107/109] x86/xen: mark boot CPU of PV guest in MSR_IA32_APICBASE commit bf56c410162dbf2e27906acbdcd904cbbfdba302 upstream. Recent topology checks of the x86 boot code uncovered the need for PV guests to have the boot cpu marked in the APICBASE MSR. Fixes: 9d22c96316ac ("x86/topology: Handle bogus ACPI tables correctly") Reported-by: Niels Dettenbach Intel-SIG: commit bf56c410162d x86/xen: mark boot CPU of PV guest in MSR_IA32_APICBASE. x86/apic: Rework APIC registration Signed-off-by: Juergen Gross Reviewed-by: Thomas Gleixner Signed-off-by: Juergen Gross [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/xen/enlighten_pv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 68f619076ed6c..0a631ba766d1d 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1036,6 +1036,10 @@ static u64 xen_do_read_msr(unsigned int msr, int *err) switch (msr) { case MSR_IA32_APICBASE: val &= ~X2APIC_ENABLE; + if (smp_processor_id() == 0) + val |= MSR_IA32_APICBASE_BSP; + else + val &= ~MSR_IA32_APICBASE_BSP; break; } return val; From 6906af94e671de7d88e7d5a185e081db26f9028f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Mar 2024 19:56:38 +0100 Subject: [PATCH 108/109] x86/topology: Handle the !APIC case gracefully commit 5e25eb25dae9fa0700bbe42aff0e2f105fcd096a upstream. If there is no local APIC enumerated and registered then the topology bitmaps are empty. Therefore, topology_init_possible_cpus() will die with a division by zero exception. Prevent this by registering a fake APIC id to populate the topology bitmap. This also allows to use all topology query interfaces unconditionally. It does not affect the actual APIC code because either the local APIC address was not registered or no local APIC could be detected. Fixes: f1f758a80516 ("x86/topology: Add a mechanism to track topology via APIC IDs") Reported-by: Guenter Roeck Reported-by: Linus Torvalds Intel-SIG: commit 5e25eb25dae9 x86/topology: Handle the !APIC case gracefully. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Guenter Roeck Link: https://lore.kernel.org/r/20240322185305.242709302@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index f50533080cf0e..a8d1cc6d223df 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -465,6 +465,17 @@ void __init topology_init_possible_cpus(void) unsigned int total = assigned + disabled; u32 apicid, firstid; + /* + * If there was no APIC registered, then fake one so that the + * topology bitmap is populated. That ensures that the code below + * is valid and the various query interfaces can be used + * unconditionally. This does not affect the actual APIC code in + * any way because either the local APIC address has not been + * registered or the local APIC was disabled on the command line. + */ + if (topo_info.boot_cpu_apic_id == BAD_APICID) + topology_register_boot_apic(0); + if (!restrict_to_up()) { if (WARN_ON_ONCE(assigned > nr_cpu_ids)) { disabled += assigned - nr_cpu_ids; From 3d5dcf56ab8149d0382f0c745ca92e0f3e7efd41 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Mon, 29 Jul 2024 10:12:02 +0800 Subject: [PATCH 109/109] x86/tsc: Use topology_max_packages() to get package number commit b4bac279319d3082eb42f074799c7b18ba528c71 upstream. Commit b50db7095fe0 ("x86/tsc: Disable clocksource watchdog for TSC on qualified platorms") was introduced to solve problem that sometimes TSC clocksource is wrongly judged as unstable by watchdog like 'jiffies', HPET, etc. In it, the hardware package number is a key factor for judging whether to disable the watchdog for TSC, and 'nr_online_nodes' was chosen due to, at that time (kernel v5.1x), it is available in early boot phase before registering 'tsc-early' clocksource, where all non-boot CPUs are not brought up yet. Dave and Rui pointed out there are many cases in which 'nr_online_nodes' is cheated and not accurate, like: * SNC (sub-numa cluster) mode enabled * numa emulation (numa=fake=8 etc.) * numa=off * platforms with CPU-less HBM nodes, CPU-less Optane memory nodes. * 'maxcpus=' cmdline setup, where chopped CPUs could be onlined later * 'nr_cpus=', 'possible_cpus=' cmdline setup, where chopped CPUs can not be onlined after boot The SNC case is the most user-visible case, as many CSP (Cloud Service Provider) enable this feature in their server fleets. When SNC3 enabled, a 2 socket machine will appear to have 6 NUMA nodes, and get impacted by the issue in reality. Thomas' recent patchset of refactoring x86 topology code improves topology_max_packages() greatly, by making it more accurate and available in early boot phase, which works well in most of the above cases. The only exceptions are 'nr_cpus=' and 'possible_cpus=' setup, which may under-estimate the package number. As during topology setup, the boot CPU iterates through all enumerated APIC IDs and either accepts or rejects the APIC ID. For accepted IDs, it figures out which bits of the ID map to the package number. It tracks which package numbers have been seen in a bitmap. topology_max_packages() just returns the number of bits set in that bitmap. 'nr_cpus=' and 'possible_cpus=' can cause more APIC IDs to be rejected and can artificially lower the number of bits in the package bitmap and thus topology_max_packages(). This means that, for example, a system with 8 physical packages might reject all the CPUs on 6 of those packages and be left with only 2 packages and 2 bits set in the package bitmap. It needs the TSC watchdog, but would disable it anyway. This isn't ideal, but it only happens for debug-oriented options. This is fixable by tracking the package numbers for rejected CPUs. But it's not worth the trouble for debugging. So use topology_max_packages() to replace nr_online_nodes(). Reported-by: Dave Hansen Intel-SIG: commit b4bac279319d x86/tsc: Use topology_max_packages() to get package number. x86/apic: Rework APIC registration Signed-off-by: Feng Tang Signed-off-by: Thomas Gleixner Reviewed-by: Waiman Long Link: https://lore.kernel.org/all/20240729021202.180955-1-feng.tang@intel.com Closes: https://lore.kernel.org/lkml/a4860054-0f16-6513-f121-501048431086@intel.com/ [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/tsc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 81e9b436c3b68..d4324901a4a97 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -28,6 +28,7 @@ #include #include #include +#include #include unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ @@ -1250,15 +1251,12 @@ static void __init check_system_tsc_reliable(void) * - TSC which does not stop in C-States * - the TSC_ADJUST register which allows to detect even minimal * modifications - * - not more than two sockets. As the number of sockets cannot be - * evaluated at the early boot stage where this has to be - * invoked, check the number of online memory nodes as a - * fallback solution which is an reasonable estimate. + * - not more than four packages */ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && boot_cpu_has(X86_FEATURE_TSC_ADJUST) && - nr_online_nodes <= 4) + topology_max_packages() <= 4) tsc_disable_clocksource_watchdog(); }