Patrick Williams | 2194f50 | 2022-10-16 14:26:09 -0500 | [diff] [blame^] | 1 | From a52e1e964aa4f2592ebffb30df048cc85fa5a02c Mon Sep 17 00:00:00 2001 |
Brad Bishop | bec4ebc | 2022-08-03 09:55:16 -0400 | [diff] [blame] | 2 | From: Robin Murphy <robin.murphy@arm.com> |
| 3 | Date: Fri, 3 Dec 2021 11:44:51 +0000 |
Patrick Williams | 2194f50 | 2022-10-16 14:26:09 -0500 | [diff] [blame^] | 4 | Subject: [PATCH 16/40] perf/arm-cmn: Account for NUMA affinity |
Brad Bishop | bec4ebc | 2022-08-03 09:55:16 -0400 | [diff] [blame] | 5 | |
| 6 | On a system with multiple CMN meshes, ideally we'd want to access each |
| 7 | PMU from within its own mesh, rather than with a long CML round-trip, |
| 8 | wherever feasible. Since such a system is likely to be presented as |
| 9 | multiple NUMA nodes, let's also hope a proximity domain is specified |
| 10 | for each CMN programming interface, and use that to guide our choice |
| 11 | of IRQ affinity to favour a node-local CPU where possible. |
| 12 | |
| 13 | Signed-off-by: Robin Murphy <robin.murphy@arm.com> |
| 14 | Link: https://lore.kernel.org/r/32438b0d016e0649d882d47d30ac2000484287b9.1638530442.git.robin.murphy@arm.com |
| 15 | Signed-off-by: Will Deacon <will@kernel.org> |
| 16 | |
| 17 | Upstream-Status: Backport [https://lore.kernel.org/r/32438b0d016e0649d882d47d30ac2000484287b9.1638530442.git.robin.murphy@arm.com] |
| 18 | Signed-off-by: Rupinderjit Singh <rupinderjit.singh@arm.com> |
| 19 | --- |
| 20 | drivers/perf/arm-cmn.c | 51 +++++++++++++++++++++++++++++++----------- |
| 21 | 1 file changed, 38 insertions(+), 13 deletions(-) |
| 22 | |
| 23 | diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c |
Patrick Williams | 2194f50 | 2022-10-16 14:26:09 -0500 | [diff] [blame^] | 24 | index 400eb7f579dc..02b898dbba91 100644 |
Brad Bishop | bec4ebc | 2022-08-03 09:55:16 -0400 | [diff] [blame] | 25 | --- a/drivers/perf/arm-cmn.c |
| 26 | +++ b/drivers/perf/arm-cmn.c |
| 27 | @@ -1147,23 +1147,47 @@ static int arm_cmn_commit_txn(struct pmu *pmu) |
| 28 | return 0; |
| 29 | } |
| 30 | |
| 31 | -static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) |
| 32 | +static void arm_cmn_migrate(struct arm_cmn *cmn, unsigned int cpu) |
| 33 | +{ |
| 34 | + unsigned int i; |
| 35 | + |
| 36 | + perf_pmu_migrate_context(&cmn->pmu, cmn->cpu, cpu); |
| 37 | + for (i = 0; i < cmn->num_dtcs; i++) |
| 38 | + irq_set_affinity(cmn->dtc[i].irq, cpumask_of(cpu)); |
| 39 | + cmn->cpu = cpu; |
| 40 | +} |
| 41 | + |
| 42 | +static int arm_cmn_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) |
| 43 | { |
| 44 | struct arm_cmn *cmn; |
| 45 | - unsigned int i, target; |
| 46 | + int node; |
| 47 | |
| 48 | - cmn = hlist_entry_safe(node, struct arm_cmn, cpuhp_node); |
| 49 | - if (cpu != cmn->cpu) |
| 50 | - return 0; |
| 51 | + cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node); |
| 52 | + node = dev_to_node(cmn->dev); |
| 53 | + if (node != NUMA_NO_NODE && cpu_to_node(cmn->cpu) != node && cpu_to_node(cpu) == node) |
| 54 | + arm_cmn_migrate(cmn, cpu); |
| 55 | + return 0; |
| 56 | +} |
| 57 | + |
| 58 | +static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) |
| 59 | +{ |
| 60 | + struct arm_cmn *cmn; |
| 61 | + unsigned int target; |
| 62 | + int node; |
| 63 | + cpumask_t mask; |
| 64 | |
| 65 | - target = cpumask_any_but(cpu_online_mask, cpu); |
| 66 | - if (target >= nr_cpu_ids) |
| 67 | + cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node); |
| 68 | + if (cpu != cmn->cpu) |
| 69 | return 0; |
| 70 | |
| 71 | - perf_pmu_migrate_context(&cmn->pmu, cpu, target); |
| 72 | - for (i = 0; i < cmn->num_dtcs; i++) |
| 73 | - irq_set_affinity(cmn->dtc[i].irq, cpumask_of(target)); |
| 74 | - cmn->cpu = target; |
| 75 | + node = dev_to_node(cmn->dev); |
| 76 | + if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) && |
| 77 | + cpumask_andnot(&mask, &mask, cpumask_of(cpu))) |
| 78 | + target = cpumask_any(&mask); |
| 79 | + else |
| 80 | + target = cpumask_any_but(cpu_online_mask, cpu); |
| 81 | + if (target < nr_cpu_ids) |
| 82 | + arm_cmn_migrate(cmn, target); |
| 83 | return 0; |
| 84 | } |
| 85 | |
| 86 | @@ -1532,7 +1556,7 @@ static int arm_cmn_probe(struct platform_device *pdev) |
| 87 | if (err) |
| 88 | return err; |
| 89 | |
| 90 | - cmn->cpu = raw_smp_processor_id(); |
| 91 | + cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev)); |
| 92 | cmn->pmu = (struct pmu) { |
| 93 | .module = THIS_MODULE, |
| 94 | .attr_groups = arm_cmn_attr_groups, |
| 95 | @@ -1608,7 +1632,8 @@ static int __init arm_cmn_init(void) |
| 96 | int ret; |
| 97 | |
| 98 | ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, |
| 99 | - "perf/arm/cmn:online", NULL, |
| 100 | + "perf/arm/cmn:online", |
| 101 | + arm_cmn_pmu_online_cpu, |
| 102 | arm_cmn_pmu_offline_cpu); |
| 103 | if (ret < 0) |
| 104 | return ret; |
| 105 | -- |
Patrick Williams | 2194f50 | 2022-10-16 14:26:09 -0500 | [diff] [blame^] | 106 | 2.34.1 |
Brad Bishop | bec4ebc | 2022-08-03 09:55:16 -0400 | [diff] [blame] | 107 | |