| From 8e52fd71e693619f7a58de2692e59f0c826e9988 Mon Sep 17 00:00:00 2001 |
| From: Michael Jeanson <mjeanson@efficios.com> |
| Date: Mon, 4 Apr 2022 13:52:57 -0400 |
| Subject: [PATCH 03/10] fix: sched/tracing: Don't re-read p->state when |
| emitting sched_switch event (v5.18) |
| |
| See upstream commit : |
| |
| commit fa2c3254d7cfff5f7a916ab928a562d1165f17bb |
| Author: Valentin Schneider <valentin.schneider@arm.com> |
| Date: Thu Jan 20 16:25:19 2022 +0000 |
| |
| sched/tracing: Don't re-read p->state when emitting sched_switch event |
| |
| As of commit |
| |
| c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") |
| |
| the following sequence becomes possible: |
| |
| p->__state = TASK_INTERRUPTIBLE; |
| __schedule() |
| deactivate_task(p); |
| ttwu() |
| READ !p->on_rq |
| p->__state=TASK_WAKING |
| trace_sched_switch() |
| __trace_sched_switch_state() |
| task_state_index() |
| return 0; |
| |
| TASK_WAKING isn't in TASK_REPORT, so the task appears as TASK_RUNNING in |
| the trace event. |
| |
| Prevent this by pushing the value read from __schedule() down the trace |
| event. |
| |
| Upstream-Status: Backport |
| |
| Change-Id: I46743cd006be4b4d573cae2d77df7d6d16744d04 |
| Signed-off-by: Michael Jeanson <mjeanson@efficios.com> |
| Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> |
| --- |
| include/instrumentation/events/sched.h | 88 +++++++++++++++++++++++--- |
| 1 file changed, 78 insertions(+), 10 deletions(-) |
| |
| diff --git a/include/instrumentation/events/sched.h b/include/instrumentation/events/sched.h |
| index 91953a6f..339bec94 100644 |
| --- a/include/instrumentation/events/sched.h |
| +++ b/include/instrumentation/events/sched.h |
| @@ -20,7 +20,37 @@ |
| #ifndef _TRACE_SCHED_DEF_ |
| #define _TRACE_SCHED_DEF_ |
| |
| -#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,15,0)) |
| +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0)) |
| + |
| +static inline long __trace_sched_switch_state(bool preempt, |
| + unsigned int prev_state, |
| + struct task_struct *p) |
| +{ |
| + unsigned int state; |
| + |
| +#ifdef CONFIG_SCHED_DEBUG |
| + BUG_ON(p != current); |
| +#endif /* CONFIG_SCHED_DEBUG */ |
| + |
| + /* |
| + * Preemption ignores task state, therefore preempted tasks are always |
| + * RUNNING (we will not have dequeued if state != RUNNING). |
| + */ |
| + if (preempt) |
| + return TASK_REPORT_MAX; |
| + |
| + /* |
| + * task_state_index() uses fls() and returns a value from 0-8 range. |
| + * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using |
| + * it for left shift operation to get the correct task->state |
| + * mapping. |
| + */ |
| + state = __task_state_index(prev_state, p->exit_state); |
| + |
| + return state ? (1 << (state - 1)) : state; |
| +} |
| + |
| +#elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,15,0)) |
| |
| static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) |
| { |
| @@ -321,43 +351,81 @@ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new, |
| /* |
| * Tracepoint for task switches, performed by the scheduler: |
| */ |
| + |
| +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0)) |
| LTTNG_TRACEPOINT_EVENT(sched_switch, |
| |
| -#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0)) |
| TP_PROTO(bool preempt, |
| - struct task_struct *prev, |
| - struct task_struct *next), |
| + unsigned int prev_state, |
| + struct task_struct *prev, |
| + struct task_struct *next), |
| |
| - TP_ARGS(preempt, prev, next), |
| + TP_ARGS(preempt, prev_state, prev, next), |
| + |
| + TP_FIELDS( |
| + ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN) |
| + ctf_integer(pid_t, prev_tid, prev->pid) |
| + ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO) |
| +#ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM |
| + ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(preempt, prev_state, prev)) |
| #else |
| - TP_PROTO(struct task_struct *prev, |
| + ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev_state, prev)) |
| +#endif |
| + ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN) |
| + ctf_integer(pid_t, next_tid, next->pid) |
| + ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO) |
| + ) |
| +) |
| + |
| +#elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0)) |
| + |
| +LTTNG_TRACEPOINT_EVENT(sched_switch, |
| + |
| + TP_PROTO(bool preempt, |
| + struct task_struct *prev, |
| struct task_struct *next), |
| |
| - TP_ARGS(prev, next), |
| -#endif /* #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0)) */ |
| + TP_ARGS(preempt, prev, next), |
| |
| TP_FIELDS( |
| ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN) |
| ctf_integer(pid_t, prev_tid, prev->pid) |
| ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO) |
| -#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0)) |
| #ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM |
| ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(preempt, prev)) |
| #else |
| ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev)) |
| #endif |
| + ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN) |
| + ctf_integer(pid_t, next_tid, next->pid) |
| + ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO) |
| + ) |
| +) |
| + |
| #else |
| + |
| +LTTNG_TRACEPOINT_EVENT(sched_switch, |
| + |
| + TP_PROTO(struct task_struct *prev, |
| + struct task_struct *next), |
| + |
| + TP_ARGS(prev, next), |
| + |
| + TP_FIELDS( |
| + ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN) |
| + ctf_integer(pid_t, prev_tid, prev->pid) |
| + ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO) |
| #ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM |
| ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(prev)) |
| #else |
| ctf_integer(long, prev_state, __trace_sched_switch_state(prev)) |
| -#endif |
| #endif |
| ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN) |
| ctf_integer(pid_t, next_tid, next->pid) |
| ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO) |
| ) |
| ) |
| +#endif |
| |
| /* |
| * Tracepoint for a task being migrated: |
| -- |
| 2.19.1 |
| |