blob: afe514de82c140270216b9d2e9e27da52d132080 [file] [log] [blame]
From 8e52fd71e693619f7a58de2692e59f0c826e9988 Mon Sep 17 00:00:00 2001
From: Michael Jeanson <mjeanson@efficios.com>
Date: Mon, 4 Apr 2022 13:52:57 -0400
Subject: [PATCH 03/10] fix: sched/tracing: Don't re-read p->state when
emitting sched_switch event (v5.18)
See upstream commit :
commit fa2c3254d7cfff5f7a916ab928a562d1165f17bb
Author: Valentin Schneider <valentin.schneider@arm.com>
Date: Thu Jan 20 16:25:19 2022 +0000
sched/tracing: Don't re-read p->state when emitting sched_switch event
As of commit
c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu")
the following sequence becomes possible:
p->__state = TASK_INTERRUPTIBLE;
__schedule()
deactivate_task(p);
ttwu()
READ !p->on_rq
p->__state=TASK_WAKING
trace_sched_switch()
__trace_sched_switch_state()
task_state_index()
return 0;
TASK_WAKING isn't in TASK_REPORT, so the task appears as TASK_RUNNING in
the trace event.
Prevent this by pushing the value read from __schedule() down the trace
event.
Upstream-Status: Backport
Change-Id: I46743cd006be4b4d573cae2d77df7d6d16744d04
Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
include/instrumentation/events/sched.h | 88 +++++++++++++++++++++++---
1 file changed, 78 insertions(+), 10 deletions(-)
diff --git a/include/instrumentation/events/sched.h b/include/instrumentation/events/sched.h
index 91953a6f..339bec94 100644
--- a/include/instrumentation/events/sched.h
+++ b/include/instrumentation/events/sched.h
@@ -20,7 +20,37 @@
#ifndef _TRACE_SCHED_DEF_
#define _TRACE_SCHED_DEF_
-#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,15,0))
+#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0))
+
+static inline long __trace_sched_switch_state(bool preempt,
+ unsigned int prev_state,
+ struct task_struct *p)
+{
+ unsigned int state;
+
+#ifdef CONFIG_SCHED_DEBUG
+ BUG_ON(p != current);
+#endif /* CONFIG_SCHED_DEBUG */
+
+ /*
+ * Preemption ignores task state, therefore preempted tasks are always
+ * RUNNING (we will not have dequeued if state != RUNNING).
+ */
+ if (preempt)
+ return TASK_REPORT_MAX;
+
+ /*
+ * task_state_index() uses fls() and returns a value from 0-8 range.
+ * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
+ * it for left shift operation to get the correct task->state
+ * mapping.
+ */
+ state = __task_state_index(prev_state, p->exit_state);
+
+ return state ? (1 << (state - 1)) : state;
+}
+
+#elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,15,0))
static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
{
@@ -321,43 +351,81 @@ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
/*
* Tracepoint for task switches, performed by the scheduler:
*/
+
+#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0))
LTTNG_TRACEPOINT_EVENT(sched_switch,
-#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0))
TP_PROTO(bool preempt,
- struct task_struct *prev,
- struct task_struct *next),
+ unsigned int prev_state,
+ struct task_struct *prev,
+ struct task_struct *next),
- TP_ARGS(preempt, prev, next),
+ TP_ARGS(preempt, prev_state, prev, next),
+
+ TP_FIELDS(
+ ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN)
+ ctf_integer(pid_t, prev_tid, prev->pid)
+ ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
+#ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
+ ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(preempt, prev_state, prev))
#else
- TP_PROTO(struct task_struct *prev,
+ ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev_state, prev))
+#endif
+ ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
+ ctf_integer(pid_t, next_tid, next->pid)
+ ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
+ )
+)
+
+#elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0))
+
+LTTNG_TRACEPOINT_EVENT(sched_switch,
+
+ TP_PROTO(bool preempt,
+ struct task_struct *prev,
struct task_struct *next),
- TP_ARGS(prev, next),
-#endif /* #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0)) */
+ TP_ARGS(preempt, prev, next),
TP_FIELDS(
ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN)
ctf_integer(pid_t, prev_tid, prev->pid)
ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
-#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0))
#ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(preempt, prev))
#else
ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev))
#endif
+ ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
+ ctf_integer(pid_t, next_tid, next->pid)
+ ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
+ )
+)
+
#else
+
+LTTNG_TRACEPOINT_EVENT(sched_switch,
+
+ TP_PROTO(struct task_struct *prev,
+ struct task_struct *next),
+
+ TP_ARGS(prev, next),
+
+ TP_FIELDS(
+ ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN)
+ ctf_integer(pid_t, prev_tid, prev->pid)
+ ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
#ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(prev))
#else
ctf_integer(long, prev_state, __trace_sched_switch_state(prev))
-#endif
#endif
ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
ctf_integer(pid_t, next_tid, next->pid)
ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
)
)
+#endif
/*
* Tracepoint for a task being migrated:
--
2.19.1