blob: afe514de82c140270216b9d2e9e27da52d132080 [file] [log] [blame]
Patrick Williams03907ee2022-05-01 06:28:52 -05001From 8e52fd71e693619f7a58de2692e59f0c826e9988 Mon Sep 17 00:00:00 2001
2From: Michael Jeanson <mjeanson@efficios.com>
3Date: Mon, 4 Apr 2022 13:52:57 -0400
4Subject: [PATCH 03/10] fix: sched/tracing: Don't re-read p->state when
5 emitting sched_switch event (v5.18)
6
7See upstream commit :
8
9 commit fa2c3254d7cfff5f7a916ab928a562d1165f17bb
10 Author: Valentin Schneider <valentin.schneider@arm.com>
11 Date: Thu Jan 20 16:25:19 2022 +0000
12
13 sched/tracing: Don't re-read p->state when emitting sched_switch event
14
15 As of commit
16
17 c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu")
18
19 the following sequence becomes possible:
20
21 p->__state = TASK_INTERRUPTIBLE;
22 __schedule()
23 deactivate_task(p);
24 ttwu()
25 READ !p->on_rq
26 p->__state=TASK_WAKING
27 trace_sched_switch()
28 __trace_sched_switch_state()
29 task_state_index()
30 return 0;
31
32 TASK_WAKING isn't in TASK_REPORT, so the task appears as TASK_RUNNING in
33 the trace event.
34
35 Prevent this by pushing the value read from __schedule() down the trace
36 event.
37
38Upstream-Status: Backport
39
40Change-Id: I46743cd006be4b4d573cae2d77df7d6d16744d04
41Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
42Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
43---
44 include/instrumentation/events/sched.h | 88 +++++++++++++++++++++++---
45 1 file changed, 78 insertions(+), 10 deletions(-)
46
47diff --git a/include/instrumentation/events/sched.h b/include/instrumentation/events/sched.h
48index 91953a6f..339bec94 100644
49--- a/include/instrumentation/events/sched.h
50+++ b/include/instrumentation/events/sched.h
51@@ -20,7 +20,37 @@
52 #ifndef _TRACE_SCHED_DEF_
53 #define _TRACE_SCHED_DEF_
54
55-#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,15,0))
56+#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0))
57+
58+static inline long __trace_sched_switch_state(bool preempt,
59+ unsigned int prev_state,
60+ struct task_struct *p)
61+{
62+ unsigned int state;
63+
64+#ifdef CONFIG_SCHED_DEBUG
65+ BUG_ON(p != current);
66+#endif /* CONFIG_SCHED_DEBUG */
67+
68+ /*
69+ * Preemption ignores task state, therefore preempted tasks are always
70+ * RUNNING (we will not have dequeued if state != RUNNING).
71+ */
72+ if (preempt)
73+ return TASK_REPORT_MAX;
74+
75+ /*
76+ * task_state_index() uses fls() and returns a value from 0-8 range.
77+ * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
78+ * it for left shift operation to get the correct task->state
79+ * mapping.
80+ */
81+ state = __task_state_index(prev_state, p->exit_state);
82+
83+ return state ? (1 << (state - 1)) : state;
84+}
85+
86+#elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,15,0))
87
88 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
89 {
90@@ -321,43 +351,81 @@ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
91 /*
92 * Tracepoint for task switches, performed by the scheduler:
93 */
94+
95+#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0))
96 LTTNG_TRACEPOINT_EVENT(sched_switch,
97
98-#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0))
99 TP_PROTO(bool preempt,
100- struct task_struct *prev,
101- struct task_struct *next),
102+ unsigned int prev_state,
103+ struct task_struct *prev,
104+ struct task_struct *next),
105
106- TP_ARGS(preempt, prev, next),
107+ TP_ARGS(preempt, prev_state, prev, next),
108+
109+ TP_FIELDS(
110+ ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN)
111+ ctf_integer(pid_t, prev_tid, prev->pid)
112+ ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
113+#ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
114+ ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(preempt, prev_state, prev))
115 #else
116- TP_PROTO(struct task_struct *prev,
117+ ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev_state, prev))
118+#endif
119+ ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
120+ ctf_integer(pid_t, next_tid, next->pid)
121+ ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
122+ )
123+)
124+
125+#elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0))
126+
127+LTTNG_TRACEPOINT_EVENT(sched_switch,
128+
129+ TP_PROTO(bool preempt,
130+ struct task_struct *prev,
131 struct task_struct *next),
132
133- TP_ARGS(prev, next),
134-#endif /* #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0)) */
135+ TP_ARGS(preempt, prev, next),
136
137 TP_FIELDS(
138 ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN)
139 ctf_integer(pid_t, prev_tid, prev->pid)
140 ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
141-#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0))
142 #ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
143 ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(preempt, prev))
144 #else
145 ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev))
146 #endif
147+ ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
148+ ctf_integer(pid_t, next_tid, next->pid)
149+ ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
150+ )
151+)
152+
153 #else
154+
155+LTTNG_TRACEPOINT_EVENT(sched_switch,
156+
157+ TP_PROTO(struct task_struct *prev,
158+ struct task_struct *next),
159+
160+ TP_ARGS(prev, next),
161+
162+ TP_FIELDS(
163+ ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN)
164+ ctf_integer(pid_t, prev_tid, prev->pid)
165+ ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
166 #ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
167 ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(prev))
168 #else
169 ctf_integer(long, prev_state, __trace_sched_switch_state(prev))
170-#endif
171 #endif
172 ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
173 ctf_integer(pid_t, next_tid, next->pid)
174 ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
175 )
176 )
177+#endif
178
179 /*
180 * Tracepoint for a task being migrated:
181--
1822.19.1
183