1 #include <linux/export.h>
2 #include <linux/sched.h>
3 #include <linux/tsacct_kern.h>
4 #include <linux/kernel_stat.h>
5 #include <linux/static_key.h>
9 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
12 * There are no locks covering percpu hardirq/softirq time.
13 * They are only modified in vtime_account, on corresponding CPU
14 * with interrupts disabled. So, writes are safe.
15 * They are read and saved off onto struct rq in update_rq_clock().
16 * This may result in other CPU reading this CPU's irq time and can
17 * race with irq/vtime_account on this CPU. We would either get old
18 * or new value with a side effect of accounting a slice of irq time to wrong
19 * task when irq is in progress while we read rq->clock. That is a worthy
20 * compromise in place of having locks on each irq in account_system_time.
22 DEFINE_PER_CPU(u64
, cpu_hardirq_time
);
23 DEFINE_PER_CPU(u64
, cpu_softirq_time
);
25 static DEFINE_PER_CPU(u64
, irq_start_time
);
26 static int sched_clock_irqtime
;
28 void enable_sched_clock_irqtime(void)
30 sched_clock_irqtime
= 1;
33 void disable_sched_clock_irqtime(void)
35 sched_clock_irqtime
= 0;
39 DEFINE_PER_CPU(seqcount_t
, irq_time_seq
);
40 #endif /* CONFIG_64BIT */
43 * Called before incrementing preempt_count on {soft,}irq_enter
44 * and before decrementing preempt_count on {soft,}irq_exit.
46 void vtime_account(struct task_struct
*curr
)
52 if (!sched_clock_irqtime
)
55 local_irq_save(flags
);
57 cpu
= smp_processor_id();
58 delta
= sched_clock_cpu(cpu
) - __this_cpu_read(irq_start_time
);
59 __this_cpu_add(irq_start_time
, delta
);
61 irq_time_write_begin();
63 * We do not account for softirq time from ksoftirqd here.
64 * We want to continue accounting softirq time to ksoftirqd thread
65 * in that case, so as not to confuse scheduler with a special task
66 * that do not consume any time, but still wants to run.
69 __this_cpu_add(cpu_hardirq_time
, delta
);
70 else if (in_serving_softirq() && curr
!= this_cpu_ksoftirqd())
71 __this_cpu_add(cpu_softirq_time
, delta
);
74 local_irq_restore(flags
);
76 EXPORT_SYMBOL_GPL(vtime_account
);
78 static int irqtime_account_hi_update(void)
80 u64
*cpustat
= kcpustat_this_cpu
->cpustat
;
85 local_irq_save(flags
);
86 latest_ns
= this_cpu_read(cpu_hardirq_time
);
87 if (nsecs_to_cputime64(latest_ns
) > cpustat
[CPUTIME_IRQ
])
89 local_irq_restore(flags
);
93 static int irqtime_account_si_update(void)
95 u64
*cpustat
= kcpustat_this_cpu
->cpustat
;
100 local_irq_save(flags
);
101 latest_ns
= this_cpu_read(cpu_softirq_time
);
102 if (nsecs_to_cputime64(latest_ns
) > cpustat
[CPUTIME_SOFTIRQ
])
104 local_irq_restore(flags
);
108 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
110 #define sched_clock_irqtime (0)
112 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
114 static inline void task_group_account_field(struct task_struct
*p
, int index
,
117 #ifdef CONFIG_CGROUP_CPUACCT
118 struct kernel_cpustat
*kcpustat
;
122 * Since all updates are sure to touch the root cgroup, we
123 * get ourselves ahead and touch it first. If the root cgroup
124 * is the only cgroup, then nothing else should be necessary.
127 __get_cpu_var(kernel_cpustat
).cpustat
[index
] += tmp
;
129 #ifdef CONFIG_CGROUP_CPUACCT
130 if (unlikely(!cpuacct_subsys
.active
))
135 while (ca
&& (ca
!= &root_cpuacct
)) {
136 kcpustat
= this_cpu_ptr(ca
->cpustat
);
137 kcpustat
->cpustat
[index
] += tmp
;
145 * Account user cpu time to a process.
146 * @p: the process that the cpu time gets accounted to
147 * @cputime: the cpu time spent in user space since the last update
148 * @cputime_scaled: cputime scaled by cpu frequency
150 void account_user_time(struct task_struct
*p
, cputime_t cputime
,
151 cputime_t cputime_scaled
)
155 /* Add user time to process. */
157 p
->utimescaled
+= cputime_scaled
;
158 account_group_user_time(p
, cputime
);
160 index
= (TASK_NICE(p
) > 0) ? CPUTIME_NICE
: CPUTIME_USER
;
162 /* Add user time to cpustat. */
163 task_group_account_field(p
, index
, (__force u64
) cputime
);
165 /* Account for user time used */
166 acct_update_integrals(p
);
170 * Account guest cpu time to a process.
171 * @p: the process that the cpu time gets accounted to
172 * @cputime: the cpu time spent in virtual machine since the last update
173 * @cputime_scaled: cputime scaled by cpu frequency
175 static void account_guest_time(struct task_struct
*p
, cputime_t cputime
,
176 cputime_t cputime_scaled
)
178 u64
*cpustat
= kcpustat_this_cpu
->cpustat
;
180 /* Add guest time to process. */
182 p
->utimescaled
+= cputime_scaled
;
183 account_group_user_time(p
, cputime
);
186 /* Add guest time to cpustat. */
187 if (TASK_NICE(p
) > 0) {
188 cpustat
[CPUTIME_NICE
] += (__force u64
) cputime
;
189 cpustat
[CPUTIME_GUEST_NICE
] += (__force u64
) cputime
;
191 cpustat
[CPUTIME_USER
] += (__force u64
) cputime
;
192 cpustat
[CPUTIME_GUEST
] += (__force u64
) cputime
;
197 * Account system cpu time to a process and desired cpustat field
198 * @p: the process that the cpu time gets accounted to
199 * @cputime: the cpu time spent in kernel space since the last update
200 * @cputime_scaled: cputime scaled by cpu frequency
201 * @target_cputime64: pointer to cpustat field that has to be updated
204 void __account_system_time(struct task_struct
*p
, cputime_t cputime
,
205 cputime_t cputime_scaled
, int index
)
207 /* Add system time to process. */
209 p
->stimescaled
+= cputime_scaled
;
210 account_group_system_time(p
, cputime
);
212 /* Add system time to cpustat. */
213 task_group_account_field(p
, index
, (__force u64
) cputime
);
215 /* Account for system time used */
216 acct_update_integrals(p
);
220 * Account system cpu time to a process.
221 * @p: the process that the cpu time gets accounted to
222 * @hardirq_offset: the offset to subtract from hardirq_count()
223 * @cputime: the cpu time spent in kernel space since the last update
224 * @cputime_scaled: cputime scaled by cpu frequency
226 void account_system_time(struct task_struct
*p
, int hardirq_offset
,
227 cputime_t cputime
, cputime_t cputime_scaled
)
231 if ((p
->flags
& PF_VCPU
) && (irq_count() - hardirq_offset
== 0)) {
232 account_guest_time(p
, cputime
, cputime_scaled
);
236 if (hardirq_count() - hardirq_offset
)
238 else if (in_serving_softirq())
239 index
= CPUTIME_SOFTIRQ
;
241 index
= CPUTIME_SYSTEM
;
243 __account_system_time(p
, cputime
, cputime_scaled
, index
);
247 * Account for involuntary wait time.
248 * @cputime: the cpu time spent in involuntary wait
250 void account_steal_time(cputime_t cputime
)
252 u64
*cpustat
= kcpustat_this_cpu
->cpustat
;
254 cpustat
[CPUTIME_STEAL
] += (__force u64
) cputime
;
258 * Account for idle time.
259 * @cputime: the cpu time spent in idle wait
261 void account_idle_time(cputime_t cputime
)
263 u64
*cpustat
= kcpustat_this_cpu
->cpustat
;
264 struct rq
*rq
= this_rq();
266 if (atomic_read(&rq
->nr_iowait
) > 0)
267 cpustat
[CPUTIME_IOWAIT
] += (__force u64
) cputime
;
269 cpustat
[CPUTIME_IDLE
] += (__force u64
) cputime
;
272 static __always_inline
bool steal_account_process_tick(void)
274 #ifdef CONFIG_PARAVIRT
275 if (static_key_false(¶virt_steal_enabled
)) {
278 steal
= paravirt_steal_clock(smp_processor_id());
279 steal
-= this_rq()->prev_steal_time
;
281 st
= steal_ticks(steal
);
282 this_rq()->prev_steal_time
+= st
* TICK_NSEC
;
284 account_steal_time(st
);
291 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
293 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
295 * Account a tick to a process and cpustat
296 * @p: the process that the cpu time gets accounted to
297 * @user_tick: is the tick from userspace
298 * @rq: the pointer to rq
300 * Tick demultiplexing follows the order
301 * - pending hardirq update
302 * - pending softirq update
306 * - check for guest_time
307 * - else account as system_time
309 * Check for hardirq is done both for system and user time as there is
310 * no timer going off while we are on hardirq and hence we may never get an
311 * opportunity to update it solely in system time.
312 * p->stime and friends are only updated on system time and not on irq
313 * softirq as those do not count in task exec_runtime any more.
315 static void irqtime_account_process_tick(struct task_struct
*p
, int user_tick
,
318 cputime_t one_jiffy_scaled
= cputime_to_scaled(cputime_one_jiffy
);
319 u64
*cpustat
= kcpustat_this_cpu
->cpustat
;
321 if (steal_account_process_tick())
324 if (irqtime_account_hi_update()) {
325 cpustat
[CPUTIME_IRQ
] += (__force u64
) cputime_one_jiffy
;
326 } else if (irqtime_account_si_update()) {
327 cpustat
[CPUTIME_SOFTIRQ
] += (__force u64
) cputime_one_jiffy
;
328 } else if (this_cpu_ksoftirqd() == p
) {
330 * ksoftirqd time do not get accounted in cpu_softirq_time.
331 * So, we have to handle it separately here.
332 * Also, p->stime needs to be updated for ksoftirqd.
334 __account_system_time(p
, cputime_one_jiffy
, one_jiffy_scaled
,
336 } else if (user_tick
) {
337 account_user_time(p
, cputime_one_jiffy
, one_jiffy_scaled
);
338 } else if (p
== rq
->idle
) {
339 account_idle_time(cputime_one_jiffy
);
340 } else if (p
->flags
& PF_VCPU
) { /* System time or guest time */
341 account_guest_time(p
, cputime_one_jiffy
, one_jiffy_scaled
);
343 __account_system_time(p
, cputime_one_jiffy
, one_jiffy_scaled
,
348 static void irqtime_account_idle_ticks(int ticks
)
351 struct rq
*rq
= this_rq();
353 for (i
= 0; i
< ticks
; i
++)
354 irqtime_account_process_tick(current
, 0, rq
);
356 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
357 static void irqtime_account_idle_ticks(int ticks
) {}
358 static void irqtime_account_process_tick(struct task_struct
*p
, int user_tick
,
360 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
363 * Account a single tick of cpu time.
364 * @p: the process that the cpu time gets accounted to
365 * @user_tick: indicates if the tick is a user or a system tick
367 void account_process_tick(struct task_struct
*p
, int user_tick
)
369 cputime_t one_jiffy_scaled
= cputime_to_scaled(cputime_one_jiffy
);
370 struct rq
*rq
= this_rq();
372 if (sched_clock_irqtime
) {
373 irqtime_account_process_tick(p
, user_tick
, rq
);
377 if (steal_account_process_tick())
381 account_user_time(p
, cputime_one_jiffy
, one_jiffy_scaled
);
382 else if ((p
!= rq
->idle
) || (irq_count() != HARDIRQ_OFFSET
))
383 account_system_time(p
, HARDIRQ_OFFSET
, cputime_one_jiffy
,
386 account_idle_time(cputime_one_jiffy
);
390 * Account multiple ticks of steal time.
391 * @p: the process from which the cpu time has been stolen
392 * @ticks: number of stolen ticks
394 void account_steal_ticks(unsigned long ticks
)
396 account_steal_time(jiffies_to_cputime(ticks
));
400 * Account multiple ticks of idle time.
401 * @ticks: number of stolen ticks
403 void account_idle_ticks(unsigned long ticks
)
406 if (sched_clock_irqtime
) {
407 irqtime_account_idle_ticks(ticks
);
411 account_idle_time(jiffies_to_cputime(ticks
));
417 * Use precise platform statistics if available:
419 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
420 void task_times(struct task_struct
*p
, cputime_t
*ut
, cputime_t
*st
)
426 void thread_group_times(struct task_struct
*p
, cputime_t
*ut
, cputime_t
*st
)
428 struct task_cputime cputime
;
430 thread_group_cputime(p
, &cputime
);
437 * Archs that account the whole time spent in the idle task
438 * (outside irq) as idle time can rely on this and just implement
439 * vtime_account_system() and vtime_account_idle(). Archs that
440 * have other meaning of the idle time (s390 only includes the
441 * time spent by the CPU when it's in low power mode) must override
444 #ifndef __ARCH_HAS_VTIME_ACCOUNT
445 void vtime_account(struct task_struct
*tsk
)
449 local_irq_save(flags
);
451 if (in_interrupt() || !is_idle_task(tsk
))
452 vtime_account_system(tsk
);
454 vtime_account_idle(tsk
);
456 local_irq_restore(flags
);
458 EXPORT_SYMBOL_GPL(vtime_account
);
459 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
463 #ifndef nsecs_to_cputime
464 # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
467 static cputime_t
scale_utime(cputime_t utime
, cputime_t rtime
, cputime_t total
)
469 u64 temp
= (__force u64
) rtime
;
471 temp
*= (__force u64
) utime
;
473 if (sizeof(cputime_t
) == 4)
474 temp
= div_u64(temp
, (__force u32
) total
);
476 temp
= div64_u64(temp
, (__force u64
) total
);
478 return (__force cputime_t
) temp
;
481 void task_times(struct task_struct
*p
, cputime_t
*ut
, cputime_t
*st
)
483 cputime_t rtime
, utime
= p
->utime
, total
= utime
+ p
->stime
;
486 * Use CFS's precise accounting:
488 rtime
= nsecs_to_cputime(p
->se
.sum_exec_runtime
);
491 utime
= scale_utime(utime
, rtime
, total
);
496 * Compare with previous values, to keep monotonicity:
498 p
->prev_utime
= max(p
->prev_utime
, utime
);
499 p
->prev_stime
= max(p
->prev_stime
, rtime
- p
->prev_utime
);
506 * Must be called with siglock held.
508 void thread_group_times(struct task_struct
*p
, cputime_t
*ut
, cputime_t
*st
)
510 struct signal_struct
*sig
= p
->signal
;
511 struct task_cputime cputime
;
512 cputime_t rtime
, utime
, total
;
514 thread_group_cputime(p
, &cputime
);
516 total
= cputime
.utime
+ cputime
.stime
;
517 rtime
= nsecs_to_cputime(cputime
.sum_exec_runtime
);
520 utime
= scale_utime(cputime
.utime
, rtime
, total
);
524 sig
->prev_utime
= max(sig
->prev_utime
, utime
);
525 sig
->prev_stime
= max(sig
->prev_stime
, rtime
- sig
->prev_utime
);
527 *ut
= sig
->prev_utime
;
528 *st
= sig
->prev_stime
;