Commit | Line | Data |
---|---|---|
73fbec60 FW |
1 | #include <linux/export.h> |
2 | #include <linux/sched.h> | |
3 | #include <linux/tsacct_kern.h> | |
4 | #include <linux/kernel_stat.h> | |
5 | #include <linux/static_key.h> | |
abf917cd | 6 | #include <linux/context_tracking.h> |
73fbec60 FW |
7 | #include "sched.h" |
8 | ||
9 | ||
10 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | |
11 | ||
12 | /* | |
13 | * There are no locks covering percpu hardirq/softirq time. | |
bf9fae9f | 14 | * They are only modified in vtime_account, on corresponding CPU |
73fbec60 FW |
15 | * with interrupts disabled. So, writes are safe. |
16 | * They are read and saved off onto struct rq in update_rq_clock(). | |
17 | * This may result in other CPU reading this CPU's irq time and can | |
bf9fae9f | 18 | * race with irq/vtime_account on this CPU. We would either get old |
73fbec60 FW |
19 | * or new value with a side effect of accounting a slice of irq time to wrong |
20 | * task when irq is in progress while we read rq->clock. That is a worthy | |
21 | * compromise in place of having locks on each irq in account_system_time. | |
22 | */ | |
23 | DEFINE_PER_CPU(u64, cpu_hardirq_time); | |
24 | DEFINE_PER_CPU(u64, cpu_softirq_time); | |
25 | ||
26 | static DEFINE_PER_CPU(u64, irq_start_time); | |
27 | static int sched_clock_irqtime; | |
28 | ||
29 | void enable_sched_clock_irqtime(void) | |
30 | { | |
31 | sched_clock_irqtime = 1; | |
32 | } | |
33 | ||
34 | void disable_sched_clock_irqtime(void) | |
35 | { | |
36 | sched_clock_irqtime = 0; | |
37 | } | |
38 | ||
39 | #ifndef CONFIG_64BIT | |
40 | DEFINE_PER_CPU(seqcount_t, irq_time_seq); | |
41 | #endif /* CONFIG_64BIT */ | |
42 | ||
43 | /* | |
44 | * Called before incrementing preempt_count on {soft,}irq_enter | |
45 | * and before decrementing preempt_count on {soft,}irq_exit. | |
46 | */ | |
3e1df4f5 | 47 | void irqtime_account_irq(struct task_struct *curr) |
73fbec60 FW |
48 | { |
49 | unsigned long flags; | |
50 | s64 delta; | |
51 | int cpu; | |
52 | ||
53 | if (!sched_clock_irqtime) | |
54 | return; | |
55 | ||
56 | local_irq_save(flags); | |
57 | ||
58 | cpu = smp_processor_id(); | |
59 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); | |
60 | __this_cpu_add(irq_start_time, delta); | |
61 | ||
62 | irq_time_write_begin(); | |
63 | /* | |
64 | * We do not account for softirq time from ksoftirqd here. | |
65 | * We want to continue accounting softirq time to ksoftirqd thread | |
66 | * in that case, so as not to confuse scheduler with a special task | |
67 | * that do not consume any time, but still wants to run. | |
68 | */ | |
69 | if (hardirq_count()) | |
70 | __this_cpu_add(cpu_hardirq_time, delta); | |
71 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) | |
72 | __this_cpu_add(cpu_softirq_time, delta); | |
73 | ||
74 | irq_time_write_end(); | |
75 | local_irq_restore(flags); | |
76 | } | |
3e1df4f5 | 77 | EXPORT_SYMBOL_GPL(irqtime_account_irq); |
73fbec60 FW |
78 | |
79 | static int irqtime_account_hi_update(void) | |
80 | { | |
81 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
82 | unsigned long flags; | |
83 | u64 latest_ns; | |
84 | int ret = 0; | |
85 | ||
86 | local_irq_save(flags); | |
87 | latest_ns = this_cpu_read(cpu_hardirq_time); | |
88 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) | |
89 | ret = 1; | |
90 | local_irq_restore(flags); | |
91 | return ret; | |
92 | } | |
93 | ||
94 | static int irqtime_account_si_update(void) | |
95 | { | |
96 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
97 | unsigned long flags; | |
98 | u64 latest_ns; | |
99 | int ret = 0; | |
100 | ||
101 | local_irq_save(flags); | |
102 | latest_ns = this_cpu_read(cpu_softirq_time); | |
103 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) | |
104 | ret = 1; | |
105 | local_irq_restore(flags); | |
106 | return ret; | |
107 | } | |
108 | ||
109 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | |
110 | ||
111 | #define sched_clock_irqtime (0) | |
112 | ||
113 | #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ | |
114 | ||
115 | static inline void task_group_account_field(struct task_struct *p, int index, | |
116 | u64 tmp) | |
117 | { | |
73fbec60 FW |
118 | /* |
119 | * Since all updates are sure to touch the root cgroup, we | |
120 | * get ourselves ahead and touch it first. If the root cgroup | |
121 | * is the only cgroup, then nothing else should be necessary. | |
122 | * | |
123 | */ | |
a4f61cc0 | 124 | __this_cpu_add(kernel_cpustat.cpustat[index], tmp); |
73fbec60 | 125 | |
1966aaf7 | 126 | cpuacct_account_field(p, index, tmp); |
73fbec60 FW |
127 | } |
128 | ||
129 | /* | |
130 | * Account user cpu time to a process. | |
131 | * @p: the process that the cpu time gets accounted to | |
132 | * @cputime: the cpu time spent in user space since the last update | |
133 | * @cputime_scaled: cputime scaled by cpu frequency | |
134 | */ | |
135 | void account_user_time(struct task_struct *p, cputime_t cputime, | |
136 | cputime_t cputime_scaled) | |
137 | { | |
138 | int index; | |
139 | ||
140 | /* Add user time to process. */ | |
141 | p->utime += cputime; | |
142 | p->utimescaled += cputime_scaled; | |
143 | account_group_user_time(p, cputime); | |
144 | ||
d0ea0268 | 145 | index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; |
73fbec60 FW |
146 | |
147 | /* Add user time to cpustat. */ | |
148 | task_group_account_field(p, index, (__force u64) cputime); | |
149 | ||
150 | /* Account for user time used */ | |
6fac4829 | 151 | acct_account_cputime(p); |
73fbec60 FW |
152 | } |
153 | ||
154 | /* | |
155 | * Account guest cpu time to a process. | |
156 | * @p: the process that the cpu time gets accounted to | |
157 | * @cputime: the cpu time spent in virtual machine since the last update | |
158 | * @cputime_scaled: cputime scaled by cpu frequency | |
159 | */ | |
160 | static void account_guest_time(struct task_struct *p, cputime_t cputime, | |
161 | cputime_t cputime_scaled) | |
162 | { | |
163 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
164 | ||
165 | /* Add guest time to process. */ | |
166 | p->utime += cputime; | |
167 | p->utimescaled += cputime_scaled; | |
168 | account_group_user_time(p, cputime); | |
169 | p->gtime += cputime; | |
170 | ||
171 | /* Add guest time to cpustat. */ | |
d0ea0268 | 172 | if (task_nice(p) > 0) { |
73fbec60 FW |
173 | cpustat[CPUTIME_NICE] += (__force u64) cputime; |
174 | cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; | |
175 | } else { | |
176 | cpustat[CPUTIME_USER] += (__force u64) cputime; | |
177 | cpustat[CPUTIME_GUEST] += (__force u64) cputime; | |
178 | } | |
179 | } | |
180 | ||
181 | /* | |
182 | * Account system cpu time to a process and desired cpustat field | |
183 | * @p: the process that the cpu time gets accounted to | |
184 | * @cputime: the cpu time spent in kernel space since the last update | |
185 | * @cputime_scaled: cputime scaled by cpu frequency | |
186 | * @target_cputime64: pointer to cpustat field that has to be updated | |
187 | */ | |
188 | static inline | |
189 | void __account_system_time(struct task_struct *p, cputime_t cputime, | |
190 | cputime_t cputime_scaled, int index) | |
191 | { | |
192 | /* Add system time to process. */ | |
193 | p->stime += cputime; | |
194 | p->stimescaled += cputime_scaled; | |
195 | account_group_system_time(p, cputime); | |
196 | ||
197 | /* Add system time to cpustat. */ | |
198 | task_group_account_field(p, index, (__force u64) cputime); | |
199 | ||
200 | /* Account for system time used */ | |
6fac4829 | 201 | acct_account_cputime(p); |
73fbec60 FW |
202 | } |
203 | ||
204 | /* | |
205 | * Account system cpu time to a process. | |
206 | * @p: the process that the cpu time gets accounted to | |
207 | * @hardirq_offset: the offset to subtract from hardirq_count() | |
208 | * @cputime: the cpu time spent in kernel space since the last update | |
209 | * @cputime_scaled: cputime scaled by cpu frequency | |
210 | */ | |
211 | void account_system_time(struct task_struct *p, int hardirq_offset, | |
212 | cputime_t cputime, cputime_t cputime_scaled) | |
213 | { | |
214 | int index; | |
215 | ||
216 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { | |
217 | account_guest_time(p, cputime, cputime_scaled); | |
218 | return; | |
219 | } | |
220 | ||
221 | if (hardirq_count() - hardirq_offset) | |
222 | index = CPUTIME_IRQ; | |
223 | else if (in_serving_softirq()) | |
224 | index = CPUTIME_SOFTIRQ; | |
225 | else | |
226 | index = CPUTIME_SYSTEM; | |
227 | ||
228 | __account_system_time(p, cputime, cputime_scaled, index); | |
229 | } | |
230 | ||
231 | /* | |
232 | * Account for involuntary wait time. | |
233 | * @cputime: the cpu time spent in involuntary wait | |
234 | */ | |
235 | void account_steal_time(cputime_t cputime) | |
236 | { | |
237 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
238 | ||
239 | cpustat[CPUTIME_STEAL] += (__force u64) cputime; | |
240 | } | |
241 | ||
242 | /* | |
243 | * Account for idle time. | |
244 | * @cputime: the cpu time spent in idle wait | |
245 | */ | |
246 | void account_idle_time(cputime_t cputime) | |
247 | { | |
248 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
249 | struct rq *rq = this_rq(); | |
250 | ||
251 | if (atomic_read(&rq->nr_iowait) > 0) | |
252 | cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; | |
253 | else | |
254 | cpustat[CPUTIME_IDLE] += (__force u64) cputime; | |
255 | } | |
256 | ||
257 | static __always_inline bool steal_account_process_tick(void) | |
258 | { | |
259 | #ifdef CONFIG_PARAVIRT | |
260 | if (static_key_false(¶virt_steal_enabled)) { | |
dee08a72 FW |
261 | u64 steal; |
262 | cputime_t steal_ct; | |
73fbec60 FW |
263 | |
264 | steal = paravirt_steal_clock(smp_processor_id()); | |
265 | steal -= this_rq()->prev_steal_time; | |
266 | ||
dee08a72 FW |
267 | /* |
268 | * cputime_t may be less precise than nsecs (eg: if it's | |
269 | * based on jiffies). Lets cast the result to cputime | |
270 | * granularity and account the rest on the next rounds. | |
271 | */ | |
272 | steal_ct = nsecs_to_cputime(steal); | |
273 | this_rq()->prev_steal_time += cputime_to_nsecs(steal_ct); | |
73fbec60 | 274 | |
dee08a72 FW |
275 | account_steal_time(steal_ct); |
276 | return steal_ct; | |
73fbec60 FW |
277 | } |
278 | #endif | |
279 | return false; | |
280 | } | |
281 | ||
a634f933 FW |
282 | /* |
283 | * Accumulate raw cputime values of dead tasks (sig->[us]time) and live | |
284 | * tasks (sum on group iteration) belonging to @tsk's group. | |
285 | */ | |
286 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) | |
287 | { | |
288 | struct signal_struct *sig = tsk->signal; | |
6fac4829 | 289 | cputime_t utime, stime; |
a634f933 FW |
290 | struct task_struct *t; |
291 | ||
292 | times->utime = sig->utime; | |
293 | times->stime = sig->stime; | |
294 | times->sum_exec_runtime = sig->sum_sched_runtime; | |
295 | ||
296 | rcu_read_lock(); | |
297 | /* make sure we can trust tsk->thread_group list */ | |
298 | if (!likely(pid_alive(tsk))) | |
299 | goto out; | |
300 | ||
301 | t = tsk; | |
302 | do { | |
e614b333 | 303 | task_cputime(t, &utime, &stime); |
6fac4829 FW |
304 | times->utime += utime; |
305 | times->stime += stime; | |
a634f933 FW |
306 | times->sum_exec_runtime += task_sched_runtime(t); |
307 | } while_each_thread(tsk, t); | |
308 | out: | |
309 | rcu_read_unlock(); | |
310 | } | |
311 | ||
73fbec60 FW |
312 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
313 | /* | |
314 | * Account a tick to a process and cpustat | |
315 | * @p: the process that the cpu time gets accounted to | |
316 | * @user_tick: is the tick from userspace | |
317 | * @rq: the pointer to rq | |
318 | * | |
319 | * Tick demultiplexing follows the order | |
320 | * - pending hardirq update | |
321 | * - pending softirq update | |
322 | * - user_time | |
323 | * - idle_time | |
324 | * - system time | |
325 | * - check for guest_time | |
326 | * - else account as system_time | |
327 | * | |
328 | * Check for hardirq is done both for system and user time as there is | |
329 | * no timer going off while we are on hardirq and hence we may never get an | |
330 | * opportunity to update it solely in system time. | |
331 | * p->stime and friends are only updated on system time and not on irq | |
332 | * softirq as those do not count in task exec_runtime any more. | |
333 | */ | |
334 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | |
2d513868 | 335 | struct rq *rq, int ticks) |
73fbec60 | 336 | { |
2d513868 TG |
337 | cputime_t scaled = cputime_to_scaled(cputime_one_jiffy); |
338 | u64 cputime = (__force u64) cputime_one_jiffy; | |
73fbec60 FW |
339 | u64 *cpustat = kcpustat_this_cpu->cpustat; |
340 | ||
341 | if (steal_account_process_tick()) | |
342 | return; | |
343 | ||
2d513868 TG |
344 | cputime *= ticks; |
345 | scaled *= ticks; | |
346 | ||
73fbec60 | 347 | if (irqtime_account_hi_update()) { |
2d513868 | 348 | cpustat[CPUTIME_IRQ] += cputime; |
73fbec60 | 349 | } else if (irqtime_account_si_update()) { |
2d513868 | 350 | cpustat[CPUTIME_SOFTIRQ] += cputime; |
73fbec60 FW |
351 | } else if (this_cpu_ksoftirqd() == p) { |
352 | /* | |
353 | * ksoftirqd time do not get accounted in cpu_softirq_time. | |
354 | * So, we have to handle it separately here. | |
355 | * Also, p->stime needs to be updated for ksoftirqd. | |
356 | */ | |
2d513868 | 357 | __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ); |
73fbec60 | 358 | } else if (user_tick) { |
2d513868 | 359 | account_user_time(p, cputime, scaled); |
73fbec60 | 360 | } else if (p == rq->idle) { |
2d513868 | 361 | account_idle_time(cputime); |
73fbec60 | 362 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ |
2d513868 | 363 | account_guest_time(p, cputime, scaled); |
73fbec60 | 364 | } else { |
2d513868 | 365 | __account_system_time(p, cputime, scaled, CPUTIME_SYSTEM); |
73fbec60 FW |
366 | } |
367 | } | |
368 | ||
369 | static void irqtime_account_idle_ticks(int ticks) | |
370 | { | |
73fbec60 FW |
371 | struct rq *rq = this_rq(); |
372 | ||
2d513868 | 373 | irqtime_account_process_tick(current, 0, rq, ticks); |
73fbec60 FW |
374 | } |
375 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | |
3f4724ea FW |
376 | static inline void irqtime_account_idle_ticks(int ticks) {} |
377 | static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, | |
2d513868 | 378 | struct rq *rq, int nr_ticks) {} |
73fbec60 FW |
379 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
380 | ||
73fbec60 FW |
381 | /* |
382 | * Use precise platform statistics if available: | |
383 | */ | |
384 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | |
a7e1a9e3 | 385 | |
e3942ba0 | 386 | #ifndef __ARCH_HAS_VTIME_TASK_SWITCH |
b0493406 | 387 | void vtime_common_task_switch(struct task_struct *prev) |
e3942ba0 FW |
388 | { |
389 | if (is_idle_task(prev)) | |
390 | vtime_account_idle(prev); | |
391 | else | |
392 | vtime_account_system(prev); | |
393 | ||
abf917cd | 394 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
e3942ba0 | 395 | vtime_account_user(prev); |
abf917cd | 396 | #endif |
e3942ba0 FW |
397 | arch_vtime_task_switch(prev); |
398 | } | |
399 | #endif | |
11113334 | 400 | |
a7e1a9e3 FW |
401 | /* |
402 | * Archs that account the whole time spent in the idle task | |
403 | * (outside irq) as idle time can rely on this and just implement | |
fd25b4c2 | 404 | * vtime_account_system() and vtime_account_idle(). Archs that |
a7e1a9e3 FW |
405 | * have other meaning of the idle time (s390 only includes the |
406 | * time spent by the CPU when it's in low power mode) must override | |
407 | * vtime_account(). | |
408 | */ | |
409 | #ifndef __ARCH_HAS_VTIME_ACCOUNT | |
b0493406 | 410 | void vtime_common_account_irq_enter(struct task_struct *tsk) |
a7e1a9e3 | 411 | { |
abf917cd FW |
412 | if (!in_interrupt()) { |
413 | /* | |
414 | * If we interrupted user, context_tracking_in_user() | |
415 | * is 1 because the context tracking don't hook | |
416 | * on irq entry/exit. This way we know if | |
417 | * we need to flush user time on kernel entry. | |
418 | */ | |
419 | if (context_tracking_in_user()) { | |
420 | vtime_account_user(tsk); | |
421 | return; | |
422 | } | |
423 | ||
424 | if (is_idle_task(tsk)) { | |
425 | vtime_account_idle(tsk); | |
426 | return; | |
427 | } | |
428 | } | |
429 | vtime_account_system(tsk); | |
a7e1a9e3 | 430 | } |
b0493406 | 431 | EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter); |
a7e1a9e3 | 432 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ |
9fbc42ea FW |
433 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ |
434 | ||
435 | ||
436 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE | |
437 | void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) | |
438 | { | |
439 | *ut = p->utime; | |
440 | *st = p->stime; | |
441 | } | |
a7e1a9e3 | 442 | |
9fbc42ea FW |
443 | void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
444 | { | |
445 | struct task_cputime cputime; | |
73fbec60 | 446 | |
9fbc42ea FW |
447 | thread_group_cputime(p, &cputime); |
448 | ||
449 | *ut = cputime.utime; | |
450 | *st = cputime.stime; | |
451 | } | |
452 | #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ | |
453 | /* | |
454 | * Account a single tick of cpu time. | |
455 | * @p: the process that the cpu time gets accounted to | |
456 | * @user_tick: indicates if the tick is a user or a system tick | |
457 | */ | |
458 | void account_process_tick(struct task_struct *p, int user_tick) | |
73fbec60 | 459 | { |
9fbc42ea FW |
460 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); |
461 | struct rq *rq = this_rq(); | |
73fbec60 | 462 | |
9fbc42ea FW |
463 | if (vtime_accounting_enabled()) |
464 | return; | |
465 | ||
466 | if (sched_clock_irqtime) { | |
2d513868 | 467 | irqtime_account_process_tick(p, user_tick, rq, 1); |
9fbc42ea FW |
468 | return; |
469 | } | |
470 | ||
471 | if (steal_account_process_tick()) | |
472 | return; | |
73fbec60 | 473 | |
9fbc42ea FW |
474 | if (user_tick) |
475 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | |
476 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | |
477 | account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, | |
478 | one_jiffy_scaled); | |
73fbec60 | 479 | else |
9fbc42ea FW |
480 | account_idle_time(cputime_one_jiffy); |
481 | } | |
73fbec60 | 482 | |
9fbc42ea FW |
483 | /* |
484 | * Account multiple ticks of steal time. | |
485 | * @p: the process from which the cpu time has been stolen | |
486 | * @ticks: number of stolen ticks | |
487 | */ | |
488 | void account_steal_ticks(unsigned long ticks) | |
489 | { | |
490 | account_steal_time(jiffies_to_cputime(ticks)); | |
491 | } | |
492 | ||
493 | /* | |
494 | * Account multiple ticks of idle time. | |
495 | * @ticks: number of stolen ticks | |
496 | */ | |
497 | void account_idle_ticks(unsigned long ticks) | |
498 | { | |
499 | ||
500 | if (sched_clock_irqtime) { | |
501 | irqtime_account_idle_ticks(ticks); | |
502 | return; | |
503 | } | |
504 | ||
505 | account_idle_time(jiffies_to_cputime(ticks)); | |
506 | } | |
73fbec60 | 507 | |
d9a3c982 | 508 | /* |
55eaa7c1 SG |
509 | * Perform (stime * rtime) / total, but avoid multiplication overflow by |
510 | * loosing precision when the numbers are big. | |
d9a3c982 FW |
511 | */ |
512 | static cputime_t scale_stime(u64 stime, u64 rtime, u64 total) | |
73fbec60 | 513 | { |
55eaa7c1 | 514 | u64 scaled; |
73fbec60 | 515 | |
55eaa7c1 SG |
516 | for (;;) { |
517 | /* Make sure "rtime" is the bigger of stime/rtime */ | |
84f9f3a1 SG |
518 | if (stime > rtime) |
519 | swap(rtime, stime); | |
55eaa7c1 SG |
520 | |
521 | /* Make sure 'total' fits in 32 bits */ | |
522 | if (total >> 32) | |
523 | goto drop_precision; | |
524 | ||
525 | /* Does rtime (and thus stime) fit in 32 bits? */ | |
526 | if (!(rtime >> 32)) | |
527 | break; | |
528 | ||
529 | /* Can we just balance rtime/stime rather than dropping bits? */ | |
530 | if (stime >> 31) | |
531 | goto drop_precision; | |
532 | ||
533 | /* We can grow stime and shrink rtime and try to make them both fit */ | |
534 | stime <<= 1; | |
535 | rtime >>= 1; | |
536 | continue; | |
537 | ||
538 | drop_precision: | |
539 | /* We drop from rtime, it has more bits than stime */ | |
540 | rtime >>= 1; | |
541 | total >>= 1; | |
d9a3c982 | 542 | } |
73fbec60 | 543 | |
55eaa7c1 SG |
544 | /* |
545 | * Make sure gcc understands that this is a 32x32->64 multiply, | |
546 | * followed by a 64/32->64 divide. | |
547 | */ | |
548 | scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total); | |
d9a3c982 | 549 | return (__force cputime_t) scaled; |
73fbec60 FW |
550 | } |
551 | ||
fa092057 FW |
552 | /* |
553 | * Adjust tick based cputime random precision against scheduler | |
554 | * runtime accounting. | |
555 | */ | |
d37f761d FW |
556 | static void cputime_adjust(struct task_cputime *curr, |
557 | struct cputime *prev, | |
558 | cputime_t *ut, cputime_t *st) | |
73fbec60 | 559 | { |
5a8e01f8 | 560 | cputime_t rtime, stime, utime; |
fa092057 | 561 | |
73fbec60 | 562 | /* |
fa092057 FW |
563 | * Tick based cputime accounting depend on random scheduling |
564 | * timeslices of a task to be interrupted or not by the timer. | |
565 | * Depending on these circumstances, the number of these interrupts | |
566 | * may be over or under-optimistic, matching the real user and system | |
567 | * cputime with a variable precision. | |
568 | * | |
569 | * Fix this by scaling these tick based values against the total | |
570 | * runtime accounted by the CFS scheduler. | |
73fbec60 | 571 | */ |
d37f761d | 572 | rtime = nsecs_to_cputime(curr->sum_exec_runtime); |
73fbec60 | 573 | |
772c808a SG |
574 | /* |
575 | * Update userspace visible utime/stime values only if actual execution | |
576 | * time is bigger than already exported. Note that can happen, that we | |
577 | * provided bigger values due to scaling inaccuracy on big numbers. | |
578 | */ | |
579 | if (prev->stime + prev->utime >= rtime) | |
580 | goto out; | |
581 | ||
5a8e01f8 SG |
582 | stime = curr->stime; |
583 | utime = curr->utime; | |
584 | ||
585 | if (utime == 0) { | |
586 | stime = rtime; | |
587 | } else if (stime == 0) { | |
588 | utime = rtime; | |
589 | } else { | |
590 | cputime_t total = stime + utime; | |
591 | ||
d9a3c982 FW |
592 | stime = scale_stime((__force u64)stime, |
593 | (__force u64)rtime, (__force u64)total); | |
68aa8efc | 594 | utime = rtime - stime; |
d9a3c982 | 595 | } |
73fbec60 FW |
596 | |
597 | /* | |
fa092057 FW |
598 | * If the tick based count grows faster than the scheduler one, |
599 | * the result of the scaling may go backward. | |
600 | * Let's enforce monotonicity. | |
73fbec60 | 601 | */ |
62188451 | 602 | prev->stime = max(prev->stime, stime); |
68aa8efc | 603 | prev->utime = max(prev->utime, utime); |
d37f761d | 604 | |
772c808a | 605 | out: |
d37f761d FW |
606 | *ut = prev->utime; |
607 | *st = prev->stime; | |
608 | } | |
73fbec60 | 609 | |
d37f761d FW |
610 | void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
611 | { | |
612 | struct task_cputime cputime = { | |
d37f761d FW |
613 | .sum_exec_runtime = p->se.sum_exec_runtime, |
614 | }; | |
615 | ||
6fac4829 | 616 | task_cputime(p, &cputime.utime, &cputime.stime); |
d37f761d | 617 | cputime_adjust(&cputime, &p->prev_cputime, ut, st); |
73fbec60 FW |
618 | } |
619 | ||
620 | /* | |
621 | * Must be called with siglock held. | |
622 | */ | |
e80d0a1a | 623 | void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
73fbec60 | 624 | { |
73fbec60 | 625 | struct task_cputime cputime; |
73fbec60 FW |
626 | |
627 | thread_group_cputime(p, &cputime); | |
d37f761d | 628 | cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); |
73fbec60 | 629 | } |
9fbc42ea | 630 | #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ |
abf917cd FW |
631 | |
632 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | |
6a61671b FW |
633 | static unsigned long long vtime_delta(struct task_struct *tsk) |
634 | { | |
635 | unsigned long long clock; | |
636 | ||
7f6575f1 | 637 | clock = local_clock(); |
6a61671b FW |
638 | if (clock < tsk->vtime_snap) |
639 | return 0; | |
abf917cd | 640 | |
6a61671b FW |
641 | return clock - tsk->vtime_snap; |
642 | } | |
643 | ||
644 | static cputime_t get_vtime_delta(struct task_struct *tsk) | |
abf917cd | 645 | { |
6a61671b | 646 | unsigned long long delta = vtime_delta(tsk); |
abf917cd | 647 | |
6a61671b FW |
648 | WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING); |
649 | tsk->vtime_snap += delta; | |
abf917cd FW |
650 | |
651 | /* CHECKME: always safe to convert nsecs to cputime? */ | |
652 | return nsecs_to_cputime(delta); | |
653 | } | |
654 | ||
6a61671b FW |
655 | static void __vtime_account_system(struct task_struct *tsk) |
656 | { | |
657 | cputime_t delta_cpu = get_vtime_delta(tsk); | |
658 | ||
659 | account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); | |
660 | } | |
661 | ||
abf917cd FW |
662 | void vtime_account_system(struct task_struct *tsk) |
663 | { | |
6a61671b FW |
664 | write_seqlock(&tsk->vtime_seqlock); |
665 | __vtime_account_system(tsk); | |
666 | write_sequnlock(&tsk->vtime_seqlock); | |
667 | } | |
3f4724ea | 668 | |
b0493406 | 669 | void vtime_gen_account_irq_exit(struct task_struct *tsk) |
6a61671b | 670 | { |
6a61671b | 671 | write_seqlock(&tsk->vtime_seqlock); |
af2350bd | 672 | __vtime_account_system(tsk); |
6a61671b FW |
673 | if (context_tracking_in_user()) |
674 | tsk->vtime_snap_whence = VTIME_USER; | |
6a61671b | 675 | write_sequnlock(&tsk->vtime_seqlock); |
abf917cd FW |
676 | } |
677 | ||
678 | void vtime_account_user(struct task_struct *tsk) | |
679 | { | |
3f4724ea FW |
680 | cputime_t delta_cpu; |
681 | ||
6a61671b | 682 | write_seqlock(&tsk->vtime_seqlock); |
54461562 | 683 | delta_cpu = get_vtime_delta(tsk); |
6a61671b | 684 | tsk->vtime_snap_whence = VTIME_SYS; |
abf917cd | 685 | account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); |
6a61671b FW |
686 | write_sequnlock(&tsk->vtime_seqlock); |
687 | } | |
688 | ||
689 | void vtime_user_enter(struct task_struct *tsk) | |
690 | { | |
6a61671b | 691 | write_seqlock(&tsk->vtime_seqlock); |
6a61671b | 692 | __vtime_account_system(tsk); |
af2350bd | 693 | tsk->vtime_snap_whence = VTIME_USER; |
6a61671b FW |
694 | write_sequnlock(&tsk->vtime_seqlock); |
695 | } | |
696 | ||
697 | void vtime_guest_enter(struct task_struct *tsk) | |
698 | { | |
5b206d48 FW |
699 | /* |
700 | * The flags must be updated under the lock with | |
701 | * the vtime_snap flush and update. | |
702 | * That enforces a right ordering and update sequence | |
703 | * synchronization against the reader (task_gtime()) | |
704 | * that can thus safely catch up with a tickless delta. | |
705 | */ | |
6a61671b FW |
706 | write_seqlock(&tsk->vtime_seqlock); |
707 | __vtime_account_system(tsk); | |
708 | current->flags |= PF_VCPU; | |
709 | write_sequnlock(&tsk->vtime_seqlock); | |
710 | } | |
48d6a816 | 711 | EXPORT_SYMBOL_GPL(vtime_guest_enter); |
6a61671b FW |
712 | |
713 | void vtime_guest_exit(struct task_struct *tsk) | |
714 | { | |
715 | write_seqlock(&tsk->vtime_seqlock); | |
716 | __vtime_account_system(tsk); | |
717 | current->flags &= ~PF_VCPU; | |
718 | write_sequnlock(&tsk->vtime_seqlock); | |
abf917cd | 719 | } |
48d6a816 | 720 | EXPORT_SYMBOL_GPL(vtime_guest_exit); |
abf917cd FW |
721 | |
722 | void vtime_account_idle(struct task_struct *tsk) | |
723 | { | |
6a61671b | 724 | cputime_t delta_cpu = get_vtime_delta(tsk); |
abf917cd FW |
725 | |
726 | account_idle_time(delta_cpu); | |
727 | } | |
3f4724ea | 728 | |
6a61671b FW |
729 | void arch_vtime_task_switch(struct task_struct *prev) |
730 | { | |
731 | write_seqlock(&prev->vtime_seqlock); | |
732 | prev->vtime_snap_whence = VTIME_SLEEPING; | |
733 | write_sequnlock(&prev->vtime_seqlock); | |
734 | ||
735 | write_seqlock(¤t->vtime_seqlock); | |
736 | current->vtime_snap_whence = VTIME_SYS; | |
45eacc69 | 737 | current->vtime_snap = sched_clock_cpu(smp_processor_id()); |
6a61671b FW |
738 | write_sequnlock(¤t->vtime_seqlock); |
739 | } | |
740 | ||
45eacc69 | 741 | void vtime_init_idle(struct task_struct *t, int cpu) |
6a61671b FW |
742 | { |
743 | unsigned long flags; | |
744 | ||
745 | write_seqlock_irqsave(&t->vtime_seqlock, flags); | |
746 | t->vtime_snap_whence = VTIME_SYS; | |
45eacc69 | 747 | t->vtime_snap = sched_clock_cpu(cpu); |
6a61671b FW |
748 | write_sequnlock_irqrestore(&t->vtime_seqlock, flags); |
749 | } | |
750 | ||
751 | cputime_t task_gtime(struct task_struct *t) | |
752 | { | |
6a61671b FW |
753 | unsigned int seq; |
754 | cputime_t gtime; | |
755 | ||
756 | do { | |
cdc4e86b | 757 | seq = read_seqbegin(&t->vtime_seqlock); |
6a61671b FW |
758 | |
759 | gtime = t->gtime; | |
760 | if (t->flags & PF_VCPU) | |
761 | gtime += vtime_delta(t); | |
762 | ||
cdc4e86b | 763 | } while (read_seqretry(&t->vtime_seqlock, seq)); |
6a61671b FW |
764 | |
765 | return gtime; | |
766 | } | |
767 | ||
768 | /* | |
769 | * Fetch cputime raw values from fields of task_struct and | |
770 | * add up the pending nohz execution time since the last | |
771 | * cputime snapshot. | |
772 | */ | |
773 | static void | |
774 | fetch_task_cputime(struct task_struct *t, | |
775 | cputime_t *u_dst, cputime_t *s_dst, | |
776 | cputime_t *u_src, cputime_t *s_src, | |
777 | cputime_t *udelta, cputime_t *sdelta) | |
778 | { | |
6a61671b FW |
779 | unsigned int seq; |
780 | unsigned long long delta; | |
781 | ||
782 | do { | |
783 | *udelta = 0; | |
784 | *sdelta = 0; | |
785 | ||
cdc4e86b | 786 | seq = read_seqbegin(&t->vtime_seqlock); |
6a61671b FW |
787 | |
788 | if (u_dst) | |
789 | *u_dst = *u_src; | |
790 | if (s_dst) | |
791 | *s_dst = *s_src; | |
792 | ||
793 | /* Task is sleeping, nothing to add */ | |
794 | if (t->vtime_snap_whence == VTIME_SLEEPING || | |
795 | is_idle_task(t)) | |
796 | continue; | |
797 | ||
798 | delta = vtime_delta(t); | |
799 | ||
800 | /* | |
801 | * Task runs either in user or kernel space, add pending nohz time to | |
802 | * the right place. | |
803 | */ | |
804 | if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) { | |
805 | *udelta = delta; | |
806 | } else { | |
807 | if (t->vtime_snap_whence == VTIME_SYS) | |
808 | *sdelta = delta; | |
809 | } | |
cdc4e86b | 810 | } while (read_seqretry(&t->vtime_seqlock, seq)); |
6a61671b FW |
811 | } |
812 | ||
813 | ||
814 | void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) | |
815 | { | |
816 | cputime_t udelta, sdelta; | |
817 | ||
818 | fetch_task_cputime(t, utime, stime, &t->utime, | |
819 | &t->stime, &udelta, &sdelta); | |
820 | if (utime) | |
821 | *utime += udelta; | |
822 | if (stime) | |
823 | *stime += sdelta; | |
824 | } | |
825 | ||
826 | void task_cputime_scaled(struct task_struct *t, | |
827 | cputime_t *utimescaled, cputime_t *stimescaled) | |
828 | { | |
829 | cputime_t udelta, sdelta; | |
830 | ||
831 | fetch_task_cputime(t, utimescaled, stimescaled, | |
832 | &t->utimescaled, &t->stimescaled, &udelta, &sdelta); | |
833 | if (utimescaled) | |
834 | *utimescaled += cputime_to_scaled(udelta); | |
835 | if (stimescaled) | |
836 | *stimescaled += cputime_to_scaled(sdelta); | |
837 | } | |
abf917cd | 838 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ |