xen: rename dom0_op to platform_op
[deliverable/linux.git] / arch / x86 / xen / time.c
CommitLineData
15c84731
JF
1/*
2 * Xen time implementation.
3 *
4 * This is implemented in terms of a clocksource driver which uses
5 * the hypervisor clock as a nanosecond timebase, and a clockevent
6 * driver which uses the hypervisor's timer mechanism.
7 *
8 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
9 */
10#include <linux/kernel.h>
11#include <linux/interrupt.h>
12#include <linux/clocksource.h>
13#include <linux/clockchips.h>
f91a8b44 14#include <linux/kernel_stat.h>
f595ec96 15#include <linux/math64.h>
5a0e3ad6 16#include <linux/gfp.h>
c9d76a24 17#include <linux/slab.h>
5584880e 18#include <linux/pvclock_gtod.h>
15c84731 19
1c7b67f7 20#include <asm/pvclock.h>
15c84731
JF
21#include <asm/xen/hypervisor.h>
22#include <asm/xen/hypercall.h>
23
24#include <xen/events.h>
409771d2 25#include <xen/features.h>
15c84731
JF
26#include <xen/interface/xen.h>
27#include <xen/interface/vcpu.h>
28
29#include "xen-ops.h"
30
15c84731
JF
31/* Xen may fire a timer up to this many ns early */
32#define TIMER_SLOP 100000
f91a8b44 33#define NS_PER_TICK (1000000000LL / HZ)
15c84731 34
f91a8b44 35/* snapshots of runstate info */
c6e22f9e 36static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
f91a8b44 37
0b0c002c 38/* unused ns of stolen time */
c6e22f9e 39static DEFINE_PER_CPU(u64, xen_residual_stolen);
f91a8b44 40
f91a8b44
JF
41static void do_stolen_accounting(void)
42{
43 struct vcpu_runstate_info state;
44 struct vcpu_runstate_info *snap;
0b0c002c 45 s64 runnable, offline, stolen;
f91a8b44
JF
46 cputime_t ticks;
47
4ccefbe5 48 xen_get_runstate_snapshot(&state);
f91a8b44
JF
49
50 WARN_ON(state.state != RUNSTATE_running);
51
89cbc767 52 snap = this_cpu_ptr(&xen_runstate_snapshot);
f91a8b44
JF
53
54 /* work out how much time the VCPU has not been runn*ing* */
f91a8b44
JF
55 runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
56 offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
57
58 *snap = state;
59
60 /* Add the appropriate number of ticks of stolen time,
79741dd3 61 including any left-overs from last time. */
780f36d8 62 stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
f91a8b44
JF
63
64 if (stolen < 0)
65 stolen = 0;
66
f595ec96 67 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
780f36d8 68 __this_cpu_write(xen_residual_stolen, stolen);
79741dd3 69 account_steal_ticks(ticks);
f91a8b44
JF
70}
71
e93ef949 72/* Get the TSC speed from Xen */
409771d2 73static unsigned long xen_tsc_khz(void)
15c84731 74{
3807f345 75 struct pvclock_vcpu_time_info *info =
15c84731
JF
76 &HYPERVISOR_shared_info->vcpu_info[0].time;
77
3807f345 78 return pvclock_tsc_khz(info);
15c84731
JF
79}
80
ee7686bc 81cycle_t xen_clocksource_read(void)
15c84731 82{
1c7b67f7 83 struct pvclock_vcpu_time_info *src;
15c84731 84 cycle_t ret;
15c84731 85
f1c39625 86 preempt_disable_notrace();
3251f20b 87 src = &__this_cpu_read(xen_vcpu)->time;
1c7b67f7 88 ret = pvclock_clocksource_read(src);
f1c39625 89 preempt_enable_notrace();
15c84731
JF
90 return ret;
91}
92
8e19608e
MD
93static cycle_t xen_clocksource_get_cycles(struct clocksource *cs)
94{
95 return xen_clocksource_read();
96}
97
15c84731
JF
98static void xen_read_wallclock(struct timespec *ts)
99{
1c7b67f7
GH
100 struct shared_info *s = HYPERVISOR_shared_info;
101 struct pvclock_wall_clock *wall_clock = &(s->wc);
102 struct pvclock_vcpu_time_info *vcpu_time;
15c84731 103
1c7b67f7
GH
104 vcpu_time = &get_cpu_var(xen_vcpu)->time;
105 pvclock_read_wallclock(wall_clock, vcpu_time, ts);
106 put_cpu_var(xen_vcpu);
15c84731
JF
107}
108
3565184e 109static void xen_get_wallclock(struct timespec *now)
15c84731 110{
3565184e 111 xen_read_wallclock(now);
15c84731 112}
15c84731 113
3565184e 114static int xen_set_wallclock(const struct timespec *now)
15c84731 115{
47433b8c 116 return -1;
15c84731
JF
117}
118
47433b8c
DV
119static int xen_pvclock_gtod_notify(struct notifier_block *nb,
120 unsigned long was_set, void *priv)
15c84731 121{
47433b8c
DV
122 /* Protected by the calling core code serialization */
123 static struct timespec next_sync;
5584880e 124
fdb9eb9f 125 struct xen_platform_op op;
47433b8c 126 struct timespec now;
fdb9eb9f 127
5584880e
DV
128 now = __current_kernel_time();
129
47433b8c
DV
130 /*
131 * We only take the expensive HV call when the clock was set
132 * or when the 11 minutes RTC synchronization time elapsed.
133 */
134 if (!was_set && timespec_compare(&now, &next_sync) < 0)
135 return NOTIFY_OK;
fdb9eb9f
JF
136
137 op.cmd = XENPF_settime;
5584880e
DV
138 op.u.settime.secs = now.tv_sec;
139 op.u.settime.nsecs = now.tv_nsec;
fdb9eb9f
JF
140 op.u.settime.system_time = xen_clocksource_read();
141
cfafae94 142 (void)HYPERVISOR_platform_op(&op);
fdb9eb9f 143
47433b8c
DV
144 /*
145 * Move the next drift compensation time 11 minutes
146 * ahead. That's emulating the sync_cmos_clock() update for
147 * the hardware RTC.
148 */
149 next_sync = now;
150 next_sync.tv_sec += 11 * 60;
151
5584880e 152 return NOTIFY_OK;
15c84731
JF
153}
154
5584880e
DV
155static struct notifier_block xen_pvclock_gtod_notifier = {
156 .notifier_call = xen_pvclock_gtod_notify,
157};
158
15c84731
JF
159static struct clocksource xen_clocksource __read_mostly = {
160 .name = "xen",
161 .rating = 400,
8e19608e 162 .read = xen_clocksource_get_cycles,
15c84731 163 .mask = ~0,
15c84731
JF
164 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
165};
166
167/*
168 Xen clockevent implementation
169
170 Xen has two clockevent implementations:
171
172 The old timer_op one works with all released versions of Xen prior
173 to version 3.0.4. This version of the hypervisor provides a
174 single-shot timer with nanosecond resolution. However, sharing the
175 same event channel is a 100Hz tick which is delivered while the
176 vcpu is running. We don't care about or use this tick, but it will
177 cause the core time code to think the timer fired too soon, and
178 will end up resetting it each time. It could be filtered, but
179 doing so has complications when the ktime clocksource is not yet
180 the xen clocksource (ie, at boot time).
181
182 The new vcpu_op-based timer interface allows the tick timer period
183 to be changed or turned off. The tick timer is not useful as a
184 periodic timer because events are only delivered to running vcpus.
185 The one-shot timer can report when a timeout is in the past, so
186 set_next_event is capable of returning -ETIME when appropriate.
187 This interface is used when available.
188*/
189
190
191/*
192 Get a hypervisor absolute time. In theory we could maintain an
193 offset between the kernel's time and the hypervisor's time, and
194 apply that to a kernel's absolute timeout. Unfortunately the
195 hypervisor and kernel times can drift even if the kernel is using
196 the Xen clocksource, because ntp can warp the kernel's clocksource.
197*/
198static s64 get_abs_timeout(unsigned long delta)
199{
200 return xen_clocksource_read() + delta;
201}
202
955381dd 203static int xen_timerop_shutdown(struct clock_event_device *evt)
15c84731 204{
955381dd
VK
205 /* cancel timeout */
206 HYPERVISOR_set_timer_op(0);
207
208 return 0;
15c84731
JF
209}
210
211static int xen_timerop_set_next_event(unsigned long delta,
212 struct clock_event_device *evt)
213{
955381dd 214 WARN_ON(!clockevent_state_oneshot(evt));
15c84731
JF
215
216 if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
217 BUG();
218
219 /* We may have missed the deadline, but there's no real way of
220 knowing for sure. If the event was in the past, then we'll
221 get an immediate interrupt. */
222
223 return 0;
224}
225
226static const struct clock_event_device xen_timerop_clockevent = {
955381dd
VK
227 .name = "xen",
228 .features = CLOCK_EVT_FEAT_ONESHOT,
15c84731 229
955381dd
VK
230 .max_delta_ns = 0xffffffff,
231 .min_delta_ns = TIMER_SLOP,
15c84731 232
955381dd
VK
233 .mult = 1,
234 .shift = 0,
235 .rating = 500,
15c84731 236
955381dd
VK
237 .set_state_shutdown = xen_timerop_shutdown,
238 .set_next_event = xen_timerop_set_next_event,
15c84731
JF
239};
240
955381dd
VK
241static int xen_vcpuop_shutdown(struct clock_event_device *evt)
242{
243 int cpu = smp_processor_id();
15c84731 244
955381dd
VK
245 if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
246 HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
247 BUG();
15c84731 248
955381dd
VK
249 return 0;
250}
251
252static int xen_vcpuop_set_oneshot(struct clock_event_device *evt)
15c84731
JF
253{
254 int cpu = smp_processor_id();
255
955381dd
VK
256 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
257 BUG();
258
259 return 0;
15c84731
JF
260}
261
262static int xen_vcpuop_set_next_event(unsigned long delta,
263 struct clock_event_device *evt)
264{
265 int cpu = smp_processor_id();
266 struct vcpu_set_singleshot_timer single;
267 int ret;
268
955381dd 269 WARN_ON(!clockevent_state_oneshot(evt));
15c84731
JF
270
271 single.timeout_abs_ns = get_abs_timeout(delta);
272 single.flags = VCPU_SSHOTTMR_future;
273
274 ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
275
276 BUG_ON(ret != 0 && ret != -ETIME);
277
278 return ret;
279}
280
281static const struct clock_event_device xen_vcpuop_clockevent = {
282 .name = "xen",
283 .features = CLOCK_EVT_FEAT_ONESHOT,
284
285 .max_delta_ns = 0xffffffff,
286 .min_delta_ns = TIMER_SLOP,
287
288 .mult = 1,
289 .shift = 0,
290 .rating = 500,
291
955381dd
VK
292 .set_state_shutdown = xen_vcpuop_shutdown,
293 .set_state_oneshot = xen_vcpuop_set_oneshot,
15c84731
JF
294 .set_next_event = xen_vcpuop_set_next_event,
295};
296
297static const struct clock_event_device *xen_clockevent =
298 &xen_timerop_clockevent;
31620a19
KRW
299
300struct xen_clock_event_device {
301 struct clock_event_device evt;
7be0772d 302 char name[16];
31620a19
KRW
303};
304static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
15c84731
JF
305
306static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
307{
89cbc767 308 struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt);
15c84731
JF
309 irqreturn_t ret;
310
311 ret = IRQ_NONE;
312 if (evt->event_handler) {
313 evt->event_handler(evt);
314 ret = IRQ_HANDLED;
315 }
316
f91a8b44
JF
317 do_stolen_accounting();
318
15c84731
JF
319 return ret;
320}
321
09e99da7
KRW
322void xen_teardown_timer(int cpu)
323{
324 struct clock_event_device *evt;
325 BUG_ON(cpu == 0);
326 evt = &per_cpu(xen_clock_events, cpu).evt;
327
328 if (evt->irq >= 0) {
329 unbind_from_irqhandler(evt->irq, NULL);
330 evt->irq = -1;
09e99da7
KRW
331 }
332}
333
f87e4cac 334void xen_setup_timer(int cpu)
15c84731 335{
7be0772d
VK
336 struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu);
337 struct clock_event_device *evt = &xevt->evt;
15c84731
JF
338 int irq;
339
ef35a4e6 340 WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
09e99da7
KRW
341 if (evt->irq >= 0)
342 xen_teardown_timer(cpu);
ef35a4e6 343
15c84731
JF
344 printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
345
7be0772d 346 snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu);
15c84731
JF
347
348 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
9d71cee6 349 IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
8d5999df 350 IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
7be0772d 351 xevt->name, NULL);
8785c676 352 (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
15c84731 353
15c84731
JF
354 memcpy(evt, xen_clockevent, sizeof(*evt));
355
320ab2b0 356 evt->cpumask = cpumask_of(cpu);
15c84731 357 evt->irq = irq;
f87e4cac
JF
358}
359
d68d82af 360
f87e4cac
JF
361void xen_setup_cpu_clockevents(void)
362{
89cbc767 363 clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt));
15c84731
JF
364}
365
d07af1f0
JF
366void xen_timer_resume(void)
367{
368 int cpu;
369
e7a3481c
JF
370 pvclock_resume();
371
d07af1f0
JF
372 if (xen_clockevent != &xen_vcpuop_clockevent)
373 return;
374
375 for_each_online_cpu(cpu) {
376 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
377 BUG();
378 }
379}
380
fb6ce5de 381static const struct pv_time_ops xen_time_ops __initconst = {
ca50a5f3 382 .sched_clock = xen_clocksource_read,
409771d2
SS
383};
384
fb6ce5de 385static void __init xen_time_init(void)
15c84731
JF
386{
387 int cpu = smp_processor_id();
c4507257 388 struct timespec tp;
15c84731 389
94dd85f6
PI
390 /* As Dom0 is never moved, no penalty on using TSC there */
391 if (xen_initial_domain())
392 xen_clocksource.rating = 275;
393
b01cc1b0 394 clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
15c84731
JF
395
396 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
f91a8b44 397 /* Successfully turned off 100Hz tick, so we have the
15c84731
JF
398 vcpuop-based timer interface */
399 printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
400 xen_clockevent = &xen_vcpuop_clockevent;
401 }
402
403 /* Set initial system time with full resolution */
c4507257
JS
404 xen_read_wallclock(&tp);
405 do_settimeofday(&tp);
15c84731 406
404ee5b1 407 setup_force_cpu_cap(X86_FEATURE_TSC);
15c84731 408
be012920 409 xen_setup_runstate_info(cpu);
15c84731 410 xen_setup_timer(cpu);
f87e4cac 411 xen_setup_cpu_clockevents();
5584880e
DV
412
413 if (xen_initial_domain())
414 pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
15c84731 415}
409771d2 416
fb6ce5de 417void __init xen_init_time_ops(void)
409771d2
SS
418{
419 pv_time_ops = xen_time_ops;
420
421 x86_init.timers.timer_init = xen_time_init;
422 x86_init.timers.setup_percpu_clockev = x86_init_noop;
423 x86_cpuinit.setup_percpu_clockev = x86_init_noop;
424
425 x86_platform.calibrate_tsc = xen_tsc_khz;
426 x86_platform.get_wallclock = xen_get_wallclock;
47433b8c
DV
427 /* Dom0 uses the native method to set the hardware RTC. */
428 if (!xen_initial_domain())
429 x86_platform.set_wallclock = xen_set_wallclock;
409771d2
SS
430}
431
ca65f9fc 432#ifdef CONFIG_XEN_PVHVM
409771d2
SS
433static void xen_hvm_setup_cpu_clockevents(void)
434{
435 int cpu = smp_processor_id();
436 xen_setup_runstate_info(cpu);
7918c92a
KRW
437 /*
438 * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence
439 * doing it xen_hvm_cpu_notify (which gets called by smp_init during
440 * early bootup and also during CPU hotplug events).
441 */
409771d2
SS
442 xen_setup_cpu_clockevents();
443}
444
fb6ce5de 445void __init xen_hvm_init_time_ops(void)
409771d2
SS
446{
447 /* vector callback is needed otherwise we cannot receive interrupts
31e7e931
SS
448 * on cpu > 0 and at this point we don't know how many cpus are
449 * available */
450 if (!xen_have_vector_callback)
409771d2
SS
451 return;
452 if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
453 printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
454 "disable pv timer\n");
455 return;
456 }
457
458 pv_time_ops = xen_time_ops;
459 x86_init.timers.setup_percpu_clockev = xen_time_init;
460 x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
461
462 x86_platform.calibrate_tsc = xen_tsc_khz;
463 x86_platform.get_wallclock = xen_get_wallclock;
464 x86_platform.set_wallclock = xen_set_wallclock;
465}
ca65f9fc 466#endif
This page took 0.513125 seconds and 5 git commands to generate.