x86_64: make /proc/interrupts work with dyn irq_desc
[deliverable/linux.git] / arch / x86 / kernel / vsyscall_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
3 * Copyright 2003 Andi Kleen, SuSE Labs.
4 *
5 * Thanks to hpa@transmeta.com for some useful hint.
6 * Special thanks to Ingo Molnar for his early experience with
7 * a different vsyscall implementation for Linux/IA32 and for the name.
8 *
9 * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
10 * at virtual address -10Mbyte+1024bytes etc... There are at max 4
11 * vsyscalls. One vsyscall can reserve more than 1 slot to avoid
12 * jumping out of line if necessary. We cannot add more with this
13 * mechanism because older kernels won't return -ENOSYS.
14 * If we want more than four we need a vDSO.
15 *
16 * Note: the concept clashes with user mode linux. If you use UML and
17 * want per guest time just set the kernel.vsyscall64 sysctl to 0.
18 */
19
20#include <linux/time.h>
21#include <linux/init.h>
22#include <linux/kernel.h>
23#include <linux/timer.h>
24#include <linux/seqlock.h>
25#include <linux/jiffies.h>
26#include <linux/sysctl.h>
7460ed28 27#include <linux/clocksource.h>
c08c8205 28#include <linux/getcpu.h>
8c131af1
AK
29#include <linux/cpu.h>
30#include <linux/smp.h>
31#include <linux/notifier.h>
1da177e4
LT
32
33#include <asm/vsyscall.h>
34#include <asm/pgtable.h>
35#include <asm/page.h>
7460ed28 36#include <asm/unistd.h>
1da177e4
LT
37#include <asm/fixmap.h>
38#include <asm/errno.h>
39#include <asm/io.h>
c08c8205
VP
40#include <asm/segment.h>
41#include <asm/desc.h>
42#include <asm/topology.h>
2aae950b 43#include <asm/vgtod.h>
1da177e4 44
23adec55
SR
45#define __vsyscall(nr) \
46 __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
65ea5b03 47#define __syscall_clobber "r11","cx","memory"
1da177e4 48
c8118c6c
ED
49/*
50 * vsyscall_gtod_data contains data that is :
51 * - readonly from vsyscalls
676b1855 52 * - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
c8118c6c
ED
53 * Try to keep this structure as small as possible to avoid cache line ping pongs
54 */
c08c8205 55int __vgetcpu_mode __section_vgetcpu_mode;
1da177e4 56
2aae950b 57struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
1da177e4 58{
7460ed28 59 .lock = SEQLOCK_UNLOCKED,
60 .sysctl_enabled = 1,
61};
1da177e4 62
2c622148
TB
63void update_vsyscall_tz(void)
64{
65 unsigned long flags;
66
67 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
68 /* sys_tz has changed */
69 vsyscall_gtod_data.sys_tz = sys_tz;
70 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
71}
72
7460ed28 73void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
1da177e4 74{
7460ed28 75 unsigned long flags;
1da177e4 76
7460ed28 77 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
78 /* copy vsyscall data */
c8118c6c
ED
79 vsyscall_gtod_data.clock.vread = clock->vread;
80 vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
81 vsyscall_gtod_data.clock.mask = clock->mask;
82 vsyscall_gtod_data.clock.mult = clock->mult;
83 vsyscall_gtod_data.clock.shift = clock->shift;
84 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
85 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
2aae950b 86 vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
7460ed28 87 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
1da177e4
LT
88}
89
7460ed28 90/* RED-PEN may want to readd seq locking, but then the variable should be
91 * write-once.
92 */
2c8bc944 93static __always_inline void do_get_tz(struct timezone * tz)
1da177e4 94{
7460ed28 95 *tz = __vsyscall_gtod_data.sys_tz;
1da177e4
LT
96}
97
2c8bc944 98static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
1da177e4
LT
99{
100 int ret;
ce28b986 101 asm volatile("syscall"
1da177e4 102 : "=a" (ret)
7460ed28 103 : "0" (__NR_gettimeofday),"D" (tv),"S" (tz)
104 : __syscall_clobber );
1da177e4
LT
105 return ret;
106}
107
2c8bc944 108static __always_inline long time_syscall(long *t)
1da177e4
LT
109{
110 long secs;
ce28b986 111 asm volatile("syscall"
1da177e4
LT
112 : "=a" (secs)
113 : "0" (__NR_time),"D" (t) : __syscall_clobber);
114 return secs;
115}
116
7460ed28 117static __always_inline void do_vgettimeofday(struct timeval * tv)
118{
119 cycle_t now, base, mask, cycle_delta;
c8118c6c
ED
120 unsigned seq;
121 unsigned long mult, shift, nsec;
7460ed28 122 cycle_t (*vread)(void);
123 do {
124 seq = read_seqbegin(&__vsyscall_gtod_data.lock);
125
126 vread = __vsyscall_gtod_data.clock.vread;
127 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) {
89952d13 128 gettimeofday(tv,NULL);
7460ed28 129 return;
130 }
131 now = vread();
132 base = __vsyscall_gtod_data.clock.cycle_last;
133 mask = __vsyscall_gtod_data.clock.mask;
134 mult = __vsyscall_gtod_data.clock.mult;
135 shift = __vsyscall_gtod_data.clock.shift;
136
c8118c6c
ED
137 tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
138 nsec = __vsyscall_gtod_data.wall_time_nsec;
7460ed28 139 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
140
141 /* calculate interval: */
142 cycle_delta = (now - base) & mask;
143 /* convert to nsecs: */
c8118c6c 144 nsec += (cycle_delta * mult) >> shift;
7460ed28 145
c8118c6c 146 while (nsec >= NSEC_PER_SEC) {
7460ed28 147 tv->tv_sec += 1;
c8118c6c 148 nsec -= NSEC_PER_SEC;
7460ed28 149 }
c8118c6c 150 tv->tv_usec = nsec / NSEC_PER_USEC;
7460ed28 151}
152
2e8ad43e 153int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
1da177e4 154{
1da177e4
LT
155 if (tv)
156 do_vgettimeofday(tv);
157 if (tz)
158 do_get_tz(tz);
159 return 0;
160}
161
162/* This will break when the xtime seconds get inaccurate, but that is
163 * unlikely */
2e8ad43e 164time_t __vsyscall(1) vtime(time_t *t)
1da177e4 165{
d0aff6e6 166 struct timeval tv;
272a3713 167 time_t result;
7460ed28 168 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
1da177e4 169 return time_syscall(t);
d0aff6e6 170
c80544dc 171 vgettimeofday(&tv, NULL);
d0aff6e6 172 result = tv.tv_sec;
272a3713
ED
173 if (t)
174 *t = result;
175 return result;
1da177e4
LT
176}
177
c08c8205
VP
178/* Fast way to get current CPU and node.
179 This helps to do per node and per CPU caches in user space.
180 The result is not guaranteed without CPU affinity, but usually
181 works out because the scheduler tries to keep a thread on the same
182 CPU.
183
184 tcache must point to a two element sized long array.
185 All arguments can be NULL. */
186long __vsyscall(2)
187vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
1da177e4 188{
8f12dea6 189 unsigned int p;
c08c8205
VP
190 unsigned long j = 0;
191
192 /* Fast cache - only recompute value once per jiffies and avoid
193 relatively costly rdtscp/cpuid otherwise.
194 This works because the scheduler usually keeps the process
195 on the same CPU and this syscall doesn't guarantee its
196 results anyways.
197 We do this here because otherwise user space would do it on
198 its own in a likely inferior way (no access to jiffies).
199 If you don't like it pass NULL. */
34596dc9
AK
200 if (tcache && tcache->blob[0] == (j = __jiffies)) {
201 p = tcache->blob[1];
c08c8205
VP
202 } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
203 /* Load per CPU data from RDTSCP */
8f12dea6 204 native_read_tscp(&p);
c08c8205
VP
205 } else {
206 /* Load per CPU data from GDT */
207 asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
208 }
209 if (tcache) {
34596dc9
AK
210 tcache->blob[0] = j;
211 tcache->blob[1] = p;
c08c8205
VP
212 }
213 if (cpu)
214 *cpu = p & 0xfff;
215 if (node)
216 *node = p >> 12;
217 return 0;
1da177e4
LT
218}
219
a4928cff 220static long __vsyscall(3) venosys_1(void)
1da177e4
LT
221{
222 return -ENOSYS;
223}
224
225#ifdef CONFIG_SYSCTL
d67bbacb
TG
226
227static int
228vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
229 void __user *buffer, size_t *lenp, loff_t *ppos)
230{
231 return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
232}
233
1da177e4 234static ctl_table kernel_table2[] = {
282a821f 235 { .procname = "vsyscall64",
7460ed28 236 .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
d67bbacb
TG
237 .mode = 0644,
238 .proc_handler = vsyscall_sysctl_change },
7a44d37d 239 {}
1da177e4
LT
240};
241
242static ctl_table kernel_root_table2[] = {
243 { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
244 .child = kernel_table2 },
7a44d37d 245 {}
1da177e4 246};
1da177e4
LT
247#endif
248
8c131af1
AK
249/* Assume __initcall executes before all user space. Hopefully kmod
250 doesn't violate that. We'll find out if it does. */
251static void __cpuinit vsyscall_set_cpu(int cpu)
c08c8205 252{
fc8b8a60 253 unsigned long d;
c08c8205
VP
254 unsigned long node = 0;
255#ifdef CONFIG_NUMA
98c9e27a 256 node = cpu_to_node(cpu);
c08c8205 257#endif
92cb7612 258 if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
8c131af1 259 write_rdtscp_aux((node << 12) | cpu);
c08c8205
VP
260
261 /* Store cpu number in limit so that it can be loaded quickly
262 in user space in vgetcpu.
263 12 bits for the CPU and 8 bits for the node. */
fc8b8a60
JF
264 d = 0x0f40000000000ULL;
265 d |= cpu;
266 d |= (node & 0xf) << 12;
267 d |= (node >> 4) << 48;
268 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
c08c8205
VP
269}
270
8c131af1
AK
271static void __cpuinit cpu_vsyscall_init(void *arg)
272{
273 /* preemption should be already off */
274 vsyscall_set_cpu(raw_smp_processor_id());
275}
276
277static int __cpuinit
278cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
279{
280 long cpu = (long)arg;
8bb78442 281 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
8691e5a8 282 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
8c131af1
AK
283 return NOTIFY_DONE;
284}
285
e4026440 286void __init map_vsyscall(void)
1da177e4
LT
287{
288 extern char __vsyscall_0;
289 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
290
103efcd9 291 /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
1da177e4
LT
292 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
293}
294
295static int __init vsyscall_init(void)
296{
297 BUG_ON(((unsigned long) &vgettimeofday !=
298 VSYSCALL_ADDR(__NR_vgettimeofday)));
299 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
300 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
c08c8205 301 BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
f3c5f5e7 302#ifdef CONFIG_SYSCTL
0b4d4147 303 register_sysctl_table(kernel_root_table2);
f3c5f5e7 304#endif
15c8b6c1 305 on_each_cpu(cpu_vsyscall_init, NULL, 1);
8c131af1 306 hotcpu_notifier(cpu_vsyscall_notifier, 0);
1da177e4
LT
307 return 0;
308}
309
310__initcall(vsyscall_init);
This page took 0.41621 seconds and 5 git commands to generate.