mm: use vm_unmapped_area() in hugetlbfs on sparc64 architecture
[deliverable/linux.git] / arch / sparc / kernel / sys_sparc_64.c
1 /* linux/arch/sparc64/kernel/sys_sparc.c
2 *
3 * This file contains various random system calls that
4 * have a non-standard calling sequence on the Linux/sparc
5 * platform.
6 */
7
8 #include <linux/errno.h>
9 #include <linux/types.h>
10 #include <linux/sched.h>
11 #include <linux/fs.h>
12 #include <linux/file.h>
13 #include <linux/mm.h>
14 #include <linux/sem.h>
15 #include <linux/msg.h>
16 #include <linux/shm.h>
17 #include <linux/stat.h>
18 #include <linux/mman.h>
19 #include <linux/utsname.h>
20 #include <linux/smp.h>
21 #include <linux/slab.h>
22 #include <linux/syscalls.h>
23 #include <linux/ipc.h>
24 #include <linux/personality.h>
25 #include <linux/random.h>
26 #include <linux/export.h>
27
28 #include <asm/uaccess.h>
29 #include <asm/utrap.h>
30 #include <asm/unistd.h>
31
32 #include "entry.h"
33 #include "systbls.h"
34
35 /* #define DEBUG_UNIMP_SYSCALL */
36
37 asmlinkage unsigned long sys_getpagesize(void)
38 {
39 return PAGE_SIZE;
40 }
41
42 #define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL))
43 #define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL))
44
45 /* Does addr --> addr+len fall within 4GB of the VA-space hole or
46 * overflow past the end of the 64-bit address space?
47 */
48 static inline int invalid_64bit_range(unsigned long addr, unsigned long len)
49 {
50 unsigned long va_exclude_start, va_exclude_end;
51
52 va_exclude_start = VA_EXCLUDE_START;
53 va_exclude_end = VA_EXCLUDE_END;
54
55 if (unlikely(len >= va_exclude_start))
56 return 1;
57
58 if (unlikely((addr + len) < addr))
59 return 1;
60
61 if (unlikely((addr >= va_exclude_start && addr < va_exclude_end) ||
62 ((addr + len) >= va_exclude_start &&
63 (addr + len) < va_exclude_end)))
64 return 1;
65
66 return 0;
67 }
68
69 /* These functions differ from the default implementations in
70 * mm/mmap.c in two ways:
71 *
72 * 1) For file backed MAP_SHARED mmap()'s we D-cache color align,
73 * for fixed such mappings we just validate what the user gave us.
74 * 2) For 64-bit tasks we avoid mapping anything within 4GB of
75 * the spitfire/niagara VA-hole.
76 */
77
78 static inline unsigned long COLOUR_ALIGN(unsigned long addr,
79 unsigned long pgoff)
80 {
81 unsigned long base = (addr+SHMLBA-1)&~(SHMLBA-1);
82 unsigned long off = (pgoff<<PAGE_SHIFT) & (SHMLBA-1);
83
84 return base + off;
85 }
86
87 unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
88 {
89 struct mm_struct *mm = current->mm;
90 struct vm_area_struct * vma;
91 unsigned long task_size = TASK_SIZE;
92 int do_color_align;
93 struct vm_unmapped_area_info info;
94
95 if (flags & MAP_FIXED) {
96 /* We do not accept a shared mapping if it would violate
97 * cache aliasing constraints.
98 */
99 if ((flags & MAP_SHARED) &&
100 ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)))
101 return -EINVAL;
102 return addr;
103 }
104
105 if (test_thread_flag(TIF_32BIT))
106 task_size = STACK_TOP32;
107 if (unlikely(len > task_size || len >= VA_EXCLUDE_START))
108 return -ENOMEM;
109
110 do_color_align = 0;
111 if (filp || (flags & MAP_SHARED))
112 do_color_align = 1;
113
114 if (addr) {
115 if (do_color_align)
116 addr = COLOUR_ALIGN(addr, pgoff);
117 else
118 addr = PAGE_ALIGN(addr);
119
120 vma = find_vma(mm, addr);
121 if (task_size - len >= addr &&
122 (!vma || addr + len <= vma->vm_start))
123 return addr;
124 }
125
126 info.flags = 0;
127 info.length = len;
128 info.low_limit = TASK_UNMAPPED_BASE;
129 info.high_limit = min(task_size, VA_EXCLUDE_START);
130 info.align_mask = do_color_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
131 info.align_offset = pgoff << PAGE_SHIFT;
132 addr = vm_unmapped_area(&info);
133
134 if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
135 VM_BUG_ON(addr != -ENOMEM);
136 info.low_limit = VA_EXCLUDE_END;
137 info.high_limit = task_size;
138 addr = vm_unmapped_area(&info);
139 }
140
141 return addr;
142 }
143
144 unsigned long
145 arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
146 const unsigned long len, const unsigned long pgoff,
147 const unsigned long flags)
148 {
149 struct vm_area_struct *vma;
150 struct mm_struct *mm = current->mm;
151 unsigned long task_size = STACK_TOP32;
152 unsigned long addr = addr0;
153 int do_color_align;
154 struct vm_unmapped_area_info info;
155
156 /* This should only ever run for 32-bit processes. */
157 BUG_ON(!test_thread_flag(TIF_32BIT));
158
159 if (flags & MAP_FIXED) {
160 /* We do not accept a shared mapping if it would violate
161 * cache aliasing constraints.
162 */
163 if ((flags & MAP_SHARED) &&
164 ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)))
165 return -EINVAL;
166 return addr;
167 }
168
169 if (unlikely(len > task_size))
170 return -ENOMEM;
171
172 do_color_align = 0;
173 if (filp || (flags & MAP_SHARED))
174 do_color_align = 1;
175
176 /* requesting a specific address */
177 if (addr) {
178 if (do_color_align)
179 addr = COLOUR_ALIGN(addr, pgoff);
180 else
181 addr = PAGE_ALIGN(addr);
182
183 vma = find_vma(mm, addr);
184 if (task_size - len >= addr &&
185 (!vma || addr + len <= vma->vm_start))
186 return addr;
187 }
188
189 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
190 info.length = len;
191 info.low_limit = PAGE_SIZE;
192 info.high_limit = mm->mmap_base;
193 info.align_mask = do_color_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
194 info.align_offset = pgoff << PAGE_SHIFT;
195 addr = vm_unmapped_area(&info);
196
197 /*
198 * A failed mmap() very likely causes application failure,
199 * so fall back to the bottom-up function here. This scenario
200 * can happen with large stack limits and large mmap()
201 * allocations.
202 */
203 if (addr & ~PAGE_MASK) {
204 VM_BUG_ON(addr != -ENOMEM);
205 info.flags = 0;
206 info.low_limit = TASK_UNMAPPED_BASE;
207 info.high_limit = STACK_TOP32;
208 addr = vm_unmapped_area(&info);
209 }
210
211 return addr;
212 }
213
214 /* Try to align mapping such that we align it as much as possible. */
215 unsigned long get_fb_unmapped_area(struct file *filp, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags)
216 {
217 unsigned long align_goal, addr = -ENOMEM;
218 unsigned long (*get_area)(struct file *, unsigned long,
219 unsigned long, unsigned long, unsigned long);
220
221 get_area = current->mm->get_unmapped_area;
222
223 if (flags & MAP_FIXED) {
224 /* Ok, don't mess with it. */
225 return get_area(NULL, orig_addr, len, pgoff, flags);
226 }
227 flags &= ~MAP_SHARED;
228
229 align_goal = PAGE_SIZE;
230 if (len >= (4UL * 1024 * 1024))
231 align_goal = (4UL * 1024 * 1024);
232 else if (len >= (512UL * 1024))
233 align_goal = (512UL * 1024);
234 else if (len >= (64UL * 1024))
235 align_goal = (64UL * 1024);
236
237 do {
238 addr = get_area(NULL, orig_addr, len + (align_goal - PAGE_SIZE), pgoff, flags);
239 if (!(addr & ~PAGE_MASK)) {
240 addr = (addr + (align_goal - 1UL)) & ~(align_goal - 1UL);
241 break;
242 }
243
244 if (align_goal == (4UL * 1024 * 1024))
245 align_goal = (512UL * 1024);
246 else if (align_goal == (512UL * 1024))
247 align_goal = (64UL * 1024);
248 else
249 align_goal = PAGE_SIZE;
250 } while ((addr & ~PAGE_MASK) && align_goal > PAGE_SIZE);
251
252 /* Mapping is smaller than 64K or larger areas could not
253 * be obtained.
254 */
255 if (addr & ~PAGE_MASK)
256 addr = get_area(NULL, orig_addr, len, pgoff, flags);
257
258 return addr;
259 }
260 EXPORT_SYMBOL(get_fb_unmapped_area);
261
262 /* Essentially the same as PowerPC. */
263 static unsigned long mmap_rnd(void)
264 {
265 unsigned long rnd = 0UL;
266
267 if (current->flags & PF_RANDOMIZE) {
268 unsigned long val = get_random_int();
269 if (test_thread_flag(TIF_32BIT))
270 rnd = (val % (1UL << (23UL-PAGE_SHIFT)));
271 else
272 rnd = (val % (1UL << (30UL-PAGE_SHIFT)));
273 }
274 return rnd << PAGE_SHIFT;
275 }
276
277 void arch_pick_mmap_layout(struct mm_struct *mm)
278 {
279 unsigned long random_factor = mmap_rnd();
280 unsigned long gap;
281
282 /*
283 * Fall back to the standard layout if the personality
284 * bit is set, or if the expected stack growth is unlimited:
285 */
286 gap = rlimit(RLIMIT_STACK);
287 if (!test_thread_flag(TIF_32BIT) ||
288 (current->personality & ADDR_COMPAT_LAYOUT) ||
289 gap == RLIM_INFINITY ||
290 sysctl_legacy_va_layout) {
291 mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
292 mm->get_unmapped_area = arch_get_unmapped_area;
293 mm->unmap_area = arch_unmap_area;
294 } else {
295 /* We know it's 32-bit */
296 unsigned long task_size = STACK_TOP32;
297
298 if (gap < 128 * 1024 * 1024)
299 gap = 128 * 1024 * 1024;
300 if (gap > (task_size / 6 * 5))
301 gap = (task_size / 6 * 5);
302
303 mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor);
304 mm->get_unmapped_area = arch_get_unmapped_area_topdown;
305 mm->unmap_area = arch_unmap_area_topdown;
306 }
307 }
308
309 /*
310 * sys_pipe() is the normal C calling standard for creating
311 * a pipe. It's not the way unix traditionally does this, though.
312 */
313 SYSCALL_DEFINE1(sparc_pipe_real, struct pt_regs *, regs)
314 {
315 int fd[2];
316 int error;
317
318 error = do_pipe_flags(fd, 0);
319 if (error)
320 goto out;
321 regs->u_regs[UREG_I1] = fd[1];
322 error = fd[0];
323 out:
324 return error;
325 }
326
327 /*
328 * sys_ipc() is the de-multiplexer for the SysV IPC calls..
329 *
330 * This is really horribly ugly.
331 */
332
333 SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second,
334 unsigned long, third, void __user *, ptr, long, fifth)
335 {
336 long err;
337
338 /* No need for backward compatibility. We can start fresh... */
339 if (call <= SEMCTL) {
340 switch (call) {
341 case SEMOP:
342 err = sys_semtimedop(first, ptr,
343 (unsigned)second, NULL);
344 goto out;
345 case SEMTIMEDOP:
346 err = sys_semtimedop(first, ptr, (unsigned)second,
347 (const struct timespec __user *)
348 (unsigned long) fifth);
349 goto out;
350 case SEMGET:
351 err = sys_semget(first, (int)second, (int)third);
352 goto out;
353 case SEMCTL: {
354 err = sys_semctl(first, second,
355 (int)third | IPC_64,
356 (union semun) ptr);
357 goto out;
358 }
359 default:
360 err = -ENOSYS;
361 goto out;
362 }
363 }
364 if (call <= MSGCTL) {
365 switch (call) {
366 case MSGSND:
367 err = sys_msgsnd(first, ptr, (size_t)second,
368 (int)third);
369 goto out;
370 case MSGRCV:
371 err = sys_msgrcv(first, ptr, (size_t)second, fifth,
372 (int)third);
373 goto out;
374 case MSGGET:
375 err = sys_msgget((key_t)first, (int)second);
376 goto out;
377 case MSGCTL:
378 err = sys_msgctl(first, (int)second | IPC_64, ptr);
379 goto out;
380 default:
381 err = -ENOSYS;
382 goto out;
383 }
384 }
385 if (call <= SHMCTL) {
386 switch (call) {
387 case SHMAT: {
388 ulong raddr;
389 err = do_shmat(first, ptr, (int)second, &raddr, SHMLBA);
390 if (!err) {
391 if (put_user(raddr,
392 (ulong __user *) third))
393 err = -EFAULT;
394 }
395 goto out;
396 }
397 case SHMDT:
398 err = sys_shmdt(ptr);
399 goto out;
400 case SHMGET:
401 err = sys_shmget(first, (size_t)second, (int)third);
402 goto out;
403 case SHMCTL:
404 err = sys_shmctl(first, (int)second | IPC_64, ptr);
405 goto out;
406 default:
407 err = -ENOSYS;
408 goto out;
409 }
410 } else {
411 err = -ENOSYS;
412 }
413 out:
414 return err;
415 }
416
417 SYSCALL_DEFINE1(sparc64_personality, unsigned long, personality)
418 {
419 int ret;
420
421 if (personality(current->personality) == PER_LINUX32 &&
422 personality(personality) == PER_LINUX)
423 personality |= PER_LINUX32;
424 ret = sys_personality(personality);
425 if (personality(ret) == PER_LINUX32)
426 ret &= ~PER_LINUX32;
427
428 return ret;
429 }
430
431 int sparc_mmap_check(unsigned long addr, unsigned long len)
432 {
433 if (test_thread_flag(TIF_32BIT)) {
434 if (len >= STACK_TOP32)
435 return -EINVAL;
436
437 if (addr > STACK_TOP32 - len)
438 return -EINVAL;
439 } else {
440 if (len >= VA_EXCLUDE_START)
441 return -EINVAL;
442
443 if (invalid_64bit_range(addr, len))
444 return -EINVAL;
445 }
446
447 return 0;
448 }
449
450 /* Linux version of mmap */
451 SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
452 unsigned long, prot, unsigned long, flags, unsigned long, fd,
453 unsigned long, off)
454 {
455 unsigned long retval = -EINVAL;
456
457 if ((off + PAGE_ALIGN(len)) < off)
458 goto out;
459 if (off & ~PAGE_MASK)
460 goto out;
461 retval = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
462 out:
463 return retval;
464 }
465
466 SYSCALL_DEFINE2(64_munmap, unsigned long, addr, size_t, len)
467 {
468 if (invalid_64bit_range(addr, len))
469 return -EINVAL;
470
471 return vm_munmap(addr, len);
472 }
473
474 extern unsigned long do_mremap(unsigned long addr,
475 unsigned long old_len, unsigned long new_len,
476 unsigned long flags, unsigned long new_addr);
477
478 SYSCALL_DEFINE5(64_mremap, unsigned long, addr, unsigned long, old_len,
479 unsigned long, new_len, unsigned long, flags,
480 unsigned long, new_addr)
481 {
482 if (test_thread_flag(TIF_32BIT))
483 return -EINVAL;
484 return sys_mremap(addr, old_len, new_len, flags, new_addr);
485 }
486
487 /* we come to here via sys_nis_syscall so it can setup the regs argument */
488 asmlinkage unsigned long c_sys_nis_syscall(struct pt_regs *regs)
489 {
490 static int count;
491
492 /* Don't make the system unusable, if someone goes stuck */
493 if (count++ > 5)
494 return -ENOSYS;
495
496 printk ("Unimplemented SPARC system call %ld\n",regs->u_regs[1]);
497 #ifdef DEBUG_UNIMP_SYSCALL
498 show_regs (regs);
499 #endif
500
501 return -ENOSYS;
502 }
503
504 /* #define DEBUG_SPARC_BREAKPOINT */
505
506 asmlinkage void sparc_breakpoint(struct pt_regs *regs)
507 {
508 siginfo_t info;
509
510 if (test_thread_flag(TIF_32BIT)) {
511 regs->tpc &= 0xffffffff;
512 regs->tnpc &= 0xffffffff;
513 }
514 #ifdef DEBUG_SPARC_BREAKPOINT
515 printk ("TRAP: Entering kernel PC=%lx, nPC=%lx\n", regs->tpc, regs->tnpc);
516 #endif
517 info.si_signo = SIGTRAP;
518 info.si_errno = 0;
519 info.si_code = TRAP_BRKPT;
520 info.si_addr = (void __user *)regs->tpc;
521 info.si_trapno = 0;
522 force_sig_info(SIGTRAP, &info, current);
523 #ifdef DEBUG_SPARC_BREAKPOINT
524 printk ("TRAP: Returning to space: PC=%lx nPC=%lx\n", regs->tpc, regs->tnpc);
525 #endif
526 }
527
528 extern void check_pending(int signum);
529
530 SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len)
531 {
532 int nlen, err;
533
534 if (len < 0)
535 return -EINVAL;
536
537 down_read(&uts_sem);
538
539 nlen = strlen(utsname()->domainname) + 1;
540 err = -EINVAL;
541 if (nlen > len)
542 goto out;
543
544 err = -EFAULT;
545 if (!copy_to_user(name, utsname()->domainname, nlen))
546 err = 0;
547
548 out:
549 up_read(&uts_sem);
550 return err;
551 }
552
553 SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type,
554 utrap_handler_t, new_p, utrap_handler_t, new_d,
555 utrap_handler_t __user *, old_p,
556 utrap_handler_t __user *, old_d)
557 {
558 if (type < UT_INSTRUCTION_EXCEPTION || type > UT_TRAP_INSTRUCTION_31)
559 return -EINVAL;
560 if (new_p == (utrap_handler_t)(long)UTH_NOCHANGE) {
561 if (old_p) {
562 if (!current_thread_info()->utraps) {
563 if (put_user(NULL, old_p))
564 return -EFAULT;
565 } else {
566 if (put_user((utrap_handler_t)(current_thread_info()->utraps[type]), old_p))
567 return -EFAULT;
568 }
569 }
570 if (old_d) {
571 if (put_user(NULL, old_d))
572 return -EFAULT;
573 }
574 return 0;
575 }
576 if (!current_thread_info()->utraps) {
577 current_thread_info()->utraps =
578 kzalloc((UT_TRAP_INSTRUCTION_31+1)*sizeof(long), GFP_KERNEL);
579 if (!current_thread_info()->utraps)
580 return -ENOMEM;
581 current_thread_info()->utraps[0] = 1;
582 } else {
583 if ((utrap_handler_t)current_thread_info()->utraps[type] != new_p &&
584 current_thread_info()->utraps[0] > 1) {
585 unsigned long *p = current_thread_info()->utraps;
586
587 current_thread_info()->utraps =
588 kmalloc((UT_TRAP_INSTRUCTION_31+1)*sizeof(long),
589 GFP_KERNEL);
590 if (!current_thread_info()->utraps) {
591 current_thread_info()->utraps = p;
592 return -ENOMEM;
593 }
594 p[0]--;
595 current_thread_info()->utraps[0] = 1;
596 memcpy(current_thread_info()->utraps+1, p+1,
597 UT_TRAP_INSTRUCTION_31*sizeof(long));
598 }
599 }
600 if (old_p) {
601 if (put_user((utrap_handler_t)(current_thread_info()->utraps[type]), old_p))
602 return -EFAULT;
603 }
604 if (old_d) {
605 if (put_user(NULL, old_d))
606 return -EFAULT;
607 }
608 current_thread_info()->utraps[type] = (long)new_p;
609
610 return 0;
611 }
612
613 asmlinkage long sparc_memory_ordering(unsigned long model,
614 struct pt_regs *regs)
615 {
616 if (model >= 3)
617 return -EINVAL;
618 regs->tstate = (regs->tstate & ~TSTATE_MM) | (model << 14);
619 return 0;
620 }
621
622 SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act,
623 struct sigaction __user *, oact, void __user *, restorer,
624 size_t, sigsetsize)
625 {
626 struct k_sigaction new_ka, old_ka;
627 int ret;
628
629 /* XXX: Don't preclude handling different sized sigset_t's. */
630 if (sigsetsize != sizeof(sigset_t))
631 return -EINVAL;
632
633 if (act) {
634 new_ka.ka_restorer = restorer;
635 if (copy_from_user(&new_ka.sa, act, sizeof(*act)))
636 return -EFAULT;
637 }
638
639 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
640
641 if (!ret && oact) {
642 if (copy_to_user(oact, &old_ka.sa, sizeof(*oact)))
643 return -EFAULT;
644 }
645
646 return ret;
647 }
648
649 /*
650 * Do a system call from kernel instead of calling sys_execve so we
651 * end up with proper pt_regs.
652 */
653 int kernel_execve(const char *filename,
654 const char *const argv[],
655 const char *const envp[])
656 {
657 long __res;
658 register long __g1 __asm__ ("g1") = __NR_execve;
659 register long __o0 __asm__ ("o0") = (long)(filename);
660 register long __o1 __asm__ ("o1") = (long)(argv);
661 register long __o2 __asm__ ("o2") = (long)(envp);
662 asm volatile ("t 0x6d\n\t"
663 "sub %%g0, %%o0, %0\n\t"
664 "movcc %%xcc, %%o0, %0\n\t"
665 : "=r" (__res), "=&r" (__o0)
666 : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1)
667 : "cc");
668 return __res;
669 }
670
671 asmlinkage long sys_kern_features(void)
672 {
673 return KERN_FEATURE_MIXED_MODE_STACK;
674 }
This page took 0.046263 seconds and 6 git commands to generate.