net: Add .gitignore to networking selftests directory.
[deliverable/linux.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1fd7317d 88#include <linux/magic.h>
5a0e3ad6 89#include <linux/slab.h>
600e1779 90#include <linux/xattr.h>
1da177e4
LT
91
92#include <asm/uaccess.h>
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
f8451725 97#include <net/cls_cgroup.h>
1da177e4
LT
98
99#include <net/sock.h>
100#include <linux/netfilter.h>
101
6b96018b
AB
102#include <linux/if_tun.h>
103#include <linux/ipv6_route.h>
104#include <linux/route.h>
6b96018b
AB
105#include <linux/sockios.h>
106#include <linux/atalk.h>
107
1da177e4 108static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
109static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
110 unsigned long nr_segs, loff_t pos);
111static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
112 unsigned long nr_segs, loff_t pos);
89bddce5 113static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
114
115static int sock_close(struct inode *inode, struct file *file);
116static unsigned int sock_poll(struct file *file,
117 struct poll_table_struct *wait);
89bddce5 118static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
119#ifdef CONFIG_COMPAT
120static long compat_sock_ioctl(struct file *file,
89bddce5 121 unsigned int cmd, unsigned long arg);
89bbfc95 122#endif
1da177e4 123static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
124static ssize_t sock_sendpage(struct file *file, struct page *page,
125 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 127 struct pipe_inode_info *pipe, size_t len,
9c55e01c 128 unsigned int flags);
1da177e4 129
1da177e4
LT
130/*
131 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
132 * in the operation structures but are done directly via the socketcall() multiplexor.
133 */
134
da7071d7 135static const struct file_operations socket_file_ops = {
1da177e4
LT
136 .owner = THIS_MODULE,
137 .llseek = no_llseek,
138 .aio_read = sock_aio_read,
139 .aio_write = sock_aio_write,
140 .poll = sock_poll,
141 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
142#ifdef CONFIG_COMPAT
143 .compat_ioctl = compat_sock_ioctl,
144#endif
1da177e4
LT
145 .mmap = sock_mmap,
146 .open = sock_no_open, /* special open code to disallow open via /proc */
147 .release = sock_close,
148 .fasync = sock_fasync,
5274f052
JA
149 .sendpage = sock_sendpage,
150 .splice_write = generic_splice_sendpage,
9c55e01c 151 .splice_read = sock_splice_read,
1da177e4
LT
152};
153
154/*
155 * The protocol list. Each protocol is registered in here.
156 */
157
1da177e4 158static DEFINE_SPINLOCK(net_family_lock);
190683a9 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 160
1da177e4
LT
161/*
162 * Statistics counters of the socket lists
163 */
164
c6d409cf 165static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
166
167/*
89bddce5
SH
168 * Support routines.
169 * Move socket addresses back and forth across the kernel/user
170 * divide and look after the messy bits.
1da177e4
LT
171 */
172
1da177e4
LT
173/**
174 * move_addr_to_kernel - copy a socket address into kernel space
175 * @uaddr: Address in user space
176 * @kaddr: Address in kernel space
177 * @ulen: Length in user space
178 *
179 * The address is copied into kernel space. If the provided address is
180 * too long an error code of -EINVAL is returned. If the copy gives
181 * invalid addresses -EFAULT is returned. On a success 0 is returned.
182 */
183
43db362d 184int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 185{
230b1839 186 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 187 return -EINVAL;
89bddce5 188 if (ulen == 0)
1da177e4 189 return 0;
89bddce5 190 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 191 return -EFAULT;
3ec3b2fb 192 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
193}
194
195/**
196 * move_addr_to_user - copy an address to user space
197 * @kaddr: kernel space address
198 * @klen: length of address in kernel
199 * @uaddr: user space address
200 * @ulen: pointer to user length field
201 *
202 * The value pointed to by ulen on entry is the buffer length available.
203 * This is overwritten with the buffer space used. -EINVAL is returned
204 * if an overlong buffer is specified or a negative buffer size. -EFAULT
205 * is returned if either the buffer or the length field are not
206 * accessible.
207 * After copying the data up to the limit the user specifies, the true
208 * length of the data is written over the length limit the user
209 * specified. Zero is returned for a success.
210 */
89bddce5 211
43db362d 212static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 213 void __user *uaddr, int __user *ulen)
1da177e4
LT
214{
215 int err;
216 int len;
217
89bddce5
SH
218 err = get_user(len, ulen);
219 if (err)
1da177e4 220 return err;
89bddce5
SH
221 if (len > klen)
222 len = klen;
230b1839 223 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 224 return -EINVAL;
89bddce5 225 if (len) {
d6fe3945
SG
226 if (audit_sockaddr(klen, kaddr))
227 return -ENOMEM;
89bddce5 228 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
229 return -EFAULT;
230 }
231 /*
89bddce5
SH
232 * "fromlen shall refer to the value before truncation.."
233 * 1003.1g
1da177e4
LT
234 */
235 return __put_user(klen, ulen);
236}
237
e18b890b 238static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
239
240static struct inode *sock_alloc_inode(struct super_block *sb)
241{
242 struct socket_alloc *ei;
eaefd110 243 struct socket_wq *wq;
89bddce5 244
e94b1766 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
246 if (!ei)
247 return NULL;
eaefd110
ED
248 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
249 if (!wq) {
43815482
ED
250 kmem_cache_free(sock_inode_cachep, ei);
251 return NULL;
252 }
eaefd110
ED
253 init_waitqueue_head(&wq->wait);
254 wq->fasync_list = NULL;
255 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482 268 struct socket_alloc *ei;
eaefd110 269 struct socket_wq *wq;
43815482
ED
270
271 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 272 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 273 kfree_rcu(wq, rcu);
43815482 274 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
275}
276
51cc5068 277static void init_once(void *foo)
1da177e4 278{
89bddce5 279 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 280
a35afb83 281 inode_init_once(&ei->vfs_inode);
1da177e4 282}
89bddce5 283
1da177e4
LT
284static int init_inodecache(void)
285{
286 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
287 sizeof(struct socket_alloc),
288 0,
289 (SLAB_HWCACHE_ALIGN |
290 SLAB_RECLAIM_ACCOUNT |
291 SLAB_MEM_SPREAD),
20c2df83 292 init_once);
1da177e4
LT
293 if (sock_inode_cachep == NULL)
294 return -ENOMEM;
295 return 0;
296}
297
b87221de 298static const struct super_operations sockfs_ops = {
c6d409cf
ED
299 .alloc_inode = sock_alloc_inode,
300 .destroy_inode = sock_destroy_inode,
301 .statfs = simple_statfs,
1da177e4
LT
302};
303
c23fbb6b
ED
304/*
305 * sockfs_dname() is called from d_path().
306 */
307static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
308{
309 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
310 dentry->d_inode->i_ino);
311}
312
3ba13d17 313static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 314 .d_dname = sockfs_dname,
1da177e4
LT
315};
316
c74a1cbb
AV
317static struct dentry *sockfs_mount(struct file_system_type *fs_type,
318 int flags, const char *dev_name, void *data)
319{
320 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
321 &sockfs_dentry_operations, SOCKFS_MAGIC);
322}
323
324static struct vfsmount *sock_mnt __read_mostly;
325
326static struct file_system_type sock_fs_type = {
327 .name = "sockfs",
328 .mount = sockfs_mount,
329 .kill_sb = kill_anon_super,
330};
331
1da177e4
LT
332/*
333 * Obtains the first available file descriptor and sets it up for use.
334 *
39d8c1b6
DM
335 * These functions create file structures and maps them to fd space
336 * of the current process. On success it returns file descriptor
1da177e4
LT
337 * and file struct implicitly stored in sock->file.
338 * Note that another thread may close file descriptor before we return
339 * from this function. We use the fact that now we do not refer
340 * to socket after mapping. If one day we will need it, this
341 * function will increment ref. count on file by 1.
342 *
343 * In any case returned fd MAY BE not valid!
344 * This race condition is unavoidable
345 * with shared fd spaces, we cannot solve it inside kernel,
346 * but we take care of internal coherence yet.
347 */
348
aab174f0 349struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 350{
7cbe66b6 351 struct qstr name = { .name = "" };
2c48b9c4 352 struct path path;
7cbe66b6 353 struct file *file;
1da177e4 354
600e1779
MY
355 if (dname) {
356 name.name = dname;
357 name.len = strlen(name.name);
358 } else if (sock->sk) {
359 name.name = sock->sk->sk_prot_creator->name;
360 name.len = strlen(name.name);
361 }
4b936885 362 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
363 if (unlikely(!path.dentry))
364 return ERR_PTR(-ENOMEM);
2c48b9c4 365 path.mnt = mntget(sock_mnt);
39d8c1b6 366
2c48b9c4 367 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 368 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 369
2c48b9c4 370 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 371 &socket_file_ops);
39b65252 372 if (unlikely(IS_ERR(file))) {
cc3808f8 373 /* drop dentry, keep inode */
7de9c6ee 374 ihold(path.dentry->d_inode);
2c48b9c4 375 path_put(&path);
39b65252 376 return file;
cc3808f8
AV
377 }
378
379 sock->file = file;
77d27200 380 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 381 file->private_data = sock;
28407630 382 return file;
39d8c1b6 383}
56b31d1c 384EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 385
56b31d1c 386static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
387{
388 struct file *newfile;
28407630
AV
389 int fd = get_unused_fd_flags(flags);
390 if (unlikely(fd < 0))
391 return fd;
39d8c1b6 392
aab174f0 393 newfile = sock_alloc_file(sock, flags, NULL);
28407630 394 if (likely(!IS_ERR(newfile))) {
39d8c1b6 395 fd_install(fd, newfile);
28407630
AV
396 return fd;
397 }
7cbe66b6 398
28407630
AV
399 put_unused_fd(fd);
400 return PTR_ERR(newfile);
1da177e4
LT
401}
402
406a3c63 403struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 404{
6cb153ca
BL
405 if (file->f_op == &socket_file_ops)
406 return file->private_data; /* set in sock_map_fd */
407
23bb80d2
ED
408 *err = -ENOTSOCK;
409 return NULL;
6cb153ca 410}
406a3c63 411EXPORT_SYMBOL(sock_from_file);
6cb153ca 412
1da177e4 413/**
c6d409cf 414 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
415 * @fd: file handle
416 * @err: pointer to an error code return
417 *
418 * The file handle passed in is locked and the socket it is bound
419 * too is returned. If an error occurs the err pointer is overwritten
420 * with a negative errno code and NULL is returned. The function checks
421 * for both invalid handles and passing a handle which is not a socket.
422 *
423 * On a success the socket object pointer is returned.
424 */
425
426struct socket *sockfd_lookup(int fd, int *err)
427{
428 struct file *file;
1da177e4
LT
429 struct socket *sock;
430
89bddce5
SH
431 file = fget(fd);
432 if (!file) {
1da177e4
LT
433 *err = -EBADF;
434 return NULL;
435 }
89bddce5 436
6cb153ca
BL
437 sock = sock_from_file(file, err);
438 if (!sock)
1da177e4 439 fput(file);
6cb153ca
BL
440 return sock;
441}
c6d409cf 442EXPORT_SYMBOL(sockfd_lookup);
1da177e4 443
6cb153ca
BL
444static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
445{
446 struct file *file;
447 struct socket *sock;
448
3672558c 449 *err = -EBADF;
6cb153ca
BL
450 file = fget_light(fd, fput_needed);
451 if (file) {
452 sock = sock_from_file(file, err);
453 if (sock)
454 return sock;
455 fput_light(file, *fput_needed);
1da177e4 456 }
6cb153ca 457 return NULL;
1da177e4
LT
458}
459
600e1779
MY
460#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
461#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
462#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
463static ssize_t sockfs_getxattr(struct dentry *dentry,
464 const char *name, void *value, size_t size)
465{
466 const char *proto_name;
467 size_t proto_size;
468 int error;
469
470 error = -ENODATA;
471 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
472 proto_name = dentry->d_name.name;
473 proto_size = strlen(proto_name);
474
475 if (value) {
476 error = -ERANGE;
477 if (proto_size + 1 > size)
478 goto out;
479
480 strncpy(value, proto_name, proto_size + 1);
481 }
482 error = proto_size + 1;
483 }
484
485out:
486 return error;
487}
488
489static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
490 size_t size)
491{
492 ssize_t len;
493 ssize_t used = 0;
494
495 len = security_inode_listsecurity(dentry->d_inode, buffer, size);
496 if (len < 0)
497 return len;
498 used += len;
499 if (buffer) {
500 if (size < used)
501 return -ERANGE;
502 buffer += len;
503 }
504
505 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
506 used += len;
507 if (buffer) {
508 if (size < used)
509 return -ERANGE;
510 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
511 buffer += len;
512 }
513
514 return used;
515}
516
517static const struct inode_operations sockfs_inode_ops = {
518 .getxattr = sockfs_getxattr,
519 .listxattr = sockfs_listxattr,
520};
521
1da177e4
LT
522/**
523 * sock_alloc - allocate a socket
89bddce5 524 *
1da177e4
LT
525 * Allocate a new inode and socket object. The two are bound together
526 * and initialised. The socket is then returned. If we are out of inodes
527 * NULL is returned.
528 */
529
530static struct socket *sock_alloc(void)
531{
89bddce5
SH
532 struct inode *inode;
533 struct socket *sock;
1da177e4 534
a209dfc7 535 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
536 if (!inode)
537 return NULL;
538
539 sock = SOCKET_I(inode);
540
29a020d3 541 kmemcheck_annotate_bitfield(sock, type);
85fe4025 542 inode->i_ino = get_next_ino();
89bddce5 543 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
544 inode->i_uid = current_fsuid();
545 inode->i_gid = current_fsgid();
600e1779 546 inode->i_op = &sockfs_inode_ops;
1da177e4 547
19e8d69c 548 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
549 return sock;
550}
551
552/*
553 * In theory you can't get an open on this inode, but /proc provides
554 * a back door. Remember to keep it shut otherwise you'll let the
555 * creepy crawlies in.
556 */
89bddce5 557
1da177e4
LT
558static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
559{
560 return -ENXIO;
561}
562
4b6f5d20 563const struct file_operations bad_sock_fops = {
1da177e4
LT
564 .owner = THIS_MODULE,
565 .open = sock_no_open,
6038f373 566 .llseek = noop_llseek,
1da177e4
LT
567};
568
569/**
570 * sock_release - close a socket
571 * @sock: socket to close
572 *
573 * The socket is released from the protocol stack if it has a release
574 * callback, and the inode is then released if the socket is bound to
89bddce5 575 * an inode not a file.
1da177e4 576 */
89bddce5 577
1da177e4
LT
578void sock_release(struct socket *sock)
579{
580 if (sock->ops) {
581 struct module *owner = sock->ops->owner;
582
583 sock->ops->release(sock);
584 sock->ops = NULL;
585 module_put(owner);
586 }
587
eaefd110 588 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
1da177e4
LT
589 printk(KERN_ERR "sock_release: fasync list not empty!\n");
590
b09e786b
MP
591 if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
592 return;
593
19e8d69c 594 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
595 if (!sock->file) {
596 iput(SOCK_INODE(sock));
597 return;
598 }
89bddce5 599 sock->file = NULL;
1da177e4 600}
c6d409cf 601EXPORT_SYMBOL(sock_release);
1da177e4 602
bf84a010 603void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 604{
2244d07b 605 *tx_flags = 0;
20d49473 606 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 607 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 608 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 609 *tx_flags |= SKBTX_SW_TSTAMP;
6e3e939f
JB
610 if (sock_flag(sk, SOCK_WIFI_STATUS))
611 *tx_flags |= SKBTX_WIFI_STATUS;
20d49473
PO
612}
613EXPORT_SYMBOL(sock_tx_timestamp);
614
228e548e
AB
615static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
616 struct msghdr *msg, size_t size)
1da177e4
LT
617{
618 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4
LT
619
620 si->sock = sock;
621 si->scm = NULL;
622 si->msg = msg;
623 si->size = size;
624
1da177e4
LT
625 return sock->ops->sendmsg(iocb, sock, msg, size);
626}
627
228e548e
AB
628static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
629 struct msghdr *msg, size_t size)
630{
631 int err = security_socket_sendmsg(sock, msg, size);
632
633 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
634}
635
1da177e4
LT
636int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
637{
638 struct kiocb iocb;
639 struct sock_iocb siocb;
640 int ret;
641
642 init_sync_kiocb(&iocb, NULL);
643 iocb.private = &siocb;
644 ret = __sock_sendmsg(&iocb, sock, msg, size);
645 if (-EIOCBQUEUED == ret)
646 ret = wait_on_sync_kiocb(&iocb);
647 return ret;
648}
c6d409cf 649EXPORT_SYMBOL(sock_sendmsg);
1da177e4 650
894dc24c 651static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
652{
653 struct kiocb iocb;
654 struct sock_iocb siocb;
655 int ret;
656
657 init_sync_kiocb(&iocb, NULL);
658 iocb.private = &siocb;
659 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
660 if (-EIOCBQUEUED == ret)
661 ret = wait_on_sync_kiocb(&iocb);
662 return ret;
663}
664
1da177e4
LT
665int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
666 struct kvec *vec, size_t num, size_t size)
667{
668 mm_segment_t oldfs = get_fs();
669 int result;
670
671 set_fs(KERNEL_DS);
672 /*
673 * the following is safe, since for compiler definitions of kvec and
674 * iovec are identical, yielding the same in-core layout and alignment
675 */
89bddce5 676 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
677 msg->msg_iovlen = num;
678 result = sock_sendmsg(sock, msg, size);
679 set_fs(oldfs);
680 return result;
681}
c6d409cf 682EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 683
20d49473
PO
684static int ktime2ts(ktime_t kt, struct timespec *ts)
685{
686 if (kt.tv64) {
687 *ts = ktime_to_timespec(kt);
688 return 1;
689 } else {
690 return 0;
691 }
692}
693
92f37fd2
ED
694/*
695 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
696 */
697void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
698 struct sk_buff *skb)
699{
20d49473
PO
700 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
701 struct timespec ts[3];
702 int empty = 1;
703 struct skb_shared_hwtstamps *shhwtstamps =
704 skb_hwtstamps(skb);
705
706 /* Race occurred between timestamp enabling and packet
707 receiving. Fill in the current time for now. */
708 if (need_software_tstamp && skb->tstamp.tv64 == 0)
709 __net_timestamp(skb);
710
711 if (need_software_tstamp) {
712 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
713 struct timeval tv;
714 skb_get_timestamp(skb, &tv);
715 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
716 sizeof(tv), &tv);
717 } else {
842509b8 718 skb_get_timestampns(skb, &ts[0]);
20d49473 719 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 720 sizeof(ts[0]), &ts[0]);
20d49473
PO
721 }
722 }
723
724
725 memset(ts, 0, sizeof(ts));
726 if (skb->tstamp.tv64 &&
727 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
728 skb_get_timestampns(skb, ts + 0);
729 empty = 0;
730 }
731 if (shhwtstamps) {
732 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
733 ktime2ts(shhwtstamps->syststamp, ts + 1))
734 empty = 0;
735 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
736 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
737 empty = 0;
92f37fd2 738 }
20d49473
PO
739 if (!empty)
740 put_cmsg(msg, SOL_SOCKET,
741 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 742}
7c81fd8b
ACM
743EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
744
6e3e939f
JB
745void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
746 struct sk_buff *skb)
747{
748 int ack;
749
750 if (!sock_flag(sk, SOCK_WIFI_STATUS))
751 return;
752 if (!skb->wifi_acked_valid)
753 return;
754
755 ack = skb->wifi_acked;
756
757 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
758}
759EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
760
11165f14 761static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
762 struct sk_buff *skb)
3b885787
NH
763{
764 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
765 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
766 sizeof(__u32), &skb->dropcount);
767}
768
767dd033 769void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
770 struct sk_buff *skb)
771{
772 sock_recv_timestamp(msg, sk, skb);
773 sock_recv_drops(msg, sk, skb);
774}
767dd033 775EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 776
a2e27255
ACM
777static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
778 struct msghdr *msg, size_t size, int flags)
1da177e4 779{
1da177e4
LT
780 struct sock_iocb *si = kiocb_to_siocb(iocb);
781
782 si->sock = sock;
783 si->scm = NULL;
784 si->msg = msg;
785 si->size = size;
786 si->flags = flags;
787
1da177e4
LT
788 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
789}
790
a2e27255
ACM
791static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
792 struct msghdr *msg, size_t size, int flags)
793{
794 int err = security_socket_recvmsg(sock, msg, size, flags);
795
796 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
797}
798
89bddce5 799int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
800 size_t size, int flags)
801{
802 struct kiocb iocb;
803 struct sock_iocb siocb;
804 int ret;
805
89bddce5 806 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
807 iocb.private = &siocb;
808 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
809 if (-EIOCBQUEUED == ret)
810 ret = wait_on_sync_kiocb(&iocb);
811 return ret;
812}
c6d409cf 813EXPORT_SYMBOL(sock_recvmsg);
1da177e4 814
a2e27255
ACM
815static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
816 size_t size, int flags)
817{
818 struct kiocb iocb;
819 struct sock_iocb siocb;
820 int ret;
821
822 init_sync_kiocb(&iocb, NULL);
823 iocb.private = &siocb;
824 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
825 if (-EIOCBQUEUED == ret)
826 ret = wait_on_sync_kiocb(&iocb);
827 return ret;
828}
829
c1249c0a
ML
830/**
831 * kernel_recvmsg - Receive a message from a socket (kernel space)
832 * @sock: The socket to receive the message from
833 * @msg: Received message
834 * @vec: Input s/g array for message data
835 * @num: Size of input s/g array
836 * @size: Number of bytes to read
837 * @flags: Message flags (MSG_DONTWAIT, etc...)
838 *
839 * On return the msg structure contains the scatter/gather array passed in the
840 * vec argument. The array is modified so that it consists of the unfilled
841 * portion of the original array.
842 *
843 * The returned value is the total number of bytes received, or an error.
844 */
89bddce5
SH
845int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
846 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
847{
848 mm_segment_t oldfs = get_fs();
849 int result;
850
851 set_fs(KERNEL_DS);
852 /*
853 * the following is safe, since for compiler definitions of kvec and
854 * iovec are identical, yielding the same in-core layout and alignment
855 */
89bddce5 856 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
857 result = sock_recvmsg(sock, msg, size, flags);
858 set_fs(oldfs);
859 return result;
860}
c6d409cf 861EXPORT_SYMBOL(kernel_recvmsg);
1da177e4
LT
862
863static void sock_aio_dtor(struct kiocb *iocb)
864{
865 kfree(iocb->private);
866}
867
ce1d4d3e
CH
868static ssize_t sock_sendpage(struct file *file, struct page *page,
869 int offset, size_t size, loff_t *ppos, int more)
1da177e4 870{
1da177e4
LT
871 struct socket *sock;
872 int flags;
873
ce1d4d3e
CH
874 sock = file->private_data;
875
35f9c09f
ED
876 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
877 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
878 flags |= more;
ce1d4d3e 879
e6949583 880 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 881}
1da177e4 882
9c55e01c 883static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 884 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
885 unsigned int flags)
886{
887 struct socket *sock = file->private_data;
888
997b37da
RDC
889 if (unlikely(!sock->ops->splice_read))
890 return -EINVAL;
891
9c55e01c
JA
892 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
893}
894
ce1d4d3e 895static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 896 struct sock_iocb *siocb)
ce1d4d3e
CH
897{
898 if (!is_sync_kiocb(iocb)) {
899 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
900 if (!siocb)
901 return NULL;
1da177e4
LT
902 iocb->ki_dtor = sock_aio_dtor;
903 }
1da177e4 904
ce1d4d3e 905 siocb->kiocb = iocb;
ce1d4d3e
CH
906 iocb->private = siocb;
907 return siocb;
1da177e4
LT
908}
909
ce1d4d3e 910static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
911 struct file *file, const struct iovec *iov,
912 unsigned long nr_segs)
ce1d4d3e
CH
913{
914 struct socket *sock = file->private_data;
915 size_t size = 0;
916 int i;
1da177e4 917
89bddce5
SH
918 for (i = 0; i < nr_segs; i++)
919 size += iov[i].iov_len;
1da177e4 920
ce1d4d3e
CH
921 msg->msg_name = NULL;
922 msg->msg_namelen = 0;
923 msg->msg_control = NULL;
924 msg->msg_controllen = 0;
89bddce5 925 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
926 msg->msg_iovlen = nr_segs;
927 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
928
929 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
930}
931
027445c3
BP
932static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
933 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
934{
935 struct sock_iocb siocb, *x;
936
1da177e4
LT
937 if (pos != 0)
938 return -ESPIPE;
027445c3
BP
939
940 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
941 return 0;
942
027445c3
BP
943
944 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
945 if (!x)
946 return -ENOMEM;
027445c3 947 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
948}
949
ce1d4d3e 950static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
951 struct file *file, const struct iovec *iov,
952 unsigned long nr_segs)
1da177e4 953{
ce1d4d3e
CH
954 struct socket *sock = file->private_data;
955 size_t size = 0;
956 int i;
1da177e4 957
89bddce5
SH
958 for (i = 0; i < nr_segs; i++)
959 size += iov[i].iov_len;
1da177e4 960
ce1d4d3e
CH
961 msg->msg_name = NULL;
962 msg->msg_namelen = 0;
963 msg->msg_control = NULL;
964 msg->msg_controllen = 0;
89bddce5 965 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
966 msg->msg_iovlen = nr_segs;
967 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
968 if (sock->type == SOCK_SEQPACKET)
969 msg->msg_flags |= MSG_EOR;
1da177e4 970
ce1d4d3e 971 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
972}
973
027445c3
BP
974static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
975 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
976{
977 struct sock_iocb siocb, *x;
1da177e4 978
ce1d4d3e
CH
979 if (pos != 0)
980 return -ESPIPE;
027445c3 981
027445c3 982 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
983 if (!x)
984 return -ENOMEM;
1da177e4 985
027445c3 986 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
987}
988
1da177e4
LT
989/*
990 * Atomic setting of ioctl hooks to avoid race
991 * with module unload.
992 */
993
4a3e2f71 994static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 995static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 996
881d966b 997void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 998{
4a3e2f71 999 mutex_lock(&br_ioctl_mutex);
1da177e4 1000 br_ioctl_hook = hook;
4a3e2f71 1001 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1002}
1003EXPORT_SYMBOL(brioctl_set);
1004
4a3e2f71 1005static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1006static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1007
881d966b 1008void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1009{
4a3e2f71 1010 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1011 vlan_ioctl_hook = hook;
4a3e2f71 1012 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1013}
1014EXPORT_SYMBOL(vlan_ioctl_set);
1015
4a3e2f71 1016static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1017static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1018
89bddce5 1019void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1020{
4a3e2f71 1021 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1022 dlci_ioctl_hook = hook;
4a3e2f71 1023 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1024}
1025EXPORT_SYMBOL(dlci_ioctl_set);
1026
6b96018b
AB
1027static long sock_do_ioctl(struct net *net, struct socket *sock,
1028 unsigned int cmd, unsigned long arg)
1029{
1030 int err;
1031 void __user *argp = (void __user *)arg;
1032
1033 err = sock->ops->ioctl(sock, cmd, arg);
1034
1035 /*
1036 * If this ioctl is unknown try to hand it down
1037 * to the NIC driver.
1038 */
1039 if (err == -ENOIOCTLCMD)
1040 err = dev_ioctl(net, cmd, argp);
1041
1042 return err;
1043}
1044
1da177e4
LT
1045/*
1046 * With an ioctl, arg may well be a user mode pointer, but we don't know
1047 * what to do with it - that's up to the protocol still.
1048 */
1049
1050static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1051{
1052 struct socket *sock;
881d966b 1053 struct sock *sk;
1da177e4
LT
1054 void __user *argp = (void __user *)arg;
1055 int pid, err;
881d966b 1056 struct net *net;
1da177e4 1057
b69aee04 1058 sock = file->private_data;
881d966b 1059 sk = sock->sk;
3b1e0a65 1060 net = sock_net(sk);
1da177e4 1061 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1062 err = dev_ioctl(net, cmd, argp);
1da177e4 1063 } else
3d23e349 1064#ifdef CONFIG_WEXT_CORE
1da177e4 1065 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1066 err = dev_ioctl(net, cmd, argp);
1da177e4 1067 } else
3d23e349 1068#endif
89bddce5 1069 switch (cmd) {
1da177e4
LT
1070 case FIOSETOWN:
1071 case SIOCSPGRP:
1072 err = -EFAULT;
1073 if (get_user(pid, (int __user *)argp))
1074 break;
1075 err = f_setown(sock->file, pid, 1);
1076 break;
1077 case FIOGETOWN:
1078 case SIOCGPGRP:
609d7fa9 1079 err = put_user(f_getown(sock->file),
89bddce5 1080 (int __user *)argp);
1da177e4
LT
1081 break;
1082 case SIOCGIFBR:
1083 case SIOCSIFBR:
1084 case SIOCBRADDBR:
1085 case SIOCBRDELBR:
1086 err = -ENOPKG;
1087 if (!br_ioctl_hook)
1088 request_module("bridge");
1089
4a3e2f71 1090 mutex_lock(&br_ioctl_mutex);
89bddce5 1091 if (br_ioctl_hook)
881d966b 1092 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1093 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1094 break;
1095 case SIOCGIFVLAN:
1096 case SIOCSIFVLAN:
1097 err = -ENOPKG;
1098 if (!vlan_ioctl_hook)
1099 request_module("8021q");
1100
4a3e2f71 1101 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1102 if (vlan_ioctl_hook)
881d966b 1103 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1104 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1105 break;
1da177e4
LT
1106 case SIOCADDDLCI:
1107 case SIOCDELDLCI:
1108 err = -ENOPKG;
1109 if (!dlci_ioctl_hook)
1110 request_module("dlci");
1111
7512cbf6
PE
1112 mutex_lock(&dlci_ioctl_mutex);
1113 if (dlci_ioctl_hook)
1da177e4 1114 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1115 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1116 break;
1117 default:
6b96018b 1118 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1119 break;
89bddce5 1120 }
1da177e4
LT
1121 return err;
1122}
1123
1124int sock_create_lite(int family, int type, int protocol, struct socket **res)
1125{
1126 int err;
1127 struct socket *sock = NULL;
89bddce5 1128
1da177e4
LT
1129 err = security_socket_create(family, type, protocol, 1);
1130 if (err)
1131 goto out;
1132
1133 sock = sock_alloc();
1134 if (!sock) {
1135 err = -ENOMEM;
1136 goto out;
1137 }
1138
1da177e4 1139 sock->type = type;
7420ed23
VY
1140 err = security_socket_post_create(sock, family, type, protocol, 1);
1141 if (err)
1142 goto out_release;
1143
1da177e4
LT
1144out:
1145 *res = sock;
1146 return err;
7420ed23
VY
1147out_release:
1148 sock_release(sock);
1149 sock = NULL;
1150 goto out;
1da177e4 1151}
c6d409cf 1152EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1153
1154/* No kernel lock held - perfect */
89bddce5 1155static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1156{
1157 struct socket *sock;
1158
1159 /*
89bddce5 1160 * We can't return errors to poll, so it's either yes or no.
1da177e4 1161 */
b69aee04 1162 sock = file->private_data;
1da177e4
LT
1163 return sock->ops->poll(file, sock, wait);
1164}
1165
89bddce5 1166static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1167{
b69aee04 1168 struct socket *sock = file->private_data;
1da177e4
LT
1169
1170 return sock->ops->mmap(file, sock, vma);
1171}
1172
20380731 1173static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1174{
1175 /*
89bddce5
SH
1176 * It was possible the inode is NULL we were
1177 * closing an unfinished socket.
1da177e4
LT
1178 */
1179
89bddce5 1180 if (!inode) {
1da177e4
LT
1181 printk(KERN_DEBUG "sock_close: NULL inode\n");
1182 return 0;
1183 }
1da177e4
LT
1184 sock_release(SOCKET_I(inode));
1185 return 0;
1186}
1187
1188/*
1189 * Update the socket async list
1190 *
1191 * Fasync_list locking strategy.
1192 *
1193 * 1. fasync_list is modified only under process context socket lock
1194 * i.e. under semaphore.
1195 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1196 * or under socket lock
1da177e4
LT
1197 */
1198
1199static int sock_fasync(int fd, struct file *filp, int on)
1200{
989a2979
ED
1201 struct socket *sock = filp->private_data;
1202 struct sock *sk = sock->sk;
eaefd110 1203 struct socket_wq *wq;
1da177e4 1204
989a2979 1205 if (sk == NULL)
1da177e4 1206 return -EINVAL;
1da177e4
LT
1207
1208 lock_sock(sk);
eaefd110
ED
1209 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1210 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1211
eaefd110 1212 if (!wq->fasync_list)
989a2979
ED
1213 sock_reset_flag(sk, SOCK_FASYNC);
1214 else
bcdce719 1215 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1216
989a2979 1217 release_sock(sk);
1da177e4
LT
1218 return 0;
1219}
1220
43815482 1221/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1222
1223int sock_wake_async(struct socket *sock, int how, int band)
1224{
43815482
ED
1225 struct socket_wq *wq;
1226
1227 if (!sock)
1228 return -1;
1229 rcu_read_lock();
1230 wq = rcu_dereference(sock->wq);
1231 if (!wq || !wq->fasync_list) {
1232 rcu_read_unlock();
1da177e4 1233 return -1;
43815482 1234 }
89bddce5 1235 switch (how) {
8d8ad9d7 1236 case SOCK_WAKE_WAITD:
1da177e4
LT
1237 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1238 break;
1239 goto call_kill;
8d8ad9d7 1240 case SOCK_WAKE_SPACE:
1da177e4
LT
1241 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1242 break;
1243 /* fall through */
8d8ad9d7 1244 case SOCK_WAKE_IO:
89bddce5 1245call_kill:
43815482 1246 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1247 break;
8d8ad9d7 1248 case SOCK_WAKE_URG:
43815482 1249 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1250 }
43815482 1251 rcu_read_unlock();
1da177e4
LT
1252 return 0;
1253}
c6d409cf 1254EXPORT_SYMBOL(sock_wake_async);
1da177e4 1255
721db93a 1256int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1257 struct socket **res, int kern)
1da177e4
LT
1258{
1259 int err;
1260 struct socket *sock;
55737fda 1261 const struct net_proto_family *pf;
1da177e4
LT
1262
1263 /*
89bddce5 1264 * Check protocol is in range
1da177e4
LT
1265 */
1266 if (family < 0 || family >= NPROTO)
1267 return -EAFNOSUPPORT;
1268 if (type < 0 || type >= SOCK_MAX)
1269 return -EINVAL;
1270
1271 /* Compatibility.
1272
1273 This uglymoron is moved from INET layer to here to avoid
1274 deadlock in module load.
1275 */
1276 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1277 static int warned;
1da177e4
LT
1278 if (!warned) {
1279 warned = 1;
89bddce5
SH
1280 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1281 current->comm);
1da177e4
LT
1282 }
1283 family = PF_PACKET;
1284 }
1285
1286 err = security_socket_create(family, type, protocol, kern);
1287 if (err)
1288 return err;
89bddce5 1289
55737fda
SH
1290 /*
1291 * Allocate the socket and allow the family to set things up. if
1292 * the protocol is 0, the family is instructed to select an appropriate
1293 * default.
1294 */
1295 sock = sock_alloc();
1296 if (!sock) {
e87cc472 1297 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1298 return -ENFILE; /* Not exactly a match, but its the
1299 closest posix thing */
1300 }
1301
1302 sock->type = type;
1303
95a5afca 1304#ifdef CONFIG_MODULES
89bddce5
SH
1305 /* Attempt to load a protocol module if the find failed.
1306 *
1307 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1308 * requested real, full-featured networking support upon configuration.
1309 * Otherwise module support will break!
1310 */
190683a9 1311 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1312 request_module("net-pf-%d", family);
1da177e4
LT
1313#endif
1314
55737fda
SH
1315 rcu_read_lock();
1316 pf = rcu_dereference(net_families[family]);
1317 err = -EAFNOSUPPORT;
1318 if (!pf)
1319 goto out_release;
1da177e4
LT
1320
1321 /*
1322 * We will call the ->create function, that possibly is in a loadable
1323 * module, so we have to bump that loadable module refcnt first.
1324 */
55737fda 1325 if (!try_module_get(pf->owner))
1da177e4
LT
1326 goto out_release;
1327
55737fda
SH
1328 /* Now protected by module ref count */
1329 rcu_read_unlock();
1330
3f378b68 1331 err = pf->create(net, sock, protocol, kern);
55737fda 1332 if (err < 0)
1da177e4 1333 goto out_module_put;
a79af59e 1334
1da177e4
LT
1335 /*
1336 * Now to bump the refcnt of the [loadable] module that owns this
1337 * socket at sock_release time we decrement its refcnt.
1338 */
55737fda
SH
1339 if (!try_module_get(sock->ops->owner))
1340 goto out_module_busy;
1341
1da177e4
LT
1342 /*
1343 * Now that we're done with the ->create function, the [loadable]
1344 * module can have its refcnt decremented
1345 */
55737fda 1346 module_put(pf->owner);
7420ed23
VY
1347 err = security_socket_post_create(sock, family, type, protocol, kern);
1348 if (err)
3b185525 1349 goto out_sock_release;
55737fda 1350 *res = sock;
1da177e4 1351
55737fda
SH
1352 return 0;
1353
1354out_module_busy:
1355 err = -EAFNOSUPPORT;
1da177e4 1356out_module_put:
55737fda
SH
1357 sock->ops = NULL;
1358 module_put(pf->owner);
1359out_sock_release:
1da177e4 1360 sock_release(sock);
55737fda
SH
1361 return err;
1362
1363out_release:
1364 rcu_read_unlock();
1365 goto out_sock_release;
1da177e4 1366}
721db93a 1367EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1368
1369int sock_create(int family, int type, int protocol, struct socket **res)
1370{
1b8d7ae4 1371 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1372}
c6d409cf 1373EXPORT_SYMBOL(sock_create);
1da177e4
LT
1374
1375int sock_create_kern(int family, int type, int protocol, struct socket **res)
1376{
1b8d7ae4 1377 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1378}
c6d409cf 1379EXPORT_SYMBOL(sock_create_kern);
1da177e4 1380
3e0fa65f 1381SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1382{
1383 int retval;
1384 struct socket *sock;
a677a039
UD
1385 int flags;
1386
e38b36f3
UD
1387 /* Check the SOCK_* constants for consistency. */
1388 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1389 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1390 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1391 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1392
a677a039 1393 flags = type & ~SOCK_TYPE_MASK;
77d27200 1394 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1395 return -EINVAL;
1396 type &= SOCK_TYPE_MASK;
1da177e4 1397
aaca0bdc
UD
1398 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1399 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1400
1da177e4
LT
1401 retval = sock_create(family, type, protocol, &sock);
1402 if (retval < 0)
1403 goto out;
1404
77d27200 1405 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1406 if (retval < 0)
1407 goto out_release;
1408
1409out:
1410 /* It may be already another descriptor 8) Not kernel problem. */
1411 return retval;
1412
1413out_release:
1414 sock_release(sock);
1415 return retval;
1416}
1417
1418/*
1419 * Create a pair of connected sockets.
1420 */
1421
3e0fa65f
HC
1422SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1423 int __user *, usockvec)
1da177e4
LT
1424{
1425 struct socket *sock1, *sock2;
1426 int fd1, fd2, err;
db349509 1427 struct file *newfile1, *newfile2;
a677a039
UD
1428 int flags;
1429
1430 flags = type & ~SOCK_TYPE_MASK;
77d27200 1431 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1432 return -EINVAL;
1433 type &= SOCK_TYPE_MASK;
1da177e4 1434
aaca0bdc
UD
1435 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1436 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1437
1da177e4
LT
1438 /*
1439 * Obtain the first socket and check if the underlying protocol
1440 * supports the socketpair call.
1441 */
1442
1443 err = sock_create(family, type, protocol, &sock1);
1444 if (err < 0)
1445 goto out;
1446
1447 err = sock_create(family, type, protocol, &sock2);
1448 if (err < 0)
1449 goto out_release_1;
1450
1451 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1452 if (err < 0)
1da177e4
LT
1453 goto out_release_both;
1454
28407630 1455 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1456 if (unlikely(fd1 < 0)) {
1457 err = fd1;
db349509 1458 goto out_release_both;
bf3c23d1 1459 }
28407630 1460 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1461 if (unlikely(fd2 < 0)) {
1462 err = fd2;
28407630
AV
1463 put_unused_fd(fd1);
1464 goto out_release_both;
1465 }
1466
aab174f0 1467 newfile1 = sock_alloc_file(sock1, flags, NULL);
28407630
AV
1468 if (unlikely(IS_ERR(newfile1))) {
1469 err = PTR_ERR(newfile1);
1470 put_unused_fd(fd1);
1471 put_unused_fd(fd2);
1472 goto out_release_both;
1473 }
1474
aab174f0 1475 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1476 if (IS_ERR(newfile2)) {
1477 err = PTR_ERR(newfile2);
198de4d7
AV
1478 fput(newfile1);
1479 put_unused_fd(fd1);
28407630 1480 put_unused_fd(fd2);
198de4d7
AV
1481 sock_release(sock2);
1482 goto out;
db349509
AV
1483 }
1484
157cf649 1485 audit_fd_pair(fd1, fd2);
db349509
AV
1486 fd_install(fd1, newfile1);
1487 fd_install(fd2, newfile2);
1da177e4
LT
1488 /* fd1 and fd2 may be already another descriptors.
1489 * Not kernel problem.
1490 */
1491
89bddce5 1492 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1493 if (!err)
1494 err = put_user(fd2, &usockvec[1]);
1495 if (!err)
1496 return 0;
1497
1498 sys_close(fd2);
1499 sys_close(fd1);
1500 return err;
1501
1da177e4 1502out_release_both:
89bddce5 1503 sock_release(sock2);
1da177e4 1504out_release_1:
89bddce5 1505 sock_release(sock1);
1da177e4
LT
1506out:
1507 return err;
1508}
1509
1da177e4
LT
1510/*
1511 * Bind a name to a socket. Nothing much to do here since it's
1512 * the protocol's responsibility to handle the local address.
1513 *
1514 * We move the socket address to kernel space before we call
1515 * the protocol layer (having also checked the address is ok).
1516 */
1517
20f37034 1518SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1519{
1520 struct socket *sock;
230b1839 1521 struct sockaddr_storage address;
6cb153ca 1522 int err, fput_needed;
1da177e4 1523
89bddce5 1524 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1525 if (sock) {
43db362d 1526 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1527 if (err >= 0) {
1528 err = security_socket_bind(sock,
230b1839 1529 (struct sockaddr *)&address,
89bddce5 1530 addrlen);
6cb153ca
BL
1531 if (!err)
1532 err = sock->ops->bind(sock,
89bddce5 1533 (struct sockaddr *)
230b1839 1534 &address, addrlen);
1da177e4 1535 }
6cb153ca 1536 fput_light(sock->file, fput_needed);
89bddce5 1537 }
1da177e4
LT
1538 return err;
1539}
1540
1da177e4
LT
1541/*
1542 * Perform a listen. Basically, we allow the protocol to do anything
1543 * necessary for a listen, and if that works, we mark the socket as
1544 * ready for listening.
1545 */
1546
3e0fa65f 1547SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1548{
1549 struct socket *sock;
6cb153ca 1550 int err, fput_needed;
b8e1f9b5 1551 int somaxconn;
89bddce5
SH
1552
1553 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1554 if (sock) {
8efa6e93 1555 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1556 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1557 backlog = somaxconn;
1da177e4
LT
1558
1559 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1560 if (!err)
1561 err = sock->ops->listen(sock, backlog);
1da177e4 1562
6cb153ca 1563 fput_light(sock->file, fput_needed);
1da177e4
LT
1564 }
1565 return err;
1566}
1567
1da177e4
LT
1568/*
1569 * For accept, we attempt to create a new socket, set up the link
1570 * with the client, wake up the client, then return the new
1571 * connected fd. We collect the address of the connector in kernel
1572 * space and move it to user at the very end. This is unclean because
1573 * we open the socket then return an error.
1574 *
1575 * 1003.1g adds the ability to recvmsg() to query connection pending
1576 * status to recvmsg. We need to add that support in a way thats
1577 * clean when we restucture accept also.
1578 */
1579
20f37034
HC
1580SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1581 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1582{
1583 struct socket *sock, *newsock;
39d8c1b6 1584 struct file *newfile;
6cb153ca 1585 int err, len, newfd, fput_needed;
230b1839 1586 struct sockaddr_storage address;
1da177e4 1587
77d27200 1588 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1589 return -EINVAL;
1590
1591 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1592 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1593
6cb153ca 1594 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1595 if (!sock)
1596 goto out;
1597
1598 err = -ENFILE;
c6d409cf
ED
1599 newsock = sock_alloc();
1600 if (!newsock)
1da177e4
LT
1601 goto out_put;
1602
1603 newsock->type = sock->type;
1604 newsock->ops = sock->ops;
1605
1da177e4
LT
1606 /*
1607 * We don't need try_module_get here, as the listening socket (sock)
1608 * has the protocol module (sock->ops->owner) held.
1609 */
1610 __module_get(newsock->ops->owner);
1611
28407630 1612 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1613 if (unlikely(newfd < 0)) {
1614 err = newfd;
9a1875e6
DM
1615 sock_release(newsock);
1616 goto out_put;
39d8c1b6 1617 }
aab174f0 1618 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
28407630
AV
1619 if (unlikely(IS_ERR(newfile))) {
1620 err = PTR_ERR(newfile);
1621 put_unused_fd(newfd);
1622 sock_release(newsock);
1623 goto out_put;
1624 }
39d8c1b6 1625
a79af59e
FF
1626 err = security_socket_accept(sock, newsock);
1627 if (err)
39d8c1b6 1628 goto out_fd;
a79af59e 1629
1da177e4
LT
1630 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1631 if (err < 0)
39d8c1b6 1632 goto out_fd;
1da177e4
LT
1633
1634 if (upeer_sockaddr) {
230b1839 1635 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1636 &len, 2) < 0) {
1da177e4 1637 err = -ECONNABORTED;
39d8c1b6 1638 goto out_fd;
1da177e4 1639 }
43db362d 1640 err = move_addr_to_user(&address,
230b1839 1641 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1642 if (err < 0)
39d8c1b6 1643 goto out_fd;
1da177e4
LT
1644 }
1645
1646 /* File flags are not inherited via accept() unlike another OSes. */
1647
39d8c1b6
DM
1648 fd_install(newfd, newfile);
1649 err = newfd;
1da177e4 1650
1da177e4 1651out_put:
6cb153ca 1652 fput_light(sock->file, fput_needed);
1da177e4
LT
1653out:
1654 return err;
39d8c1b6 1655out_fd:
9606a216 1656 fput(newfile);
39d8c1b6 1657 put_unused_fd(newfd);
1da177e4
LT
1658 goto out_put;
1659}
1660
20f37034
HC
1661SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1662 int __user *, upeer_addrlen)
aaca0bdc 1663{
de11defe 1664 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1665}
1666
1da177e4
LT
1667/*
1668 * Attempt to connect to a socket with the server address. The address
1669 * is in user space so we verify it is OK and move it to kernel space.
1670 *
1671 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1672 * break bindings
1673 *
1674 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1675 * other SEQPACKET protocols that take time to connect() as it doesn't
1676 * include the -EINPROGRESS status for such sockets.
1677 */
1678
20f37034
HC
1679SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1680 int, addrlen)
1da177e4
LT
1681{
1682 struct socket *sock;
230b1839 1683 struct sockaddr_storage address;
6cb153ca 1684 int err, fput_needed;
1da177e4 1685
6cb153ca 1686 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1687 if (!sock)
1688 goto out;
43db362d 1689 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1690 if (err < 0)
1691 goto out_put;
1692
89bddce5 1693 err =
230b1839 1694 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1695 if (err)
1696 goto out_put;
1697
230b1839 1698 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1699 sock->file->f_flags);
1700out_put:
6cb153ca 1701 fput_light(sock->file, fput_needed);
1da177e4
LT
1702out:
1703 return err;
1704}
1705
1706/*
1707 * Get the local address ('name') of a socket object. Move the obtained
1708 * name to user space.
1709 */
1710
20f37034
HC
1711SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1712 int __user *, usockaddr_len)
1da177e4
LT
1713{
1714 struct socket *sock;
230b1839 1715 struct sockaddr_storage address;
6cb153ca 1716 int len, err, fput_needed;
89bddce5 1717
6cb153ca 1718 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1719 if (!sock)
1720 goto out;
1721
1722 err = security_socket_getsockname(sock);
1723 if (err)
1724 goto out_put;
1725
230b1839 1726 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1727 if (err)
1728 goto out_put;
43db362d 1729 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1730
1731out_put:
6cb153ca 1732 fput_light(sock->file, fput_needed);
1da177e4
LT
1733out:
1734 return err;
1735}
1736
1737/*
1738 * Get the remote address ('name') of a socket object. Move the obtained
1739 * name to user space.
1740 */
1741
20f37034
HC
1742SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1743 int __user *, usockaddr_len)
1da177e4
LT
1744{
1745 struct socket *sock;
230b1839 1746 struct sockaddr_storage address;
6cb153ca 1747 int len, err, fput_needed;
1da177e4 1748
89bddce5
SH
1749 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1750 if (sock != NULL) {
1da177e4
LT
1751 err = security_socket_getpeername(sock);
1752 if (err) {
6cb153ca 1753 fput_light(sock->file, fput_needed);
1da177e4
LT
1754 return err;
1755 }
1756
89bddce5 1757 err =
230b1839 1758 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1759 1);
1da177e4 1760 if (!err)
43db362d 1761 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1762 usockaddr_len);
6cb153ca 1763 fput_light(sock->file, fput_needed);
1da177e4
LT
1764 }
1765 return err;
1766}
1767
1768/*
1769 * Send a datagram to a given address. We move the address into kernel
1770 * space and check the user space data area is readable before invoking
1771 * the protocol.
1772 */
1773
3e0fa65f 1774SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1775 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1776 int, addr_len)
1da177e4
LT
1777{
1778 struct socket *sock;
230b1839 1779 struct sockaddr_storage address;
1da177e4
LT
1780 int err;
1781 struct msghdr msg;
1782 struct iovec iov;
6cb153ca 1783 int fput_needed;
6cb153ca 1784
253eacc0
LT
1785 if (len > INT_MAX)
1786 len = INT_MAX;
de0fa95c
PE
1787 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1788 if (!sock)
4387ff75 1789 goto out;
6cb153ca 1790
89bddce5
SH
1791 iov.iov_base = buff;
1792 iov.iov_len = len;
1793 msg.msg_name = NULL;
1794 msg.msg_iov = &iov;
1795 msg.msg_iovlen = 1;
1796 msg.msg_control = NULL;
1797 msg.msg_controllen = 0;
1798 msg.msg_namelen = 0;
6cb153ca 1799 if (addr) {
43db362d 1800 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1801 if (err < 0)
1802 goto out_put;
230b1839 1803 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1804 msg.msg_namelen = addr_len;
1da177e4
LT
1805 }
1806 if (sock->file->f_flags & O_NONBLOCK)
1807 flags |= MSG_DONTWAIT;
1808 msg.msg_flags = flags;
1809 err = sock_sendmsg(sock, &msg, len);
1810
89bddce5 1811out_put:
de0fa95c 1812 fput_light(sock->file, fput_needed);
4387ff75 1813out:
1da177e4
LT
1814 return err;
1815}
1816
1817/*
89bddce5 1818 * Send a datagram down a socket.
1da177e4
LT
1819 */
1820
3e0fa65f 1821SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1822 unsigned int, flags)
1da177e4
LT
1823{
1824 return sys_sendto(fd, buff, len, flags, NULL, 0);
1825}
1826
1827/*
89bddce5 1828 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1829 * sender. We verify the buffers are writable and if needed move the
1830 * sender address from kernel to user space.
1831 */
1832
3e0fa65f 1833SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1834 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1835 int __user *, addr_len)
1da177e4
LT
1836{
1837 struct socket *sock;
1838 struct iovec iov;
1839 struct msghdr msg;
230b1839 1840 struct sockaddr_storage address;
89bddce5 1841 int err, err2;
6cb153ca
BL
1842 int fput_needed;
1843
253eacc0
LT
1844 if (size > INT_MAX)
1845 size = INT_MAX;
de0fa95c 1846 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1847 if (!sock)
de0fa95c 1848 goto out;
1da177e4 1849
89bddce5
SH
1850 msg.msg_control = NULL;
1851 msg.msg_controllen = 0;
1852 msg.msg_iovlen = 1;
1853 msg.msg_iov = &iov;
1854 iov.iov_len = size;
1855 iov.iov_base = ubuf;
230b1839
YH
1856 msg.msg_name = (struct sockaddr *)&address;
1857 msg.msg_namelen = sizeof(address);
1da177e4
LT
1858 if (sock->file->f_flags & O_NONBLOCK)
1859 flags |= MSG_DONTWAIT;
89bddce5 1860 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1861
89bddce5 1862 if (err >= 0 && addr != NULL) {
43db362d 1863 err2 = move_addr_to_user(&address,
230b1839 1864 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1865 if (err2 < 0)
1866 err = err2;
1da177e4 1867 }
de0fa95c
PE
1868
1869 fput_light(sock->file, fput_needed);
4387ff75 1870out:
1da177e4
LT
1871 return err;
1872}
1873
1874/*
89bddce5 1875 * Receive a datagram from a socket.
1da177e4
LT
1876 */
1877
89bddce5 1878asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
95c96174 1879 unsigned int flags)
1da177e4
LT
1880{
1881 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1882}
1883
1884/*
1885 * Set a socket option. Because we don't know the option lengths we have
1886 * to pass the user mode parameter for the protocols to sort out.
1887 */
1888
20f37034
HC
1889SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1890 char __user *, optval, int, optlen)
1da177e4 1891{
6cb153ca 1892 int err, fput_needed;
1da177e4
LT
1893 struct socket *sock;
1894
1895 if (optlen < 0)
1896 return -EINVAL;
89bddce5
SH
1897
1898 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1899 if (sock != NULL) {
1900 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1901 if (err)
1902 goto out_put;
1da177e4
LT
1903
1904 if (level == SOL_SOCKET)
89bddce5
SH
1905 err =
1906 sock_setsockopt(sock, level, optname, optval,
1907 optlen);
1da177e4 1908 else
89bddce5
SH
1909 err =
1910 sock->ops->setsockopt(sock, level, optname, optval,
1911 optlen);
6cb153ca
BL
1912out_put:
1913 fput_light(sock->file, fput_needed);
1da177e4
LT
1914 }
1915 return err;
1916}
1917
1918/*
1919 * Get a socket option. Because we don't know the option lengths we have
1920 * to pass a user mode parameter for the protocols to sort out.
1921 */
1922
20f37034
HC
1923SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1924 char __user *, optval, int __user *, optlen)
1da177e4 1925{
6cb153ca 1926 int err, fput_needed;
1da177e4
LT
1927 struct socket *sock;
1928
89bddce5
SH
1929 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1930 if (sock != NULL) {
6cb153ca
BL
1931 err = security_socket_getsockopt(sock, level, optname);
1932 if (err)
1933 goto out_put;
1da177e4
LT
1934
1935 if (level == SOL_SOCKET)
89bddce5
SH
1936 err =
1937 sock_getsockopt(sock, level, optname, optval,
1938 optlen);
1da177e4 1939 else
89bddce5
SH
1940 err =
1941 sock->ops->getsockopt(sock, level, optname, optval,
1942 optlen);
6cb153ca
BL
1943out_put:
1944 fput_light(sock->file, fput_needed);
1da177e4
LT
1945 }
1946 return err;
1947}
1948
1da177e4
LT
1949/*
1950 * Shutdown a socket.
1951 */
1952
754fe8d2 1953SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1954{
6cb153ca 1955 int err, fput_needed;
1da177e4
LT
1956 struct socket *sock;
1957
89bddce5
SH
1958 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1959 if (sock != NULL) {
1da177e4 1960 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1961 if (!err)
1962 err = sock->ops->shutdown(sock, how);
1963 fput_light(sock->file, fput_needed);
1da177e4
LT
1964 }
1965 return err;
1966}
1967
89bddce5 1968/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1969 * fields which are the same type (int / unsigned) on our platforms.
1970 */
1971#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1972#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1973#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1974
c71d8ebe
TH
1975struct used_address {
1976 struct sockaddr_storage name;
1977 unsigned int name_len;
1978};
1979
228e548e 1980static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 1981 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1982 struct used_address *used_address)
1da177e4 1983{
89bddce5
SH
1984 struct compat_msghdr __user *msg_compat =
1985 (struct compat_msghdr __user *)msg;
230b1839 1986 struct sockaddr_storage address;
1da177e4 1987 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1988 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1989 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1990 /* 20 is size of ipv6_pktinfo */
1da177e4 1991 unsigned char *ctl_buf = ctl;
a74e9106 1992 int err, ctl_len, total_len;
89bddce5 1993
1da177e4
LT
1994 err = -EFAULT;
1995 if (MSG_CMSG_COMPAT & flags) {
228e548e 1996 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 1997 return -EFAULT;
228e548e 1998 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1999 return -EFAULT;
2000
228e548e 2001 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2002 err = -EMSGSIZE;
2003 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2004 goto out;
2005 err = -ENOMEM;
2006 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2007 GFP_KERNEL);
1da177e4 2008 if (!iov)
228e548e 2009 goto out;
1da177e4
LT
2010 }
2011
2012 /* This will also move the address data into kernel space */
2013 if (MSG_CMSG_COMPAT & flags) {
43db362d 2014 err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
1da177e4 2015 } else
43db362d 2016 err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
89bddce5 2017 if (err < 0)
1da177e4
LT
2018 goto out_freeiov;
2019 total_len = err;
2020
2021 err = -ENOBUFS;
2022
228e548e 2023 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2024 goto out_freeiov;
228e548e 2025 ctl_len = msg_sys->msg_controllen;
1da177e4 2026 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2027 err =
228e548e 2028 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2029 sizeof(ctl));
1da177e4
LT
2030 if (err)
2031 goto out_freeiov;
228e548e
AB
2032 ctl_buf = msg_sys->msg_control;
2033 ctl_len = msg_sys->msg_controllen;
1da177e4 2034 } else if (ctl_len) {
89bddce5 2035 if (ctl_len > sizeof(ctl)) {
1da177e4 2036 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2037 if (ctl_buf == NULL)
1da177e4
LT
2038 goto out_freeiov;
2039 }
2040 err = -EFAULT;
2041 /*
228e548e 2042 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2043 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2044 * checking falls down on this.
2045 */
fb8621bb 2046 if (copy_from_user(ctl_buf,
228e548e 2047 (void __user __force *)msg_sys->msg_control,
89bddce5 2048 ctl_len))
1da177e4 2049 goto out_freectl;
228e548e 2050 msg_sys->msg_control = ctl_buf;
1da177e4 2051 }
228e548e 2052 msg_sys->msg_flags = flags;
1da177e4
LT
2053
2054 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2055 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2056 /*
2057 * If this is sendmmsg() and current destination address is same as
2058 * previously succeeded address, omit asking LSM's decision.
2059 * used_address->name_len is initialized to UINT_MAX so that the first
2060 * destination address never matches.
2061 */
bc909d9d
MD
2062 if (used_address && msg_sys->msg_name &&
2063 used_address->name_len == msg_sys->msg_namelen &&
2064 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
2065 used_address->name_len)) {
2066 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
2067 goto out_freectl;
2068 }
2069 err = sock_sendmsg(sock, msg_sys, total_len);
2070 /*
2071 * If this is sendmmsg() and sending to current destination address was
2072 * successful, remember it.
2073 */
2074 if (used_address && err >= 0) {
2075 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2076 if (msg_sys->msg_name)
2077 memcpy(&used_address->name, msg_sys->msg_name,
2078 used_address->name_len);
c71d8ebe 2079 }
1da177e4
LT
2080
2081out_freectl:
89bddce5 2082 if (ctl_buf != ctl)
1da177e4
LT
2083 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2084out_freeiov:
2085 if (iov != iovstack)
a74e9106 2086 kfree(iov);
228e548e
AB
2087out:
2088 return err;
2089}
2090
2091/*
2092 * BSD sendmsg interface
2093 */
2094
95c96174 2095SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
228e548e
AB
2096{
2097 int fput_needed, err;
2098 struct msghdr msg_sys;
2099 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2100
2101 if (!sock)
2102 goto out;
2103
c71d8ebe 2104 err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2105
6cb153ca 2106 fput_light(sock->file, fput_needed);
89bddce5 2107out:
1da177e4
LT
2108 return err;
2109}
2110
228e548e
AB
2111/*
2112 * Linux sendmmsg interface
2113 */
2114
2115int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2116 unsigned int flags)
2117{
2118 int fput_needed, err, datagrams;
2119 struct socket *sock;
2120 struct mmsghdr __user *entry;
2121 struct compat_mmsghdr __user *compat_entry;
2122 struct msghdr msg_sys;
c71d8ebe 2123 struct used_address used_address;
228e548e 2124
98382f41
AB
2125 if (vlen > UIO_MAXIOV)
2126 vlen = UIO_MAXIOV;
228e548e
AB
2127
2128 datagrams = 0;
2129
2130 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2131 if (!sock)
2132 return err;
2133
c71d8ebe 2134 used_address.name_len = UINT_MAX;
228e548e
AB
2135 entry = mmsg;
2136 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2137 err = 0;
228e548e
AB
2138
2139 while (datagrams < vlen) {
228e548e
AB
2140 if (MSG_CMSG_COMPAT & flags) {
2141 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
c71d8ebe 2142 &msg_sys, flags, &used_address);
228e548e
AB
2143 if (err < 0)
2144 break;
2145 err = __put_user(err, &compat_entry->msg_len);
2146 ++compat_entry;
2147 } else {
2148 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
c71d8ebe 2149 &msg_sys, flags, &used_address);
228e548e
AB
2150 if (err < 0)
2151 break;
2152 err = put_user(err, &entry->msg_len);
2153 ++entry;
2154 }
2155
2156 if (err)
2157 break;
2158 ++datagrams;
2159 }
2160
228e548e
AB
2161 fput_light(sock->file, fput_needed);
2162
728ffb86
AB
2163 /* We only return an error if no datagrams were able to be sent */
2164 if (datagrams != 0)
228e548e
AB
2165 return datagrams;
2166
228e548e
AB
2167 return err;
2168}
2169
2170SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2171 unsigned int, vlen, unsigned int, flags)
2172{
2173 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2174}
2175
a2e27255 2176static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 2177 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2178{
89bddce5
SH
2179 struct compat_msghdr __user *msg_compat =
2180 (struct compat_msghdr __user *)msg;
1da177e4 2181 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2182 struct iovec *iov = iovstack;
1da177e4 2183 unsigned long cmsg_ptr;
a74e9106 2184 int err, total_len, len;
1da177e4
LT
2185
2186 /* kernel mode address */
230b1839 2187 struct sockaddr_storage addr;
1da177e4
LT
2188
2189 /* user mode address pointers */
2190 struct sockaddr __user *uaddr;
2191 int __user *uaddr_len;
89bddce5 2192
1da177e4 2193 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2194 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2195 return -EFAULT;
c6d409cf 2196 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 2197 return -EFAULT;
1da177e4 2198
a2e27255 2199 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2200 err = -EMSGSIZE;
2201 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2202 goto out;
2203 err = -ENOMEM;
2204 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2205 GFP_KERNEL);
1da177e4 2206 if (!iov)
a2e27255 2207 goto out;
1da177e4
LT
2208 }
2209
2210 /*
89bddce5
SH
2211 * Save the user-mode address (verify_iovec will change the
2212 * kernel msghdr to use the kernel address space)
1da177e4 2213 */
89bddce5 2214
a2e27255 2215 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
2216 uaddr_len = COMPAT_NAMELEN(msg);
2217 if (MSG_CMSG_COMPAT & flags) {
43db362d 2218 err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4 2219 } else
43db362d 2220 err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4
LT
2221 if (err < 0)
2222 goto out_freeiov;
89bddce5 2223 total_len = err;
1da177e4 2224
a2e27255
ACM
2225 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2226 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2227
1da177e4
LT
2228 if (sock->file->f_flags & O_NONBLOCK)
2229 flags |= MSG_DONTWAIT;
a2e27255
ACM
2230 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2231 total_len, flags);
1da177e4
LT
2232 if (err < 0)
2233 goto out_freeiov;
2234 len = err;
2235
2236 if (uaddr != NULL) {
43db362d 2237 err = move_addr_to_user(&addr,
a2e27255 2238 msg_sys->msg_namelen, uaddr,
89bddce5 2239 uaddr_len);
1da177e4
LT
2240 if (err < 0)
2241 goto out_freeiov;
2242 }
a2e27255 2243 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2244 COMPAT_FLAGS(msg));
1da177e4
LT
2245 if (err)
2246 goto out_freeiov;
2247 if (MSG_CMSG_COMPAT & flags)
a2e27255 2248 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2249 &msg_compat->msg_controllen);
2250 else
a2e27255 2251 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2252 &msg->msg_controllen);
2253 if (err)
2254 goto out_freeiov;
2255 err = len;
2256
2257out_freeiov:
2258 if (iov != iovstack)
a74e9106 2259 kfree(iov);
a2e27255
ACM
2260out:
2261 return err;
2262}
2263
2264/*
2265 * BSD recvmsg interface
2266 */
2267
2268SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2269 unsigned int, flags)
2270{
2271 int fput_needed, err;
2272 struct msghdr msg_sys;
2273 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2274
2275 if (!sock)
2276 goto out;
2277
2278 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2279
6cb153ca 2280 fput_light(sock->file, fput_needed);
1da177e4
LT
2281out:
2282 return err;
2283}
2284
a2e27255
ACM
2285/*
2286 * Linux recvmmsg interface
2287 */
2288
2289int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2290 unsigned int flags, struct timespec *timeout)
2291{
2292 int fput_needed, err, datagrams;
2293 struct socket *sock;
2294 struct mmsghdr __user *entry;
d7256d0e 2295 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2296 struct msghdr msg_sys;
2297 struct timespec end_time;
2298
2299 if (timeout &&
2300 poll_select_set_timeout(&end_time, timeout->tv_sec,
2301 timeout->tv_nsec))
2302 return -EINVAL;
2303
2304 datagrams = 0;
2305
2306 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2307 if (!sock)
2308 return err;
2309
2310 err = sock_error(sock->sk);
2311 if (err)
2312 goto out_put;
2313
2314 entry = mmsg;
d7256d0e 2315 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2316
2317 while (datagrams < vlen) {
2318 /*
2319 * No need to ask LSM for more than the first datagram.
2320 */
d7256d0e
JMG
2321 if (MSG_CMSG_COMPAT & flags) {
2322 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
b9eb8b87
AB
2323 &msg_sys, flags & ~MSG_WAITFORONE,
2324 datagrams);
d7256d0e
JMG
2325 if (err < 0)
2326 break;
2327 err = __put_user(err, &compat_entry->msg_len);
2328 ++compat_entry;
2329 } else {
2330 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
b9eb8b87
AB
2331 &msg_sys, flags & ~MSG_WAITFORONE,
2332 datagrams);
d7256d0e
JMG
2333 if (err < 0)
2334 break;
2335 err = put_user(err, &entry->msg_len);
2336 ++entry;
2337 }
2338
a2e27255
ACM
2339 if (err)
2340 break;
a2e27255
ACM
2341 ++datagrams;
2342
71c5c159
BB
2343 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2344 if (flags & MSG_WAITFORONE)
2345 flags |= MSG_DONTWAIT;
2346
a2e27255
ACM
2347 if (timeout) {
2348 ktime_get_ts(timeout);
2349 *timeout = timespec_sub(end_time, *timeout);
2350 if (timeout->tv_sec < 0) {
2351 timeout->tv_sec = timeout->tv_nsec = 0;
2352 break;
2353 }
2354
2355 /* Timeout, return less than vlen datagrams */
2356 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2357 break;
2358 }
2359
2360 /* Out of band data, return right away */
2361 if (msg_sys.msg_flags & MSG_OOB)
2362 break;
2363 }
2364
2365out_put:
2366 fput_light(sock->file, fput_needed);
1da177e4 2367
a2e27255
ACM
2368 if (err == 0)
2369 return datagrams;
2370
2371 if (datagrams != 0) {
2372 /*
2373 * We may return less entries than requested (vlen) if the
2374 * sock is non block and there aren't enough datagrams...
2375 */
2376 if (err != -EAGAIN) {
2377 /*
2378 * ... or if recvmsg returns an error after we
2379 * received some datagrams, where we record the
2380 * error to return on the next call or if the
2381 * app asks about it using getsockopt(SO_ERROR).
2382 */
2383 sock->sk->sk_err = -err;
2384 }
2385
2386 return datagrams;
2387 }
2388
2389 return err;
2390}
2391
2392SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2393 unsigned int, vlen, unsigned int, flags,
2394 struct timespec __user *, timeout)
2395{
2396 int datagrams;
2397 struct timespec timeout_sys;
2398
2399 if (!timeout)
2400 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2401
2402 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2403 return -EFAULT;
2404
2405 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2406
2407 if (datagrams > 0 &&
2408 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2409 datagrams = -EFAULT;
2410
2411 return datagrams;
2412}
2413
2414#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2415/* Argument list sizes for sys_socketcall */
2416#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2417static const unsigned char nargs[21] = {
c6d409cf
ED
2418 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2419 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2420 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2421 AL(4), AL(5), AL(4)
89bddce5
SH
2422};
2423
1da177e4
LT
2424#undef AL
2425
2426/*
89bddce5 2427 * System call vectors.
1da177e4
LT
2428 *
2429 * Argument checking cleaned up. Saved 20% in size.
2430 * This function doesn't need to set the kernel lock because
89bddce5 2431 * it is set by the callees.
1da177e4
LT
2432 */
2433
3e0fa65f 2434SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2435{
2436 unsigned long a[6];
89bddce5 2437 unsigned long a0, a1;
1da177e4 2438 int err;
47379052 2439 unsigned int len;
1da177e4 2440
228e548e 2441 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2442 return -EINVAL;
2443
47379052
AV
2444 len = nargs[call];
2445 if (len > sizeof(a))
2446 return -EINVAL;
2447
1da177e4 2448 /* copy_from_user should be SMP safe. */
47379052 2449 if (copy_from_user(a, args, len))
1da177e4 2450 return -EFAULT;
3ec3b2fb 2451
f3298dc4 2452 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2453
89bddce5
SH
2454 a0 = a[0];
2455 a1 = a[1];
2456
2457 switch (call) {
2458 case SYS_SOCKET:
2459 err = sys_socket(a0, a1, a[2]);
2460 break;
2461 case SYS_BIND:
2462 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2463 break;
2464 case SYS_CONNECT:
2465 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2466 break;
2467 case SYS_LISTEN:
2468 err = sys_listen(a0, a1);
2469 break;
2470 case SYS_ACCEPT:
de11defe
UD
2471 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2472 (int __user *)a[2], 0);
89bddce5
SH
2473 break;
2474 case SYS_GETSOCKNAME:
2475 err =
2476 sys_getsockname(a0, (struct sockaddr __user *)a1,
2477 (int __user *)a[2]);
2478 break;
2479 case SYS_GETPEERNAME:
2480 err =
2481 sys_getpeername(a0, (struct sockaddr __user *)a1,
2482 (int __user *)a[2]);
2483 break;
2484 case SYS_SOCKETPAIR:
2485 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2486 break;
2487 case SYS_SEND:
2488 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2489 break;
2490 case SYS_SENDTO:
2491 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2492 (struct sockaddr __user *)a[4], a[5]);
2493 break;
2494 case SYS_RECV:
2495 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2496 break;
2497 case SYS_RECVFROM:
2498 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2499 (struct sockaddr __user *)a[4],
2500 (int __user *)a[5]);
2501 break;
2502 case SYS_SHUTDOWN:
2503 err = sys_shutdown(a0, a1);
2504 break;
2505 case SYS_SETSOCKOPT:
2506 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2507 break;
2508 case SYS_GETSOCKOPT:
2509 err =
2510 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2511 (int __user *)a[4]);
2512 break;
2513 case SYS_SENDMSG:
2514 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2515 break;
228e548e
AB
2516 case SYS_SENDMMSG:
2517 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2518 break;
89bddce5
SH
2519 case SYS_RECVMSG:
2520 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2521 break;
a2e27255
ACM
2522 case SYS_RECVMMSG:
2523 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2524 (struct timespec __user *)a[4]);
2525 break;
de11defe
UD
2526 case SYS_ACCEPT4:
2527 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2528 (int __user *)a[2], a[3]);
aaca0bdc 2529 break;
89bddce5
SH
2530 default:
2531 err = -EINVAL;
2532 break;
1da177e4
LT
2533 }
2534 return err;
2535}
2536
89bddce5 2537#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2538
55737fda
SH
2539/**
2540 * sock_register - add a socket protocol handler
2541 * @ops: description of protocol
2542 *
1da177e4
LT
2543 * This function is called by a protocol handler that wants to
2544 * advertise its address family, and have it linked into the
55737fda
SH
2545 * socket interface. The value ops->family coresponds to the
2546 * socket system call protocol family.
1da177e4 2547 */
f0fd27d4 2548int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2549{
2550 int err;
2551
2552 if (ops->family >= NPROTO) {
89bddce5
SH
2553 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2554 NPROTO);
1da177e4
LT
2555 return -ENOBUFS;
2556 }
55737fda
SH
2557
2558 spin_lock(&net_family_lock);
190683a9
ED
2559 if (rcu_dereference_protected(net_families[ops->family],
2560 lockdep_is_held(&net_family_lock)))
55737fda
SH
2561 err = -EEXIST;
2562 else {
cf778b00 2563 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2564 err = 0;
2565 }
55737fda
SH
2566 spin_unlock(&net_family_lock);
2567
89bddce5 2568 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2569 return err;
2570}
c6d409cf 2571EXPORT_SYMBOL(sock_register);
1da177e4 2572
55737fda
SH
2573/**
2574 * sock_unregister - remove a protocol handler
2575 * @family: protocol family to remove
2576 *
1da177e4
LT
2577 * This function is called by a protocol handler that wants to
2578 * remove its address family, and have it unlinked from the
55737fda
SH
2579 * new socket creation.
2580 *
2581 * If protocol handler is a module, then it can use module reference
2582 * counts to protect against new references. If protocol handler is not
2583 * a module then it needs to provide its own protection in
2584 * the ops->create routine.
1da177e4 2585 */
f0fd27d4 2586void sock_unregister(int family)
1da177e4 2587{
f0fd27d4 2588 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2589
55737fda 2590 spin_lock(&net_family_lock);
a9b3cd7f 2591 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2592 spin_unlock(&net_family_lock);
2593
2594 synchronize_rcu();
2595
89bddce5 2596 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4 2597}
c6d409cf 2598EXPORT_SYMBOL(sock_unregister);
1da177e4 2599
77d76ea3 2600static int __init sock_init(void)
1da177e4 2601{
b3e19d92 2602 int err;
2ca794e5
EB
2603 /*
2604 * Initialize the network sysctl infrastructure.
2605 */
2606 err = net_sysctl_init();
2607 if (err)
2608 goto out;
b3e19d92 2609
1da177e4 2610 /*
89bddce5 2611 * Initialize skbuff SLAB cache
1da177e4
LT
2612 */
2613 skb_init();
1da177e4
LT
2614
2615 /*
89bddce5 2616 * Initialize the protocols module.
1da177e4
LT
2617 */
2618
2619 init_inodecache();
b3e19d92
NP
2620
2621 err = register_filesystem(&sock_fs_type);
2622 if (err)
2623 goto out_fs;
1da177e4 2624 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2625 if (IS_ERR(sock_mnt)) {
2626 err = PTR_ERR(sock_mnt);
2627 goto out_mount;
2628 }
77d76ea3
AK
2629
2630 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2631 */
2632
2633#ifdef CONFIG_NETFILTER
2634 netfilter_init();
2635#endif
cbeb321a 2636
c1f19b51
RC
2637#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2638 skb_timestamping_init();
2639#endif
2640
b3e19d92
NP
2641out:
2642 return err;
2643
2644out_mount:
2645 unregister_filesystem(&sock_fs_type);
2646out_fs:
2647 goto out;
1da177e4
LT
2648}
2649
77d76ea3
AK
2650core_initcall(sock_init); /* early initcall */
2651
1da177e4
LT
2652#ifdef CONFIG_PROC_FS
2653void socket_seq_show(struct seq_file *seq)
2654{
2655 int cpu;
2656 int counter = 0;
2657
6f912042 2658 for_each_possible_cpu(cpu)
89bddce5 2659 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2660
2661 /* It can be negative, by the way. 8) */
2662 if (counter < 0)
2663 counter = 0;
2664
2665 seq_printf(seq, "sockets: used %d\n", counter);
2666}
89bddce5 2667#endif /* CONFIG_PROC_FS */
1da177e4 2668
89bbfc95 2669#ifdef CONFIG_COMPAT
6b96018b 2670static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2671 unsigned int cmd, void __user *up)
7a229387 2672{
7a229387
AB
2673 mm_segment_t old_fs = get_fs();
2674 struct timeval ktv;
2675 int err;
2676
2677 set_fs(KERNEL_DS);
6b96018b 2678 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2679 set_fs(old_fs);
644595f8 2680 if (!err)
ed6fe9d6 2681 err = compat_put_timeval(&ktv, up);
644595f8 2682
7a229387
AB
2683 return err;
2684}
2685
6b96018b 2686static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2687 unsigned int cmd, void __user *up)
7a229387 2688{
7a229387
AB
2689 mm_segment_t old_fs = get_fs();
2690 struct timespec kts;
2691 int err;
2692
2693 set_fs(KERNEL_DS);
6b96018b 2694 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2695 set_fs(old_fs);
644595f8 2696 if (!err)
ed6fe9d6 2697 err = compat_put_timespec(&kts, up);
644595f8 2698
7a229387
AB
2699 return err;
2700}
2701
6b96018b 2702static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2703{
2704 struct ifreq __user *uifr;
2705 int err;
2706
2707 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2708 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2709 return -EFAULT;
2710
6b96018b 2711 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2712 if (err)
2713 return err;
2714
6b96018b 2715 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2716 return -EFAULT;
2717
2718 return 0;
2719}
2720
6b96018b 2721static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2722{
6b96018b 2723 struct compat_ifconf ifc32;
7a229387
AB
2724 struct ifconf ifc;
2725 struct ifconf __user *uifc;
6b96018b 2726 struct compat_ifreq __user *ifr32;
7a229387
AB
2727 struct ifreq __user *ifr;
2728 unsigned int i, j;
2729 int err;
2730
6b96018b 2731 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2732 return -EFAULT;
2733
43da5f2e 2734 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2735 if (ifc32.ifcbuf == 0) {
2736 ifc32.ifc_len = 0;
2737 ifc.ifc_len = 0;
2738 ifc.ifc_req = NULL;
2739 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2740 } else {
c6d409cf
ED
2741 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2742 sizeof(struct ifreq);
7a229387
AB
2743 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2744 ifc.ifc_len = len;
2745 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2746 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2747 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2748 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2749 return -EFAULT;
2750 ifr++;
2751 ifr32++;
2752 }
2753 }
2754 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2755 return -EFAULT;
2756
6b96018b 2757 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2758 if (err)
2759 return err;
2760
2761 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2762 return -EFAULT;
2763
2764 ifr = ifc.ifc_req;
2765 ifr32 = compat_ptr(ifc32.ifcbuf);
2766 for (i = 0, j = 0;
c6d409cf
ED
2767 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2768 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2769 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2770 return -EFAULT;
2771 ifr32++;
2772 ifr++;
2773 }
2774
2775 if (ifc32.ifcbuf == 0) {
2776 /* Translate from 64-bit structure multiple to
2777 * a 32-bit one.
2778 */
2779 i = ifc.ifc_len;
6b96018b 2780 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2781 ifc32.ifc_len = i;
2782 } else {
2783 ifc32.ifc_len = i;
2784 }
6b96018b 2785 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2786 return -EFAULT;
2787
2788 return 0;
2789}
2790
6b96018b 2791static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2792{
3a7da39d
BH
2793 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2794 bool convert_in = false, convert_out = false;
2795 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2796 struct ethtool_rxnfc __user *rxnfc;
7a229387 2797 struct ifreq __user *ifr;
3a7da39d
BH
2798 u32 rule_cnt = 0, actual_rule_cnt;
2799 u32 ethcmd;
7a229387 2800 u32 data;
3a7da39d 2801 int ret;
7a229387 2802
3a7da39d
BH
2803 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2804 return -EFAULT;
7a229387 2805
3a7da39d
BH
2806 compat_rxnfc = compat_ptr(data);
2807
2808 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2809 return -EFAULT;
2810
3a7da39d
BH
2811 /* Most ethtool structures are defined without padding.
2812 * Unfortunately struct ethtool_rxnfc is an exception.
2813 */
2814 switch (ethcmd) {
2815 default:
2816 break;
2817 case ETHTOOL_GRXCLSRLALL:
2818 /* Buffer size is variable */
2819 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2820 return -EFAULT;
2821 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2822 return -ENOMEM;
2823 buf_size += rule_cnt * sizeof(u32);
2824 /* fall through */
2825 case ETHTOOL_GRXRINGS:
2826 case ETHTOOL_GRXCLSRLCNT:
2827 case ETHTOOL_GRXCLSRULE:
55664f32 2828 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2829 convert_out = true;
2830 /* fall through */
2831 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2832 buf_size += sizeof(struct ethtool_rxnfc);
2833 convert_in = true;
2834 break;
2835 }
2836
2837 ifr = compat_alloc_user_space(buf_size);
954b1244 2838 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2839
2840 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2841 return -EFAULT;
2842
3a7da39d
BH
2843 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2844 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2845 return -EFAULT;
2846
3a7da39d 2847 if (convert_in) {
127fe533 2848 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2849 * fs.ring_cookie and at the end of fs, but nowhere else.
2850 */
127fe533
AD
2851 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2852 sizeof(compat_rxnfc->fs.m_ext) !=
2853 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2854 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2855 BUILD_BUG_ON(
2856 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2857 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2858 offsetof(struct ethtool_rxnfc, fs.location) -
2859 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2860
2861 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2862 (void __user *)(&rxnfc->fs.m_ext + 1) -
2863 (void __user *)rxnfc) ||
3a7da39d
BH
2864 copy_in_user(&rxnfc->fs.ring_cookie,
2865 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2866 (void __user *)(&rxnfc->fs.location + 1) -
2867 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2868 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2869 sizeof(rxnfc->rule_cnt)))
2870 return -EFAULT;
2871 }
2872
2873 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2874 if (ret)
2875 return ret;
2876
2877 if (convert_out) {
2878 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2879 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2880 (const void __user *)rxnfc) ||
3a7da39d
BH
2881 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2882 &rxnfc->fs.ring_cookie,
954b1244
SH
2883 (const void __user *)(&rxnfc->fs.location + 1) -
2884 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2885 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2886 sizeof(rxnfc->rule_cnt)))
2887 return -EFAULT;
2888
2889 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2890 /* As an optimisation, we only copy the actual
2891 * number of rules that the underlying
2892 * function returned. Since Mallory might
2893 * change the rule count in user memory, we
2894 * check that it is less than the rule count
2895 * originally given (as the user buffer size),
2896 * which has been range-checked.
2897 */
2898 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2899 return -EFAULT;
2900 if (actual_rule_cnt < rule_cnt)
2901 rule_cnt = actual_rule_cnt;
2902 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2903 &rxnfc->rule_locs[0],
2904 rule_cnt * sizeof(u32)))
2905 return -EFAULT;
2906 }
2907 }
2908
2909 return 0;
7a229387
AB
2910}
2911
7a50a240
AB
2912static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2913{
2914 void __user *uptr;
2915 compat_uptr_t uptr32;
2916 struct ifreq __user *uifr;
2917
c6d409cf 2918 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2919 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2920 return -EFAULT;
2921
2922 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2923 return -EFAULT;
2924
2925 uptr = compat_ptr(uptr32);
2926
2927 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2928 return -EFAULT;
2929
2930 return dev_ioctl(net, SIOCWANDEV, uifr);
2931}
2932
6b96018b
AB
2933static int bond_ioctl(struct net *net, unsigned int cmd,
2934 struct compat_ifreq __user *ifr32)
7a229387
AB
2935{
2936 struct ifreq kifr;
2937 struct ifreq __user *uifr;
7a229387
AB
2938 mm_segment_t old_fs;
2939 int err;
2940 u32 data;
2941 void __user *datap;
2942
2943 switch (cmd) {
2944 case SIOCBONDENSLAVE:
2945 case SIOCBONDRELEASE:
2946 case SIOCBONDSETHWADDR:
2947 case SIOCBONDCHANGEACTIVE:
6b96018b 2948 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2949 return -EFAULT;
2950
2951 old_fs = get_fs();
c6d409cf 2952 set_fs(KERNEL_DS);
c3f52ae6 2953 err = dev_ioctl(net, cmd,
2954 (struct ifreq __user __force *) &kifr);
c6d409cf 2955 set_fs(old_fs);
7a229387
AB
2956
2957 return err;
2958 case SIOCBONDSLAVEINFOQUERY:
2959 case SIOCBONDINFOQUERY:
2960 uifr = compat_alloc_user_space(sizeof(*uifr));
2961 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2962 return -EFAULT;
2963
2964 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2965 return -EFAULT;
2966
2967 datap = compat_ptr(data);
2968 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2969 return -EFAULT;
2970
6b96018b 2971 return dev_ioctl(net, cmd, uifr);
7a229387 2972 default:
07d106d0 2973 return -ENOIOCTLCMD;
ccbd6a5a 2974 }
7a229387
AB
2975}
2976
6b96018b
AB
2977static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2978 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2979{
2980 struct ifreq __user *u_ifreq64;
7a229387
AB
2981 char tmp_buf[IFNAMSIZ];
2982 void __user *data64;
2983 u32 data32;
2984
2985 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2986 IFNAMSIZ))
2987 return -EFAULT;
2988 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2989 return -EFAULT;
2990 data64 = compat_ptr(data32);
2991
2992 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2993
2994 /* Don't check these user accesses, just let that get trapped
2995 * in the ioctl handler instead.
2996 */
2997 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2998 IFNAMSIZ))
2999 return -EFAULT;
3000 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
3001 return -EFAULT;
3002
6b96018b 3003 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
3004}
3005
6b96018b
AB
3006static int dev_ifsioc(struct net *net, struct socket *sock,
3007 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 3008{
a2116ed2 3009 struct ifreq __user *uifr;
7a229387
AB
3010 int err;
3011
a2116ed2
AB
3012 uifr = compat_alloc_user_space(sizeof(*uifr));
3013 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3014 return -EFAULT;
3015
3016 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3017
7a229387
AB
3018 if (!err) {
3019 switch (cmd) {
3020 case SIOCGIFFLAGS:
3021 case SIOCGIFMETRIC:
3022 case SIOCGIFMTU:
3023 case SIOCGIFMEM:
3024 case SIOCGIFHWADDR:
3025 case SIOCGIFINDEX:
3026 case SIOCGIFADDR:
3027 case SIOCGIFBRDADDR:
3028 case SIOCGIFDSTADDR:
3029 case SIOCGIFNETMASK:
fab2532b 3030 case SIOCGIFPFLAGS:
7a229387 3031 case SIOCGIFTXQLEN:
fab2532b
AB
3032 case SIOCGMIIPHY:
3033 case SIOCGMIIREG:
a2116ed2 3034 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
3035 err = -EFAULT;
3036 break;
3037 }
3038 }
3039 return err;
3040}
3041
a2116ed2
AB
3042static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3043 struct compat_ifreq __user *uifr32)
3044{
3045 struct ifreq ifr;
3046 struct compat_ifmap __user *uifmap32;
3047 mm_segment_t old_fs;
3048 int err;
3049
3050 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3051 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3052 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3053 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3054 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3055 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
3056 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
3057 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
3058 if (err)
3059 return -EFAULT;
3060
3061 old_fs = get_fs();
c6d409cf 3062 set_fs(KERNEL_DS);
c3f52ae6 3063 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3064 set_fs(old_fs);
a2116ed2
AB
3065
3066 if (cmd == SIOCGIFMAP && !err) {
3067 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3068 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3069 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3070 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3071 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
3072 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
3073 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
3074 if (err)
3075 err = -EFAULT;
3076 }
3077 return err;
3078}
3079
3080static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
3081{
3082 void __user *uptr;
3083 compat_uptr_t uptr32;
3084 struct ifreq __user *uifr;
3085
c6d409cf 3086 uifr = compat_alloc_user_space(sizeof(*uifr));
a2116ed2
AB
3087 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
3088 return -EFAULT;
3089
3090 if (get_user(uptr32, &uifr32->ifr_data))
3091 return -EFAULT;
3092
3093 uptr = compat_ptr(uptr32);
3094
3095 if (put_user(uptr, &uifr->ifr_data))
3096 return -EFAULT;
3097
3098 return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
3099}
3100
7a229387 3101struct rtentry32 {
c6d409cf 3102 u32 rt_pad1;
7a229387
AB
3103 struct sockaddr rt_dst; /* target address */
3104 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3105 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3106 unsigned short rt_flags;
3107 short rt_pad2;
3108 u32 rt_pad3;
3109 unsigned char rt_tos;
3110 unsigned char rt_class;
3111 short rt_pad4;
3112 short rt_metric; /* +1 for binary compatibility! */
7a229387 3113 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3114 u32 rt_mtu; /* per route MTU/Window */
3115 u32 rt_window; /* Window clamping */
7a229387
AB
3116 unsigned short rt_irtt; /* Initial RTT */
3117};
3118
3119struct in6_rtmsg32 {
3120 struct in6_addr rtmsg_dst;
3121 struct in6_addr rtmsg_src;
3122 struct in6_addr rtmsg_gateway;
3123 u32 rtmsg_type;
3124 u16 rtmsg_dst_len;
3125 u16 rtmsg_src_len;
3126 u32 rtmsg_metric;
3127 u32 rtmsg_info;
3128 u32 rtmsg_flags;
3129 s32 rtmsg_ifindex;
3130};
3131
6b96018b
AB
3132static int routing_ioctl(struct net *net, struct socket *sock,
3133 unsigned int cmd, void __user *argp)
7a229387
AB
3134{
3135 int ret;
3136 void *r = NULL;
3137 struct in6_rtmsg r6;
3138 struct rtentry r4;
3139 char devname[16];
3140 u32 rtdev;
3141 mm_segment_t old_fs = get_fs();
3142
6b96018b
AB
3143 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3144 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3145 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3146 3 * sizeof(struct in6_addr));
c6d409cf
ED
3147 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3148 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3149 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3150 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3151 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3152 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3153 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3154
3155 r = (void *) &r6;
3156 } else { /* ipv4 */
6b96018b 3157 struct rtentry32 __user *ur4 = argp;
c6d409cf 3158 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3159 3 * sizeof(struct sockaddr));
c6d409cf
ED
3160 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
3161 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
3162 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
3163 ret |= __get_user(r4.rt_window, &(ur4->rt_window));
3164 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
3165 ret |= __get_user(rtdev, &(ur4->rt_dev));
7a229387 3166 if (rtdev) {
c6d409cf 3167 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3168 r4.rt_dev = (char __user __force *)devname;
3169 devname[15] = 0;
7a229387
AB
3170 } else
3171 r4.rt_dev = NULL;
3172
3173 r = (void *) &r4;
3174 }
3175
3176 if (ret) {
3177 ret = -EFAULT;
3178 goto out;
3179 }
3180
c6d409cf 3181 set_fs(KERNEL_DS);
6b96018b 3182 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3183 set_fs(old_fs);
7a229387
AB
3184
3185out:
7a229387
AB
3186 return ret;
3187}
3188
3189/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3190 * for some operations; this forces use of the newer bridge-utils that
25985edc 3191 * use compatible ioctls
7a229387 3192 */
6b96018b 3193static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3194{
6b96018b 3195 compat_ulong_t tmp;
7a229387 3196
6b96018b 3197 if (get_user(tmp, argp))
7a229387
AB
3198 return -EFAULT;
3199 if (tmp == BRCTL_GET_VERSION)
3200 return BRCTL_VERSION + 1;
3201 return -EINVAL;
3202}
3203
6b96018b
AB
3204static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3205 unsigned int cmd, unsigned long arg)
3206{
3207 void __user *argp = compat_ptr(arg);
3208 struct sock *sk = sock->sk;
3209 struct net *net = sock_net(sk);
7a229387 3210
6b96018b
AB
3211 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3212 return siocdevprivate_ioctl(net, cmd, argp);
3213
3214 switch (cmd) {
3215 case SIOCSIFBR:
3216 case SIOCGIFBR:
3217 return old_bridge_ioctl(argp);
3218 case SIOCGIFNAME:
3219 return dev_ifname32(net, argp);
3220 case SIOCGIFCONF:
3221 return dev_ifconf(net, argp);
3222 case SIOCETHTOOL:
3223 return ethtool_ioctl(net, argp);
7a50a240
AB
3224 case SIOCWANDEV:
3225 return compat_siocwandev(net, argp);
a2116ed2
AB
3226 case SIOCGIFMAP:
3227 case SIOCSIFMAP:
3228 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3229 case SIOCBONDENSLAVE:
3230 case SIOCBONDRELEASE:
3231 case SIOCBONDSETHWADDR:
3232 case SIOCBONDSLAVEINFOQUERY:
3233 case SIOCBONDINFOQUERY:
3234 case SIOCBONDCHANGEACTIVE:
3235 return bond_ioctl(net, cmd, argp);
3236 case SIOCADDRT:
3237 case SIOCDELRT:
3238 return routing_ioctl(net, sock, cmd, argp);
3239 case SIOCGSTAMP:
3240 return do_siocgstamp(net, sock, cmd, argp);
3241 case SIOCGSTAMPNS:
3242 return do_siocgstampns(net, sock, cmd, argp);
a2116ed2
AB
3243 case SIOCSHWTSTAMP:
3244 return compat_siocshwtstamp(net, argp);
6b96018b
AB
3245
3246 case FIOSETOWN:
3247 case SIOCSPGRP:
3248 case FIOGETOWN:
3249 case SIOCGPGRP:
3250 case SIOCBRADDBR:
3251 case SIOCBRDELBR:
3252 case SIOCGIFVLAN:
3253 case SIOCSIFVLAN:
3254 case SIOCADDDLCI:
3255 case SIOCDELDLCI:
3256 return sock_ioctl(file, cmd, arg);
3257
3258 case SIOCGIFFLAGS:
3259 case SIOCSIFFLAGS:
3260 case SIOCGIFMETRIC:
3261 case SIOCSIFMETRIC:
3262 case SIOCGIFMTU:
3263 case SIOCSIFMTU:
3264 case SIOCGIFMEM:
3265 case SIOCSIFMEM:
3266 case SIOCGIFHWADDR:
3267 case SIOCSIFHWADDR:
3268 case SIOCADDMULTI:
3269 case SIOCDELMULTI:
3270 case SIOCGIFINDEX:
6b96018b
AB
3271 case SIOCGIFADDR:
3272 case SIOCSIFADDR:
3273 case SIOCSIFHWBROADCAST:
6b96018b 3274 case SIOCDIFADDR:
6b96018b
AB
3275 case SIOCGIFBRDADDR:
3276 case SIOCSIFBRDADDR:
3277 case SIOCGIFDSTADDR:
3278 case SIOCSIFDSTADDR:
3279 case SIOCGIFNETMASK:
3280 case SIOCSIFNETMASK:
3281 case SIOCSIFPFLAGS:
3282 case SIOCGIFPFLAGS:
3283 case SIOCGIFTXQLEN:
3284 case SIOCSIFTXQLEN:
3285 case SIOCBRADDIF:
3286 case SIOCBRDELIF:
9177efd3
AB
3287 case SIOCSIFNAME:
3288 case SIOCGMIIPHY:
3289 case SIOCGMIIREG:
3290 case SIOCSMIIREG:
6b96018b 3291 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3292
6b96018b
AB
3293 case SIOCSARP:
3294 case SIOCGARP:
3295 case SIOCDARP:
6b96018b 3296 case SIOCATMARK:
9177efd3
AB
3297 return sock_do_ioctl(net, sock, cmd, arg);
3298 }
3299
6b96018b
AB
3300 return -ENOIOCTLCMD;
3301}
7a229387 3302
95c96174 3303static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3304 unsigned long arg)
89bbfc95
SP
3305{
3306 struct socket *sock = file->private_data;
3307 int ret = -ENOIOCTLCMD;
87de87d5
DM
3308 struct sock *sk;
3309 struct net *net;
3310
3311 sk = sock->sk;
3312 net = sock_net(sk);
89bbfc95
SP
3313
3314 if (sock->ops->compat_ioctl)
3315 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3316
87de87d5
DM
3317 if (ret == -ENOIOCTLCMD &&
3318 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3319 ret = compat_wext_handle_ioctl(net, cmd, arg);
3320
6b96018b
AB
3321 if (ret == -ENOIOCTLCMD)
3322 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3323
89bbfc95
SP
3324 return ret;
3325}
3326#endif
3327
ac5a488e
SS
3328int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3329{
3330 return sock->ops->bind(sock, addr, addrlen);
3331}
c6d409cf 3332EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3333
3334int kernel_listen(struct socket *sock, int backlog)
3335{
3336 return sock->ops->listen(sock, backlog);
3337}
c6d409cf 3338EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3339
3340int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3341{
3342 struct sock *sk = sock->sk;
3343 int err;
3344
3345 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3346 newsock);
3347 if (err < 0)
3348 goto done;
3349
3350 err = sock->ops->accept(sock, *newsock, flags);
3351 if (err < 0) {
3352 sock_release(*newsock);
fa8705b0 3353 *newsock = NULL;
ac5a488e
SS
3354 goto done;
3355 }
3356
3357 (*newsock)->ops = sock->ops;
1b08534e 3358 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3359
3360done:
3361 return err;
3362}
c6d409cf 3363EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3364
3365int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3366 int flags)
ac5a488e
SS
3367{
3368 return sock->ops->connect(sock, addr, addrlen, flags);
3369}
c6d409cf 3370EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3371
3372int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3373 int *addrlen)
3374{
3375 return sock->ops->getname(sock, addr, addrlen, 0);
3376}
c6d409cf 3377EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3378
3379int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3380 int *addrlen)
3381{
3382 return sock->ops->getname(sock, addr, addrlen, 1);
3383}
c6d409cf 3384EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3385
3386int kernel_getsockopt(struct socket *sock, int level, int optname,
3387 char *optval, int *optlen)
3388{
3389 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3390 char __user *uoptval;
3391 int __user *uoptlen;
ac5a488e
SS
3392 int err;
3393
fb8621bb
NK
3394 uoptval = (char __user __force *) optval;
3395 uoptlen = (int __user __force *) optlen;
3396
ac5a488e
SS
3397 set_fs(KERNEL_DS);
3398 if (level == SOL_SOCKET)
fb8621bb 3399 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3400 else
fb8621bb
NK
3401 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3402 uoptlen);
ac5a488e
SS
3403 set_fs(oldfs);
3404 return err;
3405}
c6d409cf 3406EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3407
3408int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3409 char *optval, unsigned int optlen)
ac5a488e
SS
3410{
3411 mm_segment_t oldfs = get_fs();
fb8621bb 3412 char __user *uoptval;
ac5a488e
SS
3413 int err;
3414
fb8621bb
NK
3415 uoptval = (char __user __force *) optval;
3416
ac5a488e
SS
3417 set_fs(KERNEL_DS);
3418 if (level == SOL_SOCKET)
fb8621bb 3419 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3420 else
fb8621bb 3421 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3422 optlen);
3423 set_fs(oldfs);
3424 return err;
3425}
c6d409cf 3426EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3427
3428int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3429 size_t size, int flags)
3430{
3431 if (sock->ops->sendpage)
3432 return sock->ops->sendpage(sock, page, offset, size, flags);
3433
3434 return sock_no_sendpage(sock, page, offset, size, flags);
3435}
c6d409cf 3436EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3437
3438int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3439{
3440 mm_segment_t oldfs = get_fs();
3441 int err;
3442
3443 set_fs(KERNEL_DS);
3444 err = sock->ops->ioctl(sock, cmd, arg);
3445 set_fs(oldfs);
3446
3447 return err;
3448}
c6d409cf 3449EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3450
91cf45f0
TM
3451int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3452{
3453 return sock->ops->shutdown(sock, how);
3454}
91cf45f0 3455EXPORT_SYMBOL(kernel_sock_shutdown);
This page took 1.496161 seconds and 5 git commands to generate.