Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[deliverable/linux.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
027445c3
BP
116static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
117 unsigned long nr_segs, loff_t pos);
118static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
119 unsigned long nr_segs, loff_t pos);
89bddce5 120static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
121
122static int sock_close(struct inode *inode, struct file *file);
123static unsigned int sock_poll(struct file *file,
124 struct poll_table_struct *wait);
89bddce5 125static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
126#ifdef CONFIG_COMPAT
127static long compat_sock_ioctl(struct file *file,
89bddce5 128 unsigned int cmd, unsigned long arg);
89bbfc95 129#endif
1da177e4 130static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
131static ssize_t sock_sendpage(struct file *file, struct page *page,
132 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 133static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 134 struct pipe_inode_info *pipe, size_t len,
9c55e01c 135 unsigned int flags);
1da177e4 136
1da177e4
LT
137/*
138 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
139 * in the operation structures but are done directly via the socketcall() multiplexor.
140 */
141
da7071d7 142static const struct file_operations socket_file_ops = {
1da177e4
LT
143 .owner = THIS_MODULE,
144 .llseek = no_llseek,
145 .aio_read = sock_aio_read,
146 .aio_write = sock_aio_write,
147 .poll = sock_poll,
148 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
149#ifdef CONFIG_COMPAT
150 .compat_ioctl = compat_sock_ioctl,
151#endif
1da177e4 152 .mmap = sock_mmap,
1da177e4
LT
153 .release = sock_close,
154 .fasync = sock_fasync,
5274f052
JA
155 .sendpage = sock_sendpage,
156 .splice_write = generic_splice_sendpage,
9c55e01c 157 .splice_read = sock_splice_read,
1da177e4
LT
158};
159
160/*
161 * The protocol list. Each protocol is registered in here.
162 */
163
1da177e4 164static DEFINE_SPINLOCK(net_family_lock);
190683a9 165static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 166
1da177e4
LT
167/*
168 * Statistics counters of the socket lists
169 */
170
c6d409cf 171static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
172
173/*
89bddce5
SH
174 * Support routines.
175 * Move socket addresses back and forth across the kernel/user
176 * divide and look after the messy bits.
1da177e4
LT
177 */
178
1da177e4
LT
179/**
180 * move_addr_to_kernel - copy a socket address into kernel space
181 * @uaddr: Address in user space
182 * @kaddr: Address in kernel space
183 * @ulen: Length in user space
184 *
185 * The address is copied into kernel space. If the provided address is
186 * too long an error code of -EINVAL is returned. If the copy gives
187 * invalid addresses -EFAULT is returned. On a success 0 is returned.
188 */
189
43db362d 190int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 191{
230b1839 192 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 193 return -EINVAL;
89bddce5 194 if (ulen == 0)
1da177e4 195 return 0;
89bddce5 196 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 197 return -EFAULT;
3ec3b2fb 198 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
199}
200
201/**
202 * move_addr_to_user - copy an address to user space
203 * @kaddr: kernel space address
204 * @klen: length of address in kernel
205 * @uaddr: user space address
206 * @ulen: pointer to user length field
207 *
208 * The value pointed to by ulen on entry is the buffer length available.
209 * This is overwritten with the buffer space used. -EINVAL is returned
210 * if an overlong buffer is specified or a negative buffer size. -EFAULT
211 * is returned if either the buffer or the length field are not
212 * accessible.
213 * After copying the data up to the limit the user specifies, the true
214 * length of the data is written over the length limit the user
215 * specified. Zero is returned for a success.
216 */
89bddce5 217
43db362d 218static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 219 void __user *uaddr, int __user *ulen)
1da177e4
LT
220{
221 int err;
222 int len;
223
68c6beb3 224 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
225 err = get_user(len, ulen);
226 if (err)
1da177e4 227 return err;
89bddce5
SH
228 if (len > klen)
229 len = klen;
68c6beb3 230 if (len < 0)
1da177e4 231 return -EINVAL;
89bddce5 232 if (len) {
d6fe3945
SG
233 if (audit_sockaddr(klen, kaddr))
234 return -ENOMEM;
89bddce5 235 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
236 return -EFAULT;
237 }
238 /*
89bddce5
SH
239 * "fromlen shall refer to the value before truncation.."
240 * 1003.1g
1da177e4
LT
241 */
242 return __put_user(klen, ulen);
243}
244
e18b890b 245static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
246
247static struct inode *sock_alloc_inode(struct super_block *sb)
248{
249 struct socket_alloc *ei;
eaefd110 250 struct socket_wq *wq;
89bddce5 251
e94b1766 252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
253 if (!ei)
254 return NULL;
eaefd110
ED
255 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
256 if (!wq) {
43815482
ED
257 kmem_cache_free(sock_inode_cachep, ei);
258 return NULL;
259 }
eaefd110
ED
260 init_waitqueue_head(&wq->wait);
261 wq->fasync_list = NULL;
262 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 263
1da177e4
LT
264 ei->socket.state = SS_UNCONNECTED;
265 ei->socket.flags = 0;
266 ei->socket.ops = NULL;
267 ei->socket.sk = NULL;
268 ei->socket.file = NULL;
1da177e4
LT
269
270 return &ei->vfs_inode;
271}
272
273static void sock_destroy_inode(struct inode *inode)
274{
43815482 275 struct socket_alloc *ei;
eaefd110 276 struct socket_wq *wq;
43815482
ED
277
278 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 279 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 280 kfree_rcu(wq, rcu);
43815482 281 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
282}
283
51cc5068 284static void init_once(void *foo)
1da177e4 285{
89bddce5 286 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 287
a35afb83 288 inode_init_once(&ei->vfs_inode);
1da177e4 289}
89bddce5 290
1da177e4
LT
291static int init_inodecache(void)
292{
293 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
294 sizeof(struct socket_alloc),
295 0,
296 (SLAB_HWCACHE_ALIGN |
297 SLAB_RECLAIM_ACCOUNT |
298 SLAB_MEM_SPREAD),
20c2df83 299 init_once);
1da177e4
LT
300 if (sock_inode_cachep == NULL)
301 return -ENOMEM;
302 return 0;
303}
304
b87221de 305static const struct super_operations sockfs_ops = {
c6d409cf
ED
306 .alloc_inode = sock_alloc_inode,
307 .destroy_inode = sock_destroy_inode,
308 .statfs = simple_statfs,
1da177e4
LT
309};
310
c23fbb6b
ED
311/*
312 * sockfs_dname() is called from d_path().
313 */
314static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
315{
316 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
317 dentry->d_inode->i_ino);
318}
319
3ba13d17 320static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 321 .d_dname = sockfs_dname,
1da177e4
LT
322};
323
c74a1cbb
AV
324static struct dentry *sockfs_mount(struct file_system_type *fs_type,
325 int flags, const char *dev_name, void *data)
326{
327 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
328 &sockfs_dentry_operations, SOCKFS_MAGIC);
329}
330
331static struct vfsmount *sock_mnt __read_mostly;
332
333static struct file_system_type sock_fs_type = {
334 .name = "sockfs",
335 .mount = sockfs_mount,
336 .kill_sb = kill_anon_super,
337};
338
1da177e4
LT
339/*
340 * Obtains the first available file descriptor and sets it up for use.
341 *
39d8c1b6
DM
342 * These functions create file structures and maps them to fd space
343 * of the current process. On success it returns file descriptor
1da177e4
LT
344 * and file struct implicitly stored in sock->file.
345 * Note that another thread may close file descriptor before we return
346 * from this function. We use the fact that now we do not refer
347 * to socket after mapping. If one day we will need it, this
348 * function will increment ref. count on file by 1.
349 *
350 * In any case returned fd MAY BE not valid!
351 * This race condition is unavoidable
352 * with shared fd spaces, we cannot solve it inside kernel,
353 * but we take care of internal coherence yet.
354 */
355
aab174f0 356struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 357{
7cbe66b6 358 struct qstr name = { .name = "" };
2c48b9c4 359 struct path path;
7cbe66b6 360 struct file *file;
1da177e4 361
600e1779
MY
362 if (dname) {
363 name.name = dname;
364 name.len = strlen(name.name);
365 } else if (sock->sk) {
366 name.name = sock->sk->sk_prot_creator->name;
367 name.len = strlen(name.name);
368 }
4b936885 369 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
370 if (unlikely(!path.dentry))
371 return ERR_PTR(-ENOMEM);
2c48b9c4 372 path.mnt = mntget(sock_mnt);
39d8c1b6 373
2c48b9c4 374 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 375 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 376
2c48b9c4 377 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 378 &socket_file_ops);
39b65252 379 if (unlikely(IS_ERR(file))) {
cc3808f8 380 /* drop dentry, keep inode */
7de9c6ee 381 ihold(path.dentry->d_inode);
2c48b9c4 382 path_put(&path);
39b65252 383 return file;
cc3808f8
AV
384 }
385
386 sock->file = file;
77d27200 387 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 388 file->private_data = sock;
28407630 389 return file;
39d8c1b6 390}
56b31d1c 391EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 392
56b31d1c 393static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
394{
395 struct file *newfile;
28407630
AV
396 int fd = get_unused_fd_flags(flags);
397 if (unlikely(fd < 0))
398 return fd;
39d8c1b6 399
aab174f0 400 newfile = sock_alloc_file(sock, flags, NULL);
28407630 401 if (likely(!IS_ERR(newfile))) {
39d8c1b6 402 fd_install(fd, newfile);
28407630
AV
403 return fd;
404 }
7cbe66b6 405
28407630
AV
406 put_unused_fd(fd);
407 return PTR_ERR(newfile);
1da177e4
LT
408}
409
406a3c63 410struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 411{
6cb153ca
BL
412 if (file->f_op == &socket_file_ops)
413 return file->private_data; /* set in sock_map_fd */
414
23bb80d2
ED
415 *err = -ENOTSOCK;
416 return NULL;
6cb153ca 417}
406a3c63 418EXPORT_SYMBOL(sock_from_file);
6cb153ca 419
1da177e4 420/**
c6d409cf 421 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
422 * @fd: file handle
423 * @err: pointer to an error code return
424 *
425 * The file handle passed in is locked and the socket it is bound
426 * too is returned. If an error occurs the err pointer is overwritten
427 * with a negative errno code and NULL is returned. The function checks
428 * for both invalid handles and passing a handle which is not a socket.
429 *
430 * On a success the socket object pointer is returned.
431 */
432
433struct socket *sockfd_lookup(int fd, int *err)
434{
435 struct file *file;
1da177e4
LT
436 struct socket *sock;
437
89bddce5
SH
438 file = fget(fd);
439 if (!file) {
1da177e4
LT
440 *err = -EBADF;
441 return NULL;
442 }
89bddce5 443
6cb153ca
BL
444 sock = sock_from_file(file, err);
445 if (!sock)
1da177e4 446 fput(file);
6cb153ca
BL
447 return sock;
448}
c6d409cf 449EXPORT_SYMBOL(sockfd_lookup);
1da177e4 450
6cb153ca
BL
451static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
452{
00e188ef 453 struct fd f = fdget(fd);
6cb153ca
BL
454 struct socket *sock;
455
3672558c 456 *err = -EBADF;
00e188ef
AV
457 if (f.file) {
458 sock = sock_from_file(f.file, err);
459 if (likely(sock)) {
460 *fput_needed = f.flags;
6cb153ca 461 return sock;
00e188ef
AV
462 }
463 fdput(f);
1da177e4 464 }
6cb153ca 465 return NULL;
1da177e4
LT
466}
467
600e1779
MY
468#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
469#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
470#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
471static ssize_t sockfs_getxattr(struct dentry *dentry,
472 const char *name, void *value, size_t size)
473{
474 const char *proto_name;
475 size_t proto_size;
476 int error;
477
478 error = -ENODATA;
479 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
480 proto_name = dentry->d_name.name;
481 proto_size = strlen(proto_name);
482
483 if (value) {
484 error = -ERANGE;
485 if (proto_size + 1 > size)
486 goto out;
487
488 strncpy(value, proto_name, proto_size + 1);
489 }
490 error = proto_size + 1;
491 }
492
493out:
494 return error;
495}
496
497static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
498 size_t size)
499{
500 ssize_t len;
501 ssize_t used = 0;
502
503 len = security_inode_listsecurity(dentry->d_inode, buffer, size);
504 if (len < 0)
505 return len;
506 used += len;
507 if (buffer) {
508 if (size < used)
509 return -ERANGE;
510 buffer += len;
511 }
512
513 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
514 used += len;
515 if (buffer) {
516 if (size < used)
517 return -ERANGE;
518 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
519 buffer += len;
520 }
521
522 return used;
523}
524
525static const struct inode_operations sockfs_inode_ops = {
526 .getxattr = sockfs_getxattr,
527 .listxattr = sockfs_listxattr,
528};
529
1da177e4
LT
530/**
531 * sock_alloc - allocate a socket
89bddce5 532 *
1da177e4
LT
533 * Allocate a new inode and socket object. The two are bound together
534 * and initialised. The socket is then returned. If we are out of inodes
535 * NULL is returned.
536 */
537
538static struct socket *sock_alloc(void)
539{
89bddce5
SH
540 struct inode *inode;
541 struct socket *sock;
1da177e4 542
a209dfc7 543 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
544 if (!inode)
545 return NULL;
546
547 sock = SOCKET_I(inode);
548
29a020d3 549 kmemcheck_annotate_bitfield(sock, type);
85fe4025 550 inode->i_ino = get_next_ino();
89bddce5 551 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
552 inode->i_uid = current_fsuid();
553 inode->i_gid = current_fsgid();
600e1779 554 inode->i_op = &sockfs_inode_ops;
1da177e4 555
19e8d69c 556 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
557 return sock;
558}
559
1da177e4
LT
560/**
561 * sock_release - close a socket
562 * @sock: socket to close
563 *
564 * The socket is released from the protocol stack if it has a release
565 * callback, and the inode is then released if the socket is bound to
89bddce5 566 * an inode not a file.
1da177e4 567 */
89bddce5 568
1da177e4
LT
569void sock_release(struct socket *sock)
570{
571 if (sock->ops) {
572 struct module *owner = sock->ops->owner;
573
574 sock->ops->release(sock);
575 sock->ops = NULL;
576 module_put(owner);
577 }
578
eaefd110 579 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 580 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 581
b09e786b
MP
582 if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
583 return;
584
19e8d69c 585 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
586 if (!sock->file) {
587 iput(SOCK_INODE(sock));
588 return;
589 }
89bddce5 590 sock->file = NULL;
1da177e4 591}
c6d409cf 592EXPORT_SYMBOL(sock_release);
1da177e4 593
67cc0d40 594void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
20d49473 595{
140c55d4
ED
596 u8 flags = *tx_flags;
597
b9f40e21 598 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
599 flags |= SKBTX_HW_TSTAMP;
600
b9f40e21 601 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
602 flags |= SKBTX_SW_TSTAMP;
603
e7fd2885 604 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
605 flags |= SKBTX_SCHED_TSTAMP;
606
e1c8a607 607 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
140c55d4 608 flags |= SKBTX_ACK_TSTAMP;
e7fd2885 609
140c55d4 610 *tx_flags = flags;
20d49473 611}
67cc0d40 612EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 613
228e548e
AB
614static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
615 struct msghdr *msg, size_t size)
1da177e4
LT
616{
617 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4
LT
618
619 si->sock = sock;
620 si->scm = NULL;
621 si->msg = msg;
622 si->size = size;
623
1da177e4
LT
624 return sock->ops->sendmsg(iocb, sock, msg, size);
625}
626
228e548e
AB
627static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
628 struct msghdr *msg, size_t size)
629{
630 int err = security_socket_sendmsg(sock, msg, size);
631
632 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
633}
634
0cf00c6f
GZ
635static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg,
636 size_t size, bool nosec)
1da177e4
LT
637{
638 struct kiocb iocb;
639 struct sock_iocb siocb;
640 int ret;
641
642 init_sync_kiocb(&iocb, NULL);
643 iocb.private = &siocb;
0cf00c6f
GZ
644 ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
645 __sock_sendmsg(&iocb, sock, msg, size);
1da177e4
LT
646 if (-EIOCBQUEUED == ret)
647 ret = wait_on_sync_kiocb(&iocb);
648 return ret;
649}
0cf00c6f
GZ
650
651int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
652{
653 return do_sock_sendmsg(sock, msg, size, false);
654}
c6d409cf 655EXPORT_SYMBOL(sock_sendmsg);
1da177e4 656
894dc24c 657static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e 658{
0cf00c6f 659 return do_sock_sendmsg(sock, msg, size, true);
228e548e
AB
660}
661
1da177e4
LT
662int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
663 struct kvec *vec, size_t num, size_t size)
664{
665 mm_segment_t oldfs = get_fs();
666 int result;
667
668 set_fs(KERNEL_DS);
669 /*
670 * the following is safe, since for compiler definitions of kvec and
671 * iovec are identical, yielding the same in-core layout and alignment
672 */
c0371da6 673 iov_iter_init(&msg->msg_iter, WRITE, (struct iovec *)vec, num, size);
1da177e4
LT
674 result = sock_sendmsg(sock, msg, size);
675 set_fs(oldfs);
676 return result;
677}
c6d409cf 678EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 679
92f37fd2
ED
680/*
681 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
682 */
683void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
684 struct sk_buff *skb)
685{
20d49473 686 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 687 struct scm_timestamping tss;
20d49473
PO
688 int empty = 1;
689 struct skb_shared_hwtstamps *shhwtstamps =
690 skb_hwtstamps(skb);
691
692 /* Race occurred between timestamp enabling and packet
693 receiving. Fill in the current time for now. */
694 if (need_software_tstamp && skb->tstamp.tv64 == 0)
695 __net_timestamp(skb);
696
697 if (need_software_tstamp) {
698 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
699 struct timeval tv;
700 skb_get_timestamp(skb, &tv);
701 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
702 sizeof(tv), &tv);
703 } else {
f24b9be5
WB
704 struct timespec ts;
705 skb_get_timestampns(skb, &ts);
20d49473 706 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 707 sizeof(ts), &ts);
20d49473
PO
708 }
709 }
710
f24b9be5 711 memset(&tss, 0, sizeof(tss));
c199105d 712 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 713 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 714 empty = 0;
4d276eb6 715 if (shhwtstamps &&
b9f40e21 716 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 717 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 718 empty = 0;
20d49473
PO
719 if (!empty)
720 put_cmsg(msg, SOL_SOCKET,
f24b9be5 721 SCM_TIMESTAMPING, sizeof(tss), &tss);
92f37fd2 722}
7c81fd8b
ACM
723EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
724
6e3e939f
JB
725void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
726 struct sk_buff *skb)
727{
728 int ack;
729
730 if (!sock_flag(sk, SOCK_WIFI_STATUS))
731 return;
732 if (!skb->wifi_acked_valid)
733 return;
734
735 ack = skb->wifi_acked;
736
737 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
738}
739EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
740
11165f14 741static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
742 struct sk_buff *skb)
3b885787
NH
743{
744 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
745 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
746 sizeof(__u32), &skb->dropcount);
747}
748
767dd033 749void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
750 struct sk_buff *skb)
751{
752 sock_recv_timestamp(msg, sk, skb);
753 sock_recv_drops(msg, sk, skb);
754}
767dd033 755EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 756
a2e27255
ACM
757static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
758 struct msghdr *msg, size_t size, int flags)
1da177e4 759{
1da177e4
LT
760 struct sock_iocb *si = kiocb_to_siocb(iocb);
761
762 si->sock = sock;
763 si->scm = NULL;
764 si->msg = msg;
765 si->size = size;
766 si->flags = flags;
767
1da177e4
LT
768 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
769}
770
a2e27255
ACM
771static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
772 struct msghdr *msg, size_t size, int flags)
773{
774 int err = security_socket_recvmsg(sock, msg, size, flags);
775
776 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
777}
778
89bddce5 779int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
780 size_t size, int flags)
781{
782 struct kiocb iocb;
783 struct sock_iocb siocb;
784 int ret;
785
89bddce5 786 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
787 iocb.private = &siocb;
788 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
789 if (-EIOCBQUEUED == ret)
790 ret = wait_on_sync_kiocb(&iocb);
791 return ret;
792}
c6d409cf 793EXPORT_SYMBOL(sock_recvmsg);
1da177e4 794
a2e27255
ACM
795static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
796 size_t size, int flags)
797{
798 struct kiocb iocb;
799 struct sock_iocb siocb;
800 int ret;
801
802 init_sync_kiocb(&iocb, NULL);
803 iocb.private = &siocb;
804 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
805 if (-EIOCBQUEUED == ret)
806 ret = wait_on_sync_kiocb(&iocb);
807 return ret;
808}
809
c1249c0a
ML
810/**
811 * kernel_recvmsg - Receive a message from a socket (kernel space)
812 * @sock: The socket to receive the message from
813 * @msg: Received message
814 * @vec: Input s/g array for message data
815 * @num: Size of input s/g array
816 * @size: Number of bytes to read
817 * @flags: Message flags (MSG_DONTWAIT, etc...)
818 *
819 * On return the msg structure contains the scatter/gather array passed in the
820 * vec argument. The array is modified so that it consists of the unfilled
821 * portion of the original array.
822 *
823 * The returned value is the total number of bytes received, or an error.
824 */
89bddce5
SH
825int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
826 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
827{
828 mm_segment_t oldfs = get_fs();
829 int result;
830
831 set_fs(KERNEL_DS);
832 /*
833 * the following is safe, since for compiler definitions of kvec and
834 * iovec are identical, yielding the same in-core layout and alignment
835 */
c0371da6 836 iov_iter_init(&msg->msg_iter, READ, (struct iovec *)vec, num, size);
1da177e4
LT
837 result = sock_recvmsg(sock, msg, size, flags);
838 set_fs(oldfs);
839 return result;
840}
c6d409cf 841EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 842
ce1d4d3e
CH
843static ssize_t sock_sendpage(struct file *file, struct page *page,
844 int offset, size_t size, loff_t *ppos, int more)
1da177e4 845{
1da177e4
LT
846 struct socket *sock;
847 int flags;
848
ce1d4d3e
CH
849 sock = file->private_data;
850
35f9c09f
ED
851 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
852 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
853 flags |= more;
ce1d4d3e 854
e6949583 855 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 856}
1da177e4 857
9c55e01c 858static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 859 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
860 unsigned int flags)
861{
862 struct socket *sock = file->private_data;
863
997b37da
RDC
864 if (unlikely(!sock->ops->splice_read))
865 return -EINVAL;
866
9c55e01c
JA
867 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
868}
869
ce1d4d3e 870static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 871 struct sock_iocb *siocb)
ce1d4d3e 872{
d29c445b
KO
873 if (!is_sync_kiocb(iocb))
874 BUG();
1da177e4 875
ce1d4d3e 876 siocb->kiocb = iocb;
ce1d4d3e
CH
877 iocb->private = siocb;
878 return siocb;
1da177e4
LT
879}
880
ce1d4d3e 881static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
882 struct file *file, const struct iovec *iov,
883 unsigned long nr_segs)
ce1d4d3e
CH
884{
885 struct socket *sock = file->private_data;
886 size_t size = 0;
887 int i;
1da177e4 888
89bddce5
SH
889 for (i = 0; i < nr_segs; i++)
890 size += iov[i].iov_len;
1da177e4 891
ce1d4d3e
CH
892 msg->msg_name = NULL;
893 msg->msg_namelen = 0;
894 msg->msg_control = NULL;
895 msg->msg_controllen = 0;
c0371da6 896 iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, size);
ce1d4d3e
CH
897 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
898
899 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
900}
901
027445c3
BP
902static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
903 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
904{
905 struct sock_iocb siocb, *x;
906
1da177e4
LT
907 if (pos != 0)
908 return -ESPIPE;
027445c3 909
73a7075e 910 if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
1da177e4
LT
911 return 0;
912
027445c3
BP
913
914 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
915 if (!x)
916 return -ENOMEM;
027445c3 917 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
918}
919
ce1d4d3e 920static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
921 struct file *file, const struct iovec *iov,
922 unsigned long nr_segs)
1da177e4 923{
ce1d4d3e
CH
924 struct socket *sock = file->private_data;
925 size_t size = 0;
926 int i;
1da177e4 927
89bddce5
SH
928 for (i = 0; i < nr_segs; i++)
929 size += iov[i].iov_len;
1da177e4 930
ce1d4d3e
CH
931 msg->msg_name = NULL;
932 msg->msg_namelen = 0;
933 msg->msg_control = NULL;
934 msg->msg_controllen = 0;
c0371da6 935 iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, size);
ce1d4d3e
CH
936 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
937 if (sock->type == SOCK_SEQPACKET)
938 msg->msg_flags |= MSG_EOR;
1da177e4 939
ce1d4d3e 940 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
941}
942
027445c3
BP
943static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
944 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
945{
946 struct sock_iocb siocb, *x;
1da177e4 947
ce1d4d3e
CH
948 if (pos != 0)
949 return -ESPIPE;
027445c3 950
027445c3 951 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
952 if (!x)
953 return -ENOMEM;
1da177e4 954
027445c3 955 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
956}
957
1da177e4
LT
958/*
959 * Atomic setting of ioctl hooks to avoid race
960 * with module unload.
961 */
962
4a3e2f71 963static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 964static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 965
881d966b 966void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 967{
4a3e2f71 968 mutex_lock(&br_ioctl_mutex);
1da177e4 969 br_ioctl_hook = hook;
4a3e2f71 970 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
971}
972EXPORT_SYMBOL(brioctl_set);
973
4a3e2f71 974static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 975static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 976
881d966b 977void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 978{
4a3e2f71 979 mutex_lock(&vlan_ioctl_mutex);
1da177e4 980 vlan_ioctl_hook = hook;
4a3e2f71 981 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
982}
983EXPORT_SYMBOL(vlan_ioctl_set);
984
4a3e2f71 985static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 986static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 987
89bddce5 988void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 989{
4a3e2f71 990 mutex_lock(&dlci_ioctl_mutex);
1da177e4 991 dlci_ioctl_hook = hook;
4a3e2f71 992 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
993}
994EXPORT_SYMBOL(dlci_ioctl_set);
995
6b96018b
AB
996static long sock_do_ioctl(struct net *net, struct socket *sock,
997 unsigned int cmd, unsigned long arg)
998{
999 int err;
1000 void __user *argp = (void __user *)arg;
1001
1002 err = sock->ops->ioctl(sock, cmd, arg);
1003
1004 /*
1005 * If this ioctl is unknown try to hand it down
1006 * to the NIC driver.
1007 */
1008 if (err == -ENOIOCTLCMD)
1009 err = dev_ioctl(net, cmd, argp);
1010
1011 return err;
1012}
1013
1da177e4
LT
1014/*
1015 * With an ioctl, arg may well be a user mode pointer, but we don't know
1016 * what to do with it - that's up to the protocol still.
1017 */
1018
1019static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1020{
1021 struct socket *sock;
881d966b 1022 struct sock *sk;
1da177e4
LT
1023 void __user *argp = (void __user *)arg;
1024 int pid, err;
881d966b 1025 struct net *net;
1da177e4 1026
b69aee04 1027 sock = file->private_data;
881d966b 1028 sk = sock->sk;
3b1e0a65 1029 net = sock_net(sk);
1da177e4 1030 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1031 err = dev_ioctl(net, cmd, argp);
1da177e4 1032 } else
3d23e349 1033#ifdef CONFIG_WEXT_CORE
1da177e4 1034 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1035 err = dev_ioctl(net, cmd, argp);
1da177e4 1036 } else
3d23e349 1037#endif
89bddce5 1038 switch (cmd) {
1da177e4
LT
1039 case FIOSETOWN:
1040 case SIOCSPGRP:
1041 err = -EFAULT;
1042 if (get_user(pid, (int __user *)argp))
1043 break;
e0b93edd
JL
1044 f_setown(sock->file, pid, 1);
1045 err = 0;
1da177e4
LT
1046 break;
1047 case FIOGETOWN:
1048 case SIOCGPGRP:
609d7fa9 1049 err = put_user(f_getown(sock->file),
89bddce5 1050 (int __user *)argp);
1da177e4
LT
1051 break;
1052 case SIOCGIFBR:
1053 case SIOCSIFBR:
1054 case SIOCBRADDBR:
1055 case SIOCBRDELBR:
1056 err = -ENOPKG;
1057 if (!br_ioctl_hook)
1058 request_module("bridge");
1059
4a3e2f71 1060 mutex_lock(&br_ioctl_mutex);
89bddce5 1061 if (br_ioctl_hook)
881d966b 1062 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1063 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1064 break;
1065 case SIOCGIFVLAN:
1066 case SIOCSIFVLAN:
1067 err = -ENOPKG;
1068 if (!vlan_ioctl_hook)
1069 request_module("8021q");
1070
4a3e2f71 1071 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1072 if (vlan_ioctl_hook)
881d966b 1073 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1074 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1075 break;
1da177e4
LT
1076 case SIOCADDDLCI:
1077 case SIOCDELDLCI:
1078 err = -ENOPKG;
1079 if (!dlci_ioctl_hook)
1080 request_module("dlci");
1081
7512cbf6
PE
1082 mutex_lock(&dlci_ioctl_mutex);
1083 if (dlci_ioctl_hook)
1da177e4 1084 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1085 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1086 break;
1087 default:
6b96018b 1088 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1089 break;
89bddce5 1090 }
1da177e4
LT
1091 return err;
1092}
1093
1094int sock_create_lite(int family, int type, int protocol, struct socket **res)
1095{
1096 int err;
1097 struct socket *sock = NULL;
89bddce5 1098
1da177e4
LT
1099 err = security_socket_create(family, type, protocol, 1);
1100 if (err)
1101 goto out;
1102
1103 sock = sock_alloc();
1104 if (!sock) {
1105 err = -ENOMEM;
1106 goto out;
1107 }
1108
1da177e4 1109 sock->type = type;
7420ed23
VY
1110 err = security_socket_post_create(sock, family, type, protocol, 1);
1111 if (err)
1112 goto out_release;
1113
1da177e4
LT
1114out:
1115 *res = sock;
1116 return err;
7420ed23
VY
1117out_release:
1118 sock_release(sock);
1119 sock = NULL;
1120 goto out;
1da177e4 1121}
c6d409cf 1122EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1123
1124/* No kernel lock held - perfect */
89bddce5 1125static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1126{
cbf55001 1127 unsigned int busy_flag = 0;
1da177e4
LT
1128 struct socket *sock;
1129
1130 /*
89bddce5 1131 * We can't return errors to poll, so it's either yes or no.
1da177e4 1132 */
b69aee04 1133 sock = file->private_data;
2d48d67f 1134
cbf55001 1135 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1136 /* this socket can poll_ll so tell the system call */
cbf55001 1137 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1138
1139 /* once, only if requested by syscall */
cbf55001
ET
1140 if (wait && (wait->_key & POLL_BUSY_LOOP))
1141 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1142 }
1143
cbf55001 1144 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1145}
1146
89bddce5 1147static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1148{
b69aee04 1149 struct socket *sock = file->private_data;
1da177e4
LT
1150
1151 return sock->ops->mmap(file, sock, vma);
1152}
1153
20380731 1154static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1155{
1da177e4
LT
1156 sock_release(SOCKET_I(inode));
1157 return 0;
1158}
1159
1160/*
1161 * Update the socket async list
1162 *
1163 * Fasync_list locking strategy.
1164 *
1165 * 1. fasync_list is modified only under process context socket lock
1166 * i.e. under semaphore.
1167 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1168 * or under socket lock
1da177e4
LT
1169 */
1170
1171static int sock_fasync(int fd, struct file *filp, int on)
1172{
989a2979
ED
1173 struct socket *sock = filp->private_data;
1174 struct sock *sk = sock->sk;
eaefd110 1175 struct socket_wq *wq;
1da177e4 1176
989a2979 1177 if (sk == NULL)
1da177e4 1178 return -EINVAL;
1da177e4
LT
1179
1180 lock_sock(sk);
eaefd110
ED
1181 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1182 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1183
eaefd110 1184 if (!wq->fasync_list)
989a2979
ED
1185 sock_reset_flag(sk, SOCK_FASYNC);
1186 else
bcdce719 1187 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1188
989a2979 1189 release_sock(sk);
1da177e4
LT
1190 return 0;
1191}
1192
43815482 1193/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1194
1195int sock_wake_async(struct socket *sock, int how, int band)
1196{
43815482
ED
1197 struct socket_wq *wq;
1198
1199 if (!sock)
1200 return -1;
1201 rcu_read_lock();
1202 wq = rcu_dereference(sock->wq);
1203 if (!wq || !wq->fasync_list) {
1204 rcu_read_unlock();
1da177e4 1205 return -1;
43815482 1206 }
89bddce5 1207 switch (how) {
8d8ad9d7 1208 case SOCK_WAKE_WAITD:
1da177e4
LT
1209 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1210 break;
1211 goto call_kill;
8d8ad9d7 1212 case SOCK_WAKE_SPACE:
1da177e4
LT
1213 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1214 break;
1215 /* fall through */
8d8ad9d7 1216 case SOCK_WAKE_IO:
89bddce5 1217call_kill:
43815482 1218 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1219 break;
8d8ad9d7 1220 case SOCK_WAKE_URG:
43815482 1221 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1222 }
43815482 1223 rcu_read_unlock();
1da177e4
LT
1224 return 0;
1225}
c6d409cf 1226EXPORT_SYMBOL(sock_wake_async);
1da177e4 1227
721db93a 1228int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1229 struct socket **res, int kern)
1da177e4
LT
1230{
1231 int err;
1232 struct socket *sock;
55737fda 1233 const struct net_proto_family *pf;
1da177e4
LT
1234
1235 /*
89bddce5 1236 * Check protocol is in range
1da177e4
LT
1237 */
1238 if (family < 0 || family >= NPROTO)
1239 return -EAFNOSUPPORT;
1240 if (type < 0 || type >= SOCK_MAX)
1241 return -EINVAL;
1242
1243 /* Compatibility.
1244
1245 This uglymoron is moved from INET layer to here to avoid
1246 deadlock in module load.
1247 */
1248 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1249 static int warned;
1da177e4
LT
1250 if (!warned) {
1251 warned = 1;
3410f22e
YY
1252 pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1253 current->comm);
1da177e4
LT
1254 }
1255 family = PF_PACKET;
1256 }
1257
1258 err = security_socket_create(family, type, protocol, kern);
1259 if (err)
1260 return err;
89bddce5 1261
55737fda
SH
1262 /*
1263 * Allocate the socket and allow the family to set things up. if
1264 * the protocol is 0, the family is instructed to select an appropriate
1265 * default.
1266 */
1267 sock = sock_alloc();
1268 if (!sock) {
e87cc472 1269 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1270 return -ENFILE; /* Not exactly a match, but its the
1271 closest posix thing */
1272 }
1273
1274 sock->type = type;
1275
95a5afca 1276#ifdef CONFIG_MODULES
89bddce5
SH
1277 /* Attempt to load a protocol module if the find failed.
1278 *
1279 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1280 * requested real, full-featured networking support upon configuration.
1281 * Otherwise module support will break!
1282 */
190683a9 1283 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1284 request_module("net-pf-%d", family);
1da177e4
LT
1285#endif
1286
55737fda
SH
1287 rcu_read_lock();
1288 pf = rcu_dereference(net_families[family]);
1289 err = -EAFNOSUPPORT;
1290 if (!pf)
1291 goto out_release;
1da177e4
LT
1292
1293 /*
1294 * We will call the ->create function, that possibly is in a loadable
1295 * module, so we have to bump that loadable module refcnt first.
1296 */
55737fda 1297 if (!try_module_get(pf->owner))
1da177e4
LT
1298 goto out_release;
1299
55737fda
SH
1300 /* Now protected by module ref count */
1301 rcu_read_unlock();
1302
3f378b68 1303 err = pf->create(net, sock, protocol, kern);
55737fda 1304 if (err < 0)
1da177e4 1305 goto out_module_put;
a79af59e 1306
1da177e4
LT
1307 /*
1308 * Now to bump the refcnt of the [loadable] module that owns this
1309 * socket at sock_release time we decrement its refcnt.
1310 */
55737fda
SH
1311 if (!try_module_get(sock->ops->owner))
1312 goto out_module_busy;
1313
1da177e4
LT
1314 /*
1315 * Now that we're done with the ->create function, the [loadable]
1316 * module can have its refcnt decremented
1317 */
55737fda 1318 module_put(pf->owner);
7420ed23
VY
1319 err = security_socket_post_create(sock, family, type, protocol, kern);
1320 if (err)
3b185525 1321 goto out_sock_release;
55737fda 1322 *res = sock;
1da177e4 1323
55737fda
SH
1324 return 0;
1325
1326out_module_busy:
1327 err = -EAFNOSUPPORT;
1da177e4 1328out_module_put:
55737fda
SH
1329 sock->ops = NULL;
1330 module_put(pf->owner);
1331out_sock_release:
1da177e4 1332 sock_release(sock);
55737fda
SH
1333 return err;
1334
1335out_release:
1336 rcu_read_unlock();
1337 goto out_sock_release;
1da177e4 1338}
721db93a 1339EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1340
1341int sock_create(int family, int type, int protocol, struct socket **res)
1342{
1b8d7ae4 1343 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1344}
c6d409cf 1345EXPORT_SYMBOL(sock_create);
1da177e4
LT
1346
1347int sock_create_kern(int family, int type, int protocol, struct socket **res)
1348{
1b8d7ae4 1349 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1350}
c6d409cf 1351EXPORT_SYMBOL(sock_create_kern);
1da177e4 1352
3e0fa65f 1353SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1354{
1355 int retval;
1356 struct socket *sock;
a677a039
UD
1357 int flags;
1358
e38b36f3
UD
1359 /* Check the SOCK_* constants for consistency. */
1360 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1361 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1362 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1363 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1364
a677a039 1365 flags = type & ~SOCK_TYPE_MASK;
77d27200 1366 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1367 return -EINVAL;
1368 type &= SOCK_TYPE_MASK;
1da177e4 1369
aaca0bdc
UD
1370 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1371 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1372
1da177e4
LT
1373 retval = sock_create(family, type, protocol, &sock);
1374 if (retval < 0)
1375 goto out;
1376
77d27200 1377 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1378 if (retval < 0)
1379 goto out_release;
1380
1381out:
1382 /* It may be already another descriptor 8) Not kernel problem. */
1383 return retval;
1384
1385out_release:
1386 sock_release(sock);
1387 return retval;
1388}
1389
1390/*
1391 * Create a pair of connected sockets.
1392 */
1393
3e0fa65f
HC
1394SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1395 int __user *, usockvec)
1da177e4
LT
1396{
1397 struct socket *sock1, *sock2;
1398 int fd1, fd2, err;
db349509 1399 struct file *newfile1, *newfile2;
a677a039
UD
1400 int flags;
1401
1402 flags = type & ~SOCK_TYPE_MASK;
77d27200 1403 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1404 return -EINVAL;
1405 type &= SOCK_TYPE_MASK;
1da177e4 1406
aaca0bdc
UD
1407 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1408 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1409
1da177e4
LT
1410 /*
1411 * Obtain the first socket and check if the underlying protocol
1412 * supports the socketpair call.
1413 */
1414
1415 err = sock_create(family, type, protocol, &sock1);
1416 if (err < 0)
1417 goto out;
1418
1419 err = sock_create(family, type, protocol, &sock2);
1420 if (err < 0)
1421 goto out_release_1;
1422
1423 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1424 if (err < 0)
1da177e4
LT
1425 goto out_release_both;
1426
28407630 1427 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1428 if (unlikely(fd1 < 0)) {
1429 err = fd1;
db349509 1430 goto out_release_both;
bf3c23d1 1431 }
d73aa286 1432
28407630 1433 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1434 if (unlikely(fd2 < 0)) {
1435 err = fd2;
d73aa286 1436 goto out_put_unused_1;
28407630
AV
1437 }
1438
aab174f0 1439 newfile1 = sock_alloc_file(sock1, flags, NULL);
28407630
AV
1440 if (unlikely(IS_ERR(newfile1))) {
1441 err = PTR_ERR(newfile1);
d73aa286 1442 goto out_put_unused_both;
28407630
AV
1443 }
1444
aab174f0 1445 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1446 if (IS_ERR(newfile2)) {
1447 err = PTR_ERR(newfile2);
d73aa286 1448 goto out_fput_1;
db349509
AV
1449 }
1450
d73aa286
YD
1451 err = put_user(fd1, &usockvec[0]);
1452 if (err)
1453 goto out_fput_both;
1454
1455 err = put_user(fd2, &usockvec[1]);
1456 if (err)
1457 goto out_fput_both;
1458
157cf649 1459 audit_fd_pair(fd1, fd2);
d73aa286 1460
db349509
AV
1461 fd_install(fd1, newfile1);
1462 fd_install(fd2, newfile2);
1da177e4
LT
1463 /* fd1 and fd2 may be already another descriptors.
1464 * Not kernel problem.
1465 */
1466
d73aa286 1467 return 0;
1da177e4 1468
d73aa286
YD
1469out_fput_both:
1470 fput(newfile2);
1471 fput(newfile1);
1472 put_unused_fd(fd2);
1473 put_unused_fd(fd1);
1474 goto out;
1475
1476out_fput_1:
1477 fput(newfile1);
1478 put_unused_fd(fd2);
1479 put_unused_fd(fd1);
1480 sock_release(sock2);
1481 goto out;
1da177e4 1482
d73aa286
YD
1483out_put_unused_both:
1484 put_unused_fd(fd2);
1485out_put_unused_1:
1486 put_unused_fd(fd1);
1da177e4 1487out_release_both:
89bddce5 1488 sock_release(sock2);
1da177e4 1489out_release_1:
89bddce5 1490 sock_release(sock1);
1da177e4
LT
1491out:
1492 return err;
1493}
1494
1da177e4
LT
1495/*
1496 * Bind a name to a socket. Nothing much to do here since it's
1497 * the protocol's responsibility to handle the local address.
1498 *
1499 * We move the socket address to kernel space before we call
1500 * the protocol layer (having also checked the address is ok).
1501 */
1502
20f37034 1503SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1504{
1505 struct socket *sock;
230b1839 1506 struct sockaddr_storage address;
6cb153ca 1507 int err, fput_needed;
1da177e4 1508
89bddce5 1509 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1510 if (sock) {
43db362d 1511 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1512 if (err >= 0) {
1513 err = security_socket_bind(sock,
230b1839 1514 (struct sockaddr *)&address,
89bddce5 1515 addrlen);
6cb153ca
BL
1516 if (!err)
1517 err = sock->ops->bind(sock,
89bddce5 1518 (struct sockaddr *)
230b1839 1519 &address, addrlen);
1da177e4 1520 }
6cb153ca 1521 fput_light(sock->file, fput_needed);
89bddce5 1522 }
1da177e4
LT
1523 return err;
1524}
1525
1da177e4
LT
1526/*
1527 * Perform a listen. Basically, we allow the protocol to do anything
1528 * necessary for a listen, and if that works, we mark the socket as
1529 * ready for listening.
1530 */
1531
3e0fa65f 1532SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1533{
1534 struct socket *sock;
6cb153ca 1535 int err, fput_needed;
b8e1f9b5 1536 int somaxconn;
89bddce5
SH
1537
1538 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1539 if (sock) {
8efa6e93 1540 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1541 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1542 backlog = somaxconn;
1da177e4
LT
1543
1544 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1545 if (!err)
1546 err = sock->ops->listen(sock, backlog);
1da177e4 1547
6cb153ca 1548 fput_light(sock->file, fput_needed);
1da177e4
LT
1549 }
1550 return err;
1551}
1552
1da177e4
LT
1553/*
1554 * For accept, we attempt to create a new socket, set up the link
1555 * with the client, wake up the client, then return the new
1556 * connected fd. We collect the address of the connector in kernel
1557 * space and move it to user at the very end. This is unclean because
1558 * we open the socket then return an error.
1559 *
1560 * 1003.1g adds the ability to recvmsg() to query connection pending
1561 * status to recvmsg. We need to add that support in a way thats
1562 * clean when we restucture accept also.
1563 */
1564
20f37034
HC
1565SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1566 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1567{
1568 struct socket *sock, *newsock;
39d8c1b6 1569 struct file *newfile;
6cb153ca 1570 int err, len, newfd, fput_needed;
230b1839 1571 struct sockaddr_storage address;
1da177e4 1572
77d27200 1573 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1574 return -EINVAL;
1575
1576 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1577 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1578
6cb153ca 1579 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1580 if (!sock)
1581 goto out;
1582
1583 err = -ENFILE;
c6d409cf
ED
1584 newsock = sock_alloc();
1585 if (!newsock)
1da177e4
LT
1586 goto out_put;
1587
1588 newsock->type = sock->type;
1589 newsock->ops = sock->ops;
1590
1da177e4
LT
1591 /*
1592 * We don't need try_module_get here, as the listening socket (sock)
1593 * has the protocol module (sock->ops->owner) held.
1594 */
1595 __module_get(newsock->ops->owner);
1596
28407630 1597 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1598 if (unlikely(newfd < 0)) {
1599 err = newfd;
9a1875e6
DM
1600 sock_release(newsock);
1601 goto out_put;
39d8c1b6 1602 }
aab174f0 1603 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
28407630
AV
1604 if (unlikely(IS_ERR(newfile))) {
1605 err = PTR_ERR(newfile);
1606 put_unused_fd(newfd);
1607 sock_release(newsock);
1608 goto out_put;
1609 }
39d8c1b6 1610
a79af59e
FF
1611 err = security_socket_accept(sock, newsock);
1612 if (err)
39d8c1b6 1613 goto out_fd;
a79af59e 1614
1da177e4
LT
1615 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1616 if (err < 0)
39d8c1b6 1617 goto out_fd;
1da177e4
LT
1618
1619 if (upeer_sockaddr) {
230b1839 1620 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1621 &len, 2) < 0) {
1da177e4 1622 err = -ECONNABORTED;
39d8c1b6 1623 goto out_fd;
1da177e4 1624 }
43db362d 1625 err = move_addr_to_user(&address,
230b1839 1626 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1627 if (err < 0)
39d8c1b6 1628 goto out_fd;
1da177e4
LT
1629 }
1630
1631 /* File flags are not inherited via accept() unlike another OSes. */
1632
39d8c1b6
DM
1633 fd_install(newfd, newfile);
1634 err = newfd;
1da177e4 1635
1da177e4 1636out_put:
6cb153ca 1637 fput_light(sock->file, fput_needed);
1da177e4
LT
1638out:
1639 return err;
39d8c1b6 1640out_fd:
9606a216 1641 fput(newfile);
39d8c1b6 1642 put_unused_fd(newfd);
1da177e4
LT
1643 goto out_put;
1644}
1645
20f37034
HC
1646SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1647 int __user *, upeer_addrlen)
aaca0bdc 1648{
de11defe 1649 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1650}
1651
1da177e4
LT
1652/*
1653 * Attempt to connect to a socket with the server address. The address
1654 * is in user space so we verify it is OK and move it to kernel space.
1655 *
1656 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1657 * break bindings
1658 *
1659 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1660 * other SEQPACKET protocols that take time to connect() as it doesn't
1661 * include the -EINPROGRESS status for such sockets.
1662 */
1663
20f37034
HC
1664SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1665 int, addrlen)
1da177e4
LT
1666{
1667 struct socket *sock;
230b1839 1668 struct sockaddr_storage address;
6cb153ca 1669 int err, fput_needed;
1da177e4 1670
6cb153ca 1671 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1672 if (!sock)
1673 goto out;
43db362d 1674 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1675 if (err < 0)
1676 goto out_put;
1677
89bddce5 1678 err =
230b1839 1679 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1680 if (err)
1681 goto out_put;
1682
230b1839 1683 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1684 sock->file->f_flags);
1685out_put:
6cb153ca 1686 fput_light(sock->file, fput_needed);
1da177e4
LT
1687out:
1688 return err;
1689}
1690
1691/*
1692 * Get the local address ('name') of a socket object. Move the obtained
1693 * name to user space.
1694 */
1695
20f37034
HC
1696SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1697 int __user *, usockaddr_len)
1da177e4
LT
1698{
1699 struct socket *sock;
230b1839 1700 struct sockaddr_storage address;
6cb153ca 1701 int len, err, fput_needed;
89bddce5 1702
6cb153ca 1703 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1704 if (!sock)
1705 goto out;
1706
1707 err = security_socket_getsockname(sock);
1708 if (err)
1709 goto out_put;
1710
230b1839 1711 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1712 if (err)
1713 goto out_put;
43db362d 1714 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1715
1716out_put:
6cb153ca 1717 fput_light(sock->file, fput_needed);
1da177e4
LT
1718out:
1719 return err;
1720}
1721
1722/*
1723 * Get the remote address ('name') of a socket object. Move the obtained
1724 * name to user space.
1725 */
1726
20f37034
HC
1727SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1728 int __user *, usockaddr_len)
1da177e4
LT
1729{
1730 struct socket *sock;
230b1839 1731 struct sockaddr_storage address;
6cb153ca 1732 int len, err, fput_needed;
1da177e4 1733
89bddce5
SH
1734 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1735 if (sock != NULL) {
1da177e4
LT
1736 err = security_socket_getpeername(sock);
1737 if (err) {
6cb153ca 1738 fput_light(sock->file, fput_needed);
1da177e4
LT
1739 return err;
1740 }
1741
89bddce5 1742 err =
230b1839 1743 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1744 1);
1da177e4 1745 if (!err)
43db362d 1746 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1747 usockaddr_len);
6cb153ca 1748 fput_light(sock->file, fput_needed);
1da177e4
LT
1749 }
1750 return err;
1751}
1752
1753/*
1754 * Send a datagram to a given address. We move the address into kernel
1755 * space and check the user space data area is readable before invoking
1756 * the protocol.
1757 */
1758
3e0fa65f 1759SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1760 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1761 int, addr_len)
1da177e4
LT
1762{
1763 struct socket *sock;
230b1839 1764 struct sockaddr_storage address;
1da177e4
LT
1765 int err;
1766 struct msghdr msg;
1767 struct iovec iov;
6cb153ca 1768 int fput_needed;
6cb153ca 1769
253eacc0
LT
1770 if (len > INT_MAX)
1771 len = INT_MAX;
de0fa95c
PE
1772 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1773 if (!sock)
4387ff75 1774 goto out;
6cb153ca 1775
89bddce5
SH
1776 iov.iov_base = buff;
1777 iov.iov_len = len;
1778 msg.msg_name = NULL;
c0371da6 1779 iov_iter_init(&msg.msg_iter, WRITE, &iov, 1, len);
89bddce5
SH
1780 msg.msg_control = NULL;
1781 msg.msg_controllen = 0;
1782 msg.msg_namelen = 0;
6cb153ca 1783 if (addr) {
43db362d 1784 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1785 if (err < 0)
1786 goto out_put;
230b1839 1787 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1788 msg.msg_namelen = addr_len;
1da177e4
LT
1789 }
1790 if (sock->file->f_flags & O_NONBLOCK)
1791 flags |= MSG_DONTWAIT;
1792 msg.msg_flags = flags;
1793 err = sock_sendmsg(sock, &msg, len);
1794
89bddce5 1795out_put:
de0fa95c 1796 fput_light(sock->file, fput_needed);
4387ff75 1797out:
1da177e4
LT
1798 return err;
1799}
1800
1801/*
89bddce5 1802 * Send a datagram down a socket.
1da177e4
LT
1803 */
1804
3e0fa65f 1805SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1806 unsigned int, flags)
1da177e4
LT
1807{
1808 return sys_sendto(fd, buff, len, flags, NULL, 0);
1809}
1810
1811/*
89bddce5 1812 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1813 * sender. We verify the buffers are writable and if needed move the
1814 * sender address from kernel to user space.
1815 */
1816
3e0fa65f 1817SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1818 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1819 int __user *, addr_len)
1da177e4
LT
1820{
1821 struct socket *sock;
1822 struct iovec iov;
1823 struct msghdr msg;
230b1839 1824 struct sockaddr_storage address;
89bddce5 1825 int err, err2;
6cb153ca
BL
1826 int fput_needed;
1827
253eacc0
LT
1828 if (size > INT_MAX)
1829 size = INT_MAX;
de0fa95c 1830 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1831 if (!sock)
de0fa95c 1832 goto out;
1da177e4 1833
89bddce5
SH
1834 msg.msg_control = NULL;
1835 msg.msg_controllen = 0;
89bddce5
SH
1836 iov.iov_len = size;
1837 iov.iov_base = ubuf;
c0371da6 1838 iov_iter_init(&msg.msg_iter, READ, &iov, 1, size);
f3d33426
HFS
1839 /* Save some cycles and don't copy the address if not needed */
1840 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1841 /* We assume all kernel code knows the size of sockaddr_storage */
1842 msg.msg_namelen = 0;
1da177e4
LT
1843 if (sock->file->f_flags & O_NONBLOCK)
1844 flags |= MSG_DONTWAIT;
89bddce5 1845 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1846
89bddce5 1847 if (err >= 0 && addr != NULL) {
43db362d 1848 err2 = move_addr_to_user(&address,
230b1839 1849 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1850 if (err2 < 0)
1851 err = err2;
1da177e4 1852 }
de0fa95c
PE
1853
1854 fput_light(sock->file, fput_needed);
4387ff75 1855out:
1da177e4
LT
1856 return err;
1857}
1858
1859/*
89bddce5 1860 * Receive a datagram from a socket.
1da177e4
LT
1861 */
1862
b7c0ddf5
JG
1863SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1864 unsigned int, flags)
1da177e4
LT
1865{
1866 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1867}
1868
1869/*
1870 * Set a socket option. Because we don't know the option lengths we have
1871 * to pass the user mode parameter for the protocols to sort out.
1872 */
1873
20f37034
HC
1874SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1875 char __user *, optval, int, optlen)
1da177e4 1876{
6cb153ca 1877 int err, fput_needed;
1da177e4
LT
1878 struct socket *sock;
1879
1880 if (optlen < 0)
1881 return -EINVAL;
89bddce5
SH
1882
1883 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1884 if (sock != NULL) {
1885 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1886 if (err)
1887 goto out_put;
1da177e4
LT
1888
1889 if (level == SOL_SOCKET)
89bddce5
SH
1890 err =
1891 sock_setsockopt(sock, level, optname, optval,
1892 optlen);
1da177e4 1893 else
89bddce5
SH
1894 err =
1895 sock->ops->setsockopt(sock, level, optname, optval,
1896 optlen);
6cb153ca
BL
1897out_put:
1898 fput_light(sock->file, fput_needed);
1da177e4
LT
1899 }
1900 return err;
1901}
1902
1903/*
1904 * Get a socket option. Because we don't know the option lengths we have
1905 * to pass a user mode parameter for the protocols to sort out.
1906 */
1907
20f37034
HC
1908SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1909 char __user *, optval, int __user *, optlen)
1da177e4 1910{
6cb153ca 1911 int err, fput_needed;
1da177e4
LT
1912 struct socket *sock;
1913
89bddce5
SH
1914 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1915 if (sock != NULL) {
6cb153ca
BL
1916 err = security_socket_getsockopt(sock, level, optname);
1917 if (err)
1918 goto out_put;
1da177e4
LT
1919
1920 if (level == SOL_SOCKET)
89bddce5
SH
1921 err =
1922 sock_getsockopt(sock, level, optname, optval,
1923 optlen);
1da177e4 1924 else
89bddce5
SH
1925 err =
1926 sock->ops->getsockopt(sock, level, optname, optval,
1927 optlen);
6cb153ca
BL
1928out_put:
1929 fput_light(sock->file, fput_needed);
1da177e4
LT
1930 }
1931 return err;
1932}
1933
1da177e4
LT
1934/*
1935 * Shutdown a socket.
1936 */
1937
754fe8d2 1938SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1939{
6cb153ca 1940 int err, fput_needed;
1da177e4
LT
1941 struct socket *sock;
1942
89bddce5
SH
1943 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1944 if (sock != NULL) {
1da177e4 1945 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1946 if (!err)
1947 err = sock->ops->shutdown(sock, how);
1948 fput_light(sock->file, fput_needed);
1da177e4
LT
1949 }
1950 return err;
1951}
1952
89bddce5 1953/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1954 * fields which are the same type (int / unsigned) on our platforms.
1955 */
1956#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1957#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1958#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1959
c71d8ebe
TH
1960struct used_address {
1961 struct sockaddr_storage name;
1962 unsigned int name_len;
1963};
1964
08adb7da
AV
1965static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
1966 struct user_msghdr __user *umsg,
1967 struct sockaddr __user **save_addr,
1968 struct iovec **iov)
1661bf36 1969{
08adb7da
AV
1970 struct sockaddr __user *uaddr;
1971 struct iovec __user *uiov;
c0371da6 1972 size_t nr_segs;
08adb7da
AV
1973 ssize_t err;
1974
1975 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1976 __get_user(uaddr, &umsg->msg_name) ||
1977 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1978 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1979 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1980 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1981 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1982 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1983 return -EFAULT;
dbb490b9 1984
08adb7da 1985 if (!uaddr)
6a2a2b3a
AS
1986 kmsg->msg_namelen = 0;
1987
dbb490b9
ML
1988 if (kmsg->msg_namelen < 0)
1989 return -EINVAL;
1990
1661bf36 1991 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1992 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1993
1994 if (save_addr)
1995 *save_addr = uaddr;
1996
1997 if (uaddr && kmsg->msg_namelen) {
1998 if (!save_addr) {
1999 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
2000 kmsg->msg_name);
2001 if (err < 0)
2002 return err;
2003 }
2004 } else {
2005 kmsg->msg_name = NULL;
2006 kmsg->msg_namelen = 0;
2007 }
2008
c0371da6 2009 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
2010 return -EMSGSIZE;
2011
2012 err = rw_copy_check_uvector(save_addr ? READ : WRITE,
c0371da6 2013 uiov, nr_segs,
08adb7da
AV
2014 UIO_FASTIOV, *iov, iov);
2015 if (err >= 0)
c0371da6
AV
2016 iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
2017 *iov, nr_segs, err);
08adb7da 2018 return err;
1661bf36
DC
2019}
2020
666547ff 2021static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2022 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 2023 struct used_address *used_address)
1da177e4 2024{
89bddce5
SH
2025 struct compat_msghdr __user *msg_compat =
2026 (struct compat_msghdr __user *)msg;
230b1839 2027 struct sockaddr_storage address;
1da177e4 2028 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2029 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
2030 __attribute__ ((aligned(sizeof(__kernel_size_t))));
2031 /* 20 is size of ipv6_pktinfo */
1da177e4 2032 unsigned char *ctl_buf = ctl;
08adb7da
AV
2033 int ctl_len, total_len;
2034 ssize_t err;
89bddce5 2035
08adb7da 2036 msg_sys->msg_name = &address;
1da177e4 2037
08449320 2038 if (MSG_CMSG_COMPAT & flags)
08adb7da 2039 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2040 else
08adb7da 2041 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2042 if (err < 0)
1da177e4
LT
2043 goto out_freeiov;
2044 total_len = err;
2045
2046 err = -ENOBUFS;
2047
228e548e 2048 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2049 goto out_freeiov;
228e548e 2050 ctl_len = msg_sys->msg_controllen;
1da177e4 2051 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2052 err =
228e548e 2053 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2054 sizeof(ctl));
1da177e4
LT
2055 if (err)
2056 goto out_freeiov;
228e548e
AB
2057 ctl_buf = msg_sys->msg_control;
2058 ctl_len = msg_sys->msg_controllen;
1da177e4 2059 } else if (ctl_len) {
89bddce5 2060 if (ctl_len > sizeof(ctl)) {
1da177e4 2061 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2062 if (ctl_buf == NULL)
1da177e4
LT
2063 goto out_freeiov;
2064 }
2065 err = -EFAULT;
2066 /*
228e548e 2067 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2068 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2069 * checking falls down on this.
2070 */
fb8621bb 2071 if (copy_from_user(ctl_buf,
228e548e 2072 (void __user __force *)msg_sys->msg_control,
89bddce5 2073 ctl_len))
1da177e4 2074 goto out_freectl;
228e548e 2075 msg_sys->msg_control = ctl_buf;
1da177e4 2076 }
228e548e 2077 msg_sys->msg_flags = flags;
1da177e4
LT
2078
2079 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2080 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2081 /*
2082 * If this is sendmmsg() and current destination address is same as
2083 * previously succeeded address, omit asking LSM's decision.
2084 * used_address->name_len is initialized to UINT_MAX so that the first
2085 * destination address never matches.
2086 */
bc909d9d
MD
2087 if (used_address && msg_sys->msg_name &&
2088 used_address->name_len == msg_sys->msg_namelen &&
2089 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
2090 used_address->name_len)) {
2091 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
2092 goto out_freectl;
2093 }
2094 err = sock_sendmsg(sock, msg_sys, total_len);
2095 /*
2096 * If this is sendmmsg() and sending to current destination address was
2097 * successful, remember it.
2098 */
2099 if (used_address && err >= 0) {
2100 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2101 if (msg_sys->msg_name)
2102 memcpy(&used_address->name, msg_sys->msg_name,
2103 used_address->name_len);
c71d8ebe 2104 }
1da177e4
LT
2105
2106out_freectl:
89bddce5 2107 if (ctl_buf != ctl)
1da177e4
LT
2108 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2109out_freeiov:
2110 if (iov != iovstack)
a74e9106 2111 kfree(iov);
228e548e
AB
2112 return err;
2113}
2114
2115/*
2116 * BSD sendmsg interface
2117 */
2118
666547ff 2119long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
2120{
2121 int fput_needed, err;
2122 struct msghdr msg_sys;
1be374a0
AL
2123 struct socket *sock;
2124
1be374a0 2125 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2126 if (!sock)
2127 goto out;
2128
a7526eb5 2129 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2130
6cb153ca 2131 fput_light(sock->file, fput_needed);
89bddce5 2132out:
1da177e4
LT
2133 return err;
2134}
2135
666547ff 2136SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
2137{
2138 if (flags & MSG_CMSG_COMPAT)
2139 return -EINVAL;
2140 return __sys_sendmsg(fd, msg, flags);
2141}
2142
228e548e
AB
2143/*
2144 * Linux sendmmsg interface
2145 */
2146
2147int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2148 unsigned int flags)
2149{
2150 int fput_needed, err, datagrams;
2151 struct socket *sock;
2152 struct mmsghdr __user *entry;
2153 struct compat_mmsghdr __user *compat_entry;
2154 struct msghdr msg_sys;
c71d8ebe 2155 struct used_address used_address;
228e548e 2156
98382f41
AB
2157 if (vlen > UIO_MAXIOV)
2158 vlen = UIO_MAXIOV;
228e548e
AB
2159
2160 datagrams = 0;
2161
2162 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2163 if (!sock)
2164 return err;
2165
c71d8ebe 2166 used_address.name_len = UINT_MAX;
228e548e
AB
2167 entry = mmsg;
2168 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2169 err = 0;
228e548e
AB
2170
2171 while (datagrams < vlen) {
228e548e 2172 if (MSG_CMSG_COMPAT & flags) {
666547ff 2173 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5 2174 &msg_sys, flags, &used_address);
228e548e
AB
2175 if (err < 0)
2176 break;
2177 err = __put_user(err, &compat_entry->msg_len);
2178 ++compat_entry;
2179 } else {
a7526eb5 2180 err = ___sys_sendmsg(sock,
666547ff 2181 (struct user_msghdr __user *)entry,
a7526eb5 2182 &msg_sys, flags, &used_address);
228e548e
AB
2183 if (err < 0)
2184 break;
2185 err = put_user(err, &entry->msg_len);
2186 ++entry;
2187 }
2188
2189 if (err)
2190 break;
2191 ++datagrams;
2192 }
2193
228e548e
AB
2194 fput_light(sock->file, fput_needed);
2195
728ffb86
AB
2196 /* We only return an error if no datagrams were able to be sent */
2197 if (datagrams != 0)
228e548e
AB
2198 return datagrams;
2199
228e548e
AB
2200 return err;
2201}
2202
2203SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2204 unsigned int, vlen, unsigned int, flags)
2205{
1be374a0
AL
2206 if (flags & MSG_CMSG_COMPAT)
2207 return -EINVAL;
228e548e
AB
2208 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2209}
2210
666547ff 2211static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2212 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2213{
89bddce5
SH
2214 struct compat_msghdr __user *msg_compat =
2215 (struct compat_msghdr __user *)msg;
1da177e4 2216 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2217 struct iovec *iov = iovstack;
1da177e4 2218 unsigned long cmsg_ptr;
08adb7da
AV
2219 int total_len, len;
2220 ssize_t err;
1da177e4
LT
2221
2222 /* kernel mode address */
230b1839 2223 struct sockaddr_storage addr;
1da177e4
LT
2224
2225 /* user mode address pointers */
2226 struct sockaddr __user *uaddr;
08adb7da 2227 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2228
08adb7da 2229 msg_sys->msg_name = &addr;
1da177e4 2230
f3d33426 2231 if (MSG_CMSG_COMPAT & flags)
08adb7da 2232 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2233 else
08adb7da 2234 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4
LT
2235 if (err < 0)
2236 goto out_freeiov;
89bddce5 2237 total_len = err;
1da177e4 2238
a2e27255
ACM
2239 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2240 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2241
f3d33426
HFS
2242 /* We assume all kernel code knows the size of sockaddr_storage */
2243 msg_sys->msg_namelen = 0;
2244
1da177e4
LT
2245 if (sock->file->f_flags & O_NONBLOCK)
2246 flags |= MSG_DONTWAIT;
a2e27255
ACM
2247 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2248 total_len, flags);
1da177e4
LT
2249 if (err < 0)
2250 goto out_freeiov;
2251 len = err;
2252
2253 if (uaddr != NULL) {
43db362d 2254 err = move_addr_to_user(&addr,
a2e27255 2255 msg_sys->msg_namelen, uaddr,
89bddce5 2256 uaddr_len);
1da177e4
LT
2257 if (err < 0)
2258 goto out_freeiov;
2259 }
a2e27255 2260 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2261 COMPAT_FLAGS(msg));
1da177e4
LT
2262 if (err)
2263 goto out_freeiov;
2264 if (MSG_CMSG_COMPAT & flags)
a2e27255 2265 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2266 &msg_compat->msg_controllen);
2267 else
a2e27255 2268 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2269 &msg->msg_controllen);
2270 if (err)
2271 goto out_freeiov;
2272 err = len;
2273
2274out_freeiov:
2275 if (iov != iovstack)
a74e9106 2276 kfree(iov);
a2e27255
ACM
2277 return err;
2278}
2279
2280/*
2281 * BSD recvmsg interface
2282 */
2283
666547ff 2284long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2285{
2286 int fput_needed, err;
2287 struct msghdr msg_sys;
1be374a0
AL
2288 struct socket *sock;
2289
1be374a0 2290 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2291 if (!sock)
2292 goto out;
2293
a7526eb5 2294 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2295
6cb153ca 2296 fput_light(sock->file, fput_needed);
1da177e4
LT
2297out:
2298 return err;
2299}
2300
666547ff 2301SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2302 unsigned int, flags)
2303{
2304 if (flags & MSG_CMSG_COMPAT)
2305 return -EINVAL;
2306 return __sys_recvmsg(fd, msg, flags);
2307}
2308
a2e27255
ACM
2309/*
2310 * Linux recvmmsg interface
2311 */
2312
2313int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2314 unsigned int flags, struct timespec *timeout)
2315{
2316 int fput_needed, err, datagrams;
2317 struct socket *sock;
2318 struct mmsghdr __user *entry;
d7256d0e 2319 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2320 struct msghdr msg_sys;
2321 struct timespec end_time;
2322
2323 if (timeout &&
2324 poll_select_set_timeout(&end_time, timeout->tv_sec,
2325 timeout->tv_nsec))
2326 return -EINVAL;
2327
2328 datagrams = 0;
2329
2330 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2331 if (!sock)
2332 return err;
2333
2334 err = sock_error(sock->sk);
2335 if (err)
2336 goto out_put;
2337
2338 entry = mmsg;
d7256d0e 2339 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2340
2341 while (datagrams < vlen) {
2342 /*
2343 * No need to ask LSM for more than the first datagram.
2344 */
d7256d0e 2345 if (MSG_CMSG_COMPAT & flags) {
666547ff 2346 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2347 &msg_sys, flags & ~MSG_WAITFORONE,
2348 datagrams);
d7256d0e
JMG
2349 if (err < 0)
2350 break;
2351 err = __put_user(err, &compat_entry->msg_len);
2352 ++compat_entry;
2353 } else {
a7526eb5 2354 err = ___sys_recvmsg(sock,
666547ff 2355 (struct user_msghdr __user *)entry,
a7526eb5
AL
2356 &msg_sys, flags & ~MSG_WAITFORONE,
2357 datagrams);
d7256d0e
JMG
2358 if (err < 0)
2359 break;
2360 err = put_user(err, &entry->msg_len);
2361 ++entry;
2362 }
2363
a2e27255
ACM
2364 if (err)
2365 break;
a2e27255
ACM
2366 ++datagrams;
2367
71c5c159
BB
2368 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2369 if (flags & MSG_WAITFORONE)
2370 flags |= MSG_DONTWAIT;
2371
a2e27255
ACM
2372 if (timeout) {
2373 ktime_get_ts(timeout);
2374 *timeout = timespec_sub(end_time, *timeout);
2375 if (timeout->tv_sec < 0) {
2376 timeout->tv_sec = timeout->tv_nsec = 0;
2377 break;
2378 }
2379
2380 /* Timeout, return less than vlen datagrams */
2381 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2382 break;
2383 }
2384
2385 /* Out of band data, return right away */
2386 if (msg_sys.msg_flags & MSG_OOB)
2387 break;
2388 }
2389
2390out_put:
2391 fput_light(sock->file, fput_needed);
1da177e4 2392
a2e27255
ACM
2393 if (err == 0)
2394 return datagrams;
2395
2396 if (datagrams != 0) {
2397 /*
2398 * We may return less entries than requested (vlen) if the
2399 * sock is non block and there aren't enough datagrams...
2400 */
2401 if (err != -EAGAIN) {
2402 /*
2403 * ... or if recvmsg returns an error after we
2404 * received some datagrams, where we record the
2405 * error to return on the next call or if the
2406 * app asks about it using getsockopt(SO_ERROR).
2407 */
2408 sock->sk->sk_err = -err;
2409 }
2410
2411 return datagrams;
2412 }
2413
2414 return err;
2415}
2416
2417SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2418 unsigned int, vlen, unsigned int, flags,
2419 struct timespec __user *, timeout)
2420{
2421 int datagrams;
2422 struct timespec timeout_sys;
2423
1be374a0
AL
2424 if (flags & MSG_CMSG_COMPAT)
2425 return -EINVAL;
2426
a2e27255
ACM
2427 if (!timeout)
2428 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2429
2430 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2431 return -EFAULT;
2432
2433 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2434
2435 if (datagrams > 0 &&
2436 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2437 datagrams = -EFAULT;
2438
2439 return datagrams;
2440}
2441
2442#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2443/* Argument list sizes for sys_socketcall */
2444#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2445static const unsigned char nargs[21] = {
c6d409cf
ED
2446 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2447 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2448 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2449 AL(4), AL(5), AL(4)
89bddce5
SH
2450};
2451
1da177e4
LT
2452#undef AL
2453
2454/*
89bddce5 2455 * System call vectors.
1da177e4
LT
2456 *
2457 * Argument checking cleaned up. Saved 20% in size.
2458 * This function doesn't need to set the kernel lock because
89bddce5 2459 * it is set by the callees.
1da177e4
LT
2460 */
2461
3e0fa65f 2462SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2463{
2950fa9d 2464 unsigned long a[AUDITSC_ARGS];
89bddce5 2465 unsigned long a0, a1;
1da177e4 2466 int err;
47379052 2467 unsigned int len;
1da177e4 2468
228e548e 2469 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2470 return -EINVAL;
2471
47379052
AV
2472 len = nargs[call];
2473 if (len > sizeof(a))
2474 return -EINVAL;
2475
1da177e4 2476 /* copy_from_user should be SMP safe. */
47379052 2477 if (copy_from_user(a, args, len))
1da177e4 2478 return -EFAULT;
3ec3b2fb 2479
2950fa9d
CG
2480 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2481 if (err)
2482 return err;
3ec3b2fb 2483
89bddce5
SH
2484 a0 = a[0];
2485 a1 = a[1];
2486
2487 switch (call) {
2488 case SYS_SOCKET:
2489 err = sys_socket(a0, a1, a[2]);
2490 break;
2491 case SYS_BIND:
2492 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2493 break;
2494 case SYS_CONNECT:
2495 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2496 break;
2497 case SYS_LISTEN:
2498 err = sys_listen(a0, a1);
2499 break;
2500 case SYS_ACCEPT:
de11defe
UD
2501 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2502 (int __user *)a[2], 0);
89bddce5
SH
2503 break;
2504 case SYS_GETSOCKNAME:
2505 err =
2506 sys_getsockname(a0, (struct sockaddr __user *)a1,
2507 (int __user *)a[2]);
2508 break;
2509 case SYS_GETPEERNAME:
2510 err =
2511 sys_getpeername(a0, (struct sockaddr __user *)a1,
2512 (int __user *)a[2]);
2513 break;
2514 case SYS_SOCKETPAIR:
2515 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2516 break;
2517 case SYS_SEND:
2518 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2519 break;
2520 case SYS_SENDTO:
2521 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2522 (struct sockaddr __user *)a[4], a[5]);
2523 break;
2524 case SYS_RECV:
2525 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2526 break;
2527 case SYS_RECVFROM:
2528 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2529 (struct sockaddr __user *)a[4],
2530 (int __user *)a[5]);
2531 break;
2532 case SYS_SHUTDOWN:
2533 err = sys_shutdown(a0, a1);
2534 break;
2535 case SYS_SETSOCKOPT:
2536 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2537 break;
2538 case SYS_GETSOCKOPT:
2539 err =
2540 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2541 (int __user *)a[4]);
2542 break;
2543 case SYS_SENDMSG:
666547ff 2544 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2545 break;
228e548e
AB
2546 case SYS_SENDMMSG:
2547 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2548 break;
89bddce5 2549 case SYS_RECVMSG:
666547ff 2550 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2551 break;
a2e27255
ACM
2552 case SYS_RECVMMSG:
2553 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2554 (struct timespec __user *)a[4]);
2555 break;
de11defe
UD
2556 case SYS_ACCEPT4:
2557 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2558 (int __user *)a[2], a[3]);
aaca0bdc 2559 break;
89bddce5
SH
2560 default:
2561 err = -EINVAL;
2562 break;
1da177e4
LT
2563 }
2564 return err;
2565}
2566
89bddce5 2567#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2568
55737fda
SH
2569/**
2570 * sock_register - add a socket protocol handler
2571 * @ops: description of protocol
2572 *
1da177e4
LT
2573 * This function is called by a protocol handler that wants to
2574 * advertise its address family, and have it linked into the
e793c0f7 2575 * socket interface. The value ops->family corresponds to the
55737fda 2576 * socket system call protocol family.
1da177e4 2577 */
f0fd27d4 2578int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2579{
2580 int err;
2581
2582 if (ops->family >= NPROTO) {
3410f22e 2583 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2584 return -ENOBUFS;
2585 }
55737fda
SH
2586
2587 spin_lock(&net_family_lock);
190683a9
ED
2588 if (rcu_dereference_protected(net_families[ops->family],
2589 lockdep_is_held(&net_family_lock)))
55737fda
SH
2590 err = -EEXIST;
2591 else {
cf778b00 2592 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2593 err = 0;
2594 }
55737fda
SH
2595 spin_unlock(&net_family_lock);
2596
3410f22e 2597 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2598 return err;
2599}
c6d409cf 2600EXPORT_SYMBOL(sock_register);
1da177e4 2601
55737fda
SH
2602/**
2603 * sock_unregister - remove a protocol handler
2604 * @family: protocol family to remove
2605 *
1da177e4
LT
2606 * This function is called by a protocol handler that wants to
2607 * remove its address family, and have it unlinked from the
55737fda
SH
2608 * new socket creation.
2609 *
2610 * If protocol handler is a module, then it can use module reference
2611 * counts to protect against new references. If protocol handler is not
2612 * a module then it needs to provide its own protection in
2613 * the ops->create routine.
1da177e4 2614 */
f0fd27d4 2615void sock_unregister(int family)
1da177e4 2616{
f0fd27d4 2617 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2618
55737fda 2619 spin_lock(&net_family_lock);
a9b3cd7f 2620 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2621 spin_unlock(&net_family_lock);
2622
2623 synchronize_rcu();
2624
3410f22e 2625 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2626}
c6d409cf 2627EXPORT_SYMBOL(sock_unregister);
1da177e4 2628
77d76ea3 2629static int __init sock_init(void)
1da177e4 2630{
b3e19d92 2631 int err;
2ca794e5
EB
2632 /*
2633 * Initialize the network sysctl infrastructure.
2634 */
2635 err = net_sysctl_init();
2636 if (err)
2637 goto out;
b3e19d92 2638
1da177e4 2639 /*
89bddce5 2640 * Initialize skbuff SLAB cache
1da177e4
LT
2641 */
2642 skb_init();
1da177e4
LT
2643
2644 /*
89bddce5 2645 * Initialize the protocols module.
1da177e4
LT
2646 */
2647
2648 init_inodecache();
b3e19d92
NP
2649
2650 err = register_filesystem(&sock_fs_type);
2651 if (err)
2652 goto out_fs;
1da177e4 2653 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2654 if (IS_ERR(sock_mnt)) {
2655 err = PTR_ERR(sock_mnt);
2656 goto out_mount;
2657 }
77d76ea3
AK
2658
2659 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2660 */
2661
2662#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2663 err = netfilter_init();
2664 if (err)
2665 goto out;
1da177e4 2666#endif
cbeb321a 2667
408eccce 2668 ptp_classifier_init();
c1f19b51 2669
b3e19d92
NP
2670out:
2671 return err;
2672
2673out_mount:
2674 unregister_filesystem(&sock_fs_type);
2675out_fs:
2676 goto out;
1da177e4
LT
2677}
2678
77d76ea3
AK
2679core_initcall(sock_init); /* early initcall */
2680
1da177e4
LT
2681#ifdef CONFIG_PROC_FS
2682void socket_seq_show(struct seq_file *seq)
2683{
2684 int cpu;
2685 int counter = 0;
2686
6f912042 2687 for_each_possible_cpu(cpu)
89bddce5 2688 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2689
2690 /* It can be negative, by the way. 8) */
2691 if (counter < 0)
2692 counter = 0;
2693
2694 seq_printf(seq, "sockets: used %d\n", counter);
2695}
89bddce5 2696#endif /* CONFIG_PROC_FS */
1da177e4 2697
89bbfc95 2698#ifdef CONFIG_COMPAT
6b96018b 2699static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2700 unsigned int cmd, void __user *up)
7a229387 2701{
7a229387
AB
2702 mm_segment_t old_fs = get_fs();
2703 struct timeval ktv;
2704 int err;
2705
2706 set_fs(KERNEL_DS);
6b96018b 2707 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2708 set_fs(old_fs);
644595f8 2709 if (!err)
ed6fe9d6 2710 err = compat_put_timeval(&ktv, up);
644595f8 2711
7a229387
AB
2712 return err;
2713}
2714
6b96018b 2715static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2716 unsigned int cmd, void __user *up)
7a229387 2717{
7a229387
AB
2718 mm_segment_t old_fs = get_fs();
2719 struct timespec kts;
2720 int err;
2721
2722 set_fs(KERNEL_DS);
6b96018b 2723 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2724 set_fs(old_fs);
644595f8 2725 if (!err)
ed6fe9d6 2726 err = compat_put_timespec(&kts, up);
644595f8 2727
7a229387
AB
2728 return err;
2729}
2730
6b96018b 2731static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2732{
2733 struct ifreq __user *uifr;
2734 int err;
2735
2736 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2737 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2738 return -EFAULT;
2739
6b96018b 2740 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2741 if (err)
2742 return err;
2743
6b96018b 2744 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2745 return -EFAULT;
2746
2747 return 0;
2748}
2749
6b96018b 2750static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2751{
6b96018b 2752 struct compat_ifconf ifc32;
7a229387
AB
2753 struct ifconf ifc;
2754 struct ifconf __user *uifc;
6b96018b 2755 struct compat_ifreq __user *ifr32;
7a229387
AB
2756 struct ifreq __user *ifr;
2757 unsigned int i, j;
2758 int err;
2759
6b96018b 2760 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2761 return -EFAULT;
2762
43da5f2e 2763 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2764 if (ifc32.ifcbuf == 0) {
2765 ifc32.ifc_len = 0;
2766 ifc.ifc_len = 0;
2767 ifc.ifc_req = NULL;
2768 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2769 } else {
c6d409cf
ED
2770 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2771 sizeof(struct ifreq);
7a229387
AB
2772 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2773 ifc.ifc_len = len;
2774 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2775 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2776 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2777 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2778 return -EFAULT;
2779 ifr++;
2780 ifr32++;
2781 }
2782 }
2783 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2784 return -EFAULT;
2785
6b96018b 2786 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2787 if (err)
2788 return err;
2789
2790 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2791 return -EFAULT;
2792
2793 ifr = ifc.ifc_req;
2794 ifr32 = compat_ptr(ifc32.ifcbuf);
2795 for (i = 0, j = 0;
c6d409cf
ED
2796 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2797 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2798 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2799 return -EFAULT;
2800 ifr32++;
2801 ifr++;
2802 }
2803
2804 if (ifc32.ifcbuf == 0) {
2805 /* Translate from 64-bit structure multiple to
2806 * a 32-bit one.
2807 */
2808 i = ifc.ifc_len;
6b96018b 2809 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2810 ifc32.ifc_len = i;
2811 } else {
2812 ifc32.ifc_len = i;
2813 }
6b96018b 2814 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2815 return -EFAULT;
2816
2817 return 0;
2818}
2819
6b96018b 2820static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2821{
3a7da39d
BH
2822 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2823 bool convert_in = false, convert_out = false;
2824 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2825 struct ethtool_rxnfc __user *rxnfc;
7a229387 2826 struct ifreq __user *ifr;
3a7da39d
BH
2827 u32 rule_cnt = 0, actual_rule_cnt;
2828 u32 ethcmd;
7a229387 2829 u32 data;
3a7da39d 2830 int ret;
7a229387 2831
3a7da39d
BH
2832 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2833 return -EFAULT;
7a229387 2834
3a7da39d
BH
2835 compat_rxnfc = compat_ptr(data);
2836
2837 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2838 return -EFAULT;
2839
3a7da39d
BH
2840 /* Most ethtool structures are defined without padding.
2841 * Unfortunately struct ethtool_rxnfc is an exception.
2842 */
2843 switch (ethcmd) {
2844 default:
2845 break;
2846 case ETHTOOL_GRXCLSRLALL:
2847 /* Buffer size is variable */
2848 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2849 return -EFAULT;
2850 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2851 return -ENOMEM;
2852 buf_size += rule_cnt * sizeof(u32);
2853 /* fall through */
2854 case ETHTOOL_GRXRINGS:
2855 case ETHTOOL_GRXCLSRLCNT:
2856 case ETHTOOL_GRXCLSRULE:
55664f32 2857 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2858 convert_out = true;
2859 /* fall through */
2860 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2861 buf_size += sizeof(struct ethtool_rxnfc);
2862 convert_in = true;
2863 break;
2864 }
2865
2866 ifr = compat_alloc_user_space(buf_size);
954b1244 2867 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2868
2869 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2870 return -EFAULT;
2871
3a7da39d
BH
2872 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2873 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2874 return -EFAULT;
2875
3a7da39d 2876 if (convert_in) {
127fe533 2877 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2878 * fs.ring_cookie and at the end of fs, but nowhere else.
2879 */
127fe533
AD
2880 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2881 sizeof(compat_rxnfc->fs.m_ext) !=
2882 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2883 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2884 BUILD_BUG_ON(
2885 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2886 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2887 offsetof(struct ethtool_rxnfc, fs.location) -
2888 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2889
2890 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2891 (void __user *)(&rxnfc->fs.m_ext + 1) -
2892 (void __user *)rxnfc) ||
3a7da39d
BH
2893 copy_in_user(&rxnfc->fs.ring_cookie,
2894 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2895 (void __user *)(&rxnfc->fs.location + 1) -
2896 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2897 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2898 sizeof(rxnfc->rule_cnt)))
2899 return -EFAULT;
2900 }
2901
2902 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2903 if (ret)
2904 return ret;
2905
2906 if (convert_out) {
2907 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2908 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2909 (const void __user *)rxnfc) ||
3a7da39d
BH
2910 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2911 &rxnfc->fs.ring_cookie,
954b1244
SH
2912 (const void __user *)(&rxnfc->fs.location + 1) -
2913 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2914 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2915 sizeof(rxnfc->rule_cnt)))
2916 return -EFAULT;
2917
2918 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2919 /* As an optimisation, we only copy the actual
2920 * number of rules that the underlying
2921 * function returned. Since Mallory might
2922 * change the rule count in user memory, we
2923 * check that it is less than the rule count
2924 * originally given (as the user buffer size),
2925 * which has been range-checked.
2926 */
2927 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2928 return -EFAULT;
2929 if (actual_rule_cnt < rule_cnt)
2930 rule_cnt = actual_rule_cnt;
2931 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2932 &rxnfc->rule_locs[0],
2933 rule_cnt * sizeof(u32)))
2934 return -EFAULT;
2935 }
2936 }
2937
2938 return 0;
7a229387
AB
2939}
2940
7a50a240
AB
2941static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2942{
2943 void __user *uptr;
2944 compat_uptr_t uptr32;
2945 struct ifreq __user *uifr;
2946
c6d409cf 2947 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2948 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2949 return -EFAULT;
2950
2951 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2952 return -EFAULT;
2953
2954 uptr = compat_ptr(uptr32);
2955
2956 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2957 return -EFAULT;
2958
2959 return dev_ioctl(net, SIOCWANDEV, uifr);
2960}
2961
6b96018b
AB
2962static int bond_ioctl(struct net *net, unsigned int cmd,
2963 struct compat_ifreq __user *ifr32)
7a229387
AB
2964{
2965 struct ifreq kifr;
7a229387
AB
2966 mm_segment_t old_fs;
2967 int err;
7a229387
AB
2968
2969 switch (cmd) {
2970 case SIOCBONDENSLAVE:
2971 case SIOCBONDRELEASE:
2972 case SIOCBONDSETHWADDR:
2973 case SIOCBONDCHANGEACTIVE:
6b96018b 2974 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2975 return -EFAULT;
2976
2977 old_fs = get_fs();
c6d409cf 2978 set_fs(KERNEL_DS);
c3f52ae6 2979 err = dev_ioctl(net, cmd,
2980 (struct ifreq __user __force *) &kifr);
c6d409cf 2981 set_fs(old_fs);
7a229387
AB
2982
2983 return err;
7a229387 2984 default:
07d106d0 2985 return -ENOIOCTLCMD;
ccbd6a5a 2986 }
7a229387
AB
2987}
2988
590d4693
BH
2989/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2990static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2991 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2992{
2993 struct ifreq __user *u_ifreq64;
7a229387
AB
2994 char tmp_buf[IFNAMSIZ];
2995 void __user *data64;
2996 u32 data32;
2997
2998 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2999 IFNAMSIZ))
3000 return -EFAULT;
417c3522 3001 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
3002 return -EFAULT;
3003 data64 = compat_ptr(data32);
3004
3005 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
3006
7a229387
AB
3007 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
3008 IFNAMSIZ))
3009 return -EFAULT;
417c3522 3010 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
3011 return -EFAULT;
3012
6b96018b 3013 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
3014}
3015
6b96018b
AB
3016static int dev_ifsioc(struct net *net, struct socket *sock,
3017 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 3018{
a2116ed2 3019 struct ifreq __user *uifr;
7a229387
AB
3020 int err;
3021
a2116ed2
AB
3022 uifr = compat_alloc_user_space(sizeof(*uifr));
3023 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3024 return -EFAULT;
3025
3026 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3027
7a229387
AB
3028 if (!err) {
3029 switch (cmd) {
3030 case SIOCGIFFLAGS:
3031 case SIOCGIFMETRIC:
3032 case SIOCGIFMTU:
3033 case SIOCGIFMEM:
3034 case SIOCGIFHWADDR:
3035 case SIOCGIFINDEX:
3036 case SIOCGIFADDR:
3037 case SIOCGIFBRDADDR:
3038 case SIOCGIFDSTADDR:
3039 case SIOCGIFNETMASK:
fab2532b 3040 case SIOCGIFPFLAGS:
7a229387 3041 case SIOCGIFTXQLEN:
fab2532b
AB
3042 case SIOCGMIIPHY:
3043 case SIOCGMIIREG:
a2116ed2 3044 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
3045 err = -EFAULT;
3046 break;
3047 }
3048 }
3049 return err;
3050}
3051
a2116ed2
AB
3052static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3053 struct compat_ifreq __user *uifr32)
3054{
3055 struct ifreq ifr;
3056 struct compat_ifmap __user *uifmap32;
3057 mm_segment_t old_fs;
3058 int err;
3059
3060 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3061 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3062 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3063 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3064 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3065 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3066 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3067 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3068 if (err)
3069 return -EFAULT;
3070
3071 old_fs = get_fs();
c6d409cf 3072 set_fs(KERNEL_DS);
c3f52ae6 3073 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3074 set_fs(old_fs);
a2116ed2
AB
3075
3076 if (cmd == SIOCGIFMAP && !err) {
3077 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3078 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3079 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3080 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3081 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3082 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3083 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3084 if (err)
3085 err = -EFAULT;
3086 }
3087 return err;
3088}
3089
7a229387 3090struct rtentry32 {
c6d409cf 3091 u32 rt_pad1;
7a229387
AB
3092 struct sockaddr rt_dst; /* target address */
3093 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3094 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3095 unsigned short rt_flags;
3096 short rt_pad2;
3097 u32 rt_pad3;
3098 unsigned char rt_tos;
3099 unsigned char rt_class;
3100 short rt_pad4;
3101 short rt_metric; /* +1 for binary compatibility! */
7a229387 3102 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3103 u32 rt_mtu; /* per route MTU/Window */
3104 u32 rt_window; /* Window clamping */
7a229387
AB
3105 unsigned short rt_irtt; /* Initial RTT */
3106};
3107
3108struct in6_rtmsg32 {
3109 struct in6_addr rtmsg_dst;
3110 struct in6_addr rtmsg_src;
3111 struct in6_addr rtmsg_gateway;
3112 u32 rtmsg_type;
3113 u16 rtmsg_dst_len;
3114 u16 rtmsg_src_len;
3115 u32 rtmsg_metric;
3116 u32 rtmsg_info;
3117 u32 rtmsg_flags;
3118 s32 rtmsg_ifindex;
3119};
3120
6b96018b
AB
3121static int routing_ioctl(struct net *net, struct socket *sock,
3122 unsigned int cmd, void __user *argp)
7a229387
AB
3123{
3124 int ret;
3125 void *r = NULL;
3126 struct in6_rtmsg r6;
3127 struct rtentry r4;
3128 char devname[16];
3129 u32 rtdev;
3130 mm_segment_t old_fs = get_fs();
3131
6b96018b
AB
3132 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3133 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3134 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3135 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3136 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3137 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3138 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3139 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3140 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3141 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3142 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3143
3144 r = (void *) &r6;
3145 } else { /* ipv4 */
6b96018b 3146 struct rtentry32 __user *ur4 = argp;
c6d409cf 3147 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3148 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3149 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3150 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3151 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3152 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3153 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3154 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3155 if (rtdev) {
c6d409cf 3156 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3157 r4.rt_dev = (char __user __force *)devname;
3158 devname[15] = 0;
7a229387
AB
3159 } else
3160 r4.rt_dev = NULL;
3161
3162 r = (void *) &r4;
3163 }
3164
3165 if (ret) {
3166 ret = -EFAULT;
3167 goto out;
3168 }
3169
c6d409cf 3170 set_fs(KERNEL_DS);
6b96018b 3171 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3172 set_fs(old_fs);
7a229387
AB
3173
3174out:
7a229387
AB
3175 return ret;
3176}
3177
3178/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3179 * for some operations; this forces use of the newer bridge-utils that
25985edc 3180 * use compatible ioctls
7a229387 3181 */
6b96018b 3182static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3183{
6b96018b 3184 compat_ulong_t tmp;
7a229387 3185
6b96018b 3186 if (get_user(tmp, argp))
7a229387
AB
3187 return -EFAULT;
3188 if (tmp == BRCTL_GET_VERSION)
3189 return BRCTL_VERSION + 1;
3190 return -EINVAL;
3191}
3192
6b96018b
AB
3193static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3194 unsigned int cmd, unsigned long arg)
3195{
3196 void __user *argp = compat_ptr(arg);
3197 struct sock *sk = sock->sk;
3198 struct net *net = sock_net(sk);
7a229387 3199
6b96018b 3200 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3201 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3202
3203 switch (cmd) {
3204 case SIOCSIFBR:
3205 case SIOCGIFBR:
3206 return old_bridge_ioctl(argp);
3207 case SIOCGIFNAME:
3208 return dev_ifname32(net, argp);
3209 case SIOCGIFCONF:
3210 return dev_ifconf(net, argp);
3211 case SIOCETHTOOL:
3212 return ethtool_ioctl(net, argp);
7a50a240
AB
3213 case SIOCWANDEV:
3214 return compat_siocwandev(net, argp);
a2116ed2
AB
3215 case SIOCGIFMAP:
3216 case SIOCSIFMAP:
3217 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3218 case SIOCBONDENSLAVE:
3219 case SIOCBONDRELEASE:
3220 case SIOCBONDSETHWADDR:
6b96018b
AB
3221 case SIOCBONDCHANGEACTIVE:
3222 return bond_ioctl(net, cmd, argp);
3223 case SIOCADDRT:
3224 case SIOCDELRT:
3225 return routing_ioctl(net, sock, cmd, argp);
3226 case SIOCGSTAMP:
3227 return do_siocgstamp(net, sock, cmd, argp);
3228 case SIOCGSTAMPNS:
3229 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3230 case SIOCBONDSLAVEINFOQUERY:
3231 case SIOCBONDINFOQUERY:
a2116ed2 3232 case SIOCSHWTSTAMP:
fd468c74 3233 case SIOCGHWTSTAMP:
590d4693 3234 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3235
3236 case FIOSETOWN:
3237 case SIOCSPGRP:
3238 case FIOGETOWN:
3239 case SIOCGPGRP:
3240 case SIOCBRADDBR:
3241 case SIOCBRDELBR:
3242 case SIOCGIFVLAN:
3243 case SIOCSIFVLAN:
3244 case SIOCADDDLCI:
3245 case SIOCDELDLCI:
3246 return sock_ioctl(file, cmd, arg);
3247
3248 case SIOCGIFFLAGS:
3249 case SIOCSIFFLAGS:
3250 case SIOCGIFMETRIC:
3251 case SIOCSIFMETRIC:
3252 case SIOCGIFMTU:
3253 case SIOCSIFMTU:
3254 case SIOCGIFMEM:
3255 case SIOCSIFMEM:
3256 case SIOCGIFHWADDR:
3257 case SIOCSIFHWADDR:
3258 case SIOCADDMULTI:
3259 case SIOCDELMULTI:
3260 case SIOCGIFINDEX:
6b96018b
AB
3261 case SIOCGIFADDR:
3262 case SIOCSIFADDR:
3263 case SIOCSIFHWBROADCAST:
6b96018b 3264 case SIOCDIFADDR:
6b96018b
AB
3265 case SIOCGIFBRDADDR:
3266 case SIOCSIFBRDADDR:
3267 case SIOCGIFDSTADDR:
3268 case SIOCSIFDSTADDR:
3269 case SIOCGIFNETMASK:
3270 case SIOCSIFNETMASK:
3271 case SIOCSIFPFLAGS:
3272 case SIOCGIFPFLAGS:
3273 case SIOCGIFTXQLEN:
3274 case SIOCSIFTXQLEN:
3275 case SIOCBRADDIF:
3276 case SIOCBRDELIF:
9177efd3
AB
3277 case SIOCSIFNAME:
3278 case SIOCGMIIPHY:
3279 case SIOCGMIIREG:
3280 case SIOCSMIIREG:
6b96018b 3281 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3282
6b96018b
AB
3283 case SIOCSARP:
3284 case SIOCGARP:
3285 case SIOCDARP:
6b96018b 3286 case SIOCATMARK:
9177efd3
AB
3287 return sock_do_ioctl(net, sock, cmd, arg);
3288 }
3289
6b96018b
AB
3290 return -ENOIOCTLCMD;
3291}
7a229387 3292
95c96174 3293static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3294 unsigned long arg)
89bbfc95
SP
3295{
3296 struct socket *sock = file->private_data;
3297 int ret = -ENOIOCTLCMD;
87de87d5
DM
3298 struct sock *sk;
3299 struct net *net;
3300
3301 sk = sock->sk;
3302 net = sock_net(sk);
89bbfc95
SP
3303
3304 if (sock->ops->compat_ioctl)
3305 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3306
87de87d5
DM
3307 if (ret == -ENOIOCTLCMD &&
3308 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3309 ret = compat_wext_handle_ioctl(net, cmd, arg);
3310
6b96018b
AB
3311 if (ret == -ENOIOCTLCMD)
3312 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3313
89bbfc95
SP
3314 return ret;
3315}
3316#endif
3317
ac5a488e
SS
3318int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3319{
3320 return sock->ops->bind(sock, addr, addrlen);
3321}
c6d409cf 3322EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3323
3324int kernel_listen(struct socket *sock, int backlog)
3325{
3326 return sock->ops->listen(sock, backlog);
3327}
c6d409cf 3328EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3329
3330int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3331{
3332 struct sock *sk = sock->sk;
3333 int err;
3334
3335 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3336 newsock);
3337 if (err < 0)
3338 goto done;
3339
3340 err = sock->ops->accept(sock, *newsock, flags);
3341 if (err < 0) {
3342 sock_release(*newsock);
fa8705b0 3343 *newsock = NULL;
ac5a488e
SS
3344 goto done;
3345 }
3346
3347 (*newsock)->ops = sock->ops;
1b08534e 3348 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3349
3350done:
3351 return err;
3352}
c6d409cf 3353EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3354
3355int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3356 int flags)
ac5a488e
SS
3357{
3358 return sock->ops->connect(sock, addr, addrlen, flags);
3359}
c6d409cf 3360EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3361
3362int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3363 int *addrlen)
3364{
3365 return sock->ops->getname(sock, addr, addrlen, 0);
3366}
c6d409cf 3367EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3368
3369int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3370 int *addrlen)
3371{
3372 return sock->ops->getname(sock, addr, addrlen, 1);
3373}
c6d409cf 3374EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3375
3376int kernel_getsockopt(struct socket *sock, int level, int optname,
3377 char *optval, int *optlen)
3378{
3379 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3380 char __user *uoptval;
3381 int __user *uoptlen;
ac5a488e
SS
3382 int err;
3383
fb8621bb
NK
3384 uoptval = (char __user __force *) optval;
3385 uoptlen = (int __user __force *) optlen;
3386
ac5a488e
SS
3387 set_fs(KERNEL_DS);
3388 if (level == SOL_SOCKET)
fb8621bb 3389 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3390 else
fb8621bb
NK
3391 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3392 uoptlen);
ac5a488e
SS
3393 set_fs(oldfs);
3394 return err;
3395}
c6d409cf 3396EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3397
3398int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3399 char *optval, unsigned int optlen)
ac5a488e
SS
3400{
3401 mm_segment_t oldfs = get_fs();
fb8621bb 3402 char __user *uoptval;
ac5a488e
SS
3403 int err;
3404
fb8621bb
NK
3405 uoptval = (char __user __force *) optval;
3406
ac5a488e
SS
3407 set_fs(KERNEL_DS);
3408 if (level == SOL_SOCKET)
fb8621bb 3409 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3410 else
fb8621bb 3411 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3412 optlen);
3413 set_fs(oldfs);
3414 return err;
3415}
c6d409cf 3416EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3417
3418int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3419 size_t size, int flags)
3420{
3421 if (sock->ops->sendpage)
3422 return sock->ops->sendpage(sock, page, offset, size, flags);
3423
3424 return sock_no_sendpage(sock, page, offset, size, flags);
3425}
c6d409cf 3426EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3427
3428int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3429{
3430 mm_segment_t oldfs = get_fs();
3431 int err;
3432
3433 set_fs(KERNEL_DS);
3434 err = sock->ops->ioctl(sock, cmd, arg);
3435 set_fs(oldfs);
3436
3437 return err;
3438}
c6d409cf 3439EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3440
91cf45f0
TM
3441int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3442{
3443 return sock->ops->shutdown(sock, how);
3444}
91cf45f0 3445EXPORT_SYMBOL(kernel_sock_shutdown);
This page took 1.151527 seconds and 5 git commands to generate.