Merge branch 'gianfar-next'
[deliverable/linux.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1fd7317d 88#include <linux/magic.h>
5a0e3ad6 89#include <linux/slab.h>
600e1779 90#include <linux/xattr.h>
1da177e4
LT
91
92#include <asm/uaccess.h>
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
f8451725 97#include <net/cls_cgroup.h>
1da177e4
LT
98
99#include <net/sock.h>
100#include <linux/netfilter.h>
101
6b96018b
AB
102#include <linux/if_tun.h>
103#include <linux/ipv6_route.h>
104#include <linux/route.h>
6b96018b
AB
105#include <linux/sockios.h>
106#include <linux/atalk.h>
076bb0c8 107#include <net/busy_poll.h>
06021292 108
e0d1095a 109#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
110unsigned int sysctl_net_busy_read __read_mostly;
111unsigned int sysctl_net_busy_poll __read_mostly;
06021292 112#endif
6b96018b 113
1da177e4 114static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
115static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
116 unsigned long nr_segs, loff_t pos);
117static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
118 unsigned long nr_segs, loff_t pos);
89bddce5 119static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
120
121static int sock_close(struct inode *inode, struct file *file);
122static unsigned int sock_poll(struct file *file,
123 struct poll_table_struct *wait);
89bddce5 124static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
125#ifdef CONFIG_COMPAT
126static long compat_sock_ioctl(struct file *file,
89bddce5 127 unsigned int cmd, unsigned long arg);
89bbfc95 128#endif
1da177e4 129static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
130static ssize_t sock_sendpage(struct file *file, struct page *page,
131 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 132static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 133 struct pipe_inode_info *pipe, size_t len,
9c55e01c 134 unsigned int flags);
1da177e4 135
1da177e4
LT
136/*
137 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
138 * in the operation structures but are done directly via the socketcall() multiplexor.
139 */
140
da7071d7 141static const struct file_operations socket_file_ops = {
1da177e4
LT
142 .owner = THIS_MODULE,
143 .llseek = no_llseek,
144 .aio_read = sock_aio_read,
145 .aio_write = sock_aio_write,
146 .poll = sock_poll,
147 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
148#ifdef CONFIG_COMPAT
149 .compat_ioctl = compat_sock_ioctl,
150#endif
1da177e4
LT
151 .mmap = sock_mmap,
152 .open = sock_no_open, /* special open code to disallow open via /proc */
153 .release = sock_close,
154 .fasync = sock_fasync,
5274f052
JA
155 .sendpage = sock_sendpage,
156 .splice_write = generic_splice_sendpage,
9c55e01c 157 .splice_read = sock_splice_read,
1da177e4
LT
158};
159
160/*
161 * The protocol list. Each protocol is registered in here.
162 */
163
1da177e4 164static DEFINE_SPINLOCK(net_family_lock);
190683a9 165static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 166
1da177e4
LT
167/*
168 * Statistics counters of the socket lists
169 */
170
c6d409cf 171static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
172
173/*
89bddce5
SH
174 * Support routines.
175 * Move socket addresses back and forth across the kernel/user
176 * divide and look after the messy bits.
1da177e4
LT
177 */
178
1da177e4
LT
179/**
180 * move_addr_to_kernel - copy a socket address into kernel space
181 * @uaddr: Address in user space
182 * @kaddr: Address in kernel space
183 * @ulen: Length in user space
184 *
185 * The address is copied into kernel space. If the provided address is
186 * too long an error code of -EINVAL is returned. If the copy gives
187 * invalid addresses -EFAULT is returned. On a success 0 is returned.
188 */
189
43db362d 190int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 191{
230b1839 192 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 193 return -EINVAL;
89bddce5 194 if (ulen == 0)
1da177e4 195 return 0;
89bddce5 196 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 197 return -EFAULT;
3ec3b2fb 198 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
199}
200
201/**
202 * move_addr_to_user - copy an address to user space
203 * @kaddr: kernel space address
204 * @klen: length of address in kernel
205 * @uaddr: user space address
206 * @ulen: pointer to user length field
207 *
208 * The value pointed to by ulen on entry is the buffer length available.
209 * This is overwritten with the buffer space used. -EINVAL is returned
210 * if an overlong buffer is specified or a negative buffer size. -EFAULT
211 * is returned if either the buffer or the length field are not
212 * accessible.
213 * After copying the data up to the limit the user specifies, the true
214 * length of the data is written over the length limit the user
215 * specified. Zero is returned for a success.
216 */
89bddce5 217
43db362d 218static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 219 void __user *uaddr, int __user *ulen)
1da177e4
LT
220{
221 int err;
222 int len;
223
68c6beb3 224 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
225 err = get_user(len, ulen);
226 if (err)
1da177e4 227 return err;
89bddce5
SH
228 if (len > klen)
229 len = klen;
68c6beb3 230 if (len < 0)
1da177e4 231 return -EINVAL;
89bddce5 232 if (len) {
d6fe3945
SG
233 if (audit_sockaddr(klen, kaddr))
234 return -ENOMEM;
89bddce5 235 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
236 return -EFAULT;
237 }
238 /*
89bddce5
SH
239 * "fromlen shall refer to the value before truncation.."
240 * 1003.1g
1da177e4
LT
241 */
242 return __put_user(klen, ulen);
243}
244
e18b890b 245static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
246
247static struct inode *sock_alloc_inode(struct super_block *sb)
248{
249 struct socket_alloc *ei;
eaefd110 250 struct socket_wq *wq;
89bddce5 251
e94b1766 252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
253 if (!ei)
254 return NULL;
eaefd110
ED
255 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
256 if (!wq) {
43815482
ED
257 kmem_cache_free(sock_inode_cachep, ei);
258 return NULL;
259 }
eaefd110
ED
260 init_waitqueue_head(&wq->wait);
261 wq->fasync_list = NULL;
262 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 263
1da177e4
LT
264 ei->socket.state = SS_UNCONNECTED;
265 ei->socket.flags = 0;
266 ei->socket.ops = NULL;
267 ei->socket.sk = NULL;
268 ei->socket.file = NULL;
1da177e4
LT
269
270 return &ei->vfs_inode;
271}
272
273static void sock_destroy_inode(struct inode *inode)
274{
43815482 275 struct socket_alloc *ei;
eaefd110 276 struct socket_wq *wq;
43815482
ED
277
278 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 279 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 280 kfree_rcu(wq, rcu);
43815482 281 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
282}
283
51cc5068 284static void init_once(void *foo)
1da177e4 285{
89bddce5 286 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 287
a35afb83 288 inode_init_once(&ei->vfs_inode);
1da177e4 289}
89bddce5 290
1da177e4
LT
291static int init_inodecache(void)
292{
293 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
294 sizeof(struct socket_alloc),
295 0,
296 (SLAB_HWCACHE_ALIGN |
297 SLAB_RECLAIM_ACCOUNT |
298 SLAB_MEM_SPREAD),
20c2df83 299 init_once);
1da177e4
LT
300 if (sock_inode_cachep == NULL)
301 return -ENOMEM;
302 return 0;
303}
304
b87221de 305static const struct super_operations sockfs_ops = {
c6d409cf
ED
306 .alloc_inode = sock_alloc_inode,
307 .destroy_inode = sock_destroy_inode,
308 .statfs = simple_statfs,
1da177e4
LT
309};
310
c23fbb6b
ED
311/*
312 * sockfs_dname() is called from d_path().
313 */
314static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
315{
316 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
317 dentry->d_inode->i_ino);
318}
319
3ba13d17 320static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 321 .d_dname = sockfs_dname,
1da177e4
LT
322};
323
c74a1cbb
AV
324static struct dentry *sockfs_mount(struct file_system_type *fs_type,
325 int flags, const char *dev_name, void *data)
326{
327 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
328 &sockfs_dentry_operations, SOCKFS_MAGIC);
329}
330
331static struct vfsmount *sock_mnt __read_mostly;
332
333static struct file_system_type sock_fs_type = {
334 .name = "sockfs",
335 .mount = sockfs_mount,
336 .kill_sb = kill_anon_super,
337};
338
1da177e4
LT
339/*
340 * Obtains the first available file descriptor and sets it up for use.
341 *
39d8c1b6
DM
342 * These functions create file structures and maps them to fd space
343 * of the current process. On success it returns file descriptor
1da177e4
LT
344 * and file struct implicitly stored in sock->file.
345 * Note that another thread may close file descriptor before we return
346 * from this function. We use the fact that now we do not refer
347 * to socket after mapping. If one day we will need it, this
348 * function will increment ref. count on file by 1.
349 *
350 * In any case returned fd MAY BE not valid!
351 * This race condition is unavoidable
352 * with shared fd spaces, we cannot solve it inside kernel,
353 * but we take care of internal coherence yet.
354 */
355
aab174f0 356struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 357{
7cbe66b6 358 struct qstr name = { .name = "" };
2c48b9c4 359 struct path path;
7cbe66b6 360 struct file *file;
1da177e4 361
600e1779
MY
362 if (dname) {
363 name.name = dname;
364 name.len = strlen(name.name);
365 } else if (sock->sk) {
366 name.name = sock->sk->sk_prot_creator->name;
367 name.len = strlen(name.name);
368 }
4b936885 369 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
370 if (unlikely(!path.dentry))
371 return ERR_PTR(-ENOMEM);
2c48b9c4 372 path.mnt = mntget(sock_mnt);
39d8c1b6 373
2c48b9c4 374 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 375 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 376
2c48b9c4 377 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 378 &socket_file_ops);
39b65252 379 if (unlikely(IS_ERR(file))) {
cc3808f8 380 /* drop dentry, keep inode */
7de9c6ee 381 ihold(path.dentry->d_inode);
2c48b9c4 382 path_put(&path);
39b65252 383 return file;
cc3808f8
AV
384 }
385
386 sock->file = file;
77d27200 387 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 388 file->private_data = sock;
28407630 389 return file;
39d8c1b6 390}
56b31d1c 391EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 392
56b31d1c 393static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
394{
395 struct file *newfile;
28407630
AV
396 int fd = get_unused_fd_flags(flags);
397 if (unlikely(fd < 0))
398 return fd;
39d8c1b6 399
aab174f0 400 newfile = sock_alloc_file(sock, flags, NULL);
28407630 401 if (likely(!IS_ERR(newfile))) {
39d8c1b6 402 fd_install(fd, newfile);
28407630
AV
403 return fd;
404 }
7cbe66b6 405
28407630
AV
406 put_unused_fd(fd);
407 return PTR_ERR(newfile);
1da177e4
LT
408}
409
406a3c63 410struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 411{
6cb153ca
BL
412 if (file->f_op == &socket_file_ops)
413 return file->private_data; /* set in sock_map_fd */
414
23bb80d2
ED
415 *err = -ENOTSOCK;
416 return NULL;
6cb153ca 417}
406a3c63 418EXPORT_SYMBOL(sock_from_file);
6cb153ca 419
1da177e4 420/**
c6d409cf 421 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
422 * @fd: file handle
423 * @err: pointer to an error code return
424 *
425 * The file handle passed in is locked and the socket it is bound
426 * too is returned. If an error occurs the err pointer is overwritten
427 * with a negative errno code and NULL is returned. The function checks
428 * for both invalid handles and passing a handle which is not a socket.
429 *
430 * On a success the socket object pointer is returned.
431 */
432
433struct socket *sockfd_lookup(int fd, int *err)
434{
435 struct file *file;
1da177e4
LT
436 struct socket *sock;
437
89bddce5
SH
438 file = fget(fd);
439 if (!file) {
1da177e4
LT
440 *err = -EBADF;
441 return NULL;
442 }
89bddce5 443
6cb153ca
BL
444 sock = sock_from_file(file, err);
445 if (!sock)
1da177e4 446 fput(file);
6cb153ca
BL
447 return sock;
448}
c6d409cf 449EXPORT_SYMBOL(sockfd_lookup);
1da177e4 450
6cb153ca
BL
451static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
452{
453 struct file *file;
454 struct socket *sock;
455
3672558c 456 *err = -EBADF;
6cb153ca
BL
457 file = fget_light(fd, fput_needed);
458 if (file) {
459 sock = sock_from_file(file, err);
460 if (sock)
461 return sock;
462 fput_light(file, *fput_needed);
1da177e4 463 }
6cb153ca 464 return NULL;
1da177e4
LT
465}
466
600e1779
MY
467#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
468#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
469#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
470static ssize_t sockfs_getxattr(struct dentry *dentry,
471 const char *name, void *value, size_t size)
472{
473 const char *proto_name;
474 size_t proto_size;
475 int error;
476
477 error = -ENODATA;
478 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
479 proto_name = dentry->d_name.name;
480 proto_size = strlen(proto_name);
481
482 if (value) {
483 error = -ERANGE;
484 if (proto_size + 1 > size)
485 goto out;
486
487 strncpy(value, proto_name, proto_size + 1);
488 }
489 error = proto_size + 1;
490 }
491
492out:
493 return error;
494}
495
496static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
497 size_t size)
498{
499 ssize_t len;
500 ssize_t used = 0;
501
502 len = security_inode_listsecurity(dentry->d_inode, buffer, size);
503 if (len < 0)
504 return len;
505 used += len;
506 if (buffer) {
507 if (size < used)
508 return -ERANGE;
509 buffer += len;
510 }
511
512 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
513 used += len;
514 if (buffer) {
515 if (size < used)
516 return -ERANGE;
517 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
518 buffer += len;
519 }
520
521 return used;
522}
523
524static const struct inode_operations sockfs_inode_ops = {
525 .getxattr = sockfs_getxattr,
526 .listxattr = sockfs_listxattr,
527};
528
1da177e4
LT
529/**
530 * sock_alloc - allocate a socket
89bddce5 531 *
1da177e4
LT
532 * Allocate a new inode and socket object. The two are bound together
533 * and initialised. The socket is then returned. If we are out of inodes
534 * NULL is returned.
535 */
536
537static struct socket *sock_alloc(void)
538{
89bddce5
SH
539 struct inode *inode;
540 struct socket *sock;
1da177e4 541
a209dfc7 542 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
543 if (!inode)
544 return NULL;
545
546 sock = SOCKET_I(inode);
547
29a020d3 548 kmemcheck_annotate_bitfield(sock, type);
85fe4025 549 inode->i_ino = get_next_ino();
89bddce5 550 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
551 inode->i_uid = current_fsuid();
552 inode->i_gid = current_fsgid();
600e1779 553 inode->i_op = &sockfs_inode_ops;
1da177e4 554
19e8d69c 555 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
556 return sock;
557}
558
559/*
560 * In theory you can't get an open on this inode, but /proc provides
561 * a back door. Remember to keep it shut otherwise you'll let the
562 * creepy crawlies in.
563 */
89bddce5 564
1da177e4
LT
565static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
566{
567 return -ENXIO;
568}
569
4b6f5d20 570const struct file_operations bad_sock_fops = {
1da177e4
LT
571 .owner = THIS_MODULE,
572 .open = sock_no_open,
6038f373 573 .llseek = noop_llseek,
1da177e4
LT
574};
575
576/**
577 * sock_release - close a socket
578 * @sock: socket to close
579 *
580 * The socket is released from the protocol stack if it has a release
581 * callback, and the inode is then released if the socket is bound to
89bddce5 582 * an inode not a file.
1da177e4 583 */
89bddce5 584
1da177e4
LT
585void sock_release(struct socket *sock)
586{
587 if (sock->ops) {
588 struct module *owner = sock->ops->owner;
589
590 sock->ops->release(sock);
591 sock->ops = NULL;
592 module_put(owner);
593 }
594
eaefd110 595 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 596 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 597
b09e786b
MP
598 if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
599 return;
600
19e8d69c 601 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
602 if (!sock->file) {
603 iput(SOCK_INODE(sock));
604 return;
605 }
89bddce5 606 sock->file = NULL;
1da177e4 607}
c6d409cf 608EXPORT_SYMBOL(sock_release);
1da177e4 609
bf84a010 610void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 611{
2244d07b 612 *tx_flags = 0;
20d49473 613 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 614 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 615 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 616 *tx_flags |= SKBTX_SW_TSTAMP;
6e3e939f
JB
617 if (sock_flag(sk, SOCK_WIFI_STATUS))
618 *tx_flags |= SKBTX_WIFI_STATUS;
20d49473
PO
619}
620EXPORT_SYMBOL(sock_tx_timestamp);
621
228e548e
AB
622static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
623 struct msghdr *msg, size_t size)
1da177e4
LT
624{
625 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4
LT
626
627 si->sock = sock;
628 si->scm = NULL;
629 si->msg = msg;
630 si->size = size;
631
1da177e4
LT
632 return sock->ops->sendmsg(iocb, sock, msg, size);
633}
634
228e548e
AB
635static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
636 struct msghdr *msg, size_t size)
637{
638 int err = security_socket_sendmsg(sock, msg, size);
639
640 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
641}
642
1da177e4
LT
643int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
644{
645 struct kiocb iocb;
646 struct sock_iocb siocb;
647 int ret;
648
649 init_sync_kiocb(&iocb, NULL);
650 iocb.private = &siocb;
651 ret = __sock_sendmsg(&iocb, sock, msg, size);
652 if (-EIOCBQUEUED == ret)
653 ret = wait_on_sync_kiocb(&iocb);
654 return ret;
655}
c6d409cf 656EXPORT_SYMBOL(sock_sendmsg);
1da177e4 657
894dc24c 658static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
659{
660 struct kiocb iocb;
661 struct sock_iocb siocb;
662 int ret;
663
664 init_sync_kiocb(&iocb, NULL);
665 iocb.private = &siocb;
666 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
667 if (-EIOCBQUEUED == ret)
668 ret = wait_on_sync_kiocb(&iocb);
669 return ret;
670}
671
1da177e4
LT
672int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
673 struct kvec *vec, size_t num, size_t size)
674{
675 mm_segment_t oldfs = get_fs();
676 int result;
677
678 set_fs(KERNEL_DS);
679 /*
680 * the following is safe, since for compiler definitions of kvec and
681 * iovec are identical, yielding the same in-core layout and alignment
682 */
89bddce5 683 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
684 msg->msg_iovlen = num;
685 result = sock_sendmsg(sock, msg, size);
686 set_fs(oldfs);
687 return result;
688}
c6d409cf 689EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 690
92f37fd2
ED
691/*
692 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
693 */
694void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
695 struct sk_buff *skb)
696{
20d49473
PO
697 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
698 struct timespec ts[3];
699 int empty = 1;
700 struct skb_shared_hwtstamps *shhwtstamps =
701 skb_hwtstamps(skb);
702
703 /* Race occurred between timestamp enabling and packet
704 receiving. Fill in the current time for now. */
705 if (need_software_tstamp && skb->tstamp.tv64 == 0)
706 __net_timestamp(skb);
707
708 if (need_software_tstamp) {
709 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
710 struct timeval tv;
711 skb_get_timestamp(skb, &tv);
712 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
713 sizeof(tv), &tv);
714 } else {
842509b8 715 skb_get_timestampns(skb, &ts[0]);
20d49473 716 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 717 sizeof(ts[0]), &ts[0]);
20d49473
PO
718 }
719 }
720
721
722 memset(ts, 0, sizeof(ts));
6e94d1ef
DB
723 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
724 ktime_to_timespec_cond(skb->tstamp, ts + 0))
20d49473 725 empty = 0;
20d49473
PO
726 if (shhwtstamps) {
727 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
6e94d1ef 728 ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))
20d49473
PO
729 empty = 0;
730 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
6e94d1ef 731 ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
20d49473 732 empty = 0;
92f37fd2 733 }
20d49473
PO
734 if (!empty)
735 put_cmsg(msg, SOL_SOCKET,
736 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 737}
7c81fd8b
ACM
738EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
739
6e3e939f
JB
740void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
741 struct sk_buff *skb)
742{
743 int ack;
744
745 if (!sock_flag(sk, SOCK_WIFI_STATUS))
746 return;
747 if (!skb->wifi_acked_valid)
748 return;
749
750 ack = skb->wifi_acked;
751
752 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
753}
754EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
755
11165f14 756static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
757 struct sk_buff *skb)
3b885787
NH
758{
759 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
760 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
761 sizeof(__u32), &skb->dropcount);
762}
763
767dd033 764void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
765 struct sk_buff *skb)
766{
767 sock_recv_timestamp(msg, sk, skb);
768 sock_recv_drops(msg, sk, skb);
769}
767dd033 770EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 771
a2e27255
ACM
772static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
773 struct msghdr *msg, size_t size, int flags)
1da177e4 774{
1da177e4
LT
775 struct sock_iocb *si = kiocb_to_siocb(iocb);
776
777 si->sock = sock;
778 si->scm = NULL;
779 si->msg = msg;
780 si->size = size;
781 si->flags = flags;
782
1da177e4
LT
783 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
784}
785
a2e27255
ACM
786static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
787 struct msghdr *msg, size_t size, int flags)
788{
789 int err = security_socket_recvmsg(sock, msg, size, flags);
790
791 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
792}
793
89bddce5 794int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
795 size_t size, int flags)
796{
797 struct kiocb iocb;
798 struct sock_iocb siocb;
799 int ret;
800
89bddce5 801 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
802 iocb.private = &siocb;
803 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
804 if (-EIOCBQUEUED == ret)
805 ret = wait_on_sync_kiocb(&iocb);
806 return ret;
807}
c6d409cf 808EXPORT_SYMBOL(sock_recvmsg);
1da177e4 809
a2e27255
ACM
810static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
811 size_t size, int flags)
812{
813 struct kiocb iocb;
814 struct sock_iocb siocb;
815 int ret;
816
817 init_sync_kiocb(&iocb, NULL);
818 iocb.private = &siocb;
819 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
820 if (-EIOCBQUEUED == ret)
821 ret = wait_on_sync_kiocb(&iocb);
822 return ret;
823}
824
c1249c0a
ML
825/**
826 * kernel_recvmsg - Receive a message from a socket (kernel space)
827 * @sock: The socket to receive the message from
828 * @msg: Received message
829 * @vec: Input s/g array for message data
830 * @num: Size of input s/g array
831 * @size: Number of bytes to read
832 * @flags: Message flags (MSG_DONTWAIT, etc...)
833 *
834 * On return the msg structure contains the scatter/gather array passed in the
835 * vec argument. The array is modified so that it consists of the unfilled
836 * portion of the original array.
837 *
838 * The returned value is the total number of bytes received, or an error.
839 */
89bddce5
SH
840int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
841 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
842{
843 mm_segment_t oldfs = get_fs();
844 int result;
845
846 set_fs(KERNEL_DS);
847 /*
848 * the following is safe, since for compiler definitions of kvec and
849 * iovec are identical, yielding the same in-core layout and alignment
850 */
89bddce5 851 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
852 result = sock_recvmsg(sock, msg, size, flags);
853 set_fs(oldfs);
854 return result;
855}
c6d409cf 856EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 857
ce1d4d3e
CH
858static ssize_t sock_sendpage(struct file *file, struct page *page,
859 int offset, size_t size, loff_t *ppos, int more)
1da177e4 860{
1da177e4
LT
861 struct socket *sock;
862 int flags;
863
ce1d4d3e
CH
864 sock = file->private_data;
865
35f9c09f
ED
866 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
867 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
868 flags |= more;
ce1d4d3e 869
e6949583 870 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 871}
1da177e4 872
9c55e01c 873static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 874 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
875 unsigned int flags)
876{
877 struct socket *sock = file->private_data;
878
997b37da
RDC
879 if (unlikely(!sock->ops->splice_read))
880 return -EINVAL;
881
9c55e01c
JA
882 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
883}
884
ce1d4d3e 885static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 886 struct sock_iocb *siocb)
ce1d4d3e 887{
d29c445b
KO
888 if (!is_sync_kiocb(iocb))
889 BUG();
1da177e4 890
ce1d4d3e 891 siocb->kiocb = iocb;
ce1d4d3e
CH
892 iocb->private = siocb;
893 return siocb;
1da177e4
LT
894}
895
ce1d4d3e 896static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
897 struct file *file, const struct iovec *iov,
898 unsigned long nr_segs)
ce1d4d3e
CH
899{
900 struct socket *sock = file->private_data;
901 size_t size = 0;
902 int i;
1da177e4 903
89bddce5
SH
904 for (i = 0; i < nr_segs; i++)
905 size += iov[i].iov_len;
1da177e4 906
ce1d4d3e
CH
907 msg->msg_name = NULL;
908 msg->msg_namelen = 0;
909 msg->msg_control = NULL;
910 msg->msg_controllen = 0;
89bddce5 911 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
912 msg->msg_iovlen = nr_segs;
913 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
914
915 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
916}
917
027445c3
BP
918static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
919 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
920{
921 struct sock_iocb siocb, *x;
922
1da177e4
LT
923 if (pos != 0)
924 return -ESPIPE;
027445c3 925
73a7075e 926 if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
1da177e4
LT
927 return 0;
928
027445c3
BP
929
930 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
931 if (!x)
932 return -ENOMEM;
027445c3 933 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
934}
935
ce1d4d3e 936static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
937 struct file *file, const struct iovec *iov,
938 unsigned long nr_segs)
1da177e4 939{
ce1d4d3e
CH
940 struct socket *sock = file->private_data;
941 size_t size = 0;
942 int i;
1da177e4 943
89bddce5
SH
944 for (i = 0; i < nr_segs; i++)
945 size += iov[i].iov_len;
1da177e4 946
ce1d4d3e
CH
947 msg->msg_name = NULL;
948 msg->msg_namelen = 0;
949 msg->msg_control = NULL;
950 msg->msg_controllen = 0;
89bddce5 951 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
952 msg->msg_iovlen = nr_segs;
953 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
954 if (sock->type == SOCK_SEQPACKET)
955 msg->msg_flags |= MSG_EOR;
1da177e4 956
ce1d4d3e 957 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
958}
959
027445c3
BP
960static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
961 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
962{
963 struct sock_iocb siocb, *x;
1da177e4 964
ce1d4d3e
CH
965 if (pos != 0)
966 return -ESPIPE;
027445c3 967
027445c3 968 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
969 if (!x)
970 return -ENOMEM;
1da177e4 971
027445c3 972 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
973}
974
1da177e4
LT
975/*
976 * Atomic setting of ioctl hooks to avoid race
977 * with module unload.
978 */
979
4a3e2f71 980static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 981static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 982
881d966b 983void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 984{
4a3e2f71 985 mutex_lock(&br_ioctl_mutex);
1da177e4 986 br_ioctl_hook = hook;
4a3e2f71 987 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
988}
989EXPORT_SYMBOL(brioctl_set);
990
4a3e2f71 991static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 992static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 993
881d966b 994void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 995{
4a3e2f71 996 mutex_lock(&vlan_ioctl_mutex);
1da177e4 997 vlan_ioctl_hook = hook;
4a3e2f71 998 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
999}
1000EXPORT_SYMBOL(vlan_ioctl_set);
1001
4a3e2f71 1002static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1003static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1004
89bddce5 1005void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1006{
4a3e2f71 1007 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1008 dlci_ioctl_hook = hook;
4a3e2f71 1009 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1010}
1011EXPORT_SYMBOL(dlci_ioctl_set);
1012
6b96018b
AB
1013static long sock_do_ioctl(struct net *net, struct socket *sock,
1014 unsigned int cmd, unsigned long arg)
1015{
1016 int err;
1017 void __user *argp = (void __user *)arg;
1018
1019 err = sock->ops->ioctl(sock, cmd, arg);
1020
1021 /*
1022 * If this ioctl is unknown try to hand it down
1023 * to the NIC driver.
1024 */
1025 if (err == -ENOIOCTLCMD)
1026 err = dev_ioctl(net, cmd, argp);
1027
1028 return err;
1029}
1030
1da177e4
LT
1031/*
1032 * With an ioctl, arg may well be a user mode pointer, but we don't know
1033 * what to do with it - that's up to the protocol still.
1034 */
1035
1036static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1037{
1038 struct socket *sock;
881d966b 1039 struct sock *sk;
1da177e4
LT
1040 void __user *argp = (void __user *)arg;
1041 int pid, err;
881d966b 1042 struct net *net;
1da177e4 1043
b69aee04 1044 sock = file->private_data;
881d966b 1045 sk = sock->sk;
3b1e0a65 1046 net = sock_net(sk);
1da177e4 1047 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1048 err = dev_ioctl(net, cmd, argp);
1da177e4 1049 } else
3d23e349 1050#ifdef CONFIG_WEXT_CORE
1da177e4 1051 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1052 err = dev_ioctl(net, cmd, argp);
1da177e4 1053 } else
3d23e349 1054#endif
89bddce5 1055 switch (cmd) {
1da177e4
LT
1056 case FIOSETOWN:
1057 case SIOCSPGRP:
1058 err = -EFAULT;
1059 if (get_user(pid, (int __user *)argp))
1060 break;
1061 err = f_setown(sock->file, pid, 1);
1062 break;
1063 case FIOGETOWN:
1064 case SIOCGPGRP:
609d7fa9 1065 err = put_user(f_getown(sock->file),
89bddce5 1066 (int __user *)argp);
1da177e4
LT
1067 break;
1068 case SIOCGIFBR:
1069 case SIOCSIFBR:
1070 case SIOCBRADDBR:
1071 case SIOCBRDELBR:
1072 err = -ENOPKG;
1073 if (!br_ioctl_hook)
1074 request_module("bridge");
1075
4a3e2f71 1076 mutex_lock(&br_ioctl_mutex);
89bddce5 1077 if (br_ioctl_hook)
881d966b 1078 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1079 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1080 break;
1081 case SIOCGIFVLAN:
1082 case SIOCSIFVLAN:
1083 err = -ENOPKG;
1084 if (!vlan_ioctl_hook)
1085 request_module("8021q");
1086
4a3e2f71 1087 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1088 if (vlan_ioctl_hook)
881d966b 1089 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1090 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1091 break;
1da177e4
LT
1092 case SIOCADDDLCI:
1093 case SIOCDELDLCI:
1094 err = -ENOPKG;
1095 if (!dlci_ioctl_hook)
1096 request_module("dlci");
1097
7512cbf6
PE
1098 mutex_lock(&dlci_ioctl_mutex);
1099 if (dlci_ioctl_hook)
1da177e4 1100 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1101 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1102 break;
1103 default:
6b96018b 1104 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1105 break;
89bddce5 1106 }
1da177e4
LT
1107 return err;
1108}
1109
1110int sock_create_lite(int family, int type, int protocol, struct socket **res)
1111{
1112 int err;
1113 struct socket *sock = NULL;
89bddce5 1114
1da177e4
LT
1115 err = security_socket_create(family, type, protocol, 1);
1116 if (err)
1117 goto out;
1118
1119 sock = sock_alloc();
1120 if (!sock) {
1121 err = -ENOMEM;
1122 goto out;
1123 }
1124
1da177e4 1125 sock->type = type;
7420ed23
VY
1126 err = security_socket_post_create(sock, family, type, protocol, 1);
1127 if (err)
1128 goto out_release;
1129
1da177e4
LT
1130out:
1131 *res = sock;
1132 return err;
7420ed23
VY
1133out_release:
1134 sock_release(sock);
1135 sock = NULL;
1136 goto out;
1da177e4 1137}
c6d409cf 1138EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1139
1140/* No kernel lock held - perfect */
89bddce5 1141static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1142{
cbf55001 1143 unsigned int busy_flag = 0;
1da177e4
LT
1144 struct socket *sock;
1145
1146 /*
89bddce5 1147 * We can't return errors to poll, so it's either yes or no.
1da177e4 1148 */
b69aee04 1149 sock = file->private_data;
2d48d67f 1150
cbf55001 1151 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1152 /* this socket can poll_ll so tell the system call */
cbf55001 1153 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1154
1155 /* once, only if requested by syscall */
cbf55001
ET
1156 if (wait && (wait->_key & POLL_BUSY_LOOP))
1157 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1158 }
1159
cbf55001 1160 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1161}
1162
89bddce5 1163static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1164{
b69aee04 1165 struct socket *sock = file->private_data;
1da177e4
LT
1166
1167 return sock->ops->mmap(file, sock, vma);
1168}
1169
20380731 1170static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1171{
1da177e4
LT
1172 sock_release(SOCKET_I(inode));
1173 return 0;
1174}
1175
1176/*
1177 * Update the socket async list
1178 *
1179 * Fasync_list locking strategy.
1180 *
1181 * 1. fasync_list is modified only under process context socket lock
1182 * i.e. under semaphore.
1183 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1184 * or under socket lock
1da177e4
LT
1185 */
1186
1187static int sock_fasync(int fd, struct file *filp, int on)
1188{
989a2979
ED
1189 struct socket *sock = filp->private_data;
1190 struct sock *sk = sock->sk;
eaefd110 1191 struct socket_wq *wq;
1da177e4 1192
989a2979 1193 if (sk == NULL)
1da177e4 1194 return -EINVAL;
1da177e4
LT
1195
1196 lock_sock(sk);
eaefd110
ED
1197 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1198 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1199
eaefd110 1200 if (!wq->fasync_list)
989a2979
ED
1201 sock_reset_flag(sk, SOCK_FASYNC);
1202 else
bcdce719 1203 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1204
989a2979 1205 release_sock(sk);
1da177e4
LT
1206 return 0;
1207}
1208
43815482 1209/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1210
1211int sock_wake_async(struct socket *sock, int how, int band)
1212{
43815482
ED
1213 struct socket_wq *wq;
1214
1215 if (!sock)
1216 return -1;
1217 rcu_read_lock();
1218 wq = rcu_dereference(sock->wq);
1219 if (!wq || !wq->fasync_list) {
1220 rcu_read_unlock();
1da177e4 1221 return -1;
43815482 1222 }
89bddce5 1223 switch (how) {
8d8ad9d7 1224 case SOCK_WAKE_WAITD:
1da177e4
LT
1225 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1226 break;
1227 goto call_kill;
8d8ad9d7 1228 case SOCK_WAKE_SPACE:
1da177e4
LT
1229 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1230 break;
1231 /* fall through */
8d8ad9d7 1232 case SOCK_WAKE_IO:
89bddce5 1233call_kill:
43815482 1234 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1235 break;
8d8ad9d7 1236 case SOCK_WAKE_URG:
43815482 1237 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1238 }
43815482 1239 rcu_read_unlock();
1da177e4
LT
1240 return 0;
1241}
c6d409cf 1242EXPORT_SYMBOL(sock_wake_async);
1da177e4 1243
721db93a 1244int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1245 struct socket **res, int kern)
1da177e4
LT
1246{
1247 int err;
1248 struct socket *sock;
55737fda 1249 const struct net_proto_family *pf;
1da177e4
LT
1250
1251 /*
89bddce5 1252 * Check protocol is in range
1da177e4
LT
1253 */
1254 if (family < 0 || family >= NPROTO)
1255 return -EAFNOSUPPORT;
1256 if (type < 0 || type >= SOCK_MAX)
1257 return -EINVAL;
1258
1259 /* Compatibility.
1260
1261 This uglymoron is moved from INET layer to here to avoid
1262 deadlock in module load.
1263 */
1264 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1265 static int warned;
1da177e4
LT
1266 if (!warned) {
1267 warned = 1;
3410f22e
YY
1268 pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1269 current->comm);
1da177e4
LT
1270 }
1271 family = PF_PACKET;
1272 }
1273
1274 err = security_socket_create(family, type, protocol, kern);
1275 if (err)
1276 return err;
89bddce5 1277
55737fda
SH
1278 /*
1279 * Allocate the socket and allow the family to set things up. if
1280 * the protocol is 0, the family is instructed to select an appropriate
1281 * default.
1282 */
1283 sock = sock_alloc();
1284 if (!sock) {
e87cc472 1285 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1286 return -ENFILE; /* Not exactly a match, but its the
1287 closest posix thing */
1288 }
1289
1290 sock->type = type;
1291
95a5afca 1292#ifdef CONFIG_MODULES
89bddce5
SH
1293 /* Attempt to load a protocol module if the find failed.
1294 *
1295 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1296 * requested real, full-featured networking support upon configuration.
1297 * Otherwise module support will break!
1298 */
190683a9 1299 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1300 request_module("net-pf-%d", family);
1da177e4
LT
1301#endif
1302
55737fda
SH
1303 rcu_read_lock();
1304 pf = rcu_dereference(net_families[family]);
1305 err = -EAFNOSUPPORT;
1306 if (!pf)
1307 goto out_release;
1da177e4
LT
1308
1309 /*
1310 * We will call the ->create function, that possibly is in a loadable
1311 * module, so we have to bump that loadable module refcnt first.
1312 */
55737fda 1313 if (!try_module_get(pf->owner))
1da177e4
LT
1314 goto out_release;
1315
55737fda
SH
1316 /* Now protected by module ref count */
1317 rcu_read_unlock();
1318
3f378b68 1319 err = pf->create(net, sock, protocol, kern);
55737fda 1320 if (err < 0)
1da177e4 1321 goto out_module_put;
a79af59e 1322
1da177e4
LT
1323 /*
1324 * Now to bump the refcnt of the [loadable] module that owns this
1325 * socket at sock_release time we decrement its refcnt.
1326 */
55737fda
SH
1327 if (!try_module_get(sock->ops->owner))
1328 goto out_module_busy;
1329
1da177e4
LT
1330 /*
1331 * Now that we're done with the ->create function, the [loadable]
1332 * module can have its refcnt decremented
1333 */
55737fda 1334 module_put(pf->owner);
7420ed23
VY
1335 err = security_socket_post_create(sock, family, type, protocol, kern);
1336 if (err)
3b185525 1337 goto out_sock_release;
55737fda 1338 *res = sock;
1da177e4 1339
55737fda
SH
1340 return 0;
1341
1342out_module_busy:
1343 err = -EAFNOSUPPORT;
1da177e4 1344out_module_put:
55737fda
SH
1345 sock->ops = NULL;
1346 module_put(pf->owner);
1347out_sock_release:
1da177e4 1348 sock_release(sock);
55737fda
SH
1349 return err;
1350
1351out_release:
1352 rcu_read_unlock();
1353 goto out_sock_release;
1da177e4 1354}
721db93a 1355EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1356
1357int sock_create(int family, int type, int protocol, struct socket **res)
1358{
1b8d7ae4 1359 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1360}
c6d409cf 1361EXPORT_SYMBOL(sock_create);
1da177e4
LT
1362
1363int sock_create_kern(int family, int type, int protocol, struct socket **res)
1364{
1b8d7ae4 1365 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1366}
c6d409cf 1367EXPORT_SYMBOL(sock_create_kern);
1da177e4 1368
3e0fa65f 1369SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1370{
1371 int retval;
1372 struct socket *sock;
a677a039
UD
1373 int flags;
1374
e38b36f3
UD
1375 /* Check the SOCK_* constants for consistency. */
1376 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1377 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1378 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1379 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1380
a677a039 1381 flags = type & ~SOCK_TYPE_MASK;
77d27200 1382 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1383 return -EINVAL;
1384 type &= SOCK_TYPE_MASK;
1da177e4 1385
aaca0bdc
UD
1386 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1387 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1388
1da177e4
LT
1389 retval = sock_create(family, type, protocol, &sock);
1390 if (retval < 0)
1391 goto out;
1392
77d27200 1393 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1394 if (retval < 0)
1395 goto out_release;
1396
1397out:
1398 /* It may be already another descriptor 8) Not kernel problem. */
1399 return retval;
1400
1401out_release:
1402 sock_release(sock);
1403 return retval;
1404}
1405
1406/*
1407 * Create a pair of connected sockets.
1408 */
1409
3e0fa65f
HC
1410SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1411 int __user *, usockvec)
1da177e4
LT
1412{
1413 struct socket *sock1, *sock2;
1414 int fd1, fd2, err;
db349509 1415 struct file *newfile1, *newfile2;
a677a039
UD
1416 int flags;
1417
1418 flags = type & ~SOCK_TYPE_MASK;
77d27200 1419 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1420 return -EINVAL;
1421 type &= SOCK_TYPE_MASK;
1da177e4 1422
aaca0bdc
UD
1423 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1424 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1425
1da177e4
LT
1426 /*
1427 * Obtain the first socket and check if the underlying protocol
1428 * supports the socketpair call.
1429 */
1430
1431 err = sock_create(family, type, protocol, &sock1);
1432 if (err < 0)
1433 goto out;
1434
1435 err = sock_create(family, type, protocol, &sock2);
1436 if (err < 0)
1437 goto out_release_1;
1438
1439 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1440 if (err < 0)
1da177e4
LT
1441 goto out_release_both;
1442
28407630 1443 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1444 if (unlikely(fd1 < 0)) {
1445 err = fd1;
db349509 1446 goto out_release_both;
bf3c23d1 1447 }
d73aa286 1448
28407630 1449 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1450 if (unlikely(fd2 < 0)) {
1451 err = fd2;
d73aa286 1452 goto out_put_unused_1;
28407630
AV
1453 }
1454
aab174f0 1455 newfile1 = sock_alloc_file(sock1, flags, NULL);
28407630
AV
1456 if (unlikely(IS_ERR(newfile1))) {
1457 err = PTR_ERR(newfile1);
d73aa286 1458 goto out_put_unused_both;
28407630
AV
1459 }
1460
aab174f0 1461 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1462 if (IS_ERR(newfile2)) {
1463 err = PTR_ERR(newfile2);
d73aa286 1464 goto out_fput_1;
db349509
AV
1465 }
1466
d73aa286
YD
1467 err = put_user(fd1, &usockvec[0]);
1468 if (err)
1469 goto out_fput_both;
1470
1471 err = put_user(fd2, &usockvec[1]);
1472 if (err)
1473 goto out_fput_both;
1474
157cf649 1475 audit_fd_pair(fd1, fd2);
d73aa286 1476
db349509
AV
1477 fd_install(fd1, newfile1);
1478 fd_install(fd2, newfile2);
1da177e4
LT
1479 /* fd1 and fd2 may be already another descriptors.
1480 * Not kernel problem.
1481 */
1482
d73aa286 1483 return 0;
1da177e4 1484
d73aa286
YD
1485out_fput_both:
1486 fput(newfile2);
1487 fput(newfile1);
1488 put_unused_fd(fd2);
1489 put_unused_fd(fd1);
1490 goto out;
1491
1492out_fput_1:
1493 fput(newfile1);
1494 put_unused_fd(fd2);
1495 put_unused_fd(fd1);
1496 sock_release(sock2);
1497 goto out;
1da177e4 1498
d73aa286
YD
1499out_put_unused_both:
1500 put_unused_fd(fd2);
1501out_put_unused_1:
1502 put_unused_fd(fd1);
1da177e4 1503out_release_both:
89bddce5 1504 sock_release(sock2);
1da177e4 1505out_release_1:
89bddce5 1506 sock_release(sock1);
1da177e4
LT
1507out:
1508 return err;
1509}
1510
1da177e4
LT
1511/*
1512 * Bind a name to a socket. Nothing much to do here since it's
1513 * the protocol's responsibility to handle the local address.
1514 *
1515 * We move the socket address to kernel space before we call
1516 * the protocol layer (having also checked the address is ok).
1517 */
1518
20f37034 1519SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1520{
1521 struct socket *sock;
230b1839 1522 struct sockaddr_storage address;
6cb153ca 1523 int err, fput_needed;
1da177e4 1524
89bddce5 1525 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1526 if (sock) {
43db362d 1527 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1528 if (err >= 0) {
1529 err = security_socket_bind(sock,
230b1839 1530 (struct sockaddr *)&address,
89bddce5 1531 addrlen);
6cb153ca
BL
1532 if (!err)
1533 err = sock->ops->bind(sock,
89bddce5 1534 (struct sockaddr *)
230b1839 1535 &address, addrlen);
1da177e4 1536 }
6cb153ca 1537 fput_light(sock->file, fput_needed);
89bddce5 1538 }
1da177e4
LT
1539 return err;
1540}
1541
1da177e4
LT
1542/*
1543 * Perform a listen. Basically, we allow the protocol to do anything
1544 * necessary for a listen, and if that works, we mark the socket as
1545 * ready for listening.
1546 */
1547
3e0fa65f 1548SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1549{
1550 struct socket *sock;
6cb153ca 1551 int err, fput_needed;
b8e1f9b5 1552 int somaxconn;
89bddce5
SH
1553
1554 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1555 if (sock) {
8efa6e93 1556 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1557 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1558 backlog = somaxconn;
1da177e4
LT
1559
1560 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1561 if (!err)
1562 err = sock->ops->listen(sock, backlog);
1da177e4 1563
6cb153ca 1564 fput_light(sock->file, fput_needed);
1da177e4
LT
1565 }
1566 return err;
1567}
1568
1da177e4
LT
1569/*
1570 * For accept, we attempt to create a new socket, set up the link
1571 * with the client, wake up the client, then return the new
1572 * connected fd. We collect the address of the connector in kernel
1573 * space and move it to user at the very end. This is unclean because
1574 * we open the socket then return an error.
1575 *
1576 * 1003.1g adds the ability to recvmsg() to query connection pending
1577 * status to recvmsg. We need to add that support in a way thats
1578 * clean when we restucture accept also.
1579 */
1580
20f37034
HC
1581SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1582 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1583{
1584 struct socket *sock, *newsock;
39d8c1b6 1585 struct file *newfile;
6cb153ca 1586 int err, len, newfd, fput_needed;
230b1839 1587 struct sockaddr_storage address;
1da177e4 1588
77d27200 1589 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1590 return -EINVAL;
1591
1592 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1593 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1594
6cb153ca 1595 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1596 if (!sock)
1597 goto out;
1598
1599 err = -ENFILE;
c6d409cf
ED
1600 newsock = sock_alloc();
1601 if (!newsock)
1da177e4
LT
1602 goto out_put;
1603
1604 newsock->type = sock->type;
1605 newsock->ops = sock->ops;
1606
1da177e4
LT
1607 /*
1608 * We don't need try_module_get here, as the listening socket (sock)
1609 * has the protocol module (sock->ops->owner) held.
1610 */
1611 __module_get(newsock->ops->owner);
1612
28407630 1613 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1614 if (unlikely(newfd < 0)) {
1615 err = newfd;
9a1875e6
DM
1616 sock_release(newsock);
1617 goto out_put;
39d8c1b6 1618 }
aab174f0 1619 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
28407630
AV
1620 if (unlikely(IS_ERR(newfile))) {
1621 err = PTR_ERR(newfile);
1622 put_unused_fd(newfd);
1623 sock_release(newsock);
1624 goto out_put;
1625 }
39d8c1b6 1626
a79af59e
FF
1627 err = security_socket_accept(sock, newsock);
1628 if (err)
39d8c1b6 1629 goto out_fd;
a79af59e 1630
1da177e4
LT
1631 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1632 if (err < 0)
39d8c1b6 1633 goto out_fd;
1da177e4
LT
1634
1635 if (upeer_sockaddr) {
230b1839 1636 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1637 &len, 2) < 0) {
1da177e4 1638 err = -ECONNABORTED;
39d8c1b6 1639 goto out_fd;
1da177e4 1640 }
43db362d 1641 err = move_addr_to_user(&address,
230b1839 1642 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1643 if (err < 0)
39d8c1b6 1644 goto out_fd;
1da177e4
LT
1645 }
1646
1647 /* File flags are not inherited via accept() unlike another OSes. */
1648
39d8c1b6
DM
1649 fd_install(newfd, newfile);
1650 err = newfd;
1da177e4 1651
1da177e4 1652out_put:
6cb153ca 1653 fput_light(sock->file, fput_needed);
1da177e4
LT
1654out:
1655 return err;
39d8c1b6 1656out_fd:
9606a216 1657 fput(newfile);
39d8c1b6 1658 put_unused_fd(newfd);
1da177e4
LT
1659 goto out_put;
1660}
1661
20f37034
HC
1662SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1663 int __user *, upeer_addrlen)
aaca0bdc 1664{
de11defe 1665 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1666}
1667
1da177e4
LT
1668/*
1669 * Attempt to connect to a socket with the server address. The address
1670 * is in user space so we verify it is OK and move it to kernel space.
1671 *
1672 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1673 * break bindings
1674 *
1675 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1676 * other SEQPACKET protocols that take time to connect() as it doesn't
1677 * include the -EINPROGRESS status for such sockets.
1678 */
1679
20f37034
HC
1680SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1681 int, addrlen)
1da177e4
LT
1682{
1683 struct socket *sock;
230b1839 1684 struct sockaddr_storage address;
6cb153ca 1685 int err, fput_needed;
1da177e4 1686
6cb153ca 1687 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1688 if (!sock)
1689 goto out;
43db362d 1690 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1691 if (err < 0)
1692 goto out_put;
1693
89bddce5 1694 err =
230b1839 1695 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1696 if (err)
1697 goto out_put;
1698
230b1839 1699 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1700 sock->file->f_flags);
1701out_put:
6cb153ca 1702 fput_light(sock->file, fput_needed);
1da177e4
LT
1703out:
1704 return err;
1705}
1706
1707/*
1708 * Get the local address ('name') of a socket object. Move the obtained
1709 * name to user space.
1710 */
1711
20f37034
HC
1712SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1713 int __user *, usockaddr_len)
1da177e4
LT
1714{
1715 struct socket *sock;
230b1839 1716 struct sockaddr_storage address;
6cb153ca 1717 int len, err, fput_needed;
89bddce5 1718
6cb153ca 1719 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1720 if (!sock)
1721 goto out;
1722
1723 err = security_socket_getsockname(sock);
1724 if (err)
1725 goto out_put;
1726
230b1839 1727 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1728 if (err)
1729 goto out_put;
43db362d 1730 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1731
1732out_put:
6cb153ca 1733 fput_light(sock->file, fput_needed);
1da177e4
LT
1734out:
1735 return err;
1736}
1737
1738/*
1739 * Get the remote address ('name') of a socket object. Move the obtained
1740 * name to user space.
1741 */
1742
20f37034
HC
1743SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1744 int __user *, usockaddr_len)
1da177e4
LT
1745{
1746 struct socket *sock;
230b1839 1747 struct sockaddr_storage address;
6cb153ca 1748 int len, err, fput_needed;
1da177e4 1749
89bddce5
SH
1750 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1751 if (sock != NULL) {
1da177e4
LT
1752 err = security_socket_getpeername(sock);
1753 if (err) {
6cb153ca 1754 fput_light(sock->file, fput_needed);
1da177e4
LT
1755 return err;
1756 }
1757
89bddce5 1758 err =
230b1839 1759 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1760 1);
1da177e4 1761 if (!err)
43db362d 1762 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1763 usockaddr_len);
6cb153ca 1764 fput_light(sock->file, fput_needed);
1da177e4
LT
1765 }
1766 return err;
1767}
1768
1769/*
1770 * Send a datagram to a given address. We move the address into kernel
1771 * space and check the user space data area is readable before invoking
1772 * the protocol.
1773 */
1774
3e0fa65f 1775SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1776 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1777 int, addr_len)
1da177e4
LT
1778{
1779 struct socket *sock;
230b1839 1780 struct sockaddr_storage address;
1da177e4
LT
1781 int err;
1782 struct msghdr msg;
1783 struct iovec iov;
6cb153ca 1784 int fput_needed;
6cb153ca 1785
253eacc0
LT
1786 if (len > INT_MAX)
1787 len = INT_MAX;
de0fa95c
PE
1788 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1789 if (!sock)
4387ff75 1790 goto out;
6cb153ca 1791
89bddce5
SH
1792 iov.iov_base = buff;
1793 iov.iov_len = len;
1794 msg.msg_name = NULL;
1795 msg.msg_iov = &iov;
1796 msg.msg_iovlen = 1;
1797 msg.msg_control = NULL;
1798 msg.msg_controllen = 0;
1799 msg.msg_namelen = 0;
6cb153ca 1800 if (addr) {
43db362d 1801 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1802 if (err < 0)
1803 goto out_put;
230b1839 1804 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1805 msg.msg_namelen = addr_len;
1da177e4
LT
1806 }
1807 if (sock->file->f_flags & O_NONBLOCK)
1808 flags |= MSG_DONTWAIT;
1809 msg.msg_flags = flags;
1810 err = sock_sendmsg(sock, &msg, len);
1811
89bddce5 1812out_put:
de0fa95c 1813 fput_light(sock->file, fput_needed);
4387ff75 1814out:
1da177e4
LT
1815 return err;
1816}
1817
1818/*
89bddce5 1819 * Send a datagram down a socket.
1da177e4
LT
1820 */
1821
3e0fa65f 1822SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1823 unsigned int, flags)
1da177e4
LT
1824{
1825 return sys_sendto(fd, buff, len, flags, NULL, 0);
1826}
1827
1828/*
89bddce5 1829 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1830 * sender. We verify the buffers are writable and if needed move the
1831 * sender address from kernel to user space.
1832 */
1833
3e0fa65f 1834SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1835 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1836 int __user *, addr_len)
1da177e4
LT
1837{
1838 struct socket *sock;
1839 struct iovec iov;
1840 struct msghdr msg;
230b1839 1841 struct sockaddr_storage address;
89bddce5 1842 int err, err2;
6cb153ca
BL
1843 int fput_needed;
1844
253eacc0
LT
1845 if (size > INT_MAX)
1846 size = INT_MAX;
de0fa95c 1847 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1848 if (!sock)
de0fa95c 1849 goto out;
1da177e4 1850
89bddce5
SH
1851 msg.msg_control = NULL;
1852 msg.msg_controllen = 0;
1853 msg.msg_iovlen = 1;
1854 msg.msg_iov = &iov;
1855 iov.iov_len = size;
1856 iov.iov_base = ubuf;
f3d33426
HFS
1857 /* Save some cycles and don't copy the address if not needed */
1858 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1859 /* We assume all kernel code knows the size of sockaddr_storage */
1860 msg.msg_namelen = 0;
1da177e4
LT
1861 if (sock->file->f_flags & O_NONBLOCK)
1862 flags |= MSG_DONTWAIT;
89bddce5 1863 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1864
89bddce5 1865 if (err >= 0 && addr != NULL) {
43db362d 1866 err2 = move_addr_to_user(&address,
230b1839 1867 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1868 if (err2 < 0)
1869 err = err2;
1da177e4 1870 }
de0fa95c
PE
1871
1872 fput_light(sock->file, fput_needed);
4387ff75 1873out:
1da177e4
LT
1874 return err;
1875}
1876
1877/*
89bddce5 1878 * Receive a datagram from a socket.
1da177e4
LT
1879 */
1880
89bddce5 1881asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
95c96174 1882 unsigned int flags)
1da177e4
LT
1883{
1884 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1885}
1886
1887/*
1888 * Set a socket option. Because we don't know the option lengths we have
1889 * to pass the user mode parameter for the protocols to sort out.
1890 */
1891
20f37034
HC
1892SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1893 char __user *, optval, int, optlen)
1da177e4 1894{
6cb153ca 1895 int err, fput_needed;
1da177e4
LT
1896 struct socket *sock;
1897
1898 if (optlen < 0)
1899 return -EINVAL;
89bddce5
SH
1900
1901 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1902 if (sock != NULL) {
1903 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1904 if (err)
1905 goto out_put;
1da177e4
LT
1906
1907 if (level == SOL_SOCKET)
89bddce5
SH
1908 err =
1909 sock_setsockopt(sock, level, optname, optval,
1910 optlen);
1da177e4 1911 else
89bddce5
SH
1912 err =
1913 sock->ops->setsockopt(sock, level, optname, optval,
1914 optlen);
6cb153ca
BL
1915out_put:
1916 fput_light(sock->file, fput_needed);
1da177e4
LT
1917 }
1918 return err;
1919}
1920
1921/*
1922 * Get a socket option. Because we don't know the option lengths we have
1923 * to pass a user mode parameter for the protocols to sort out.
1924 */
1925
20f37034
HC
1926SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1927 char __user *, optval, int __user *, optlen)
1da177e4 1928{
6cb153ca 1929 int err, fput_needed;
1da177e4
LT
1930 struct socket *sock;
1931
89bddce5
SH
1932 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1933 if (sock != NULL) {
6cb153ca
BL
1934 err = security_socket_getsockopt(sock, level, optname);
1935 if (err)
1936 goto out_put;
1da177e4
LT
1937
1938 if (level == SOL_SOCKET)
89bddce5
SH
1939 err =
1940 sock_getsockopt(sock, level, optname, optval,
1941 optlen);
1da177e4 1942 else
89bddce5
SH
1943 err =
1944 sock->ops->getsockopt(sock, level, optname, optval,
1945 optlen);
6cb153ca
BL
1946out_put:
1947 fput_light(sock->file, fput_needed);
1da177e4
LT
1948 }
1949 return err;
1950}
1951
1da177e4
LT
1952/*
1953 * Shutdown a socket.
1954 */
1955
754fe8d2 1956SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1957{
6cb153ca 1958 int err, fput_needed;
1da177e4
LT
1959 struct socket *sock;
1960
89bddce5
SH
1961 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1962 if (sock != NULL) {
1da177e4 1963 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1964 if (!err)
1965 err = sock->ops->shutdown(sock, how);
1966 fput_light(sock->file, fput_needed);
1da177e4
LT
1967 }
1968 return err;
1969}
1970
89bddce5 1971/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1972 * fields which are the same type (int / unsigned) on our platforms.
1973 */
1974#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1975#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1976#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1977
c71d8ebe
TH
1978struct used_address {
1979 struct sockaddr_storage name;
1980 unsigned int name_len;
1981};
1982
1661bf36
DC
1983static int copy_msghdr_from_user(struct msghdr *kmsg,
1984 struct msghdr __user *umsg)
1985{
1986 if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
1987 return -EFAULT;
1988 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1989 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
1661bf36
DC
1990 return 0;
1991}
1992
a7526eb5 1993static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 1994 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1995 struct used_address *used_address)
1da177e4 1996{
89bddce5
SH
1997 struct compat_msghdr __user *msg_compat =
1998 (struct compat_msghdr __user *)msg;
230b1839 1999 struct sockaddr_storage address;
1da177e4 2000 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2001 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
2002 __attribute__ ((aligned(sizeof(__kernel_size_t))));
2003 /* 20 is size of ipv6_pktinfo */
1da177e4 2004 unsigned char *ctl_buf = ctl;
a74e9106 2005 int err, ctl_len, total_len;
89bddce5 2006
1da177e4
LT
2007 err = -EFAULT;
2008 if (MSG_CMSG_COMPAT & flags) {
228e548e 2009 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2010 return -EFAULT;
1661bf36
DC
2011 } else {
2012 err = copy_msghdr_from_user(msg_sys, msg);
2013 if (err)
2014 return err;
2015 }
1da177e4 2016
228e548e 2017 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2018 err = -EMSGSIZE;
2019 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2020 goto out;
2021 err = -ENOMEM;
2022 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2023 GFP_KERNEL);
1da177e4 2024 if (!iov)
228e548e 2025 goto out;
1da177e4
LT
2026 }
2027
2028 /* This will also move the address data into kernel space */
2029 if (MSG_CMSG_COMPAT & flags) {
43db362d 2030 err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
1da177e4 2031 } else
43db362d 2032 err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
89bddce5 2033 if (err < 0)
1da177e4
LT
2034 goto out_freeiov;
2035 total_len = err;
2036
2037 err = -ENOBUFS;
2038
228e548e 2039 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2040 goto out_freeiov;
228e548e 2041 ctl_len = msg_sys->msg_controllen;
1da177e4 2042 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2043 err =
228e548e 2044 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2045 sizeof(ctl));
1da177e4
LT
2046 if (err)
2047 goto out_freeiov;
228e548e
AB
2048 ctl_buf = msg_sys->msg_control;
2049 ctl_len = msg_sys->msg_controllen;
1da177e4 2050 } else if (ctl_len) {
89bddce5 2051 if (ctl_len > sizeof(ctl)) {
1da177e4 2052 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2053 if (ctl_buf == NULL)
1da177e4
LT
2054 goto out_freeiov;
2055 }
2056 err = -EFAULT;
2057 /*
228e548e 2058 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2059 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2060 * checking falls down on this.
2061 */
fb8621bb 2062 if (copy_from_user(ctl_buf,
228e548e 2063 (void __user __force *)msg_sys->msg_control,
89bddce5 2064 ctl_len))
1da177e4 2065 goto out_freectl;
228e548e 2066 msg_sys->msg_control = ctl_buf;
1da177e4 2067 }
228e548e 2068 msg_sys->msg_flags = flags;
1da177e4
LT
2069
2070 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2071 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2072 /*
2073 * If this is sendmmsg() and current destination address is same as
2074 * previously succeeded address, omit asking LSM's decision.
2075 * used_address->name_len is initialized to UINT_MAX so that the first
2076 * destination address never matches.
2077 */
bc909d9d
MD
2078 if (used_address && msg_sys->msg_name &&
2079 used_address->name_len == msg_sys->msg_namelen &&
2080 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
2081 used_address->name_len)) {
2082 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
2083 goto out_freectl;
2084 }
2085 err = sock_sendmsg(sock, msg_sys, total_len);
2086 /*
2087 * If this is sendmmsg() and sending to current destination address was
2088 * successful, remember it.
2089 */
2090 if (used_address && err >= 0) {
2091 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2092 if (msg_sys->msg_name)
2093 memcpy(&used_address->name, msg_sys->msg_name,
2094 used_address->name_len);
c71d8ebe 2095 }
1da177e4
LT
2096
2097out_freectl:
89bddce5 2098 if (ctl_buf != ctl)
1da177e4
LT
2099 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2100out_freeiov:
2101 if (iov != iovstack)
a74e9106 2102 kfree(iov);
228e548e
AB
2103out:
2104 return err;
2105}
2106
2107/*
2108 * BSD sendmsg interface
2109 */
2110
a7526eb5 2111long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
228e548e
AB
2112{
2113 int fput_needed, err;
2114 struct msghdr msg_sys;
1be374a0
AL
2115 struct socket *sock;
2116
1be374a0 2117 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2118 if (!sock)
2119 goto out;
2120
a7526eb5 2121 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2122
6cb153ca 2123 fput_light(sock->file, fput_needed);
89bddce5 2124out:
1da177e4
LT
2125 return err;
2126}
2127
a7526eb5
AL
2128SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
2129{
2130 if (flags & MSG_CMSG_COMPAT)
2131 return -EINVAL;
2132 return __sys_sendmsg(fd, msg, flags);
2133}
2134
228e548e
AB
2135/*
2136 * Linux sendmmsg interface
2137 */
2138
2139int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2140 unsigned int flags)
2141{
2142 int fput_needed, err, datagrams;
2143 struct socket *sock;
2144 struct mmsghdr __user *entry;
2145 struct compat_mmsghdr __user *compat_entry;
2146 struct msghdr msg_sys;
c71d8ebe 2147 struct used_address used_address;
228e548e 2148
98382f41
AB
2149 if (vlen > UIO_MAXIOV)
2150 vlen = UIO_MAXIOV;
228e548e
AB
2151
2152 datagrams = 0;
2153
2154 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2155 if (!sock)
2156 return err;
2157
c71d8ebe 2158 used_address.name_len = UINT_MAX;
228e548e
AB
2159 entry = mmsg;
2160 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2161 err = 0;
228e548e
AB
2162
2163 while (datagrams < vlen) {
228e548e 2164 if (MSG_CMSG_COMPAT & flags) {
a7526eb5
AL
2165 err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
2166 &msg_sys, flags, &used_address);
228e548e
AB
2167 if (err < 0)
2168 break;
2169 err = __put_user(err, &compat_entry->msg_len);
2170 ++compat_entry;
2171 } else {
a7526eb5
AL
2172 err = ___sys_sendmsg(sock,
2173 (struct msghdr __user *)entry,
2174 &msg_sys, flags, &used_address);
228e548e
AB
2175 if (err < 0)
2176 break;
2177 err = put_user(err, &entry->msg_len);
2178 ++entry;
2179 }
2180
2181 if (err)
2182 break;
2183 ++datagrams;
2184 }
2185
228e548e
AB
2186 fput_light(sock->file, fput_needed);
2187
728ffb86
AB
2188 /* We only return an error if no datagrams were able to be sent */
2189 if (datagrams != 0)
228e548e
AB
2190 return datagrams;
2191
228e548e
AB
2192 return err;
2193}
2194
2195SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2196 unsigned int, vlen, unsigned int, flags)
2197{
1be374a0
AL
2198 if (flags & MSG_CMSG_COMPAT)
2199 return -EINVAL;
228e548e
AB
2200 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2201}
2202
a7526eb5 2203static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 2204 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2205{
89bddce5
SH
2206 struct compat_msghdr __user *msg_compat =
2207 (struct compat_msghdr __user *)msg;
1da177e4 2208 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2209 struct iovec *iov = iovstack;
1da177e4 2210 unsigned long cmsg_ptr;
a74e9106 2211 int err, total_len, len;
1da177e4
LT
2212
2213 /* kernel mode address */
230b1839 2214 struct sockaddr_storage addr;
1da177e4
LT
2215
2216 /* user mode address pointers */
2217 struct sockaddr __user *uaddr;
2218 int __user *uaddr_len;
89bddce5 2219
1da177e4 2220 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2221 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2222 return -EFAULT;
1661bf36
DC
2223 } else {
2224 err = copy_msghdr_from_user(msg_sys, msg);
2225 if (err)
2226 return err;
2227 }
1da177e4 2228
a2e27255 2229 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2230 err = -EMSGSIZE;
2231 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2232 goto out;
2233 err = -ENOMEM;
2234 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2235 GFP_KERNEL);
1da177e4 2236 if (!iov)
a2e27255 2237 goto out;
1da177e4
LT
2238 }
2239
f3d33426
HFS
2240 /* Save the user-mode address (verify_iovec will change the
2241 * kernel msghdr to use the kernel address space)
1da177e4 2242 */
a2e27255 2243 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4 2244 uaddr_len = COMPAT_NAMELEN(msg);
f3d33426 2245 if (MSG_CMSG_COMPAT & flags)
43db362d 2246 err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
f3d33426 2247 else
43db362d 2248 err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4
LT
2249 if (err < 0)
2250 goto out_freeiov;
89bddce5 2251 total_len = err;
1da177e4 2252
a2e27255
ACM
2253 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2254 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2255
f3d33426
HFS
2256 /* We assume all kernel code knows the size of sockaddr_storage */
2257 msg_sys->msg_namelen = 0;
2258
1da177e4
LT
2259 if (sock->file->f_flags & O_NONBLOCK)
2260 flags |= MSG_DONTWAIT;
a2e27255
ACM
2261 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2262 total_len, flags);
1da177e4
LT
2263 if (err < 0)
2264 goto out_freeiov;
2265 len = err;
2266
2267 if (uaddr != NULL) {
43db362d 2268 err = move_addr_to_user(&addr,
a2e27255 2269 msg_sys->msg_namelen, uaddr,
89bddce5 2270 uaddr_len);
1da177e4
LT
2271 if (err < 0)
2272 goto out_freeiov;
2273 }
a2e27255 2274 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2275 COMPAT_FLAGS(msg));
1da177e4
LT
2276 if (err)
2277 goto out_freeiov;
2278 if (MSG_CMSG_COMPAT & flags)
a2e27255 2279 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2280 &msg_compat->msg_controllen);
2281 else
a2e27255 2282 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2283 &msg->msg_controllen);
2284 if (err)
2285 goto out_freeiov;
2286 err = len;
2287
2288out_freeiov:
2289 if (iov != iovstack)
a74e9106 2290 kfree(iov);
a2e27255
ACM
2291out:
2292 return err;
2293}
2294
2295/*
2296 * BSD recvmsg interface
2297 */
2298
a7526eb5 2299long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags)
a2e27255
ACM
2300{
2301 int fput_needed, err;
2302 struct msghdr msg_sys;
1be374a0
AL
2303 struct socket *sock;
2304
1be374a0 2305 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2306 if (!sock)
2307 goto out;
2308
a7526eb5 2309 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2310
6cb153ca 2311 fput_light(sock->file, fput_needed);
1da177e4
LT
2312out:
2313 return err;
2314}
2315
a7526eb5
AL
2316SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2317 unsigned int, flags)
2318{
2319 if (flags & MSG_CMSG_COMPAT)
2320 return -EINVAL;
2321 return __sys_recvmsg(fd, msg, flags);
2322}
2323
a2e27255
ACM
2324/*
2325 * Linux recvmmsg interface
2326 */
2327
2328int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2329 unsigned int flags, struct timespec *timeout)
2330{
2331 int fput_needed, err, datagrams;
2332 struct socket *sock;
2333 struct mmsghdr __user *entry;
d7256d0e 2334 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2335 struct msghdr msg_sys;
2336 struct timespec end_time;
2337
2338 if (timeout &&
2339 poll_select_set_timeout(&end_time, timeout->tv_sec,
2340 timeout->tv_nsec))
2341 return -EINVAL;
2342
2343 datagrams = 0;
2344
2345 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2346 if (!sock)
2347 return err;
2348
2349 err = sock_error(sock->sk);
2350 if (err)
2351 goto out_put;
2352
2353 entry = mmsg;
d7256d0e 2354 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2355
2356 while (datagrams < vlen) {
2357 /*
2358 * No need to ask LSM for more than the first datagram.
2359 */
d7256d0e 2360 if (MSG_CMSG_COMPAT & flags) {
a7526eb5
AL
2361 err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
2362 &msg_sys, flags & ~MSG_WAITFORONE,
2363 datagrams);
d7256d0e
JMG
2364 if (err < 0)
2365 break;
2366 err = __put_user(err, &compat_entry->msg_len);
2367 ++compat_entry;
2368 } else {
a7526eb5
AL
2369 err = ___sys_recvmsg(sock,
2370 (struct msghdr __user *)entry,
2371 &msg_sys, flags & ~MSG_WAITFORONE,
2372 datagrams);
d7256d0e
JMG
2373 if (err < 0)
2374 break;
2375 err = put_user(err, &entry->msg_len);
2376 ++entry;
2377 }
2378
a2e27255
ACM
2379 if (err)
2380 break;
a2e27255
ACM
2381 ++datagrams;
2382
71c5c159
BB
2383 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2384 if (flags & MSG_WAITFORONE)
2385 flags |= MSG_DONTWAIT;
2386
a2e27255
ACM
2387 if (timeout) {
2388 ktime_get_ts(timeout);
2389 *timeout = timespec_sub(end_time, *timeout);
2390 if (timeout->tv_sec < 0) {
2391 timeout->tv_sec = timeout->tv_nsec = 0;
2392 break;
2393 }
2394
2395 /* Timeout, return less than vlen datagrams */
2396 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2397 break;
2398 }
2399
2400 /* Out of band data, return right away */
2401 if (msg_sys.msg_flags & MSG_OOB)
2402 break;
2403 }
2404
2405out_put:
2406 fput_light(sock->file, fput_needed);
1da177e4 2407
a2e27255
ACM
2408 if (err == 0)
2409 return datagrams;
2410
2411 if (datagrams != 0) {
2412 /*
2413 * We may return less entries than requested (vlen) if the
2414 * sock is non block and there aren't enough datagrams...
2415 */
2416 if (err != -EAGAIN) {
2417 /*
2418 * ... or if recvmsg returns an error after we
2419 * received some datagrams, where we record the
2420 * error to return on the next call or if the
2421 * app asks about it using getsockopt(SO_ERROR).
2422 */
2423 sock->sk->sk_err = -err;
2424 }
2425
2426 return datagrams;
2427 }
2428
2429 return err;
2430}
2431
2432SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2433 unsigned int, vlen, unsigned int, flags,
2434 struct timespec __user *, timeout)
2435{
2436 int datagrams;
2437 struct timespec timeout_sys;
2438
1be374a0
AL
2439 if (flags & MSG_CMSG_COMPAT)
2440 return -EINVAL;
2441
a2e27255
ACM
2442 if (!timeout)
2443 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2444
2445 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2446 return -EFAULT;
2447
2448 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2449
2450 if (datagrams > 0 &&
2451 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2452 datagrams = -EFAULT;
2453
2454 return datagrams;
2455}
2456
2457#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2458/* Argument list sizes for sys_socketcall */
2459#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2460static const unsigned char nargs[21] = {
c6d409cf
ED
2461 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2462 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2463 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2464 AL(4), AL(5), AL(4)
89bddce5
SH
2465};
2466
1da177e4
LT
2467#undef AL
2468
2469/*
89bddce5 2470 * System call vectors.
1da177e4
LT
2471 *
2472 * Argument checking cleaned up. Saved 20% in size.
2473 * This function doesn't need to set the kernel lock because
89bddce5 2474 * it is set by the callees.
1da177e4
LT
2475 */
2476
3e0fa65f 2477SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2478{
2950fa9d 2479 unsigned long a[AUDITSC_ARGS];
89bddce5 2480 unsigned long a0, a1;
1da177e4 2481 int err;
47379052 2482 unsigned int len;
1da177e4 2483
228e548e 2484 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2485 return -EINVAL;
2486
47379052
AV
2487 len = nargs[call];
2488 if (len > sizeof(a))
2489 return -EINVAL;
2490
1da177e4 2491 /* copy_from_user should be SMP safe. */
47379052 2492 if (copy_from_user(a, args, len))
1da177e4 2493 return -EFAULT;
3ec3b2fb 2494
2950fa9d
CG
2495 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2496 if (err)
2497 return err;
3ec3b2fb 2498
89bddce5
SH
2499 a0 = a[0];
2500 a1 = a[1];
2501
2502 switch (call) {
2503 case SYS_SOCKET:
2504 err = sys_socket(a0, a1, a[2]);
2505 break;
2506 case SYS_BIND:
2507 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2508 break;
2509 case SYS_CONNECT:
2510 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2511 break;
2512 case SYS_LISTEN:
2513 err = sys_listen(a0, a1);
2514 break;
2515 case SYS_ACCEPT:
de11defe
UD
2516 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2517 (int __user *)a[2], 0);
89bddce5
SH
2518 break;
2519 case SYS_GETSOCKNAME:
2520 err =
2521 sys_getsockname(a0, (struct sockaddr __user *)a1,
2522 (int __user *)a[2]);
2523 break;
2524 case SYS_GETPEERNAME:
2525 err =
2526 sys_getpeername(a0, (struct sockaddr __user *)a1,
2527 (int __user *)a[2]);
2528 break;
2529 case SYS_SOCKETPAIR:
2530 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2531 break;
2532 case SYS_SEND:
2533 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2534 break;
2535 case SYS_SENDTO:
2536 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2537 (struct sockaddr __user *)a[4], a[5]);
2538 break;
2539 case SYS_RECV:
2540 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2541 break;
2542 case SYS_RECVFROM:
2543 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2544 (struct sockaddr __user *)a[4],
2545 (int __user *)a[5]);
2546 break;
2547 case SYS_SHUTDOWN:
2548 err = sys_shutdown(a0, a1);
2549 break;
2550 case SYS_SETSOCKOPT:
2551 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2552 break;
2553 case SYS_GETSOCKOPT:
2554 err =
2555 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2556 (int __user *)a[4]);
2557 break;
2558 case SYS_SENDMSG:
2559 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2560 break;
228e548e
AB
2561 case SYS_SENDMMSG:
2562 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2563 break;
89bddce5
SH
2564 case SYS_RECVMSG:
2565 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2566 break;
a2e27255
ACM
2567 case SYS_RECVMMSG:
2568 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2569 (struct timespec __user *)a[4]);
2570 break;
de11defe
UD
2571 case SYS_ACCEPT4:
2572 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2573 (int __user *)a[2], a[3]);
aaca0bdc 2574 break;
89bddce5
SH
2575 default:
2576 err = -EINVAL;
2577 break;
1da177e4
LT
2578 }
2579 return err;
2580}
2581
89bddce5 2582#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2583
55737fda
SH
2584/**
2585 * sock_register - add a socket protocol handler
2586 * @ops: description of protocol
2587 *
1da177e4
LT
2588 * This function is called by a protocol handler that wants to
2589 * advertise its address family, and have it linked into the
55737fda
SH
2590 * socket interface. The value ops->family coresponds to the
2591 * socket system call protocol family.
1da177e4 2592 */
f0fd27d4 2593int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2594{
2595 int err;
2596
2597 if (ops->family >= NPROTO) {
3410f22e 2598 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2599 return -ENOBUFS;
2600 }
55737fda
SH
2601
2602 spin_lock(&net_family_lock);
190683a9
ED
2603 if (rcu_dereference_protected(net_families[ops->family],
2604 lockdep_is_held(&net_family_lock)))
55737fda
SH
2605 err = -EEXIST;
2606 else {
cf778b00 2607 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2608 err = 0;
2609 }
55737fda
SH
2610 spin_unlock(&net_family_lock);
2611
3410f22e 2612 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2613 return err;
2614}
c6d409cf 2615EXPORT_SYMBOL(sock_register);
1da177e4 2616
55737fda
SH
2617/**
2618 * sock_unregister - remove a protocol handler
2619 * @family: protocol family to remove
2620 *
1da177e4
LT
2621 * This function is called by a protocol handler that wants to
2622 * remove its address family, and have it unlinked from the
55737fda
SH
2623 * new socket creation.
2624 *
2625 * If protocol handler is a module, then it can use module reference
2626 * counts to protect against new references. If protocol handler is not
2627 * a module then it needs to provide its own protection in
2628 * the ops->create routine.
1da177e4 2629 */
f0fd27d4 2630void sock_unregister(int family)
1da177e4 2631{
f0fd27d4 2632 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2633
55737fda 2634 spin_lock(&net_family_lock);
a9b3cd7f 2635 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2636 spin_unlock(&net_family_lock);
2637
2638 synchronize_rcu();
2639
3410f22e 2640 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2641}
c6d409cf 2642EXPORT_SYMBOL(sock_unregister);
1da177e4 2643
77d76ea3 2644static int __init sock_init(void)
1da177e4 2645{
b3e19d92 2646 int err;
2ca794e5
EB
2647 /*
2648 * Initialize the network sysctl infrastructure.
2649 */
2650 err = net_sysctl_init();
2651 if (err)
2652 goto out;
b3e19d92 2653
1da177e4 2654 /*
89bddce5 2655 * Initialize skbuff SLAB cache
1da177e4
LT
2656 */
2657 skb_init();
1da177e4
LT
2658
2659 /*
89bddce5 2660 * Initialize the protocols module.
1da177e4
LT
2661 */
2662
2663 init_inodecache();
b3e19d92
NP
2664
2665 err = register_filesystem(&sock_fs_type);
2666 if (err)
2667 goto out_fs;
1da177e4 2668 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2669 if (IS_ERR(sock_mnt)) {
2670 err = PTR_ERR(sock_mnt);
2671 goto out_mount;
2672 }
77d76ea3
AK
2673
2674 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2675 */
2676
2677#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2678 err = netfilter_init();
2679 if (err)
2680 goto out;
1da177e4 2681#endif
cbeb321a 2682
c1f19b51
RC
2683#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2684 skb_timestamping_init();
2685#endif
2686
b3e19d92
NP
2687out:
2688 return err;
2689
2690out_mount:
2691 unregister_filesystem(&sock_fs_type);
2692out_fs:
2693 goto out;
1da177e4
LT
2694}
2695
77d76ea3
AK
2696core_initcall(sock_init); /* early initcall */
2697
1da177e4
LT
2698#ifdef CONFIG_PROC_FS
2699void socket_seq_show(struct seq_file *seq)
2700{
2701 int cpu;
2702 int counter = 0;
2703
6f912042 2704 for_each_possible_cpu(cpu)
89bddce5 2705 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2706
2707 /* It can be negative, by the way. 8) */
2708 if (counter < 0)
2709 counter = 0;
2710
2711 seq_printf(seq, "sockets: used %d\n", counter);
2712}
89bddce5 2713#endif /* CONFIG_PROC_FS */
1da177e4 2714
89bbfc95 2715#ifdef CONFIG_COMPAT
6b96018b 2716static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2717 unsigned int cmd, void __user *up)
7a229387 2718{
7a229387
AB
2719 mm_segment_t old_fs = get_fs();
2720 struct timeval ktv;
2721 int err;
2722
2723 set_fs(KERNEL_DS);
6b96018b 2724 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2725 set_fs(old_fs);
644595f8 2726 if (!err)
ed6fe9d6 2727 err = compat_put_timeval(&ktv, up);
644595f8 2728
7a229387
AB
2729 return err;
2730}
2731
6b96018b 2732static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2733 unsigned int cmd, void __user *up)
7a229387 2734{
7a229387
AB
2735 mm_segment_t old_fs = get_fs();
2736 struct timespec kts;
2737 int err;
2738
2739 set_fs(KERNEL_DS);
6b96018b 2740 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2741 set_fs(old_fs);
644595f8 2742 if (!err)
ed6fe9d6 2743 err = compat_put_timespec(&kts, up);
644595f8 2744
7a229387
AB
2745 return err;
2746}
2747
6b96018b 2748static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2749{
2750 struct ifreq __user *uifr;
2751 int err;
2752
2753 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2754 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2755 return -EFAULT;
2756
6b96018b 2757 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2758 if (err)
2759 return err;
2760
6b96018b 2761 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2762 return -EFAULT;
2763
2764 return 0;
2765}
2766
6b96018b 2767static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2768{
6b96018b 2769 struct compat_ifconf ifc32;
7a229387
AB
2770 struct ifconf ifc;
2771 struct ifconf __user *uifc;
6b96018b 2772 struct compat_ifreq __user *ifr32;
7a229387
AB
2773 struct ifreq __user *ifr;
2774 unsigned int i, j;
2775 int err;
2776
6b96018b 2777 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2778 return -EFAULT;
2779
43da5f2e 2780 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2781 if (ifc32.ifcbuf == 0) {
2782 ifc32.ifc_len = 0;
2783 ifc.ifc_len = 0;
2784 ifc.ifc_req = NULL;
2785 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2786 } else {
c6d409cf
ED
2787 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2788 sizeof(struct ifreq);
7a229387
AB
2789 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2790 ifc.ifc_len = len;
2791 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2792 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2793 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2794 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2795 return -EFAULT;
2796 ifr++;
2797 ifr32++;
2798 }
2799 }
2800 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2801 return -EFAULT;
2802
6b96018b 2803 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2804 if (err)
2805 return err;
2806
2807 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2808 return -EFAULT;
2809
2810 ifr = ifc.ifc_req;
2811 ifr32 = compat_ptr(ifc32.ifcbuf);
2812 for (i = 0, j = 0;
c6d409cf
ED
2813 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2814 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2815 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2816 return -EFAULT;
2817 ifr32++;
2818 ifr++;
2819 }
2820
2821 if (ifc32.ifcbuf == 0) {
2822 /* Translate from 64-bit structure multiple to
2823 * a 32-bit one.
2824 */
2825 i = ifc.ifc_len;
6b96018b 2826 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2827 ifc32.ifc_len = i;
2828 } else {
2829 ifc32.ifc_len = i;
2830 }
6b96018b 2831 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2832 return -EFAULT;
2833
2834 return 0;
2835}
2836
6b96018b 2837static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2838{
3a7da39d
BH
2839 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2840 bool convert_in = false, convert_out = false;
2841 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2842 struct ethtool_rxnfc __user *rxnfc;
7a229387 2843 struct ifreq __user *ifr;
3a7da39d
BH
2844 u32 rule_cnt = 0, actual_rule_cnt;
2845 u32 ethcmd;
7a229387 2846 u32 data;
3a7da39d 2847 int ret;
7a229387 2848
3a7da39d
BH
2849 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2850 return -EFAULT;
7a229387 2851
3a7da39d
BH
2852 compat_rxnfc = compat_ptr(data);
2853
2854 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2855 return -EFAULT;
2856
3a7da39d
BH
2857 /* Most ethtool structures are defined without padding.
2858 * Unfortunately struct ethtool_rxnfc is an exception.
2859 */
2860 switch (ethcmd) {
2861 default:
2862 break;
2863 case ETHTOOL_GRXCLSRLALL:
2864 /* Buffer size is variable */
2865 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2866 return -EFAULT;
2867 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2868 return -ENOMEM;
2869 buf_size += rule_cnt * sizeof(u32);
2870 /* fall through */
2871 case ETHTOOL_GRXRINGS:
2872 case ETHTOOL_GRXCLSRLCNT:
2873 case ETHTOOL_GRXCLSRULE:
55664f32 2874 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2875 convert_out = true;
2876 /* fall through */
2877 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2878 buf_size += sizeof(struct ethtool_rxnfc);
2879 convert_in = true;
2880 break;
2881 }
2882
2883 ifr = compat_alloc_user_space(buf_size);
954b1244 2884 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2885
2886 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2887 return -EFAULT;
2888
3a7da39d
BH
2889 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2890 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2891 return -EFAULT;
2892
3a7da39d 2893 if (convert_in) {
127fe533 2894 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2895 * fs.ring_cookie and at the end of fs, but nowhere else.
2896 */
127fe533
AD
2897 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2898 sizeof(compat_rxnfc->fs.m_ext) !=
2899 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2900 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2901 BUILD_BUG_ON(
2902 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2903 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2904 offsetof(struct ethtool_rxnfc, fs.location) -
2905 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2906
2907 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2908 (void __user *)(&rxnfc->fs.m_ext + 1) -
2909 (void __user *)rxnfc) ||
3a7da39d
BH
2910 copy_in_user(&rxnfc->fs.ring_cookie,
2911 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2912 (void __user *)(&rxnfc->fs.location + 1) -
2913 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2914 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2915 sizeof(rxnfc->rule_cnt)))
2916 return -EFAULT;
2917 }
2918
2919 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2920 if (ret)
2921 return ret;
2922
2923 if (convert_out) {
2924 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2925 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2926 (const void __user *)rxnfc) ||
3a7da39d
BH
2927 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2928 &rxnfc->fs.ring_cookie,
954b1244
SH
2929 (const void __user *)(&rxnfc->fs.location + 1) -
2930 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2931 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2932 sizeof(rxnfc->rule_cnt)))
2933 return -EFAULT;
2934
2935 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2936 /* As an optimisation, we only copy the actual
2937 * number of rules that the underlying
2938 * function returned. Since Mallory might
2939 * change the rule count in user memory, we
2940 * check that it is less than the rule count
2941 * originally given (as the user buffer size),
2942 * which has been range-checked.
2943 */
2944 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2945 return -EFAULT;
2946 if (actual_rule_cnt < rule_cnt)
2947 rule_cnt = actual_rule_cnt;
2948 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2949 &rxnfc->rule_locs[0],
2950 rule_cnt * sizeof(u32)))
2951 return -EFAULT;
2952 }
2953 }
2954
2955 return 0;
7a229387
AB
2956}
2957
7a50a240
AB
2958static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2959{
2960 void __user *uptr;
2961 compat_uptr_t uptr32;
2962 struct ifreq __user *uifr;
2963
c6d409cf 2964 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2965 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2966 return -EFAULT;
2967
2968 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2969 return -EFAULT;
2970
2971 uptr = compat_ptr(uptr32);
2972
2973 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2974 return -EFAULT;
2975
2976 return dev_ioctl(net, SIOCWANDEV, uifr);
2977}
2978
6b96018b
AB
2979static int bond_ioctl(struct net *net, unsigned int cmd,
2980 struct compat_ifreq __user *ifr32)
7a229387
AB
2981{
2982 struct ifreq kifr;
7a229387
AB
2983 mm_segment_t old_fs;
2984 int err;
7a229387
AB
2985
2986 switch (cmd) {
2987 case SIOCBONDENSLAVE:
2988 case SIOCBONDRELEASE:
2989 case SIOCBONDSETHWADDR:
2990 case SIOCBONDCHANGEACTIVE:
6b96018b 2991 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2992 return -EFAULT;
2993
2994 old_fs = get_fs();
c6d409cf 2995 set_fs(KERNEL_DS);
c3f52ae6 2996 err = dev_ioctl(net, cmd,
2997 (struct ifreq __user __force *) &kifr);
c6d409cf 2998 set_fs(old_fs);
7a229387
AB
2999
3000 return err;
7a229387 3001 default:
07d106d0 3002 return -ENOIOCTLCMD;
ccbd6a5a 3003 }
7a229387
AB
3004}
3005
590d4693
BH
3006/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3007static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3008 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
3009{
3010 struct ifreq __user *u_ifreq64;
7a229387
AB
3011 char tmp_buf[IFNAMSIZ];
3012 void __user *data64;
3013 u32 data32;
3014
3015 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
3016 IFNAMSIZ))
3017 return -EFAULT;
417c3522 3018 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
3019 return -EFAULT;
3020 data64 = compat_ptr(data32);
3021
3022 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
3023
7a229387
AB
3024 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
3025 IFNAMSIZ))
3026 return -EFAULT;
417c3522 3027 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
3028 return -EFAULT;
3029
6b96018b 3030 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
3031}
3032
6b96018b
AB
3033static int dev_ifsioc(struct net *net, struct socket *sock,
3034 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 3035{
a2116ed2 3036 struct ifreq __user *uifr;
7a229387
AB
3037 int err;
3038
a2116ed2
AB
3039 uifr = compat_alloc_user_space(sizeof(*uifr));
3040 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3041 return -EFAULT;
3042
3043 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3044
7a229387
AB
3045 if (!err) {
3046 switch (cmd) {
3047 case SIOCGIFFLAGS:
3048 case SIOCGIFMETRIC:
3049 case SIOCGIFMTU:
3050 case SIOCGIFMEM:
3051 case SIOCGIFHWADDR:
3052 case SIOCGIFINDEX:
3053 case SIOCGIFADDR:
3054 case SIOCGIFBRDADDR:
3055 case SIOCGIFDSTADDR:
3056 case SIOCGIFNETMASK:
fab2532b 3057 case SIOCGIFPFLAGS:
7a229387 3058 case SIOCGIFTXQLEN:
fab2532b
AB
3059 case SIOCGMIIPHY:
3060 case SIOCGMIIREG:
a2116ed2 3061 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
3062 err = -EFAULT;
3063 break;
3064 }
3065 }
3066 return err;
3067}
3068
a2116ed2
AB
3069static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3070 struct compat_ifreq __user *uifr32)
3071{
3072 struct ifreq ifr;
3073 struct compat_ifmap __user *uifmap32;
3074 mm_segment_t old_fs;
3075 int err;
3076
3077 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3078 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3079 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3080 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3081 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3082 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3083 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3084 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3085 if (err)
3086 return -EFAULT;
3087
3088 old_fs = get_fs();
c6d409cf 3089 set_fs(KERNEL_DS);
c3f52ae6 3090 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3091 set_fs(old_fs);
a2116ed2
AB
3092
3093 if (cmd == SIOCGIFMAP && !err) {
3094 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3095 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3096 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3097 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3098 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3099 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3100 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3101 if (err)
3102 err = -EFAULT;
3103 }
3104 return err;
3105}
3106
7a229387 3107struct rtentry32 {
c6d409cf 3108 u32 rt_pad1;
7a229387
AB
3109 struct sockaddr rt_dst; /* target address */
3110 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3111 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3112 unsigned short rt_flags;
3113 short rt_pad2;
3114 u32 rt_pad3;
3115 unsigned char rt_tos;
3116 unsigned char rt_class;
3117 short rt_pad4;
3118 short rt_metric; /* +1 for binary compatibility! */
7a229387 3119 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3120 u32 rt_mtu; /* per route MTU/Window */
3121 u32 rt_window; /* Window clamping */
7a229387
AB
3122 unsigned short rt_irtt; /* Initial RTT */
3123};
3124
3125struct in6_rtmsg32 {
3126 struct in6_addr rtmsg_dst;
3127 struct in6_addr rtmsg_src;
3128 struct in6_addr rtmsg_gateway;
3129 u32 rtmsg_type;
3130 u16 rtmsg_dst_len;
3131 u16 rtmsg_src_len;
3132 u32 rtmsg_metric;
3133 u32 rtmsg_info;
3134 u32 rtmsg_flags;
3135 s32 rtmsg_ifindex;
3136};
3137
6b96018b
AB
3138static int routing_ioctl(struct net *net, struct socket *sock,
3139 unsigned int cmd, void __user *argp)
7a229387
AB
3140{
3141 int ret;
3142 void *r = NULL;
3143 struct in6_rtmsg r6;
3144 struct rtentry r4;
3145 char devname[16];
3146 u32 rtdev;
3147 mm_segment_t old_fs = get_fs();
3148
6b96018b
AB
3149 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3150 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3151 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3152 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3153 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3154 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3155 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3156 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3157 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3158 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3159 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3160
3161 r = (void *) &r6;
3162 } else { /* ipv4 */
6b96018b 3163 struct rtentry32 __user *ur4 = argp;
c6d409cf 3164 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3165 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3166 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3167 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3168 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3169 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3170 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3171 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3172 if (rtdev) {
c6d409cf 3173 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3174 r4.rt_dev = (char __user __force *)devname;
3175 devname[15] = 0;
7a229387
AB
3176 } else
3177 r4.rt_dev = NULL;
3178
3179 r = (void *) &r4;
3180 }
3181
3182 if (ret) {
3183 ret = -EFAULT;
3184 goto out;
3185 }
3186
c6d409cf 3187 set_fs(KERNEL_DS);
6b96018b 3188 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3189 set_fs(old_fs);
7a229387
AB
3190
3191out:
7a229387
AB
3192 return ret;
3193}
3194
3195/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3196 * for some operations; this forces use of the newer bridge-utils that
25985edc 3197 * use compatible ioctls
7a229387 3198 */
6b96018b 3199static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3200{
6b96018b 3201 compat_ulong_t tmp;
7a229387 3202
6b96018b 3203 if (get_user(tmp, argp))
7a229387
AB
3204 return -EFAULT;
3205 if (tmp == BRCTL_GET_VERSION)
3206 return BRCTL_VERSION + 1;
3207 return -EINVAL;
3208}
3209
6b96018b
AB
3210static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3211 unsigned int cmd, unsigned long arg)
3212{
3213 void __user *argp = compat_ptr(arg);
3214 struct sock *sk = sock->sk;
3215 struct net *net = sock_net(sk);
7a229387 3216
6b96018b 3217 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3218 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3219
3220 switch (cmd) {
3221 case SIOCSIFBR:
3222 case SIOCGIFBR:
3223 return old_bridge_ioctl(argp);
3224 case SIOCGIFNAME:
3225 return dev_ifname32(net, argp);
3226 case SIOCGIFCONF:
3227 return dev_ifconf(net, argp);
3228 case SIOCETHTOOL:
3229 return ethtool_ioctl(net, argp);
7a50a240
AB
3230 case SIOCWANDEV:
3231 return compat_siocwandev(net, argp);
a2116ed2
AB
3232 case SIOCGIFMAP:
3233 case SIOCSIFMAP:
3234 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3235 case SIOCBONDENSLAVE:
3236 case SIOCBONDRELEASE:
3237 case SIOCBONDSETHWADDR:
6b96018b
AB
3238 case SIOCBONDCHANGEACTIVE:
3239 return bond_ioctl(net, cmd, argp);
3240 case SIOCADDRT:
3241 case SIOCDELRT:
3242 return routing_ioctl(net, sock, cmd, argp);
3243 case SIOCGSTAMP:
3244 return do_siocgstamp(net, sock, cmd, argp);
3245 case SIOCGSTAMPNS:
3246 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3247 case SIOCBONDSLAVEINFOQUERY:
3248 case SIOCBONDINFOQUERY:
a2116ed2 3249 case SIOCSHWTSTAMP:
fd468c74 3250 case SIOCGHWTSTAMP:
590d4693 3251 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3252
3253 case FIOSETOWN:
3254 case SIOCSPGRP:
3255 case FIOGETOWN:
3256 case SIOCGPGRP:
3257 case SIOCBRADDBR:
3258 case SIOCBRDELBR:
3259 case SIOCGIFVLAN:
3260 case SIOCSIFVLAN:
3261 case SIOCADDDLCI:
3262 case SIOCDELDLCI:
3263 return sock_ioctl(file, cmd, arg);
3264
3265 case SIOCGIFFLAGS:
3266 case SIOCSIFFLAGS:
3267 case SIOCGIFMETRIC:
3268 case SIOCSIFMETRIC:
3269 case SIOCGIFMTU:
3270 case SIOCSIFMTU:
3271 case SIOCGIFMEM:
3272 case SIOCSIFMEM:
3273 case SIOCGIFHWADDR:
3274 case SIOCSIFHWADDR:
3275 case SIOCADDMULTI:
3276 case SIOCDELMULTI:
3277 case SIOCGIFINDEX:
6b96018b
AB
3278 case SIOCGIFADDR:
3279 case SIOCSIFADDR:
3280 case SIOCSIFHWBROADCAST:
6b96018b 3281 case SIOCDIFADDR:
6b96018b
AB
3282 case SIOCGIFBRDADDR:
3283 case SIOCSIFBRDADDR:
3284 case SIOCGIFDSTADDR:
3285 case SIOCSIFDSTADDR:
3286 case SIOCGIFNETMASK:
3287 case SIOCSIFNETMASK:
3288 case SIOCSIFPFLAGS:
3289 case SIOCGIFPFLAGS:
3290 case SIOCGIFTXQLEN:
3291 case SIOCSIFTXQLEN:
3292 case SIOCBRADDIF:
3293 case SIOCBRDELIF:
9177efd3
AB
3294 case SIOCSIFNAME:
3295 case SIOCGMIIPHY:
3296 case SIOCGMIIREG:
3297 case SIOCSMIIREG:
6b96018b 3298 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3299
6b96018b
AB
3300 case SIOCSARP:
3301 case SIOCGARP:
3302 case SIOCDARP:
6b96018b 3303 case SIOCATMARK:
9177efd3
AB
3304 return sock_do_ioctl(net, sock, cmd, arg);
3305 }
3306
6b96018b
AB
3307 return -ENOIOCTLCMD;
3308}
7a229387 3309
95c96174 3310static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3311 unsigned long arg)
89bbfc95
SP
3312{
3313 struct socket *sock = file->private_data;
3314 int ret = -ENOIOCTLCMD;
87de87d5
DM
3315 struct sock *sk;
3316 struct net *net;
3317
3318 sk = sock->sk;
3319 net = sock_net(sk);
89bbfc95
SP
3320
3321 if (sock->ops->compat_ioctl)
3322 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3323
87de87d5
DM
3324 if (ret == -ENOIOCTLCMD &&
3325 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3326 ret = compat_wext_handle_ioctl(net, cmd, arg);
3327
6b96018b
AB
3328 if (ret == -ENOIOCTLCMD)
3329 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3330
89bbfc95
SP
3331 return ret;
3332}
3333#endif
3334
ac5a488e
SS
3335int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3336{
3337 return sock->ops->bind(sock, addr, addrlen);
3338}
c6d409cf 3339EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3340
3341int kernel_listen(struct socket *sock, int backlog)
3342{
3343 return sock->ops->listen(sock, backlog);
3344}
c6d409cf 3345EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3346
3347int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3348{
3349 struct sock *sk = sock->sk;
3350 int err;
3351
3352 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3353 newsock);
3354 if (err < 0)
3355 goto done;
3356
3357 err = sock->ops->accept(sock, *newsock, flags);
3358 if (err < 0) {
3359 sock_release(*newsock);
fa8705b0 3360 *newsock = NULL;
ac5a488e
SS
3361 goto done;
3362 }
3363
3364 (*newsock)->ops = sock->ops;
1b08534e 3365 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3366
3367done:
3368 return err;
3369}
c6d409cf 3370EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3371
3372int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3373 int flags)
ac5a488e
SS
3374{
3375 return sock->ops->connect(sock, addr, addrlen, flags);
3376}
c6d409cf 3377EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3378
3379int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3380 int *addrlen)
3381{
3382 return sock->ops->getname(sock, addr, addrlen, 0);
3383}
c6d409cf 3384EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3385
3386int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3387 int *addrlen)
3388{
3389 return sock->ops->getname(sock, addr, addrlen, 1);
3390}
c6d409cf 3391EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3392
3393int kernel_getsockopt(struct socket *sock, int level, int optname,
3394 char *optval, int *optlen)
3395{
3396 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3397 char __user *uoptval;
3398 int __user *uoptlen;
ac5a488e
SS
3399 int err;
3400
fb8621bb
NK
3401 uoptval = (char __user __force *) optval;
3402 uoptlen = (int __user __force *) optlen;
3403
ac5a488e
SS
3404 set_fs(KERNEL_DS);
3405 if (level == SOL_SOCKET)
fb8621bb 3406 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3407 else
fb8621bb
NK
3408 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3409 uoptlen);
ac5a488e
SS
3410 set_fs(oldfs);
3411 return err;
3412}
c6d409cf 3413EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3414
3415int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3416 char *optval, unsigned int optlen)
ac5a488e
SS
3417{
3418 mm_segment_t oldfs = get_fs();
fb8621bb 3419 char __user *uoptval;
ac5a488e
SS
3420 int err;
3421
fb8621bb
NK
3422 uoptval = (char __user __force *) optval;
3423
ac5a488e
SS
3424 set_fs(KERNEL_DS);
3425 if (level == SOL_SOCKET)
fb8621bb 3426 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3427 else
fb8621bb 3428 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3429 optlen);
3430 set_fs(oldfs);
3431 return err;
3432}
c6d409cf 3433EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3434
3435int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3436 size_t size, int flags)
3437{
3438 if (sock->ops->sendpage)
3439 return sock->ops->sendpage(sock, page, offset, size, flags);
3440
3441 return sock_no_sendpage(sock, page, offset, size, flags);
3442}
c6d409cf 3443EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3444
3445int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3446{
3447 mm_segment_t oldfs = get_fs();
3448 int err;
3449
3450 set_fs(KERNEL_DS);
3451 err = sock->ops->ioctl(sock, cmd, arg);
3452 set_fs(oldfs);
3453
3454 return err;
3455}
c6d409cf 3456EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3457
91cf45f0
TM
3458int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3459{
3460 return sock->ops->shutdown(sock, how);
3461}
91cf45f0 3462EXPORT_SYMBOL(kernel_sock_shutdown);
This page took 1.668483 seconds and 5 git commands to generate.