Merge tag 'pci-v3.15-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaa...
[deliverable/linux.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1fd7317d 88#include <linux/magic.h>
5a0e3ad6 89#include <linux/slab.h>
600e1779 90#include <linux/xattr.h>
1da177e4
LT
91
92#include <asm/uaccess.h>
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
f8451725 97#include <net/cls_cgroup.h>
1da177e4
LT
98
99#include <net/sock.h>
100#include <linux/netfilter.h>
101
6b96018b
AB
102#include <linux/if_tun.h>
103#include <linux/ipv6_route.h>
104#include <linux/route.h>
6b96018b
AB
105#include <linux/sockios.h>
106#include <linux/atalk.h>
076bb0c8 107#include <net/busy_poll.h>
06021292 108
e0d1095a 109#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
110unsigned int sysctl_net_busy_read __read_mostly;
111unsigned int sysctl_net_busy_poll __read_mostly;
06021292 112#endif
6b96018b 113
1da177e4 114static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
115static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
116 unsigned long nr_segs, loff_t pos);
117static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
118 unsigned long nr_segs, loff_t pos);
89bddce5 119static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
120
121static int sock_close(struct inode *inode, struct file *file);
122static unsigned int sock_poll(struct file *file,
123 struct poll_table_struct *wait);
89bddce5 124static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
125#ifdef CONFIG_COMPAT
126static long compat_sock_ioctl(struct file *file,
89bddce5 127 unsigned int cmd, unsigned long arg);
89bbfc95 128#endif
1da177e4 129static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
130static ssize_t sock_sendpage(struct file *file, struct page *page,
131 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 132static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 133 struct pipe_inode_info *pipe, size_t len,
9c55e01c 134 unsigned int flags);
1da177e4 135
1da177e4
LT
136/*
137 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
138 * in the operation structures but are done directly via the socketcall() multiplexor.
139 */
140
da7071d7 141static const struct file_operations socket_file_ops = {
1da177e4
LT
142 .owner = THIS_MODULE,
143 .llseek = no_llseek,
144 .aio_read = sock_aio_read,
145 .aio_write = sock_aio_write,
146 .poll = sock_poll,
147 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
148#ifdef CONFIG_COMPAT
149 .compat_ioctl = compat_sock_ioctl,
150#endif
1da177e4
LT
151 .mmap = sock_mmap,
152 .open = sock_no_open, /* special open code to disallow open via /proc */
153 .release = sock_close,
154 .fasync = sock_fasync,
5274f052
JA
155 .sendpage = sock_sendpage,
156 .splice_write = generic_splice_sendpage,
9c55e01c 157 .splice_read = sock_splice_read,
1da177e4
LT
158};
159
160/*
161 * The protocol list. Each protocol is registered in here.
162 */
163
1da177e4 164static DEFINE_SPINLOCK(net_family_lock);
190683a9 165static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 166
1da177e4
LT
167/*
168 * Statistics counters of the socket lists
169 */
170
c6d409cf 171static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
172
173/*
89bddce5
SH
174 * Support routines.
175 * Move socket addresses back and forth across the kernel/user
176 * divide and look after the messy bits.
1da177e4
LT
177 */
178
1da177e4
LT
179/**
180 * move_addr_to_kernel - copy a socket address into kernel space
181 * @uaddr: Address in user space
182 * @kaddr: Address in kernel space
183 * @ulen: Length in user space
184 *
185 * The address is copied into kernel space. If the provided address is
186 * too long an error code of -EINVAL is returned. If the copy gives
187 * invalid addresses -EFAULT is returned. On a success 0 is returned.
188 */
189
43db362d 190int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 191{
230b1839 192 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 193 return -EINVAL;
89bddce5 194 if (ulen == 0)
1da177e4 195 return 0;
89bddce5 196 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 197 return -EFAULT;
3ec3b2fb 198 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
199}
200
201/**
202 * move_addr_to_user - copy an address to user space
203 * @kaddr: kernel space address
204 * @klen: length of address in kernel
205 * @uaddr: user space address
206 * @ulen: pointer to user length field
207 *
208 * The value pointed to by ulen on entry is the buffer length available.
209 * This is overwritten with the buffer space used. -EINVAL is returned
210 * if an overlong buffer is specified or a negative buffer size. -EFAULT
211 * is returned if either the buffer or the length field are not
212 * accessible.
213 * After copying the data up to the limit the user specifies, the true
214 * length of the data is written over the length limit the user
215 * specified. Zero is returned for a success.
216 */
89bddce5 217
43db362d 218static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 219 void __user *uaddr, int __user *ulen)
1da177e4
LT
220{
221 int err;
222 int len;
223
68c6beb3 224 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
225 err = get_user(len, ulen);
226 if (err)
1da177e4 227 return err;
89bddce5
SH
228 if (len > klen)
229 len = klen;
68c6beb3 230 if (len < 0)
1da177e4 231 return -EINVAL;
89bddce5 232 if (len) {
d6fe3945
SG
233 if (audit_sockaddr(klen, kaddr))
234 return -ENOMEM;
89bddce5 235 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
236 return -EFAULT;
237 }
238 /*
89bddce5
SH
239 * "fromlen shall refer to the value before truncation.."
240 * 1003.1g
1da177e4
LT
241 */
242 return __put_user(klen, ulen);
243}
244
e18b890b 245static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
246
247static struct inode *sock_alloc_inode(struct super_block *sb)
248{
249 struct socket_alloc *ei;
eaefd110 250 struct socket_wq *wq;
89bddce5 251
e94b1766 252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
253 if (!ei)
254 return NULL;
eaefd110
ED
255 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
256 if (!wq) {
43815482
ED
257 kmem_cache_free(sock_inode_cachep, ei);
258 return NULL;
259 }
eaefd110
ED
260 init_waitqueue_head(&wq->wait);
261 wq->fasync_list = NULL;
262 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 263
1da177e4
LT
264 ei->socket.state = SS_UNCONNECTED;
265 ei->socket.flags = 0;
266 ei->socket.ops = NULL;
267 ei->socket.sk = NULL;
268 ei->socket.file = NULL;
1da177e4
LT
269
270 return &ei->vfs_inode;
271}
272
273static void sock_destroy_inode(struct inode *inode)
274{
43815482 275 struct socket_alloc *ei;
eaefd110 276 struct socket_wq *wq;
43815482
ED
277
278 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 279 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 280 kfree_rcu(wq, rcu);
43815482 281 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
282}
283
51cc5068 284static void init_once(void *foo)
1da177e4 285{
89bddce5 286 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 287
a35afb83 288 inode_init_once(&ei->vfs_inode);
1da177e4 289}
89bddce5 290
1da177e4
LT
291static int init_inodecache(void)
292{
293 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
294 sizeof(struct socket_alloc),
295 0,
296 (SLAB_HWCACHE_ALIGN |
297 SLAB_RECLAIM_ACCOUNT |
298 SLAB_MEM_SPREAD),
20c2df83 299 init_once);
1da177e4
LT
300 if (sock_inode_cachep == NULL)
301 return -ENOMEM;
302 return 0;
303}
304
b87221de 305static const struct super_operations sockfs_ops = {
c6d409cf
ED
306 .alloc_inode = sock_alloc_inode,
307 .destroy_inode = sock_destroy_inode,
308 .statfs = simple_statfs,
1da177e4
LT
309};
310
c23fbb6b
ED
311/*
312 * sockfs_dname() is called from d_path().
313 */
314static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
315{
316 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
317 dentry->d_inode->i_ino);
318}
319
3ba13d17 320static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 321 .d_dname = sockfs_dname,
1da177e4
LT
322};
323
c74a1cbb
AV
324static struct dentry *sockfs_mount(struct file_system_type *fs_type,
325 int flags, const char *dev_name, void *data)
326{
327 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
328 &sockfs_dentry_operations, SOCKFS_MAGIC);
329}
330
331static struct vfsmount *sock_mnt __read_mostly;
332
333static struct file_system_type sock_fs_type = {
334 .name = "sockfs",
335 .mount = sockfs_mount,
336 .kill_sb = kill_anon_super,
337};
338
1da177e4
LT
339/*
340 * Obtains the first available file descriptor and sets it up for use.
341 *
39d8c1b6
DM
342 * These functions create file structures and maps them to fd space
343 * of the current process. On success it returns file descriptor
1da177e4
LT
344 * and file struct implicitly stored in sock->file.
345 * Note that another thread may close file descriptor before we return
346 * from this function. We use the fact that now we do not refer
347 * to socket after mapping. If one day we will need it, this
348 * function will increment ref. count on file by 1.
349 *
350 * In any case returned fd MAY BE not valid!
351 * This race condition is unavoidable
352 * with shared fd spaces, we cannot solve it inside kernel,
353 * but we take care of internal coherence yet.
354 */
355
aab174f0 356struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 357{
7cbe66b6 358 struct qstr name = { .name = "" };
2c48b9c4 359 struct path path;
7cbe66b6 360 struct file *file;
1da177e4 361
600e1779
MY
362 if (dname) {
363 name.name = dname;
364 name.len = strlen(name.name);
365 } else if (sock->sk) {
366 name.name = sock->sk->sk_prot_creator->name;
367 name.len = strlen(name.name);
368 }
4b936885 369 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
370 if (unlikely(!path.dentry))
371 return ERR_PTR(-ENOMEM);
2c48b9c4 372 path.mnt = mntget(sock_mnt);
39d8c1b6 373
2c48b9c4 374 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 375 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 376
2c48b9c4 377 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 378 &socket_file_ops);
39b65252 379 if (unlikely(IS_ERR(file))) {
cc3808f8 380 /* drop dentry, keep inode */
7de9c6ee 381 ihold(path.dentry->d_inode);
2c48b9c4 382 path_put(&path);
39b65252 383 return file;
cc3808f8
AV
384 }
385
386 sock->file = file;
77d27200 387 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 388 file->private_data = sock;
28407630 389 return file;
39d8c1b6 390}
56b31d1c 391EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 392
56b31d1c 393static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
394{
395 struct file *newfile;
28407630
AV
396 int fd = get_unused_fd_flags(flags);
397 if (unlikely(fd < 0))
398 return fd;
39d8c1b6 399
aab174f0 400 newfile = sock_alloc_file(sock, flags, NULL);
28407630 401 if (likely(!IS_ERR(newfile))) {
39d8c1b6 402 fd_install(fd, newfile);
28407630
AV
403 return fd;
404 }
7cbe66b6 405
28407630
AV
406 put_unused_fd(fd);
407 return PTR_ERR(newfile);
1da177e4
LT
408}
409
406a3c63 410struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 411{
6cb153ca
BL
412 if (file->f_op == &socket_file_ops)
413 return file->private_data; /* set in sock_map_fd */
414
23bb80d2
ED
415 *err = -ENOTSOCK;
416 return NULL;
6cb153ca 417}
406a3c63 418EXPORT_SYMBOL(sock_from_file);
6cb153ca 419
1da177e4 420/**
c6d409cf 421 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
422 * @fd: file handle
423 * @err: pointer to an error code return
424 *
425 * The file handle passed in is locked and the socket it is bound
426 * too is returned. If an error occurs the err pointer is overwritten
427 * with a negative errno code and NULL is returned. The function checks
428 * for both invalid handles and passing a handle which is not a socket.
429 *
430 * On a success the socket object pointer is returned.
431 */
432
433struct socket *sockfd_lookup(int fd, int *err)
434{
435 struct file *file;
1da177e4
LT
436 struct socket *sock;
437
89bddce5
SH
438 file = fget(fd);
439 if (!file) {
1da177e4
LT
440 *err = -EBADF;
441 return NULL;
442 }
89bddce5 443
6cb153ca
BL
444 sock = sock_from_file(file, err);
445 if (!sock)
1da177e4 446 fput(file);
6cb153ca
BL
447 return sock;
448}
c6d409cf 449EXPORT_SYMBOL(sockfd_lookup);
1da177e4 450
6cb153ca
BL
451static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
452{
00e188ef 453 struct fd f = fdget(fd);
6cb153ca
BL
454 struct socket *sock;
455
3672558c 456 *err = -EBADF;
00e188ef
AV
457 if (f.file) {
458 sock = sock_from_file(f.file, err);
459 if (likely(sock)) {
460 *fput_needed = f.flags;
6cb153ca 461 return sock;
00e188ef
AV
462 }
463 fdput(f);
1da177e4 464 }
6cb153ca 465 return NULL;
1da177e4
LT
466}
467
600e1779
MY
468#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
469#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
470#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
471static ssize_t sockfs_getxattr(struct dentry *dentry,
472 const char *name, void *value, size_t size)
473{
474 const char *proto_name;
475 size_t proto_size;
476 int error;
477
478 error = -ENODATA;
479 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
480 proto_name = dentry->d_name.name;
481 proto_size = strlen(proto_name);
482
483 if (value) {
484 error = -ERANGE;
485 if (proto_size + 1 > size)
486 goto out;
487
488 strncpy(value, proto_name, proto_size + 1);
489 }
490 error = proto_size + 1;
491 }
492
493out:
494 return error;
495}
496
497static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
498 size_t size)
499{
500 ssize_t len;
501 ssize_t used = 0;
502
503 len = security_inode_listsecurity(dentry->d_inode, buffer, size);
504 if (len < 0)
505 return len;
506 used += len;
507 if (buffer) {
508 if (size < used)
509 return -ERANGE;
510 buffer += len;
511 }
512
513 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
514 used += len;
515 if (buffer) {
516 if (size < used)
517 return -ERANGE;
518 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
519 buffer += len;
520 }
521
522 return used;
523}
524
525static const struct inode_operations sockfs_inode_ops = {
526 .getxattr = sockfs_getxattr,
527 .listxattr = sockfs_listxattr,
528};
529
1da177e4
LT
530/**
531 * sock_alloc - allocate a socket
89bddce5 532 *
1da177e4
LT
533 * Allocate a new inode and socket object. The two are bound together
534 * and initialised. The socket is then returned. If we are out of inodes
535 * NULL is returned.
536 */
537
538static struct socket *sock_alloc(void)
539{
89bddce5
SH
540 struct inode *inode;
541 struct socket *sock;
1da177e4 542
a209dfc7 543 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
544 if (!inode)
545 return NULL;
546
547 sock = SOCKET_I(inode);
548
29a020d3 549 kmemcheck_annotate_bitfield(sock, type);
85fe4025 550 inode->i_ino = get_next_ino();
89bddce5 551 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
552 inode->i_uid = current_fsuid();
553 inode->i_gid = current_fsgid();
600e1779 554 inode->i_op = &sockfs_inode_ops;
1da177e4 555
19e8d69c 556 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
557 return sock;
558}
559
560/*
561 * In theory you can't get an open on this inode, but /proc provides
562 * a back door. Remember to keep it shut otherwise you'll let the
563 * creepy crawlies in.
564 */
89bddce5 565
1da177e4
LT
566static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
567{
568 return -ENXIO;
569}
570
4b6f5d20 571const struct file_operations bad_sock_fops = {
1da177e4
LT
572 .owner = THIS_MODULE,
573 .open = sock_no_open,
6038f373 574 .llseek = noop_llseek,
1da177e4
LT
575};
576
577/**
578 * sock_release - close a socket
579 * @sock: socket to close
580 *
581 * The socket is released from the protocol stack if it has a release
582 * callback, and the inode is then released if the socket is bound to
89bddce5 583 * an inode not a file.
1da177e4 584 */
89bddce5 585
1da177e4
LT
586void sock_release(struct socket *sock)
587{
588 if (sock->ops) {
589 struct module *owner = sock->ops->owner;
590
591 sock->ops->release(sock);
592 sock->ops = NULL;
593 module_put(owner);
594 }
595
eaefd110 596 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
1da177e4
LT
597 printk(KERN_ERR "sock_release: fasync list not empty!\n");
598
b09e786b
MP
599 if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
600 return;
601
19e8d69c 602 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
603 if (!sock->file) {
604 iput(SOCK_INODE(sock));
605 return;
606 }
89bddce5 607 sock->file = NULL;
1da177e4 608}
c6d409cf 609EXPORT_SYMBOL(sock_release);
1da177e4 610
bf84a010 611void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 612{
2244d07b 613 *tx_flags = 0;
20d49473 614 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 615 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 616 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 617 *tx_flags |= SKBTX_SW_TSTAMP;
6e3e939f
JB
618 if (sock_flag(sk, SOCK_WIFI_STATUS))
619 *tx_flags |= SKBTX_WIFI_STATUS;
20d49473
PO
620}
621EXPORT_SYMBOL(sock_tx_timestamp);
622
228e548e
AB
623static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
624 struct msghdr *msg, size_t size)
1da177e4
LT
625{
626 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4
LT
627
628 si->sock = sock;
629 si->scm = NULL;
630 si->msg = msg;
631 si->size = size;
632
1da177e4
LT
633 return sock->ops->sendmsg(iocb, sock, msg, size);
634}
635
228e548e
AB
636static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
637 struct msghdr *msg, size_t size)
638{
639 int err = security_socket_sendmsg(sock, msg, size);
640
641 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
642}
643
1da177e4
LT
644int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
645{
646 struct kiocb iocb;
647 struct sock_iocb siocb;
648 int ret;
649
650 init_sync_kiocb(&iocb, NULL);
651 iocb.private = &siocb;
652 ret = __sock_sendmsg(&iocb, sock, msg, size);
653 if (-EIOCBQUEUED == ret)
654 ret = wait_on_sync_kiocb(&iocb);
655 return ret;
656}
c6d409cf 657EXPORT_SYMBOL(sock_sendmsg);
1da177e4 658
894dc24c 659static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
660{
661 struct kiocb iocb;
662 struct sock_iocb siocb;
663 int ret;
664
665 init_sync_kiocb(&iocb, NULL);
666 iocb.private = &siocb;
667 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
668 if (-EIOCBQUEUED == ret)
669 ret = wait_on_sync_kiocb(&iocb);
670 return ret;
671}
672
1da177e4
LT
673int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
674 struct kvec *vec, size_t num, size_t size)
675{
676 mm_segment_t oldfs = get_fs();
677 int result;
678
679 set_fs(KERNEL_DS);
680 /*
681 * the following is safe, since for compiler definitions of kvec and
682 * iovec are identical, yielding the same in-core layout and alignment
683 */
89bddce5 684 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
685 msg->msg_iovlen = num;
686 result = sock_sendmsg(sock, msg, size);
687 set_fs(oldfs);
688 return result;
689}
c6d409cf 690EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 691
92f37fd2
ED
692/*
693 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
694 */
695void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
696 struct sk_buff *skb)
697{
20d49473
PO
698 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
699 struct timespec ts[3];
700 int empty = 1;
701 struct skb_shared_hwtstamps *shhwtstamps =
702 skb_hwtstamps(skb);
703
704 /* Race occurred between timestamp enabling and packet
705 receiving. Fill in the current time for now. */
706 if (need_software_tstamp && skb->tstamp.tv64 == 0)
707 __net_timestamp(skb);
708
709 if (need_software_tstamp) {
710 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
711 struct timeval tv;
712 skb_get_timestamp(skb, &tv);
713 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
714 sizeof(tv), &tv);
715 } else {
842509b8 716 skb_get_timestampns(skb, &ts[0]);
20d49473 717 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 718 sizeof(ts[0]), &ts[0]);
20d49473
PO
719 }
720 }
721
722
723 memset(ts, 0, sizeof(ts));
6e94d1ef
DB
724 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
725 ktime_to_timespec_cond(skb->tstamp, ts + 0))
20d49473 726 empty = 0;
20d49473
PO
727 if (shhwtstamps) {
728 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
6e94d1ef 729 ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))
20d49473
PO
730 empty = 0;
731 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
6e94d1ef 732 ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
20d49473 733 empty = 0;
92f37fd2 734 }
20d49473
PO
735 if (!empty)
736 put_cmsg(msg, SOL_SOCKET,
737 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 738}
7c81fd8b
ACM
739EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
740
6e3e939f
JB
741void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
742 struct sk_buff *skb)
743{
744 int ack;
745
746 if (!sock_flag(sk, SOCK_WIFI_STATUS))
747 return;
748 if (!skb->wifi_acked_valid)
749 return;
750
751 ack = skb->wifi_acked;
752
753 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
754}
755EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
756
11165f14 757static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
758 struct sk_buff *skb)
3b885787
NH
759{
760 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
761 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
762 sizeof(__u32), &skb->dropcount);
763}
764
767dd033 765void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
766 struct sk_buff *skb)
767{
768 sock_recv_timestamp(msg, sk, skb);
769 sock_recv_drops(msg, sk, skb);
770}
767dd033 771EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 772
a2e27255
ACM
773static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
774 struct msghdr *msg, size_t size, int flags)
1da177e4 775{
1da177e4
LT
776 struct sock_iocb *si = kiocb_to_siocb(iocb);
777
778 si->sock = sock;
779 si->scm = NULL;
780 si->msg = msg;
781 si->size = size;
782 si->flags = flags;
783
1da177e4
LT
784 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
785}
786
a2e27255
ACM
787static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
788 struct msghdr *msg, size_t size, int flags)
789{
790 int err = security_socket_recvmsg(sock, msg, size, flags);
791
792 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
793}
794
89bddce5 795int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
796 size_t size, int flags)
797{
798 struct kiocb iocb;
799 struct sock_iocb siocb;
800 int ret;
801
89bddce5 802 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
803 iocb.private = &siocb;
804 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
805 if (-EIOCBQUEUED == ret)
806 ret = wait_on_sync_kiocb(&iocb);
807 return ret;
808}
c6d409cf 809EXPORT_SYMBOL(sock_recvmsg);
1da177e4 810
a2e27255
ACM
811static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
812 size_t size, int flags)
813{
814 struct kiocb iocb;
815 struct sock_iocb siocb;
816 int ret;
817
818 init_sync_kiocb(&iocb, NULL);
819 iocb.private = &siocb;
820 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
821 if (-EIOCBQUEUED == ret)
822 ret = wait_on_sync_kiocb(&iocb);
823 return ret;
824}
825
c1249c0a
ML
826/**
827 * kernel_recvmsg - Receive a message from a socket (kernel space)
828 * @sock: The socket to receive the message from
829 * @msg: Received message
830 * @vec: Input s/g array for message data
831 * @num: Size of input s/g array
832 * @size: Number of bytes to read
833 * @flags: Message flags (MSG_DONTWAIT, etc...)
834 *
835 * On return the msg structure contains the scatter/gather array passed in the
836 * vec argument. The array is modified so that it consists of the unfilled
837 * portion of the original array.
838 *
839 * The returned value is the total number of bytes received, or an error.
840 */
89bddce5
SH
841int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
842 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
843{
844 mm_segment_t oldfs = get_fs();
845 int result;
846
847 set_fs(KERNEL_DS);
848 /*
849 * the following is safe, since for compiler definitions of kvec and
850 * iovec are identical, yielding the same in-core layout and alignment
851 */
89bddce5 852 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
853 result = sock_recvmsg(sock, msg, size, flags);
854 set_fs(oldfs);
855 return result;
856}
c6d409cf 857EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 858
ce1d4d3e
CH
859static ssize_t sock_sendpage(struct file *file, struct page *page,
860 int offset, size_t size, loff_t *ppos, int more)
1da177e4 861{
1da177e4
LT
862 struct socket *sock;
863 int flags;
864
ce1d4d3e
CH
865 sock = file->private_data;
866
35f9c09f
ED
867 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
868 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
869 flags |= more;
ce1d4d3e 870
e6949583 871 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 872}
1da177e4 873
9c55e01c 874static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 875 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
876 unsigned int flags)
877{
878 struct socket *sock = file->private_data;
879
997b37da
RDC
880 if (unlikely(!sock->ops->splice_read))
881 return -EINVAL;
882
9c55e01c
JA
883 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
884}
885
ce1d4d3e 886static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 887 struct sock_iocb *siocb)
ce1d4d3e 888{
d29c445b
KO
889 if (!is_sync_kiocb(iocb))
890 BUG();
1da177e4 891
ce1d4d3e 892 siocb->kiocb = iocb;
ce1d4d3e
CH
893 iocb->private = siocb;
894 return siocb;
1da177e4
LT
895}
896
ce1d4d3e 897static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
898 struct file *file, const struct iovec *iov,
899 unsigned long nr_segs)
ce1d4d3e
CH
900{
901 struct socket *sock = file->private_data;
902 size_t size = 0;
903 int i;
1da177e4 904
89bddce5
SH
905 for (i = 0; i < nr_segs; i++)
906 size += iov[i].iov_len;
1da177e4 907
ce1d4d3e
CH
908 msg->msg_name = NULL;
909 msg->msg_namelen = 0;
910 msg->msg_control = NULL;
911 msg->msg_controllen = 0;
89bddce5 912 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
913 msg->msg_iovlen = nr_segs;
914 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
915
916 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
917}
918
027445c3
BP
919static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
920 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
921{
922 struct sock_iocb siocb, *x;
923
1da177e4
LT
924 if (pos != 0)
925 return -ESPIPE;
027445c3 926
73a7075e 927 if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
1da177e4
LT
928 return 0;
929
027445c3
BP
930
931 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
932 if (!x)
933 return -ENOMEM;
027445c3 934 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
935}
936
ce1d4d3e 937static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
938 struct file *file, const struct iovec *iov,
939 unsigned long nr_segs)
1da177e4 940{
ce1d4d3e
CH
941 struct socket *sock = file->private_data;
942 size_t size = 0;
943 int i;
1da177e4 944
89bddce5
SH
945 for (i = 0; i < nr_segs; i++)
946 size += iov[i].iov_len;
1da177e4 947
ce1d4d3e
CH
948 msg->msg_name = NULL;
949 msg->msg_namelen = 0;
950 msg->msg_control = NULL;
951 msg->msg_controllen = 0;
89bddce5 952 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
953 msg->msg_iovlen = nr_segs;
954 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
955 if (sock->type == SOCK_SEQPACKET)
956 msg->msg_flags |= MSG_EOR;
1da177e4 957
ce1d4d3e 958 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
959}
960
027445c3
BP
961static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
962 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
963{
964 struct sock_iocb siocb, *x;
1da177e4 965
ce1d4d3e
CH
966 if (pos != 0)
967 return -ESPIPE;
027445c3 968
027445c3 969 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
970 if (!x)
971 return -ENOMEM;
1da177e4 972
027445c3 973 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
974}
975
1da177e4
LT
976/*
977 * Atomic setting of ioctl hooks to avoid race
978 * with module unload.
979 */
980
4a3e2f71 981static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 982static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 983
881d966b 984void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 985{
4a3e2f71 986 mutex_lock(&br_ioctl_mutex);
1da177e4 987 br_ioctl_hook = hook;
4a3e2f71 988 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
989}
990EXPORT_SYMBOL(brioctl_set);
991
4a3e2f71 992static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 993static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 994
881d966b 995void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 996{
4a3e2f71 997 mutex_lock(&vlan_ioctl_mutex);
1da177e4 998 vlan_ioctl_hook = hook;
4a3e2f71 999 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1000}
1001EXPORT_SYMBOL(vlan_ioctl_set);
1002
4a3e2f71 1003static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1004static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1005
89bddce5 1006void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1007{
4a3e2f71 1008 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1009 dlci_ioctl_hook = hook;
4a3e2f71 1010 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1011}
1012EXPORT_SYMBOL(dlci_ioctl_set);
1013
6b96018b
AB
1014static long sock_do_ioctl(struct net *net, struct socket *sock,
1015 unsigned int cmd, unsigned long arg)
1016{
1017 int err;
1018 void __user *argp = (void __user *)arg;
1019
1020 err = sock->ops->ioctl(sock, cmd, arg);
1021
1022 /*
1023 * If this ioctl is unknown try to hand it down
1024 * to the NIC driver.
1025 */
1026 if (err == -ENOIOCTLCMD)
1027 err = dev_ioctl(net, cmd, argp);
1028
1029 return err;
1030}
1031
1da177e4
LT
1032/*
1033 * With an ioctl, arg may well be a user mode pointer, but we don't know
1034 * what to do with it - that's up to the protocol still.
1035 */
1036
1037static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1038{
1039 struct socket *sock;
881d966b 1040 struct sock *sk;
1da177e4
LT
1041 void __user *argp = (void __user *)arg;
1042 int pid, err;
881d966b 1043 struct net *net;
1da177e4 1044
b69aee04 1045 sock = file->private_data;
881d966b 1046 sk = sock->sk;
3b1e0a65 1047 net = sock_net(sk);
1da177e4 1048 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1049 err = dev_ioctl(net, cmd, argp);
1da177e4 1050 } else
3d23e349 1051#ifdef CONFIG_WEXT_CORE
1da177e4 1052 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1053 err = dev_ioctl(net, cmd, argp);
1da177e4 1054 } else
3d23e349 1055#endif
89bddce5 1056 switch (cmd) {
1da177e4
LT
1057 case FIOSETOWN:
1058 case SIOCSPGRP:
1059 err = -EFAULT;
1060 if (get_user(pid, (int __user *)argp))
1061 break;
1062 err = f_setown(sock->file, pid, 1);
1063 break;
1064 case FIOGETOWN:
1065 case SIOCGPGRP:
609d7fa9 1066 err = put_user(f_getown(sock->file),
89bddce5 1067 (int __user *)argp);
1da177e4
LT
1068 break;
1069 case SIOCGIFBR:
1070 case SIOCSIFBR:
1071 case SIOCBRADDBR:
1072 case SIOCBRDELBR:
1073 err = -ENOPKG;
1074 if (!br_ioctl_hook)
1075 request_module("bridge");
1076
4a3e2f71 1077 mutex_lock(&br_ioctl_mutex);
89bddce5 1078 if (br_ioctl_hook)
881d966b 1079 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1080 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1081 break;
1082 case SIOCGIFVLAN:
1083 case SIOCSIFVLAN:
1084 err = -ENOPKG;
1085 if (!vlan_ioctl_hook)
1086 request_module("8021q");
1087
4a3e2f71 1088 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1089 if (vlan_ioctl_hook)
881d966b 1090 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1091 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1092 break;
1da177e4
LT
1093 case SIOCADDDLCI:
1094 case SIOCDELDLCI:
1095 err = -ENOPKG;
1096 if (!dlci_ioctl_hook)
1097 request_module("dlci");
1098
7512cbf6
PE
1099 mutex_lock(&dlci_ioctl_mutex);
1100 if (dlci_ioctl_hook)
1da177e4 1101 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1102 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1103 break;
1104 default:
6b96018b 1105 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1106 break;
89bddce5 1107 }
1da177e4
LT
1108 return err;
1109}
1110
1111int sock_create_lite(int family, int type, int protocol, struct socket **res)
1112{
1113 int err;
1114 struct socket *sock = NULL;
89bddce5 1115
1da177e4
LT
1116 err = security_socket_create(family, type, protocol, 1);
1117 if (err)
1118 goto out;
1119
1120 sock = sock_alloc();
1121 if (!sock) {
1122 err = -ENOMEM;
1123 goto out;
1124 }
1125
1da177e4 1126 sock->type = type;
7420ed23
VY
1127 err = security_socket_post_create(sock, family, type, protocol, 1);
1128 if (err)
1129 goto out_release;
1130
1da177e4
LT
1131out:
1132 *res = sock;
1133 return err;
7420ed23
VY
1134out_release:
1135 sock_release(sock);
1136 sock = NULL;
1137 goto out;
1da177e4 1138}
c6d409cf 1139EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1140
1141/* No kernel lock held - perfect */
89bddce5 1142static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1143{
cbf55001 1144 unsigned int busy_flag = 0;
1da177e4
LT
1145 struct socket *sock;
1146
1147 /*
89bddce5 1148 * We can't return errors to poll, so it's either yes or no.
1da177e4 1149 */
b69aee04 1150 sock = file->private_data;
2d48d67f 1151
cbf55001 1152 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1153 /* this socket can poll_ll so tell the system call */
cbf55001 1154 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1155
1156 /* once, only if requested by syscall */
cbf55001
ET
1157 if (wait && (wait->_key & POLL_BUSY_LOOP))
1158 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1159 }
1160
cbf55001 1161 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1162}
1163
89bddce5 1164static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1165{
b69aee04 1166 struct socket *sock = file->private_data;
1da177e4
LT
1167
1168 return sock->ops->mmap(file, sock, vma);
1169}
1170
20380731 1171static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1172{
1da177e4
LT
1173 sock_release(SOCKET_I(inode));
1174 return 0;
1175}
1176
1177/*
1178 * Update the socket async list
1179 *
1180 * Fasync_list locking strategy.
1181 *
1182 * 1. fasync_list is modified only under process context socket lock
1183 * i.e. under semaphore.
1184 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1185 * or under socket lock
1da177e4
LT
1186 */
1187
1188static int sock_fasync(int fd, struct file *filp, int on)
1189{
989a2979
ED
1190 struct socket *sock = filp->private_data;
1191 struct sock *sk = sock->sk;
eaefd110 1192 struct socket_wq *wq;
1da177e4 1193
989a2979 1194 if (sk == NULL)
1da177e4 1195 return -EINVAL;
1da177e4
LT
1196
1197 lock_sock(sk);
eaefd110
ED
1198 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1199 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1200
eaefd110 1201 if (!wq->fasync_list)
989a2979
ED
1202 sock_reset_flag(sk, SOCK_FASYNC);
1203 else
bcdce719 1204 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1205
989a2979 1206 release_sock(sk);
1da177e4
LT
1207 return 0;
1208}
1209
43815482 1210/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1211
1212int sock_wake_async(struct socket *sock, int how, int band)
1213{
43815482
ED
1214 struct socket_wq *wq;
1215
1216 if (!sock)
1217 return -1;
1218 rcu_read_lock();
1219 wq = rcu_dereference(sock->wq);
1220 if (!wq || !wq->fasync_list) {
1221 rcu_read_unlock();
1da177e4 1222 return -1;
43815482 1223 }
89bddce5 1224 switch (how) {
8d8ad9d7 1225 case SOCK_WAKE_WAITD:
1da177e4
LT
1226 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1227 break;
1228 goto call_kill;
8d8ad9d7 1229 case SOCK_WAKE_SPACE:
1da177e4
LT
1230 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1231 break;
1232 /* fall through */
8d8ad9d7 1233 case SOCK_WAKE_IO:
89bddce5 1234call_kill:
43815482 1235 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1236 break;
8d8ad9d7 1237 case SOCK_WAKE_URG:
43815482 1238 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1239 }
43815482 1240 rcu_read_unlock();
1da177e4
LT
1241 return 0;
1242}
c6d409cf 1243EXPORT_SYMBOL(sock_wake_async);
1da177e4 1244
721db93a 1245int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1246 struct socket **res, int kern)
1da177e4
LT
1247{
1248 int err;
1249 struct socket *sock;
55737fda 1250 const struct net_proto_family *pf;
1da177e4
LT
1251
1252 /*
89bddce5 1253 * Check protocol is in range
1da177e4
LT
1254 */
1255 if (family < 0 || family >= NPROTO)
1256 return -EAFNOSUPPORT;
1257 if (type < 0 || type >= SOCK_MAX)
1258 return -EINVAL;
1259
1260 /* Compatibility.
1261
1262 This uglymoron is moved from INET layer to here to avoid
1263 deadlock in module load.
1264 */
1265 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1266 static int warned;
1da177e4
LT
1267 if (!warned) {
1268 warned = 1;
89bddce5
SH
1269 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1270 current->comm);
1da177e4
LT
1271 }
1272 family = PF_PACKET;
1273 }
1274
1275 err = security_socket_create(family, type, protocol, kern);
1276 if (err)
1277 return err;
89bddce5 1278
55737fda
SH
1279 /*
1280 * Allocate the socket and allow the family to set things up. if
1281 * the protocol is 0, the family is instructed to select an appropriate
1282 * default.
1283 */
1284 sock = sock_alloc();
1285 if (!sock) {
e87cc472 1286 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1287 return -ENFILE; /* Not exactly a match, but its the
1288 closest posix thing */
1289 }
1290
1291 sock->type = type;
1292
95a5afca 1293#ifdef CONFIG_MODULES
89bddce5
SH
1294 /* Attempt to load a protocol module if the find failed.
1295 *
1296 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1297 * requested real, full-featured networking support upon configuration.
1298 * Otherwise module support will break!
1299 */
190683a9 1300 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1301 request_module("net-pf-%d", family);
1da177e4
LT
1302#endif
1303
55737fda
SH
1304 rcu_read_lock();
1305 pf = rcu_dereference(net_families[family]);
1306 err = -EAFNOSUPPORT;
1307 if (!pf)
1308 goto out_release;
1da177e4
LT
1309
1310 /*
1311 * We will call the ->create function, that possibly is in a loadable
1312 * module, so we have to bump that loadable module refcnt first.
1313 */
55737fda 1314 if (!try_module_get(pf->owner))
1da177e4
LT
1315 goto out_release;
1316
55737fda
SH
1317 /* Now protected by module ref count */
1318 rcu_read_unlock();
1319
3f378b68 1320 err = pf->create(net, sock, protocol, kern);
55737fda 1321 if (err < 0)
1da177e4 1322 goto out_module_put;
a79af59e 1323
1da177e4
LT
1324 /*
1325 * Now to bump the refcnt of the [loadable] module that owns this
1326 * socket at sock_release time we decrement its refcnt.
1327 */
55737fda
SH
1328 if (!try_module_get(sock->ops->owner))
1329 goto out_module_busy;
1330
1da177e4
LT
1331 /*
1332 * Now that we're done with the ->create function, the [loadable]
1333 * module can have its refcnt decremented
1334 */
55737fda 1335 module_put(pf->owner);
7420ed23
VY
1336 err = security_socket_post_create(sock, family, type, protocol, kern);
1337 if (err)
3b185525 1338 goto out_sock_release;
55737fda 1339 *res = sock;
1da177e4 1340
55737fda
SH
1341 return 0;
1342
1343out_module_busy:
1344 err = -EAFNOSUPPORT;
1da177e4 1345out_module_put:
55737fda
SH
1346 sock->ops = NULL;
1347 module_put(pf->owner);
1348out_sock_release:
1da177e4 1349 sock_release(sock);
55737fda
SH
1350 return err;
1351
1352out_release:
1353 rcu_read_unlock();
1354 goto out_sock_release;
1da177e4 1355}
721db93a 1356EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1357
1358int sock_create(int family, int type, int protocol, struct socket **res)
1359{
1b8d7ae4 1360 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1361}
c6d409cf 1362EXPORT_SYMBOL(sock_create);
1da177e4
LT
1363
1364int sock_create_kern(int family, int type, int protocol, struct socket **res)
1365{
1b8d7ae4 1366 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1367}
c6d409cf 1368EXPORT_SYMBOL(sock_create_kern);
1da177e4 1369
3e0fa65f 1370SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1371{
1372 int retval;
1373 struct socket *sock;
a677a039
UD
1374 int flags;
1375
e38b36f3
UD
1376 /* Check the SOCK_* constants for consistency. */
1377 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1378 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1379 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1380 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1381
a677a039 1382 flags = type & ~SOCK_TYPE_MASK;
77d27200 1383 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1384 return -EINVAL;
1385 type &= SOCK_TYPE_MASK;
1da177e4 1386
aaca0bdc
UD
1387 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1388 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1389
1da177e4
LT
1390 retval = sock_create(family, type, protocol, &sock);
1391 if (retval < 0)
1392 goto out;
1393
77d27200 1394 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1395 if (retval < 0)
1396 goto out_release;
1397
1398out:
1399 /* It may be already another descriptor 8) Not kernel problem. */
1400 return retval;
1401
1402out_release:
1403 sock_release(sock);
1404 return retval;
1405}
1406
1407/*
1408 * Create a pair of connected sockets.
1409 */
1410
3e0fa65f
HC
1411SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1412 int __user *, usockvec)
1da177e4
LT
1413{
1414 struct socket *sock1, *sock2;
1415 int fd1, fd2, err;
db349509 1416 struct file *newfile1, *newfile2;
a677a039
UD
1417 int flags;
1418
1419 flags = type & ~SOCK_TYPE_MASK;
77d27200 1420 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1421 return -EINVAL;
1422 type &= SOCK_TYPE_MASK;
1da177e4 1423
aaca0bdc
UD
1424 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1425 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1426
1da177e4
LT
1427 /*
1428 * Obtain the first socket and check if the underlying protocol
1429 * supports the socketpair call.
1430 */
1431
1432 err = sock_create(family, type, protocol, &sock1);
1433 if (err < 0)
1434 goto out;
1435
1436 err = sock_create(family, type, protocol, &sock2);
1437 if (err < 0)
1438 goto out_release_1;
1439
1440 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1441 if (err < 0)
1da177e4
LT
1442 goto out_release_both;
1443
28407630 1444 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1445 if (unlikely(fd1 < 0)) {
1446 err = fd1;
db349509 1447 goto out_release_both;
bf3c23d1 1448 }
d73aa286 1449
28407630 1450 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1451 if (unlikely(fd2 < 0)) {
1452 err = fd2;
d73aa286 1453 goto out_put_unused_1;
28407630
AV
1454 }
1455
aab174f0 1456 newfile1 = sock_alloc_file(sock1, flags, NULL);
28407630
AV
1457 if (unlikely(IS_ERR(newfile1))) {
1458 err = PTR_ERR(newfile1);
d73aa286 1459 goto out_put_unused_both;
28407630
AV
1460 }
1461
aab174f0 1462 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1463 if (IS_ERR(newfile2)) {
1464 err = PTR_ERR(newfile2);
d73aa286 1465 goto out_fput_1;
db349509
AV
1466 }
1467
d73aa286
YD
1468 err = put_user(fd1, &usockvec[0]);
1469 if (err)
1470 goto out_fput_both;
1471
1472 err = put_user(fd2, &usockvec[1]);
1473 if (err)
1474 goto out_fput_both;
1475
157cf649 1476 audit_fd_pair(fd1, fd2);
d73aa286 1477
db349509
AV
1478 fd_install(fd1, newfile1);
1479 fd_install(fd2, newfile2);
1da177e4
LT
1480 /* fd1 and fd2 may be already another descriptors.
1481 * Not kernel problem.
1482 */
1483
d73aa286 1484 return 0;
1da177e4 1485
d73aa286
YD
1486out_fput_both:
1487 fput(newfile2);
1488 fput(newfile1);
1489 put_unused_fd(fd2);
1490 put_unused_fd(fd1);
1491 goto out;
1492
1493out_fput_1:
1494 fput(newfile1);
1495 put_unused_fd(fd2);
1496 put_unused_fd(fd1);
1497 sock_release(sock2);
1498 goto out;
1da177e4 1499
d73aa286
YD
1500out_put_unused_both:
1501 put_unused_fd(fd2);
1502out_put_unused_1:
1503 put_unused_fd(fd1);
1da177e4 1504out_release_both:
89bddce5 1505 sock_release(sock2);
1da177e4 1506out_release_1:
89bddce5 1507 sock_release(sock1);
1da177e4
LT
1508out:
1509 return err;
1510}
1511
1da177e4
LT
1512/*
1513 * Bind a name to a socket. Nothing much to do here since it's
1514 * the protocol's responsibility to handle the local address.
1515 *
1516 * We move the socket address to kernel space before we call
1517 * the protocol layer (having also checked the address is ok).
1518 */
1519
20f37034 1520SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1521{
1522 struct socket *sock;
230b1839 1523 struct sockaddr_storage address;
6cb153ca 1524 int err, fput_needed;
1da177e4 1525
89bddce5 1526 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1527 if (sock) {
43db362d 1528 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1529 if (err >= 0) {
1530 err = security_socket_bind(sock,
230b1839 1531 (struct sockaddr *)&address,
89bddce5 1532 addrlen);
6cb153ca
BL
1533 if (!err)
1534 err = sock->ops->bind(sock,
89bddce5 1535 (struct sockaddr *)
230b1839 1536 &address, addrlen);
1da177e4 1537 }
6cb153ca 1538 fput_light(sock->file, fput_needed);
89bddce5 1539 }
1da177e4
LT
1540 return err;
1541}
1542
1da177e4
LT
1543/*
1544 * Perform a listen. Basically, we allow the protocol to do anything
1545 * necessary for a listen, and if that works, we mark the socket as
1546 * ready for listening.
1547 */
1548
3e0fa65f 1549SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1550{
1551 struct socket *sock;
6cb153ca 1552 int err, fput_needed;
b8e1f9b5 1553 int somaxconn;
89bddce5
SH
1554
1555 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1556 if (sock) {
8efa6e93 1557 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1558 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1559 backlog = somaxconn;
1da177e4
LT
1560
1561 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1562 if (!err)
1563 err = sock->ops->listen(sock, backlog);
1da177e4 1564
6cb153ca 1565 fput_light(sock->file, fput_needed);
1da177e4
LT
1566 }
1567 return err;
1568}
1569
1da177e4
LT
1570/*
1571 * For accept, we attempt to create a new socket, set up the link
1572 * with the client, wake up the client, then return the new
1573 * connected fd. We collect the address of the connector in kernel
1574 * space and move it to user at the very end. This is unclean because
1575 * we open the socket then return an error.
1576 *
1577 * 1003.1g adds the ability to recvmsg() to query connection pending
1578 * status to recvmsg. We need to add that support in a way thats
1579 * clean when we restucture accept also.
1580 */
1581
20f37034
HC
1582SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1583 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1584{
1585 struct socket *sock, *newsock;
39d8c1b6 1586 struct file *newfile;
6cb153ca 1587 int err, len, newfd, fput_needed;
230b1839 1588 struct sockaddr_storage address;
1da177e4 1589
77d27200 1590 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1591 return -EINVAL;
1592
1593 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1594 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1595
6cb153ca 1596 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1597 if (!sock)
1598 goto out;
1599
1600 err = -ENFILE;
c6d409cf
ED
1601 newsock = sock_alloc();
1602 if (!newsock)
1da177e4
LT
1603 goto out_put;
1604
1605 newsock->type = sock->type;
1606 newsock->ops = sock->ops;
1607
1da177e4
LT
1608 /*
1609 * We don't need try_module_get here, as the listening socket (sock)
1610 * has the protocol module (sock->ops->owner) held.
1611 */
1612 __module_get(newsock->ops->owner);
1613
28407630 1614 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1615 if (unlikely(newfd < 0)) {
1616 err = newfd;
9a1875e6
DM
1617 sock_release(newsock);
1618 goto out_put;
39d8c1b6 1619 }
aab174f0 1620 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
28407630
AV
1621 if (unlikely(IS_ERR(newfile))) {
1622 err = PTR_ERR(newfile);
1623 put_unused_fd(newfd);
1624 sock_release(newsock);
1625 goto out_put;
1626 }
39d8c1b6 1627
a79af59e
FF
1628 err = security_socket_accept(sock, newsock);
1629 if (err)
39d8c1b6 1630 goto out_fd;
a79af59e 1631
1da177e4
LT
1632 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1633 if (err < 0)
39d8c1b6 1634 goto out_fd;
1da177e4
LT
1635
1636 if (upeer_sockaddr) {
230b1839 1637 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1638 &len, 2) < 0) {
1da177e4 1639 err = -ECONNABORTED;
39d8c1b6 1640 goto out_fd;
1da177e4 1641 }
43db362d 1642 err = move_addr_to_user(&address,
230b1839 1643 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1644 if (err < 0)
39d8c1b6 1645 goto out_fd;
1da177e4
LT
1646 }
1647
1648 /* File flags are not inherited via accept() unlike another OSes. */
1649
39d8c1b6
DM
1650 fd_install(newfd, newfile);
1651 err = newfd;
1da177e4 1652
1da177e4 1653out_put:
6cb153ca 1654 fput_light(sock->file, fput_needed);
1da177e4
LT
1655out:
1656 return err;
39d8c1b6 1657out_fd:
9606a216 1658 fput(newfile);
39d8c1b6 1659 put_unused_fd(newfd);
1da177e4
LT
1660 goto out_put;
1661}
1662
20f37034
HC
1663SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1664 int __user *, upeer_addrlen)
aaca0bdc 1665{
de11defe 1666 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1667}
1668
1da177e4
LT
1669/*
1670 * Attempt to connect to a socket with the server address. The address
1671 * is in user space so we verify it is OK and move it to kernel space.
1672 *
1673 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1674 * break bindings
1675 *
1676 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1677 * other SEQPACKET protocols that take time to connect() as it doesn't
1678 * include the -EINPROGRESS status for such sockets.
1679 */
1680
20f37034
HC
1681SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1682 int, addrlen)
1da177e4
LT
1683{
1684 struct socket *sock;
230b1839 1685 struct sockaddr_storage address;
6cb153ca 1686 int err, fput_needed;
1da177e4 1687
6cb153ca 1688 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1689 if (!sock)
1690 goto out;
43db362d 1691 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1692 if (err < 0)
1693 goto out_put;
1694
89bddce5 1695 err =
230b1839 1696 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1697 if (err)
1698 goto out_put;
1699
230b1839 1700 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1701 sock->file->f_flags);
1702out_put:
6cb153ca 1703 fput_light(sock->file, fput_needed);
1da177e4
LT
1704out:
1705 return err;
1706}
1707
1708/*
1709 * Get the local address ('name') of a socket object. Move the obtained
1710 * name to user space.
1711 */
1712
20f37034
HC
1713SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1714 int __user *, usockaddr_len)
1da177e4
LT
1715{
1716 struct socket *sock;
230b1839 1717 struct sockaddr_storage address;
6cb153ca 1718 int len, err, fput_needed;
89bddce5 1719
6cb153ca 1720 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1721 if (!sock)
1722 goto out;
1723
1724 err = security_socket_getsockname(sock);
1725 if (err)
1726 goto out_put;
1727
230b1839 1728 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1729 if (err)
1730 goto out_put;
43db362d 1731 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1732
1733out_put:
6cb153ca 1734 fput_light(sock->file, fput_needed);
1da177e4
LT
1735out:
1736 return err;
1737}
1738
1739/*
1740 * Get the remote address ('name') of a socket object. Move the obtained
1741 * name to user space.
1742 */
1743
20f37034
HC
1744SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1745 int __user *, usockaddr_len)
1da177e4
LT
1746{
1747 struct socket *sock;
230b1839 1748 struct sockaddr_storage address;
6cb153ca 1749 int len, err, fput_needed;
1da177e4 1750
89bddce5
SH
1751 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1752 if (sock != NULL) {
1da177e4
LT
1753 err = security_socket_getpeername(sock);
1754 if (err) {
6cb153ca 1755 fput_light(sock->file, fput_needed);
1da177e4
LT
1756 return err;
1757 }
1758
89bddce5 1759 err =
230b1839 1760 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1761 1);
1da177e4 1762 if (!err)
43db362d 1763 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1764 usockaddr_len);
6cb153ca 1765 fput_light(sock->file, fput_needed);
1da177e4
LT
1766 }
1767 return err;
1768}
1769
1770/*
1771 * Send a datagram to a given address. We move the address into kernel
1772 * space and check the user space data area is readable before invoking
1773 * the protocol.
1774 */
1775
3e0fa65f 1776SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1777 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1778 int, addr_len)
1da177e4
LT
1779{
1780 struct socket *sock;
230b1839 1781 struct sockaddr_storage address;
1da177e4
LT
1782 int err;
1783 struct msghdr msg;
1784 struct iovec iov;
6cb153ca 1785 int fput_needed;
6cb153ca 1786
253eacc0
LT
1787 if (len > INT_MAX)
1788 len = INT_MAX;
de0fa95c
PE
1789 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1790 if (!sock)
4387ff75 1791 goto out;
6cb153ca 1792
89bddce5
SH
1793 iov.iov_base = buff;
1794 iov.iov_len = len;
1795 msg.msg_name = NULL;
1796 msg.msg_iov = &iov;
1797 msg.msg_iovlen = 1;
1798 msg.msg_control = NULL;
1799 msg.msg_controllen = 0;
1800 msg.msg_namelen = 0;
6cb153ca 1801 if (addr) {
43db362d 1802 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1803 if (err < 0)
1804 goto out_put;
230b1839 1805 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1806 msg.msg_namelen = addr_len;
1da177e4
LT
1807 }
1808 if (sock->file->f_flags & O_NONBLOCK)
1809 flags |= MSG_DONTWAIT;
1810 msg.msg_flags = flags;
1811 err = sock_sendmsg(sock, &msg, len);
1812
89bddce5 1813out_put:
de0fa95c 1814 fput_light(sock->file, fput_needed);
4387ff75 1815out:
1da177e4
LT
1816 return err;
1817}
1818
1819/*
89bddce5 1820 * Send a datagram down a socket.
1da177e4
LT
1821 */
1822
3e0fa65f 1823SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1824 unsigned int, flags)
1da177e4
LT
1825{
1826 return sys_sendto(fd, buff, len, flags, NULL, 0);
1827}
1828
1829/*
89bddce5 1830 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1831 * sender. We verify the buffers are writable and if needed move the
1832 * sender address from kernel to user space.
1833 */
1834
3e0fa65f 1835SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1836 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1837 int __user *, addr_len)
1da177e4
LT
1838{
1839 struct socket *sock;
1840 struct iovec iov;
1841 struct msghdr msg;
230b1839 1842 struct sockaddr_storage address;
89bddce5 1843 int err, err2;
6cb153ca
BL
1844 int fput_needed;
1845
253eacc0
LT
1846 if (size > INT_MAX)
1847 size = INT_MAX;
de0fa95c 1848 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1849 if (!sock)
de0fa95c 1850 goto out;
1da177e4 1851
89bddce5
SH
1852 msg.msg_control = NULL;
1853 msg.msg_controllen = 0;
1854 msg.msg_iovlen = 1;
1855 msg.msg_iov = &iov;
1856 iov.iov_len = size;
1857 iov.iov_base = ubuf;
f3d33426
HFS
1858 /* Save some cycles and don't copy the address if not needed */
1859 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1860 /* We assume all kernel code knows the size of sockaddr_storage */
1861 msg.msg_namelen = 0;
1da177e4
LT
1862 if (sock->file->f_flags & O_NONBLOCK)
1863 flags |= MSG_DONTWAIT;
89bddce5 1864 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1865
89bddce5 1866 if (err >= 0 && addr != NULL) {
43db362d 1867 err2 = move_addr_to_user(&address,
230b1839 1868 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1869 if (err2 < 0)
1870 err = err2;
1da177e4 1871 }
de0fa95c
PE
1872
1873 fput_light(sock->file, fput_needed);
4387ff75 1874out:
1da177e4
LT
1875 return err;
1876}
1877
1878/*
89bddce5 1879 * Receive a datagram from a socket.
1da177e4
LT
1880 */
1881
89bddce5 1882asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
95c96174 1883 unsigned int flags)
1da177e4
LT
1884{
1885 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1886}
1887
1888/*
1889 * Set a socket option. Because we don't know the option lengths we have
1890 * to pass the user mode parameter for the protocols to sort out.
1891 */
1892
20f37034
HC
1893SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1894 char __user *, optval, int, optlen)
1da177e4 1895{
6cb153ca 1896 int err, fput_needed;
1da177e4
LT
1897 struct socket *sock;
1898
1899 if (optlen < 0)
1900 return -EINVAL;
89bddce5
SH
1901
1902 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1903 if (sock != NULL) {
1904 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1905 if (err)
1906 goto out_put;
1da177e4
LT
1907
1908 if (level == SOL_SOCKET)
89bddce5
SH
1909 err =
1910 sock_setsockopt(sock, level, optname, optval,
1911 optlen);
1da177e4 1912 else
89bddce5
SH
1913 err =
1914 sock->ops->setsockopt(sock, level, optname, optval,
1915 optlen);
6cb153ca
BL
1916out_put:
1917 fput_light(sock->file, fput_needed);
1da177e4
LT
1918 }
1919 return err;
1920}
1921
1922/*
1923 * Get a socket option. Because we don't know the option lengths we have
1924 * to pass a user mode parameter for the protocols to sort out.
1925 */
1926
20f37034
HC
1927SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1928 char __user *, optval, int __user *, optlen)
1da177e4 1929{
6cb153ca 1930 int err, fput_needed;
1da177e4
LT
1931 struct socket *sock;
1932
89bddce5
SH
1933 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1934 if (sock != NULL) {
6cb153ca
BL
1935 err = security_socket_getsockopt(sock, level, optname);
1936 if (err)
1937 goto out_put;
1da177e4
LT
1938
1939 if (level == SOL_SOCKET)
89bddce5
SH
1940 err =
1941 sock_getsockopt(sock, level, optname, optval,
1942 optlen);
1da177e4 1943 else
89bddce5
SH
1944 err =
1945 sock->ops->getsockopt(sock, level, optname, optval,
1946 optlen);
6cb153ca
BL
1947out_put:
1948 fput_light(sock->file, fput_needed);
1da177e4
LT
1949 }
1950 return err;
1951}
1952
1da177e4
LT
1953/*
1954 * Shutdown a socket.
1955 */
1956
754fe8d2 1957SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1958{
6cb153ca 1959 int err, fput_needed;
1da177e4
LT
1960 struct socket *sock;
1961
89bddce5
SH
1962 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1963 if (sock != NULL) {
1da177e4 1964 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1965 if (!err)
1966 err = sock->ops->shutdown(sock, how);
1967 fput_light(sock->file, fput_needed);
1da177e4
LT
1968 }
1969 return err;
1970}
1971
89bddce5 1972/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1973 * fields which are the same type (int / unsigned) on our platforms.
1974 */
1975#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1976#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1977#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1978
c71d8ebe
TH
1979struct used_address {
1980 struct sockaddr_storage name;
1981 unsigned int name_len;
1982};
1983
1661bf36
DC
1984static int copy_msghdr_from_user(struct msghdr *kmsg,
1985 struct msghdr __user *umsg)
1986{
1987 if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
1988 return -EFAULT;
dbb490b9
ML
1989
1990 if (kmsg->msg_namelen < 0)
1991 return -EINVAL;
1992
1661bf36 1993 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1994 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
1661bf36
DC
1995 return 0;
1996}
1997
a7526eb5 1998static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 1999 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 2000 struct used_address *used_address)
1da177e4 2001{
89bddce5
SH
2002 struct compat_msghdr __user *msg_compat =
2003 (struct compat_msghdr __user *)msg;
230b1839 2004 struct sockaddr_storage address;
1da177e4 2005 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2006 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
2007 __attribute__ ((aligned(sizeof(__kernel_size_t))));
2008 /* 20 is size of ipv6_pktinfo */
1da177e4 2009 unsigned char *ctl_buf = ctl;
a74e9106 2010 int err, ctl_len, total_len;
89bddce5 2011
1da177e4
LT
2012 err = -EFAULT;
2013 if (MSG_CMSG_COMPAT & flags) {
228e548e 2014 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2015 return -EFAULT;
1661bf36
DC
2016 } else {
2017 err = copy_msghdr_from_user(msg_sys, msg);
2018 if (err)
2019 return err;
2020 }
1da177e4 2021
228e548e 2022 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2023 err = -EMSGSIZE;
2024 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2025 goto out;
2026 err = -ENOMEM;
2027 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2028 GFP_KERNEL);
1da177e4 2029 if (!iov)
228e548e 2030 goto out;
1da177e4
LT
2031 }
2032
2033 /* This will also move the address data into kernel space */
2034 if (MSG_CMSG_COMPAT & flags) {
43db362d 2035 err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
1da177e4 2036 } else
43db362d 2037 err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
89bddce5 2038 if (err < 0)
1da177e4
LT
2039 goto out_freeiov;
2040 total_len = err;
2041
2042 err = -ENOBUFS;
2043
228e548e 2044 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2045 goto out_freeiov;
228e548e 2046 ctl_len = msg_sys->msg_controllen;
1da177e4 2047 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2048 err =
228e548e 2049 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2050 sizeof(ctl));
1da177e4
LT
2051 if (err)
2052 goto out_freeiov;
228e548e
AB
2053 ctl_buf = msg_sys->msg_control;
2054 ctl_len = msg_sys->msg_controllen;
1da177e4 2055 } else if (ctl_len) {
89bddce5 2056 if (ctl_len > sizeof(ctl)) {
1da177e4 2057 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2058 if (ctl_buf == NULL)
1da177e4
LT
2059 goto out_freeiov;
2060 }
2061 err = -EFAULT;
2062 /*
228e548e 2063 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2064 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2065 * checking falls down on this.
2066 */
fb8621bb 2067 if (copy_from_user(ctl_buf,
228e548e 2068 (void __user __force *)msg_sys->msg_control,
89bddce5 2069 ctl_len))
1da177e4 2070 goto out_freectl;
228e548e 2071 msg_sys->msg_control = ctl_buf;
1da177e4 2072 }
228e548e 2073 msg_sys->msg_flags = flags;
1da177e4
LT
2074
2075 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2076 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2077 /*
2078 * If this is sendmmsg() and current destination address is same as
2079 * previously succeeded address, omit asking LSM's decision.
2080 * used_address->name_len is initialized to UINT_MAX so that the first
2081 * destination address never matches.
2082 */
bc909d9d
MD
2083 if (used_address && msg_sys->msg_name &&
2084 used_address->name_len == msg_sys->msg_namelen &&
2085 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
2086 used_address->name_len)) {
2087 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
2088 goto out_freectl;
2089 }
2090 err = sock_sendmsg(sock, msg_sys, total_len);
2091 /*
2092 * If this is sendmmsg() and sending to current destination address was
2093 * successful, remember it.
2094 */
2095 if (used_address && err >= 0) {
2096 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2097 if (msg_sys->msg_name)
2098 memcpy(&used_address->name, msg_sys->msg_name,
2099 used_address->name_len);
c71d8ebe 2100 }
1da177e4
LT
2101
2102out_freectl:
89bddce5 2103 if (ctl_buf != ctl)
1da177e4
LT
2104 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2105out_freeiov:
2106 if (iov != iovstack)
a74e9106 2107 kfree(iov);
228e548e
AB
2108out:
2109 return err;
2110}
2111
2112/*
2113 * BSD sendmsg interface
2114 */
2115
a7526eb5 2116long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
228e548e
AB
2117{
2118 int fput_needed, err;
2119 struct msghdr msg_sys;
1be374a0
AL
2120 struct socket *sock;
2121
1be374a0 2122 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2123 if (!sock)
2124 goto out;
2125
a7526eb5 2126 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2127
6cb153ca 2128 fput_light(sock->file, fput_needed);
89bddce5 2129out:
1da177e4
LT
2130 return err;
2131}
2132
a7526eb5
AL
2133SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
2134{
2135 if (flags & MSG_CMSG_COMPAT)
2136 return -EINVAL;
2137 return __sys_sendmsg(fd, msg, flags);
2138}
2139
228e548e
AB
2140/*
2141 * Linux sendmmsg interface
2142 */
2143
2144int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2145 unsigned int flags)
2146{
2147 int fput_needed, err, datagrams;
2148 struct socket *sock;
2149 struct mmsghdr __user *entry;
2150 struct compat_mmsghdr __user *compat_entry;
2151 struct msghdr msg_sys;
c71d8ebe 2152 struct used_address used_address;
228e548e 2153
98382f41
AB
2154 if (vlen > UIO_MAXIOV)
2155 vlen = UIO_MAXIOV;
228e548e
AB
2156
2157 datagrams = 0;
2158
2159 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2160 if (!sock)
2161 return err;
2162
c71d8ebe 2163 used_address.name_len = UINT_MAX;
228e548e
AB
2164 entry = mmsg;
2165 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2166 err = 0;
228e548e
AB
2167
2168 while (datagrams < vlen) {
228e548e 2169 if (MSG_CMSG_COMPAT & flags) {
a7526eb5
AL
2170 err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
2171 &msg_sys, flags, &used_address);
228e548e
AB
2172 if (err < 0)
2173 break;
2174 err = __put_user(err, &compat_entry->msg_len);
2175 ++compat_entry;
2176 } else {
a7526eb5
AL
2177 err = ___sys_sendmsg(sock,
2178 (struct msghdr __user *)entry,
2179 &msg_sys, flags, &used_address);
228e548e
AB
2180 if (err < 0)
2181 break;
2182 err = put_user(err, &entry->msg_len);
2183 ++entry;
2184 }
2185
2186 if (err)
2187 break;
2188 ++datagrams;
2189 }
2190
228e548e
AB
2191 fput_light(sock->file, fput_needed);
2192
728ffb86
AB
2193 /* We only return an error if no datagrams were able to be sent */
2194 if (datagrams != 0)
228e548e
AB
2195 return datagrams;
2196
228e548e
AB
2197 return err;
2198}
2199
2200SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2201 unsigned int, vlen, unsigned int, flags)
2202{
1be374a0
AL
2203 if (flags & MSG_CMSG_COMPAT)
2204 return -EINVAL;
228e548e
AB
2205 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2206}
2207
a7526eb5 2208static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 2209 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2210{
89bddce5
SH
2211 struct compat_msghdr __user *msg_compat =
2212 (struct compat_msghdr __user *)msg;
1da177e4 2213 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2214 struct iovec *iov = iovstack;
1da177e4 2215 unsigned long cmsg_ptr;
a74e9106 2216 int err, total_len, len;
1da177e4
LT
2217
2218 /* kernel mode address */
230b1839 2219 struct sockaddr_storage addr;
1da177e4
LT
2220
2221 /* user mode address pointers */
2222 struct sockaddr __user *uaddr;
2223 int __user *uaddr_len;
89bddce5 2224
1da177e4 2225 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2226 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2227 return -EFAULT;
1661bf36
DC
2228 } else {
2229 err = copy_msghdr_from_user(msg_sys, msg);
2230 if (err)
2231 return err;
2232 }
1da177e4 2233
a2e27255 2234 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2235 err = -EMSGSIZE;
2236 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2237 goto out;
2238 err = -ENOMEM;
2239 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2240 GFP_KERNEL);
1da177e4 2241 if (!iov)
a2e27255 2242 goto out;
1da177e4
LT
2243 }
2244
f3d33426
HFS
2245 /* Save the user-mode address (verify_iovec will change the
2246 * kernel msghdr to use the kernel address space)
1da177e4 2247 */
a2e27255 2248 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4 2249 uaddr_len = COMPAT_NAMELEN(msg);
f3d33426 2250 if (MSG_CMSG_COMPAT & flags)
43db362d 2251 err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
f3d33426 2252 else
43db362d 2253 err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4
LT
2254 if (err < 0)
2255 goto out_freeiov;
89bddce5 2256 total_len = err;
1da177e4 2257
a2e27255
ACM
2258 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2259 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2260
f3d33426
HFS
2261 /* We assume all kernel code knows the size of sockaddr_storage */
2262 msg_sys->msg_namelen = 0;
2263
1da177e4
LT
2264 if (sock->file->f_flags & O_NONBLOCK)
2265 flags |= MSG_DONTWAIT;
a2e27255
ACM
2266 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2267 total_len, flags);
1da177e4
LT
2268 if (err < 0)
2269 goto out_freeiov;
2270 len = err;
2271
2272 if (uaddr != NULL) {
43db362d 2273 err = move_addr_to_user(&addr,
a2e27255 2274 msg_sys->msg_namelen, uaddr,
89bddce5 2275 uaddr_len);
1da177e4
LT
2276 if (err < 0)
2277 goto out_freeiov;
2278 }
a2e27255 2279 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2280 COMPAT_FLAGS(msg));
1da177e4
LT
2281 if (err)
2282 goto out_freeiov;
2283 if (MSG_CMSG_COMPAT & flags)
a2e27255 2284 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2285 &msg_compat->msg_controllen);
2286 else
a2e27255 2287 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2288 &msg->msg_controllen);
2289 if (err)
2290 goto out_freeiov;
2291 err = len;
2292
2293out_freeiov:
2294 if (iov != iovstack)
a74e9106 2295 kfree(iov);
a2e27255
ACM
2296out:
2297 return err;
2298}
2299
2300/*
2301 * BSD recvmsg interface
2302 */
2303
a7526eb5 2304long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags)
a2e27255
ACM
2305{
2306 int fput_needed, err;
2307 struct msghdr msg_sys;
1be374a0
AL
2308 struct socket *sock;
2309
1be374a0 2310 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2311 if (!sock)
2312 goto out;
2313
a7526eb5 2314 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2315
6cb153ca 2316 fput_light(sock->file, fput_needed);
1da177e4
LT
2317out:
2318 return err;
2319}
2320
a7526eb5
AL
2321SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2322 unsigned int, flags)
2323{
2324 if (flags & MSG_CMSG_COMPAT)
2325 return -EINVAL;
2326 return __sys_recvmsg(fd, msg, flags);
2327}
2328
a2e27255
ACM
2329/*
2330 * Linux recvmmsg interface
2331 */
2332
2333int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2334 unsigned int flags, struct timespec *timeout)
2335{
2336 int fput_needed, err, datagrams;
2337 struct socket *sock;
2338 struct mmsghdr __user *entry;
d7256d0e 2339 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2340 struct msghdr msg_sys;
2341 struct timespec end_time;
2342
2343 if (timeout &&
2344 poll_select_set_timeout(&end_time, timeout->tv_sec,
2345 timeout->tv_nsec))
2346 return -EINVAL;
2347
2348 datagrams = 0;
2349
2350 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2351 if (!sock)
2352 return err;
2353
2354 err = sock_error(sock->sk);
2355 if (err)
2356 goto out_put;
2357
2358 entry = mmsg;
d7256d0e 2359 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2360
2361 while (datagrams < vlen) {
2362 /*
2363 * No need to ask LSM for more than the first datagram.
2364 */
d7256d0e 2365 if (MSG_CMSG_COMPAT & flags) {
a7526eb5
AL
2366 err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
2367 &msg_sys, flags & ~MSG_WAITFORONE,
2368 datagrams);
d7256d0e
JMG
2369 if (err < 0)
2370 break;
2371 err = __put_user(err, &compat_entry->msg_len);
2372 ++compat_entry;
2373 } else {
a7526eb5
AL
2374 err = ___sys_recvmsg(sock,
2375 (struct msghdr __user *)entry,
2376 &msg_sys, flags & ~MSG_WAITFORONE,
2377 datagrams);
d7256d0e
JMG
2378 if (err < 0)
2379 break;
2380 err = put_user(err, &entry->msg_len);
2381 ++entry;
2382 }
2383
a2e27255
ACM
2384 if (err)
2385 break;
a2e27255
ACM
2386 ++datagrams;
2387
71c5c159
BB
2388 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2389 if (flags & MSG_WAITFORONE)
2390 flags |= MSG_DONTWAIT;
2391
a2e27255
ACM
2392 if (timeout) {
2393 ktime_get_ts(timeout);
2394 *timeout = timespec_sub(end_time, *timeout);
2395 if (timeout->tv_sec < 0) {
2396 timeout->tv_sec = timeout->tv_nsec = 0;
2397 break;
2398 }
2399
2400 /* Timeout, return less than vlen datagrams */
2401 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2402 break;
2403 }
2404
2405 /* Out of band data, return right away */
2406 if (msg_sys.msg_flags & MSG_OOB)
2407 break;
2408 }
2409
2410out_put:
2411 fput_light(sock->file, fput_needed);
1da177e4 2412
a2e27255
ACM
2413 if (err == 0)
2414 return datagrams;
2415
2416 if (datagrams != 0) {
2417 /*
2418 * We may return less entries than requested (vlen) if the
2419 * sock is non block and there aren't enough datagrams...
2420 */
2421 if (err != -EAGAIN) {
2422 /*
2423 * ... or if recvmsg returns an error after we
2424 * received some datagrams, where we record the
2425 * error to return on the next call or if the
2426 * app asks about it using getsockopt(SO_ERROR).
2427 */
2428 sock->sk->sk_err = -err;
2429 }
2430
2431 return datagrams;
2432 }
2433
2434 return err;
2435}
2436
2437SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2438 unsigned int, vlen, unsigned int, flags,
2439 struct timespec __user *, timeout)
2440{
2441 int datagrams;
2442 struct timespec timeout_sys;
2443
1be374a0
AL
2444 if (flags & MSG_CMSG_COMPAT)
2445 return -EINVAL;
2446
a2e27255
ACM
2447 if (!timeout)
2448 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2449
2450 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2451 return -EFAULT;
2452
2453 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2454
2455 if (datagrams > 0 &&
2456 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2457 datagrams = -EFAULT;
2458
2459 return datagrams;
2460}
2461
2462#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2463/* Argument list sizes for sys_socketcall */
2464#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2465static const unsigned char nargs[21] = {
c6d409cf
ED
2466 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2467 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2468 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2469 AL(4), AL(5), AL(4)
89bddce5
SH
2470};
2471
1da177e4
LT
2472#undef AL
2473
2474/*
89bddce5 2475 * System call vectors.
1da177e4
LT
2476 *
2477 * Argument checking cleaned up. Saved 20% in size.
2478 * This function doesn't need to set the kernel lock because
89bddce5 2479 * it is set by the callees.
1da177e4
LT
2480 */
2481
3e0fa65f 2482SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2483{
2950fa9d 2484 unsigned long a[AUDITSC_ARGS];
89bddce5 2485 unsigned long a0, a1;
1da177e4 2486 int err;
47379052 2487 unsigned int len;
1da177e4 2488
228e548e 2489 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2490 return -EINVAL;
2491
47379052
AV
2492 len = nargs[call];
2493 if (len > sizeof(a))
2494 return -EINVAL;
2495
1da177e4 2496 /* copy_from_user should be SMP safe. */
47379052 2497 if (copy_from_user(a, args, len))
1da177e4 2498 return -EFAULT;
3ec3b2fb 2499
2950fa9d
CG
2500 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2501 if (err)
2502 return err;
3ec3b2fb 2503
89bddce5
SH
2504 a0 = a[0];
2505 a1 = a[1];
2506
2507 switch (call) {
2508 case SYS_SOCKET:
2509 err = sys_socket(a0, a1, a[2]);
2510 break;
2511 case SYS_BIND:
2512 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2513 break;
2514 case SYS_CONNECT:
2515 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2516 break;
2517 case SYS_LISTEN:
2518 err = sys_listen(a0, a1);
2519 break;
2520 case SYS_ACCEPT:
de11defe
UD
2521 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2522 (int __user *)a[2], 0);
89bddce5
SH
2523 break;
2524 case SYS_GETSOCKNAME:
2525 err =
2526 sys_getsockname(a0, (struct sockaddr __user *)a1,
2527 (int __user *)a[2]);
2528 break;
2529 case SYS_GETPEERNAME:
2530 err =
2531 sys_getpeername(a0, (struct sockaddr __user *)a1,
2532 (int __user *)a[2]);
2533 break;
2534 case SYS_SOCKETPAIR:
2535 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2536 break;
2537 case SYS_SEND:
2538 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2539 break;
2540 case SYS_SENDTO:
2541 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2542 (struct sockaddr __user *)a[4], a[5]);
2543 break;
2544 case SYS_RECV:
2545 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2546 break;
2547 case SYS_RECVFROM:
2548 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2549 (struct sockaddr __user *)a[4],
2550 (int __user *)a[5]);
2551 break;
2552 case SYS_SHUTDOWN:
2553 err = sys_shutdown(a0, a1);
2554 break;
2555 case SYS_SETSOCKOPT:
2556 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2557 break;
2558 case SYS_GETSOCKOPT:
2559 err =
2560 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2561 (int __user *)a[4]);
2562 break;
2563 case SYS_SENDMSG:
2564 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2565 break;
228e548e
AB
2566 case SYS_SENDMMSG:
2567 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2568 break;
89bddce5
SH
2569 case SYS_RECVMSG:
2570 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2571 break;
a2e27255
ACM
2572 case SYS_RECVMMSG:
2573 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2574 (struct timespec __user *)a[4]);
2575 break;
de11defe
UD
2576 case SYS_ACCEPT4:
2577 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2578 (int __user *)a[2], a[3]);
aaca0bdc 2579 break;
89bddce5
SH
2580 default:
2581 err = -EINVAL;
2582 break;
1da177e4
LT
2583 }
2584 return err;
2585}
2586
89bddce5 2587#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2588
55737fda
SH
2589/**
2590 * sock_register - add a socket protocol handler
2591 * @ops: description of protocol
2592 *
1da177e4
LT
2593 * This function is called by a protocol handler that wants to
2594 * advertise its address family, and have it linked into the
55737fda
SH
2595 * socket interface. The value ops->family coresponds to the
2596 * socket system call protocol family.
1da177e4 2597 */
f0fd27d4 2598int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2599{
2600 int err;
2601
2602 if (ops->family >= NPROTO) {
89bddce5
SH
2603 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2604 NPROTO);
1da177e4
LT
2605 return -ENOBUFS;
2606 }
55737fda
SH
2607
2608 spin_lock(&net_family_lock);
190683a9
ED
2609 if (rcu_dereference_protected(net_families[ops->family],
2610 lockdep_is_held(&net_family_lock)))
55737fda
SH
2611 err = -EEXIST;
2612 else {
cf778b00 2613 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2614 err = 0;
2615 }
55737fda
SH
2616 spin_unlock(&net_family_lock);
2617
89bddce5 2618 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2619 return err;
2620}
c6d409cf 2621EXPORT_SYMBOL(sock_register);
1da177e4 2622
55737fda
SH
2623/**
2624 * sock_unregister - remove a protocol handler
2625 * @family: protocol family to remove
2626 *
1da177e4
LT
2627 * This function is called by a protocol handler that wants to
2628 * remove its address family, and have it unlinked from the
55737fda
SH
2629 * new socket creation.
2630 *
2631 * If protocol handler is a module, then it can use module reference
2632 * counts to protect against new references. If protocol handler is not
2633 * a module then it needs to provide its own protection in
2634 * the ops->create routine.
1da177e4 2635 */
f0fd27d4 2636void sock_unregister(int family)
1da177e4 2637{
f0fd27d4 2638 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2639
55737fda 2640 spin_lock(&net_family_lock);
a9b3cd7f 2641 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2642 spin_unlock(&net_family_lock);
2643
2644 synchronize_rcu();
2645
89bddce5 2646 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4 2647}
c6d409cf 2648EXPORT_SYMBOL(sock_unregister);
1da177e4 2649
77d76ea3 2650static int __init sock_init(void)
1da177e4 2651{
b3e19d92 2652 int err;
2ca794e5
EB
2653 /*
2654 * Initialize the network sysctl infrastructure.
2655 */
2656 err = net_sysctl_init();
2657 if (err)
2658 goto out;
b3e19d92 2659
1da177e4 2660 /*
89bddce5 2661 * Initialize skbuff SLAB cache
1da177e4
LT
2662 */
2663 skb_init();
1da177e4
LT
2664
2665 /*
89bddce5 2666 * Initialize the protocols module.
1da177e4
LT
2667 */
2668
2669 init_inodecache();
b3e19d92
NP
2670
2671 err = register_filesystem(&sock_fs_type);
2672 if (err)
2673 goto out_fs;
1da177e4 2674 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2675 if (IS_ERR(sock_mnt)) {
2676 err = PTR_ERR(sock_mnt);
2677 goto out_mount;
2678 }
77d76ea3
AK
2679
2680 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2681 */
2682
2683#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2684 err = netfilter_init();
2685 if (err)
2686 goto out;
1da177e4 2687#endif
cbeb321a 2688
c1f19b51
RC
2689#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2690 skb_timestamping_init();
2691#endif
2692
b3e19d92
NP
2693out:
2694 return err;
2695
2696out_mount:
2697 unregister_filesystem(&sock_fs_type);
2698out_fs:
2699 goto out;
1da177e4
LT
2700}
2701
77d76ea3
AK
2702core_initcall(sock_init); /* early initcall */
2703
1da177e4
LT
2704#ifdef CONFIG_PROC_FS
2705void socket_seq_show(struct seq_file *seq)
2706{
2707 int cpu;
2708 int counter = 0;
2709
6f912042 2710 for_each_possible_cpu(cpu)
89bddce5 2711 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2712
2713 /* It can be negative, by the way. 8) */
2714 if (counter < 0)
2715 counter = 0;
2716
2717 seq_printf(seq, "sockets: used %d\n", counter);
2718}
89bddce5 2719#endif /* CONFIG_PROC_FS */
1da177e4 2720
89bbfc95 2721#ifdef CONFIG_COMPAT
6b96018b 2722static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2723 unsigned int cmd, void __user *up)
7a229387 2724{
7a229387
AB
2725 mm_segment_t old_fs = get_fs();
2726 struct timeval ktv;
2727 int err;
2728
2729 set_fs(KERNEL_DS);
6b96018b 2730 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2731 set_fs(old_fs);
644595f8 2732 if (!err)
ed6fe9d6 2733 err = compat_put_timeval(&ktv, up);
644595f8 2734
7a229387
AB
2735 return err;
2736}
2737
6b96018b 2738static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2739 unsigned int cmd, void __user *up)
7a229387 2740{
7a229387
AB
2741 mm_segment_t old_fs = get_fs();
2742 struct timespec kts;
2743 int err;
2744
2745 set_fs(KERNEL_DS);
6b96018b 2746 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2747 set_fs(old_fs);
644595f8 2748 if (!err)
ed6fe9d6 2749 err = compat_put_timespec(&kts, up);
644595f8 2750
7a229387
AB
2751 return err;
2752}
2753
6b96018b 2754static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2755{
2756 struct ifreq __user *uifr;
2757 int err;
2758
2759 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2760 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2761 return -EFAULT;
2762
6b96018b 2763 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2764 if (err)
2765 return err;
2766
6b96018b 2767 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2768 return -EFAULT;
2769
2770 return 0;
2771}
2772
6b96018b 2773static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2774{
6b96018b 2775 struct compat_ifconf ifc32;
7a229387
AB
2776 struct ifconf ifc;
2777 struct ifconf __user *uifc;
6b96018b 2778 struct compat_ifreq __user *ifr32;
7a229387
AB
2779 struct ifreq __user *ifr;
2780 unsigned int i, j;
2781 int err;
2782
6b96018b 2783 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2784 return -EFAULT;
2785
43da5f2e 2786 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2787 if (ifc32.ifcbuf == 0) {
2788 ifc32.ifc_len = 0;
2789 ifc.ifc_len = 0;
2790 ifc.ifc_req = NULL;
2791 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2792 } else {
c6d409cf
ED
2793 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2794 sizeof(struct ifreq);
7a229387
AB
2795 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2796 ifc.ifc_len = len;
2797 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2798 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2799 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2800 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2801 return -EFAULT;
2802 ifr++;
2803 ifr32++;
2804 }
2805 }
2806 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2807 return -EFAULT;
2808
6b96018b 2809 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2810 if (err)
2811 return err;
2812
2813 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2814 return -EFAULT;
2815
2816 ifr = ifc.ifc_req;
2817 ifr32 = compat_ptr(ifc32.ifcbuf);
2818 for (i = 0, j = 0;
c6d409cf
ED
2819 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2820 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2821 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2822 return -EFAULT;
2823 ifr32++;
2824 ifr++;
2825 }
2826
2827 if (ifc32.ifcbuf == 0) {
2828 /* Translate from 64-bit structure multiple to
2829 * a 32-bit one.
2830 */
2831 i = ifc.ifc_len;
6b96018b 2832 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2833 ifc32.ifc_len = i;
2834 } else {
2835 ifc32.ifc_len = i;
2836 }
6b96018b 2837 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2838 return -EFAULT;
2839
2840 return 0;
2841}
2842
6b96018b 2843static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2844{
3a7da39d
BH
2845 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2846 bool convert_in = false, convert_out = false;
2847 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2848 struct ethtool_rxnfc __user *rxnfc;
7a229387 2849 struct ifreq __user *ifr;
3a7da39d
BH
2850 u32 rule_cnt = 0, actual_rule_cnt;
2851 u32 ethcmd;
7a229387 2852 u32 data;
3a7da39d 2853 int ret;
7a229387 2854
3a7da39d
BH
2855 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2856 return -EFAULT;
7a229387 2857
3a7da39d
BH
2858 compat_rxnfc = compat_ptr(data);
2859
2860 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2861 return -EFAULT;
2862
3a7da39d
BH
2863 /* Most ethtool structures are defined without padding.
2864 * Unfortunately struct ethtool_rxnfc is an exception.
2865 */
2866 switch (ethcmd) {
2867 default:
2868 break;
2869 case ETHTOOL_GRXCLSRLALL:
2870 /* Buffer size is variable */
2871 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2872 return -EFAULT;
2873 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2874 return -ENOMEM;
2875 buf_size += rule_cnt * sizeof(u32);
2876 /* fall through */
2877 case ETHTOOL_GRXRINGS:
2878 case ETHTOOL_GRXCLSRLCNT:
2879 case ETHTOOL_GRXCLSRULE:
55664f32 2880 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2881 convert_out = true;
2882 /* fall through */
2883 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2884 buf_size += sizeof(struct ethtool_rxnfc);
2885 convert_in = true;
2886 break;
2887 }
2888
2889 ifr = compat_alloc_user_space(buf_size);
954b1244 2890 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2891
2892 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2893 return -EFAULT;
2894
3a7da39d
BH
2895 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2896 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2897 return -EFAULT;
2898
3a7da39d 2899 if (convert_in) {
127fe533 2900 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2901 * fs.ring_cookie and at the end of fs, but nowhere else.
2902 */
127fe533
AD
2903 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2904 sizeof(compat_rxnfc->fs.m_ext) !=
2905 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2906 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2907 BUILD_BUG_ON(
2908 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2909 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2910 offsetof(struct ethtool_rxnfc, fs.location) -
2911 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2912
2913 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2914 (void __user *)(&rxnfc->fs.m_ext + 1) -
2915 (void __user *)rxnfc) ||
3a7da39d
BH
2916 copy_in_user(&rxnfc->fs.ring_cookie,
2917 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2918 (void __user *)(&rxnfc->fs.location + 1) -
2919 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2920 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2921 sizeof(rxnfc->rule_cnt)))
2922 return -EFAULT;
2923 }
2924
2925 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2926 if (ret)
2927 return ret;
2928
2929 if (convert_out) {
2930 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2931 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2932 (const void __user *)rxnfc) ||
3a7da39d
BH
2933 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2934 &rxnfc->fs.ring_cookie,
954b1244
SH
2935 (const void __user *)(&rxnfc->fs.location + 1) -
2936 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2937 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2938 sizeof(rxnfc->rule_cnt)))
2939 return -EFAULT;
2940
2941 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2942 /* As an optimisation, we only copy the actual
2943 * number of rules that the underlying
2944 * function returned. Since Mallory might
2945 * change the rule count in user memory, we
2946 * check that it is less than the rule count
2947 * originally given (as the user buffer size),
2948 * which has been range-checked.
2949 */
2950 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2951 return -EFAULT;
2952 if (actual_rule_cnt < rule_cnt)
2953 rule_cnt = actual_rule_cnt;
2954 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2955 &rxnfc->rule_locs[0],
2956 rule_cnt * sizeof(u32)))
2957 return -EFAULT;
2958 }
2959 }
2960
2961 return 0;
7a229387
AB
2962}
2963
7a50a240
AB
2964static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2965{
2966 void __user *uptr;
2967 compat_uptr_t uptr32;
2968 struct ifreq __user *uifr;
2969
c6d409cf 2970 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2971 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2972 return -EFAULT;
2973
2974 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2975 return -EFAULT;
2976
2977 uptr = compat_ptr(uptr32);
2978
2979 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2980 return -EFAULT;
2981
2982 return dev_ioctl(net, SIOCWANDEV, uifr);
2983}
2984
6b96018b
AB
2985static int bond_ioctl(struct net *net, unsigned int cmd,
2986 struct compat_ifreq __user *ifr32)
7a229387
AB
2987{
2988 struct ifreq kifr;
7a229387
AB
2989 mm_segment_t old_fs;
2990 int err;
7a229387
AB
2991
2992 switch (cmd) {
2993 case SIOCBONDENSLAVE:
2994 case SIOCBONDRELEASE:
2995 case SIOCBONDSETHWADDR:
2996 case SIOCBONDCHANGEACTIVE:
6b96018b 2997 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2998 return -EFAULT;
2999
3000 old_fs = get_fs();
c6d409cf 3001 set_fs(KERNEL_DS);
c3f52ae6 3002 err = dev_ioctl(net, cmd,
3003 (struct ifreq __user __force *) &kifr);
c6d409cf 3004 set_fs(old_fs);
7a229387
AB
3005
3006 return err;
7a229387 3007 default:
07d106d0 3008 return -ENOIOCTLCMD;
ccbd6a5a 3009 }
7a229387
AB
3010}
3011
590d4693
BH
3012/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3013static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3014 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
3015{
3016 struct ifreq __user *u_ifreq64;
7a229387
AB
3017 char tmp_buf[IFNAMSIZ];
3018 void __user *data64;
3019 u32 data32;
3020
3021 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
3022 IFNAMSIZ))
3023 return -EFAULT;
417c3522 3024 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
3025 return -EFAULT;
3026 data64 = compat_ptr(data32);
3027
3028 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
3029
7a229387
AB
3030 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
3031 IFNAMSIZ))
3032 return -EFAULT;
417c3522 3033 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
3034 return -EFAULT;
3035
6b96018b 3036 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
3037}
3038
6b96018b
AB
3039static int dev_ifsioc(struct net *net, struct socket *sock,
3040 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 3041{
a2116ed2 3042 struct ifreq __user *uifr;
7a229387
AB
3043 int err;
3044
a2116ed2
AB
3045 uifr = compat_alloc_user_space(sizeof(*uifr));
3046 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3047 return -EFAULT;
3048
3049 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3050
7a229387
AB
3051 if (!err) {
3052 switch (cmd) {
3053 case SIOCGIFFLAGS:
3054 case SIOCGIFMETRIC:
3055 case SIOCGIFMTU:
3056 case SIOCGIFMEM:
3057 case SIOCGIFHWADDR:
3058 case SIOCGIFINDEX:
3059 case SIOCGIFADDR:
3060 case SIOCGIFBRDADDR:
3061 case SIOCGIFDSTADDR:
3062 case SIOCGIFNETMASK:
fab2532b 3063 case SIOCGIFPFLAGS:
7a229387 3064 case SIOCGIFTXQLEN:
fab2532b
AB
3065 case SIOCGMIIPHY:
3066 case SIOCGMIIREG:
a2116ed2 3067 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
3068 err = -EFAULT;
3069 break;
3070 }
3071 }
3072 return err;
3073}
3074
a2116ed2
AB
3075static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3076 struct compat_ifreq __user *uifr32)
3077{
3078 struct ifreq ifr;
3079 struct compat_ifmap __user *uifmap32;
3080 mm_segment_t old_fs;
3081 int err;
3082
3083 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3084 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3085 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3086 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3087 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3088 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3089 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3090 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3091 if (err)
3092 return -EFAULT;
3093
3094 old_fs = get_fs();
c6d409cf 3095 set_fs(KERNEL_DS);
c3f52ae6 3096 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3097 set_fs(old_fs);
a2116ed2
AB
3098
3099 if (cmd == SIOCGIFMAP && !err) {
3100 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3101 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3102 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3103 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3104 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3105 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3106 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3107 if (err)
3108 err = -EFAULT;
3109 }
3110 return err;
3111}
3112
7a229387 3113struct rtentry32 {
c6d409cf 3114 u32 rt_pad1;
7a229387
AB
3115 struct sockaddr rt_dst; /* target address */
3116 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3117 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3118 unsigned short rt_flags;
3119 short rt_pad2;
3120 u32 rt_pad3;
3121 unsigned char rt_tos;
3122 unsigned char rt_class;
3123 short rt_pad4;
3124 short rt_metric; /* +1 for binary compatibility! */
7a229387 3125 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3126 u32 rt_mtu; /* per route MTU/Window */
3127 u32 rt_window; /* Window clamping */
7a229387
AB
3128 unsigned short rt_irtt; /* Initial RTT */
3129};
3130
3131struct in6_rtmsg32 {
3132 struct in6_addr rtmsg_dst;
3133 struct in6_addr rtmsg_src;
3134 struct in6_addr rtmsg_gateway;
3135 u32 rtmsg_type;
3136 u16 rtmsg_dst_len;
3137 u16 rtmsg_src_len;
3138 u32 rtmsg_metric;
3139 u32 rtmsg_info;
3140 u32 rtmsg_flags;
3141 s32 rtmsg_ifindex;
3142};
3143
6b96018b
AB
3144static int routing_ioctl(struct net *net, struct socket *sock,
3145 unsigned int cmd, void __user *argp)
7a229387
AB
3146{
3147 int ret;
3148 void *r = NULL;
3149 struct in6_rtmsg r6;
3150 struct rtentry r4;
3151 char devname[16];
3152 u32 rtdev;
3153 mm_segment_t old_fs = get_fs();
3154
6b96018b
AB
3155 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3156 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3157 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3158 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3159 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3160 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3161 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3162 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3163 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3164 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3165 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3166
3167 r = (void *) &r6;
3168 } else { /* ipv4 */
6b96018b 3169 struct rtentry32 __user *ur4 = argp;
c6d409cf 3170 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3171 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3172 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3173 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3174 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3175 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3176 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3177 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3178 if (rtdev) {
c6d409cf 3179 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3180 r4.rt_dev = (char __user __force *)devname;
3181 devname[15] = 0;
7a229387
AB
3182 } else
3183 r4.rt_dev = NULL;
3184
3185 r = (void *) &r4;
3186 }
3187
3188 if (ret) {
3189 ret = -EFAULT;
3190 goto out;
3191 }
3192
c6d409cf 3193 set_fs(KERNEL_DS);
6b96018b 3194 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3195 set_fs(old_fs);
7a229387
AB
3196
3197out:
7a229387
AB
3198 return ret;
3199}
3200
3201/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3202 * for some operations; this forces use of the newer bridge-utils that
25985edc 3203 * use compatible ioctls
7a229387 3204 */
6b96018b 3205static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3206{
6b96018b 3207 compat_ulong_t tmp;
7a229387 3208
6b96018b 3209 if (get_user(tmp, argp))
7a229387
AB
3210 return -EFAULT;
3211 if (tmp == BRCTL_GET_VERSION)
3212 return BRCTL_VERSION + 1;
3213 return -EINVAL;
3214}
3215
6b96018b
AB
3216static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3217 unsigned int cmd, unsigned long arg)
3218{
3219 void __user *argp = compat_ptr(arg);
3220 struct sock *sk = sock->sk;
3221 struct net *net = sock_net(sk);
7a229387 3222
6b96018b 3223 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3224 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3225
3226 switch (cmd) {
3227 case SIOCSIFBR:
3228 case SIOCGIFBR:
3229 return old_bridge_ioctl(argp);
3230 case SIOCGIFNAME:
3231 return dev_ifname32(net, argp);
3232 case SIOCGIFCONF:
3233 return dev_ifconf(net, argp);
3234 case SIOCETHTOOL:
3235 return ethtool_ioctl(net, argp);
7a50a240
AB
3236 case SIOCWANDEV:
3237 return compat_siocwandev(net, argp);
a2116ed2
AB
3238 case SIOCGIFMAP:
3239 case SIOCSIFMAP:
3240 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3241 case SIOCBONDENSLAVE:
3242 case SIOCBONDRELEASE:
3243 case SIOCBONDSETHWADDR:
6b96018b
AB
3244 case SIOCBONDCHANGEACTIVE:
3245 return bond_ioctl(net, cmd, argp);
3246 case SIOCADDRT:
3247 case SIOCDELRT:
3248 return routing_ioctl(net, sock, cmd, argp);
3249 case SIOCGSTAMP:
3250 return do_siocgstamp(net, sock, cmd, argp);
3251 case SIOCGSTAMPNS:
3252 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3253 case SIOCBONDSLAVEINFOQUERY:
3254 case SIOCBONDINFOQUERY:
a2116ed2 3255 case SIOCSHWTSTAMP:
fd468c74 3256 case SIOCGHWTSTAMP:
590d4693 3257 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3258
3259 case FIOSETOWN:
3260 case SIOCSPGRP:
3261 case FIOGETOWN:
3262 case SIOCGPGRP:
3263 case SIOCBRADDBR:
3264 case SIOCBRDELBR:
3265 case SIOCGIFVLAN:
3266 case SIOCSIFVLAN:
3267 case SIOCADDDLCI:
3268 case SIOCDELDLCI:
3269 return sock_ioctl(file, cmd, arg);
3270
3271 case SIOCGIFFLAGS:
3272 case SIOCSIFFLAGS:
3273 case SIOCGIFMETRIC:
3274 case SIOCSIFMETRIC:
3275 case SIOCGIFMTU:
3276 case SIOCSIFMTU:
3277 case SIOCGIFMEM:
3278 case SIOCSIFMEM:
3279 case SIOCGIFHWADDR:
3280 case SIOCSIFHWADDR:
3281 case SIOCADDMULTI:
3282 case SIOCDELMULTI:
3283 case SIOCGIFINDEX:
6b96018b
AB
3284 case SIOCGIFADDR:
3285 case SIOCSIFADDR:
3286 case SIOCSIFHWBROADCAST:
6b96018b 3287 case SIOCDIFADDR:
6b96018b
AB
3288 case SIOCGIFBRDADDR:
3289 case SIOCSIFBRDADDR:
3290 case SIOCGIFDSTADDR:
3291 case SIOCSIFDSTADDR:
3292 case SIOCGIFNETMASK:
3293 case SIOCSIFNETMASK:
3294 case SIOCSIFPFLAGS:
3295 case SIOCGIFPFLAGS:
3296 case SIOCGIFTXQLEN:
3297 case SIOCSIFTXQLEN:
3298 case SIOCBRADDIF:
3299 case SIOCBRDELIF:
9177efd3
AB
3300 case SIOCSIFNAME:
3301 case SIOCGMIIPHY:
3302 case SIOCGMIIREG:
3303 case SIOCSMIIREG:
6b96018b 3304 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3305
6b96018b
AB
3306 case SIOCSARP:
3307 case SIOCGARP:
3308 case SIOCDARP:
6b96018b 3309 case SIOCATMARK:
9177efd3
AB
3310 return sock_do_ioctl(net, sock, cmd, arg);
3311 }
3312
6b96018b
AB
3313 return -ENOIOCTLCMD;
3314}
7a229387 3315
95c96174 3316static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3317 unsigned long arg)
89bbfc95
SP
3318{
3319 struct socket *sock = file->private_data;
3320 int ret = -ENOIOCTLCMD;
87de87d5
DM
3321 struct sock *sk;
3322 struct net *net;
3323
3324 sk = sock->sk;
3325 net = sock_net(sk);
89bbfc95
SP
3326
3327 if (sock->ops->compat_ioctl)
3328 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3329
87de87d5
DM
3330 if (ret == -ENOIOCTLCMD &&
3331 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3332 ret = compat_wext_handle_ioctl(net, cmd, arg);
3333
6b96018b
AB
3334 if (ret == -ENOIOCTLCMD)
3335 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3336
89bbfc95
SP
3337 return ret;
3338}
3339#endif
3340
ac5a488e
SS
3341int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3342{
3343 return sock->ops->bind(sock, addr, addrlen);
3344}
c6d409cf 3345EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3346
3347int kernel_listen(struct socket *sock, int backlog)
3348{
3349 return sock->ops->listen(sock, backlog);
3350}
c6d409cf 3351EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3352
3353int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3354{
3355 struct sock *sk = sock->sk;
3356 int err;
3357
3358 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3359 newsock);
3360 if (err < 0)
3361 goto done;
3362
3363 err = sock->ops->accept(sock, *newsock, flags);
3364 if (err < 0) {
3365 sock_release(*newsock);
fa8705b0 3366 *newsock = NULL;
ac5a488e
SS
3367 goto done;
3368 }
3369
3370 (*newsock)->ops = sock->ops;
1b08534e 3371 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3372
3373done:
3374 return err;
3375}
c6d409cf 3376EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3377
3378int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3379 int flags)
ac5a488e
SS
3380{
3381 return sock->ops->connect(sock, addr, addrlen, flags);
3382}
c6d409cf 3383EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3384
3385int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3386 int *addrlen)
3387{
3388 return sock->ops->getname(sock, addr, addrlen, 0);
3389}
c6d409cf 3390EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3391
3392int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3393 int *addrlen)
3394{
3395 return sock->ops->getname(sock, addr, addrlen, 1);
3396}
c6d409cf 3397EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3398
3399int kernel_getsockopt(struct socket *sock, int level, int optname,
3400 char *optval, int *optlen)
3401{
3402 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3403 char __user *uoptval;
3404 int __user *uoptlen;
ac5a488e
SS
3405 int err;
3406
fb8621bb
NK
3407 uoptval = (char __user __force *) optval;
3408 uoptlen = (int __user __force *) optlen;
3409
ac5a488e
SS
3410 set_fs(KERNEL_DS);
3411 if (level == SOL_SOCKET)
fb8621bb 3412 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3413 else
fb8621bb
NK
3414 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3415 uoptlen);
ac5a488e
SS
3416 set_fs(oldfs);
3417 return err;
3418}
c6d409cf 3419EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3420
3421int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3422 char *optval, unsigned int optlen)
ac5a488e
SS
3423{
3424 mm_segment_t oldfs = get_fs();
fb8621bb 3425 char __user *uoptval;
ac5a488e
SS
3426 int err;
3427
fb8621bb
NK
3428 uoptval = (char __user __force *) optval;
3429
ac5a488e
SS
3430 set_fs(KERNEL_DS);
3431 if (level == SOL_SOCKET)
fb8621bb 3432 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3433 else
fb8621bb 3434 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3435 optlen);
3436 set_fs(oldfs);
3437 return err;
3438}
c6d409cf 3439EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3440
3441int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3442 size_t size, int flags)
3443{
3444 if (sock->ops->sendpage)
3445 return sock->ops->sendpage(sock, page, offset, size, flags);
3446
3447 return sock_no_sendpage(sock, page, offset, size, flags);
3448}
c6d409cf 3449EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3450
3451int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3452{
3453 mm_segment_t oldfs = get_fs();
3454 int err;
3455
3456 set_fs(KERNEL_DS);
3457 err = sock->ops->ioctl(sock, cmd, arg);
3458 set_fs(oldfs);
3459
3460 return err;
3461}
c6d409cf 3462EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3463
91cf45f0
TM
3464int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3465{
3466 return sock->ops->shutdown(sock, how);
3467}
91cf45f0 3468EXPORT_SYMBOL(kernel_sock_shutdown);
This page took 1.154699 seconds and 5 git commands to generate.