[IPV4]: Fix build without procfs.
[deliverable/linux.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1da177e4
LT
87
88#include <asm/uaccess.h>
89#include <asm/unistd.h>
90
91#include <net/compat.h>
92
93#include <net/sock.h>
94#include <linux/netfilter.h>
95
96static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
97static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
98 unsigned long nr_segs, loff_t pos);
99static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
100 unsigned long nr_segs, loff_t pos);
89bddce5 101static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
102
103static int sock_close(struct inode *inode, struct file *file);
104static unsigned int sock_poll(struct file *file,
105 struct poll_table_struct *wait);
89bddce5 106static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
107#ifdef CONFIG_COMPAT
108static long compat_sock_ioctl(struct file *file,
89bddce5 109 unsigned int cmd, unsigned long arg);
89bbfc95 110#endif
1da177e4 111static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
112static ssize_t sock_sendpage(struct file *file, struct page *page,
113 int offset, size_t size, loff_t *ppos, int more);
114
1da177e4
LT
115/*
116 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
117 * in the operation structures but are done directly via the socketcall() multiplexor.
118 */
119
da7071d7 120static const struct file_operations socket_file_ops = {
1da177e4
LT
121 .owner = THIS_MODULE,
122 .llseek = no_llseek,
123 .aio_read = sock_aio_read,
124 .aio_write = sock_aio_write,
125 .poll = sock_poll,
126 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
127#ifdef CONFIG_COMPAT
128 .compat_ioctl = compat_sock_ioctl,
129#endif
1da177e4
LT
130 .mmap = sock_mmap,
131 .open = sock_no_open, /* special open code to disallow open via /proc */
132 .release = sock_close,
133 .fasync = sock_fasync,
5274f052
JA
134 .sendpage = sock_sendpage,
135 .splice_write = generic_splice_sendpage,
1da177e4
LT
136};
137
138/*
139 * The protocol list. Each protocol is registered in here.
140 */
141
1da177e4 142static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 143static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 144
1da177e4
LT
145/*
146 * Statistics counters of the socket lists
147 */
148
149static DEFINE_PER_CPU(int, sockets_in_use) = 0;
150
151/*
89bddce5
SH
152 * Support routines.
153 * Move socket addresses back and forth across the kernel/user
154 * divide and look after the messy bits.
1da177e4
LT
155 */
156
89bddce5 157#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
158 16 for IP, 16 for IPX,
159 24 for IPv6,
89bddce5 160 about 80 for AX.25
1da177e4
LT
161 must be at least one bigger than
162 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 163 :unix_mkname()).
1da177e4 164 */
89bddce5 165
1da177e4
LT
166/**
167 * move_addr_to_kernel - copy a socket address into kernel space
168 * @uaddr: Address in user space
169 * @kaddr: Address in kernel space
170 * @ulen: Length in user space
171 *
172 * The address is copied into kernel space. If the provided address is
173 * too long an error code of -EINVAL is returned. If the copy gives
174 * invalid addresses -EFAULT is returned. On a success 0 is returned.
175 */
176
177int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
178{
89bddce5 179 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 180 return -EINVAL;
89bddce5 181 if (ulen == 0)
1da177e4 182 return 0;
89bddce5 183 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 184 return -EFAULT;
3ec3b2fb 185 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
186}
187
188/**
189 * move_addr_to_user - copy an address to user space
190 * @kaddr: kernel space address
191 * @klen: length of address in kernel
192 * @uaddr: user space address
193 * @ulen: pointer to user length field
194 *
195 * The value pointed to by ulen on entry is the buffer length available.
196 * This is overwritten with the buffer space used. -EINVAL is returned
197 * if an overlong buffer is specified or a negative buffer size. -EFAULT
198 * is returned if either the buffer or the length field are not
199 * accessible.
200 * After copying the data up to the limit the user specifies, the true
201 * length of the data is written over the length limit the user
202 * specified. Zero is returned for a success.
203 */
89bddce5
SH
204
205int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
206 int __user *ulen)
1da177e4
LT
207{
208 int err;
209 int len;
210
89bddce5
SH
211 err = get_user(len, ulen);
212 if (err)
1da177e4 213 return err;
89bddce5
SH
214 if (len > klen)
215 len = klen;
216 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 217 return -EINVAL;
89bddce5 218 if (len) {
d6fe3945
SG
219 if (audit_sockaddr(klen, kaddr))
220 return -ENOMEM;
89bddce5 221 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
222 return -EFAULT;
223 }
224 /*
89bddce5
SH
225 * "fromlen shall refer to the value before truncation.."
226 * 1003.1g
1da177e4
LT
227 */
228 return __put_user(klen, ulen);
229}
230
231#define SOCKFS_MAGIC 0x534F434B
232
e18b890b 233static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
234
235static struct inode *sock_alloc_inode(struct super_block *sb)
236{
237 struct socket_alloc *ei;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
242 init_waitqueue_head(&ei->socket.wait);
89bddce5 243
1da177e4
LT
244 ei->socket.fasync_list = NULL;
245 ei->socket.state = SS_UNCONNECTED;
246 ei->socket.flags = 0;
247 ei->socket.ops = NULL;
248 ei->socket.sk = NULL;
249 ei->socket.file = NULL;
1da177e4
LT
250
251 return &ei->vfs_inode;
252}
253
254static void sock_destroy_inode(struct inode *inode)
255{
256 kmem_cache_free(sock_inode_cachep,
257 container_of(inode, struct socket_alloc, vfs_inode));
258}
259
e18b890b 260static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
1da177e4 261{
89bddce5 262 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 263
89bddce5
SH
264 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
265 == SLAB_CTOR_CONSTRUCTOR)
1da177e4
LT
266 inode_init_once(&ei->vfs_inode);
267}
89bddce5 268
1da177e4
LT
269static int init_inodecache(void)
270{
271 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
272 sizeof(struct socket_alloc),
273 0,
274 (SLAB_HWCACHE_ALIGN |
275 SLAB_RECLAIM_ACCOUNT |
276 SLAB_MEM_SPREAD),
277 init_once,
278 NULL);
1da177e4
LT
279 if (sock_inode_cachep == NULL)
280 return -ENOMEM;
281 return 0;
282}
283
284static struct super_operations sockfs_ops = {
285 .alloc_inode = sock_alloc_inode,
286 .destroy_inode =sock_destroy_inode,
287 .statfs = simple_statfs,
288};
289
454e2398 290static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
291 int flags, const char *dev_name, void *data,
292 struct vfsmount *mnt)
1da177e4 293{
454e2398
DH
294 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
295 mnt);
1da177e4
LT
296}
297
ba89966c 298static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
299
300static struct file_system_type sock_fs_type = {
301 .name = "sockfs",
302 .get_sb = sockfs_get_sb,
303 .kill_sb = kill_anon_super,
304};
89bddce5 305
1da177e4
LT
306static int sockfs_delete_dentry(struct dentry *dentry)
307{
304e61e6
ED
308 /*
309 * At creation time, we pretended this dentry was hashed
310 * (by clearing DCACHE_UNHASHED bit in d_flags)
311 * At delete time, we restore the truth : not hashed.
312 * (so that dput() can proceed correctly)
313 */
314 dentry->d_flags |= DCACHE_UNHASHED;
315 return 0;
1da177e4
LT
316}
317static struct dentry_operations sockfs_dentry_operations = {
89bddce5 318 .d_delete = sockfs_delete_dentry,
1da177e4
LT
319};
320
321/*
322 * Obtains the first available file descriptor and sets it up for use.
323 *
39d8c1b6
DM
324 * These functions create file structures and maps them to fd space
325 * of the current process. On success it returns file descriptor
1da177e4
LT
326 * and file struct implicitly stored in sock->file.
327 * Note that another thread may close file descriptor before we return
328 * from this function. We use the fact that now we do not refer
329 * to socket after mapping. If one day we will need it, this
330 * function will increment ref. count on file by 1.
331 *
332 * In any case returned fd MAY BE not valid!
333 * This race condition is unavoidable
334 * with shared fd spaces, we cannot solve it inside kernel,
335 * but we take care of internal coherence yet.
336 */
337
39d8c1b6 338static int sock_alloc_fd(struct file **filep)
1da177e4
LT
339{
340 int fd;
1da177e4
LT
341
342 fd = get_unused_fd();
39d8c1b6 343 if (likely(fd >= 0)) {
1da177e4
LT
344 struct file *file = get_empty_filp();
345
39d8c1b6
DM
346 *filep = file;
347 if (unlikely(!file)) {
1da177e4 348 put_unused_fd(fd);
39d8c1b6 349 return -ENFILE;
1da177e4 350 }
39d8c1b6
DM
351 } else
352 *filep = NULL;
353 return fd;
354}
1da177e4 355
39d8c1b6
DM
356static int sock_attach_fd(struct socket *sock, struct file *file)
357{
358 struct qstr this;
359 char name[32];
360
361 this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
362 this.name = name;
304e61e6 363 this.hash = 0;
39d8c1b6 364
3126a42c
JS
365 file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
366 if (unlikely(!file->f_path.dentry))
39d8c1b6
DM
367 return -ENOMEM;
368
3126a42c 369 file->f_path.dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
370 /*
371 * We dont want to push this dentry into global dentry hash table.
372 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
373 * This permits a working /proc/$pid/fd/XXX on sockets
374 */
3126a42c
JS
375 file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
376 d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
377 file->f_path.mnt = mntget(sock_mnt);
378 file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
39d8c1b6
DM
379
380 sock->file = file;
381 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
382 file->f_mode = FMODE_READ | FMODE_WRITE;
383 file->f_flags = O_RDWR;
384 file->f_pos = 0;
385 file->private_data = sock;
1da177e4 386
39d8c1b6
DM
387 return 0;
388}
389
390int sock_map_fd(struct socket *sock)
391{
392 struct file *newfile;
393 int fd = sock_alloc_fd(&newfile);
394
395 if (likely(fd >= 0)) {
396 int err = sock_attach_fd(sock, newfile);
397
398 if (unlikely(err < 0)) {
399 put_filp(newfile);
1da177e4 400 put_unused_fd(fd);
39d8c1b6 401 return err;
1da177e4 402 }
39d8c1b6 403 fd_install(fd, newfile);
1da177e4 404 }
1da177e4
LT
405 return fd;
406}
407
6cb153ca
BL
408static struct socket *sock_from_file(struct file *file, int *err)
409{
6cb153ca
BL
410 if (file->f_op == &socket_file_ops)
411 return file->private_data; /* set in sock_map_fd */
412
23bb80d2
ED
413 *err = -ENOTSOCK;
414 return NULL;
6cb153ca
BL
415}
416
1da177e4
LT
417/**
418 * sockfd_lookup - Go from a file number to its socket slot
419 * @fd: file handle
420 * @err: pointer to an error code return
421 *
422 * The file handle passed in is locked and the socket it is bound
423 * too is returned. If an error occurs the err pointer is overwritten
424 * with a negative errno code and NULL is returned. The function checks
425 * for both invalid handles and passing a handle which is not a socket.
426 *
427 * On a success the socket object pointer is returned.
428 */
429
430struct socket *sockfd_lookup(int fd, int *err)
431{
432 struct file *file;
1da177e4
LT
433 struct socket *sock;
434
89bddce5
SH
435 file = fget(fd);
436 if (!file) {
1da177e4
LT
437 *err = -EBADF;
438 return NULL;
439 }
89bddce5 440
6cb153ca
BL
441 sock = sock_from_file(file, err);
442 if (!sock)
1da177e4 443 fput(file);
6cb153ca
BL
444 return sock;
445}
1da177e4 446
6cb153ca
BL
447static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
448{
449 struct file *file;
450 struct socket *sock;
451
3672558c 452 *err = -EBADF;
6cb153ca
BL
453 file = fget_light(fd, fput_needed);
454 if (file) {
455 sock = sock_from_file(file, err);
456 if (sock)
457 return sock;
458 fput_light(file, *fput_needed);
1da177e4 459 }
6cb153ca 460 return NULL;
1da177e4
LT
461}
462
463/**
464 * sock_alloc - allocate a socket
89bddce5 465 *
1da177e4
LT
466 * Allocate a new inode and socket object. The two are bound together
467 * and initialised. The socket is then returned. If we are out of inodes
468 * NULL is returned.
469 */
470
471static struct socket *sock_alloc(void)
472{
89bddce5
SH
473 struct inode *inode;
474 struct socket *sock;
1da177e4
LT
475
476 inode = new_inode(sock_mnt->mnt_sb);
477 if (!inode)
478 return NULL;
479
480 sock = SOCKET_I(inode);
481
89bddce5 482 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
483 inode->i_uid = current->fsuid;
484 inode->i_gid = current->fsgid;
485
486 get_cpu_var(sockets_in_use)++;
487 put_cpu_var(sockets_in_use);
488 return sock;
489}
490
491/*
492 * In theory you can't get an open on this inode, but /proc provides
493 * a back door. Remember to keep it shut otherwise you'll let the
494 * creepy crawlies in.
495 */
89bddce5 496
1da177e4
LT
497static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
498{
499 return -ENXIO;
500}
501
4b6f5d20 502const struct file_operations bad_sock_fops = {
1da177e4
LT
503 .owner = THIS_MODULE,
504 .open = sock_no_open,
505};
506
507/**
508 * sock_release - close a socket
509 * @sock: socket to close
510 *
511 * The socket is released from the protocol stack if it has a release
512 * callback, and the inode is then released if the socket is bound to
89bddce5 513 * an inode not a file.
1da177e4 514 */
89bddce5 515
1da177e4
LT
516void sock_release(struct socket *sock)
517{
518 if (sock->ops) {
519 struct module *owner = sock->ops->owner;
520
521 sock->ops->release(sock);
522 sock->ops = NULL;
523 module_put(owner);
524 }
525
526 if (sock->fasync_list)
527 printk(KERN_ERR "sock_release: fasync list not empty!\n");
528
529 get_cpu_var(sockets_in_use)--;
530 put_cpu_var(sockets_in_use);
531 if (!sock->file) {
532 iput(SOCK_INODE(sock));
533 return;
534 }
89bddce5 535 sock->file = NULL;
1da177e4
LT
536}
537
89bddce5 538static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
539 struct msghdr *msg, size_t size)
540{
541 struct sock_iocb *si = kiocb_to_siocb(iocb);
542 int err;
543
544 si->sock = sock;
545 si->scm = NULL;
546 si->msg = msg;
547 si->size = size;
548
549 err = security_socket_sendmsg(sock, msg, size);
550 if (err)
551 return err;
552
553 return sock->ops->sendmsg(iocb, sock, msg, size);
554}
555
556int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
557{
558 struct kiocb iocb;
559 struct sock_iocb siocb;
560 int ret;
561
562 init_sync_kiocb(&iocb, NULL);
563 iocb.private = &siocb;
564 ret = __sock_sendmsg(&iocb, sock, msg, size);
565 if (-EIOCBQUEUED == ret)
566 ret = wait_on_sync_kiocb(&iocb);
567 return ret;
568}
569
570int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
571 struct kvec *vec, size_t num, size_t size)
572{
573 mm_segment_t oldfs = get_fs();
574 int result;
575
576 set_fs(KERNEL_DS);
577 /*
578 * the following is safe, since for compiler definitions of kvec and
579 * iovec are identical, yielding the same in-core layout and alignment
580 */
89bddce5 581 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
582 msg->msg_iovlen = num;
583 result = sock_sendmsg(sock, msg, size);
584 set_fs(oldfs);
585 return result;
586}
587
92f37fd2
ED
588/*
589 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
590 */
591void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
592 struct sk_buff *skb)
593{
594 ktime_t kt = skb->tstamp;
595
596 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
597 struct timeval tv;
598 /* Race occurred between timestamp enabling and packet
599 receiving. Fill in the current time for now. */
600 if (kt.tv64 == 0)
601 kt = ktime_get_real();
602 skb->tstamp = kt;
603 tv = ktime_to_timeval(kt);
604 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
605 } else {
606 struct timespec ts;
607 /* Race occurred between timestamp enabling and packet
608 receiving. Fill in the current time for now. */
609 if (kt.tv64 == 0)
610 kt = ktime_get_real();
611 skb->tstamp = kt;
612 ts = ktime_to_timespec(kt);
613 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
614 }
615}
616
7c81fd8b
ACM
617EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
618
89bddce5 619static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
620 struct msghdr *msg, size_t size, int flags)
621{
622 int err;
623 struct sock_iocb *si = kiocb_to_siocb(iocb);
624
625 si->sock = sock;
626 si->scm = NULL;
627 si->msg = msg;
628 si->size = size;
629 si->flags = flags;
630
631 err = security_socket_recvmsg(sock, msg, size, flags);
632 if (err)
633 return err;
634
635 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
636}
637
89bddce5 638int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
639 size_t size, int flags)
640{
641 struct kiocb iocb;
642 struct sock_iocb siocb;
643 int ret;
644
89bddce5 645 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
646 iocb.private = &siocb;
647 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
648 if (-EIOCBQUEUED == ret)
649 ret = wait_on_sync_kiocb(&iocb);
650 return ret;
651}
652
89bddce5
SH
653int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
654 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
655{
656 mm_segment_t oldfs = get_fs();
657 int result;
658
659 set_fs(KERNEL_DS);
660 /*
661 * the following is safe, since for compiler definitions of kvec and
662 * iovec are identical, yielding the same in-core layout and alignment
663 */
89bddce5 664 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
665 result = sock_recvmsg(sock, msg, size, flags);
666 set_fs(oldfs);
667 return result;
668}
669
670static void sock_aio_dtor(struct kiocb *iocb)
671{
672 kfree(iocb->private);
673}
674
ce1d4d3e
CH
675static ssize_t sock_sendpage(struct file *file, struct page *page,
676 int offset, size_t size, loff_t *ppos, int more)
1da177e4 677{
1da177e4
LT
678 struct socket *sock;
679 int flags;
680
ce1d4d3e
CH
681 sock = file->private_data;
682
683 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
684 if (more)
685 flags |= MSG_MORE;
686
687 return sock->ops->sendpage(sock, page, offset, size, flags);
688}
1da177e4 689
ce1d4d3e 690static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 691 struct sock_iocb *siocb)
ce1d4d3e
CH
692{
693 if (!is_sync_kiocb(iocb)) {
694 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
695 if (!siocb)
696 return NULL;
1da177e4
LT
697 iocb->ki_dtor = sock_aio_dtor;
698 }
1da177e4 699
ce1d4d3e 700 siocb->kiocb = iocb;
ce1d4d3e
CH
701 iocb->private = siocb;
702 return siocb;
1da177e4
LT
703}
704
ce1d4d3e 705static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
706 struct file *file, const struct iovec *iov,
707 unsigned long nr_segs)
ce1d4d3e
CH
708{
709 struct socket *sock = file->private_data;
710 size_t size = 0;
711 int i;
1da177e4 712
89bddce5
SH
713 for (i = 0; i < nr_segs; i++)
714 size += iov[i].iov_len;
1da177e4 715
ce1d4d3e
CH
716 msg->msg_name = NULL;
717 msg->msg_namelen = 0;
718 msg->msg_control = NULL;
719 msg->msg_controllen = 0;
89bddce5 720 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
721 msg->msg_iovlen = nr_segs;
722 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
723
724 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
725}
726
027445c3
BP
727static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
728 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
729{
730 struct sock_iocb siocb, *x;
731
1da177e4
LT
732 if (pos != 0)
733 return -ESPIPE;
027445c3
BP
734
735 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
736 return 0;
737
027445c3
BP
738
739 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
740 if (!x)
741 return -ENOMEM;
027445c3 742 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
743}
744
ce1d4d3e 745static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
746 struct file *file, const struct iovec *iov,
747 unsigned long nr_segs)
1da177e4 748{
ce1d4d3e
CH
749 struct socket *sock = file->private_data;
750 size_t size = 0;
751 int i;
1da177e4 752
89bddce5
SH
753 for (i = 0; i < nr_segs; i++)
754 size += iov[i].iov_len;
1da177e4 755
ce1d4d3e
CH
756 msg->msg_name = NULL;
757 msg->msg_namelen = 0;
758 msg->msg_control = NULL;
759 msg->msg_controllen = 0;
89bddce5 760 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
761 msg->msg_iovlen = nr_segs;
762 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
763 if (sock->type == SOCK_SEQPACKET)
764 msg->msg_flags |= MSG_EOR;
1da177e4 765
ce1d4d3e 766 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
767}
768
027445c3
BP
769static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
770 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
771{
772 struct sock_iocb siocb, *x;
1da177e4 773
ce1d4d3e
CH
774 if (pos != 0)
775 return -ESPIPE;
027445c3
BP
776
777 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
ce1d4d3e 778 return 0;
1da177e4 779
027445c3 780 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
781 if (!x)
782 return -ENOMEM;
1da177e4 783
027445c3 784 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
785}
786
1da177e4
LT
787/*
788 * Atomic setting of ioctl hooks to avoid race
789 * with module unload.
790 */
791
4a3e2f71 792static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 793static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 794
89bddce5 795void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 796{
4a3e2f71 797 mutex_lock(&br_ioctl_mutex);
1da177e4 798 br_ioctl_hook = hook;
4a3e2f71 799 mutex_unlock(&br_ioctl_mutex);
1da177e4 800}
89bddce5 801
1da177e4
LT
802EXPORT_SYMBOL(brioctl_set);
803
4a3e2f71 804static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 805static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 806
89bddce5 807void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 808{
4a3e2f71 809 mutex_lock(&vlan_ioctl_mutex);
1da177e4 810 vlan_ioctl_hook = hook;
4a3e2f71 811 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 812}
89bddce5 813
1da177e4
LT
814EXPORT_SYMBOL(vlan_ioctl_set);
815
4a3e2f71 816static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 817static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 818
89bddce5 819void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 820{
4a3e2f71 821 mutex_lock(&dlci_ioctl_mutex);
1da177e4 822 dlci_ioctl_hook = hook;
4a3e2f71 823 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 824}
89bddce5 825
1da177e4
LT
826EXPORT_SYMBOL(dlci_ioctl_set);
827
828/*
829 * With an ioctl, arg may well be a user mode pointer, but we don't know
830 * what to do with it - that's up to the protocol still.
831 */
832
833static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
834{
835 struct socket *sock;
836 void __user *argp = (void __user *)arg;
837 int pid, err;
838
b69aee04 839 sock = file->private_data;
1da177e4
LT
840 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
841 err = dev_ioctl(cmd, argp);
842 } else
d86b5e0e 843#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
844 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
845 err = dev_ioctl(cmd, argp);
846 } else
89bddce5
SH
847#endif /* CONFIG_WIRELESS_EXT */
848 switch (cmd) {
1da177e4
LT
849 case FIOSETOWN:
850 case SIOCSPGRP:
851 err = -EFAULT;
852 if (get_user(pid, (int __user *)argp))
853 break;
854 err = f_setown(sock->file, pid, 1);
855 break;
856 case FIOGETOWN:
857 case SIOCGPGRP:
609d7fa9 858 err = put_user(f_getown(sock->file),
89bddce5 859 (int __user *)argp);
1da177e4
LT
860 break;
861 case SIOCGIFBR:
862 case SIOCSIFBR:
863 case SIOCBRADDBR:
864 case SIOCBRDELBR:
865 err = -ENOPKG;
866 if (!br_ioctl_hook)
867 request_module("bridge");
868
4a3e2f71 869 mutex_lock(&br_ioctl_mutex);
89bddce5 870 if (br_ioctl_hook)
1da177e4 871 err = br_ioctl_hook(cmd, argp);
4a3e2f71 872 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
873 break;
874 case SIOCGIFVLAN:
875 case SIOCSIFVLAN:
876 err = -ENOPKG;
877 if (!vlan_ioctl_hook)
878 request_module("8021q");
879
4a3e2f71 880 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
881 if (vlan_ioctl_hook)
882 err = vlan_ioctl_hook(argp);
4a3e2f71 883 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 884 break;
1da177e4
LT
885 case SIOCADDDLCI:
886 case SIOCDELDLCI:
887 err = -ENOPKG;
888 if (!dlci_ioctl_hook)
889 request_module("dlci");
890
891 if (dlci_ioctl_hook) {
4a3e2f71 892 mutex_lock(&dlci_ioctl_mutex);
1da177e4 893 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 894 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
895 }
896 break;
897 default:
898 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
899
900 /*
901 * If this ioctl is unknown try to hand it down
902 * to the NIC driver.
903 */
904 if (err == -ENOIOCTLCMD)
905 err = dev_ioctl(cmd, argp);
1da177e4 906 break;
89bddce5 907 }
1da177e4
LT
908 return err;
909}
910
911int sock_create_lite(int family, int type, int protocol, struct socket **res)
912{
913 int err;
914 struct socket *sock = NULL;
89bddce5 915
1da177e4
LT
916 err = security_socket_create(family, type, protocol, 1);
917 if (err)
918 goto out;
919
920 sock = sock_alloc();
921 if (!sock) {
922 err = -ENOMEM;
923 goto out;
924 }
925
1da177e4 926 sock->type = type;
7420ed23
VY
927 err = security_socket_post_create(sock, family, type, protocol, 1);
928 if (err)
929 goto out_release;
930
1da177e4
LT
931out:
932 *res = sock;
933 return err;
7420ed23
VY
934out_release:
935 sock_release(sock);
936 sock = NULL;
937 goto out;
1da177e4
LT
938}
939
940/* No kernel lock held - perfect */
89bddce5 941static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
942{
943 struct socket *sock;
944
945 /*
89bddce5 946 * We can't return errors to poll, so it's either yes or no.
1da177e4 947 */
b69aee04 948 sock = file->private_data;
1da177e4
LT
949 return sock->ops->poll(file, sock, wait);
950}
951
89bddce5 952static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 953{
b69aee04 954 struct socket *sock = file->private_data;
1da177e4
LT
955
956 return sock->ops->mmap(file, sock, vma);
957}
958
20380731 959static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
960{
961 /*
89bddce5
SH
962 * It was possible the inode is NULL we were
963 * closing an unfinished socket.
1da177e4
LT
964 */
965
89bddce5 966 if (!inode) {
1da177e4
LT
967 printk(KERN_DEBUG "sock_close: NULL inode\n");
968 return 0;
969 }
970 sock_fasync(-1, filp, 0);
971 sock_release(SOCKET_I(inode));
972 return 0;
973}
974
975/*
976 * Update the socket async list
977 *
978 * Fasync_list locking strategy.
979 *
980 * 1. fasync_list is modified only under process context socket lock
981 * i.e. under semaphore.
982 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
983 * or under socket lock.
984 * 3. fasync_list can be used from softirq context, so that
985 * modification under socket lock have to be enhanced with
986 * write_lock_bh(&sk->sk_callback_lock).
987 * --ANK (990710)
988 */
989
990static int sock_fasync(int fd, struct file *filp, int on)
991{
89bddce5 992 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
993 struct socket *sock;
994 struct sock *sk;
995
89bddce5 996 if (on) {
8b3a7005 997 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 998 if (fna == NULL)
1da177e4
LT
999 return -ENOMEM;
1000 }
1001
b69aee04 1002 sock = filp->private_data;
1da177e4 1003
89bddce5
SH
1004 sk = sock->sk;
1005 if (sk == NULL) {
1da177e4
LT
1006 kfree(fna);
1007 return -EINVAL;
1008 }
1009
1010 lock_sock(sk);
1011
89bddce5 1012 prev = &(sock->fasync_list);
1da177e4 1013
89bddce5
SH
1014 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1015 if (fa->fa_file == filp)
1da177e4
LT
1016 break;
1017
89bddce5
SH
1018 if (on) {
1019 if (fa != NULL) {
1da177e4 1020 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1021 fa->fa_fd = fd;
1da177e4
LT
1022 write_unlock_bh(&sk->sk_callback_lock);
1023
1024 kfree(fna);
1025 goto out;
1026 }
89bddce5
SH
1027 fna->fa_file = filp;
1028 fna->fa_fd = fd;
1029 fna->magic = FASYNC_MAGIC;
1030 fna->fa_next = sock->fasync_list;
1da177e4 1031 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1032 sock->fasync_list = fna;
1da177e4 1033 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1034 } else {
1035 if (fa != NULL) {
1da177e4 1036 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1037 *prev = fa->fa_next;
1da177e4
LT
1038 write_unlock_bh(&sk->sk_callback_lock);
1039 kfree(fa);
1040 }
1041 }
1042
1043out:
1044 release_sock(sock->sk);
1045 return 0;
1046}
1047
1048/* This function may be called only under socket lock or callback_lock */
1049
1050int sock_wake_async(struct socket *sock, int how, int band)
1051{
1052 if (!sock || !sock->fasync_list)
1053 return -1;
89bddce5 1054 switch (how) {
1da177e4 1055 case 1:
89bddce5 1056
1da177e4
LT
1057 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1058 break;
1059 goto call_kill;
1060 case 2:
1061 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1062 break;
1063 /* fall through */
1064 case 0:
89bddce5 1065call_kill:
1da177e4
LT
1066 __kill_fasync(sock->fasync_list, SIGIO, band);
1067 break;
1068 case 3:
1069 __kill_fasync(sock->fasync_list, SIGURG, band);
1070 }
1071 return 0;
1072}
1073
89bddce5
SH
1074static int __sock_create(int family, int type, int protocol,
1075 struct socket **res, int kern)
1da177e4
LT
1076{
1077 int err;
1078 struct socket *sock;
55737fda 1079 const struct net_proto_family *pf;
1da177e4
LT
1080
1081 /*
89bddce5 1082 * Check protocol is in range
1da177e4
LT
1083 */
1084 if (family < 0 || family >= NPROTO)
1085 return -EAFNOSUPPORT;
1086 if (type < 0 || type >= SOCK_MAX)
1087 return -EINVAL;
1088
1089 /* Compatibility.
1090
1091 This uglymoron is moved from INET layer to here to avoid
1092 deadlock in module load.
1093 */
1094 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1095 static int warned;
1da177e4
LT
1096 if (!warned) {
1097 warned = 1;
89bddce5
SH
1098 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1099 current->comm);
1da177e4
LT
1100 }
1101 family = PF_PACKET;
1102 }
1103
1104 err = security_socket_create(family, type, protocol, kern);
1105 if (err)
1106 return err;
89bddce5 1107
55737fda
SH
1108 /*
1109 * Allocate the socket and allow the family to set things up. if
1110 * the protocol is 0, the family is instructed to select an appropriate
1111 * default.
1112 */
1113 sock = sock_alloc();
1114 if (!sock) {
1115 if (net_ratelimit())
1116 printk(KERN_WARNING "socket: no more sockets\n");
1117 return -ENFILE; /* Not exactly a match, but its the
1118 closest posix thing */
1119 }
1120
1121 sock->type = type;
1122
1da177e4 1123#if defined(CONFIG_KMOD)
89bddce5
SH
1124 /* Attempt to load a protocol module if the find failed.
1125 *
1126 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1127 * requested real, full-featured networking support upon configuration.
1128 * Otherwise module support will break!
1129 */
55737fda 1130 if (net_families[family] == NULL)
89bddce5 1131 request_module("net-pf-%d", family);
1da177e4
LT
1132#endif
1133
55737fda
SH
1134 rcu_read_lock();
1135 pf = rcu_dereference(net_families[family]);
1136 err = -EAFNOSUPPORT;
1137 if (!pf)
1138 goto out_release;
1da177e4
LT
1139
1140 /*
1141 * We will call the ->create function, that possibly is in a loadable
1142 * module, so we have to bump that loadable module refcnt first.
1143 */
55737fda 1144 if (!try_module_get(pf->owner))
1da177e4
LT
1145 goto out_release;
1146
55737fda
SH
1147 /* Now protected by module ref count */
1148 rcu_read_unlock();
1149
1150 err = pf->create(sock, protocol);
1151 if (err < 0)
1da177e4 1152 goto out_module_put;
a79af59e 1153
1da177e4
LT
1154 /*
1155 * Now to bump the refcnt of the [loadable] module that owns this
1156 * socket at sock_release time we decrement its refcnt.
1157 */
55737fda
SH
1158 if (!try_module_get(sock->ops->owner))
1159 goto out_module_busy;
1160
1da177e4
LT
1161 /*
1162 * Now that we're done with the ->create function, the [loadable]
1163 * module can have its refcnt decremented
1164 */
55737fda 1165 module_put(pf->owner);
7420ed23
VY
1166 err = security_socket_post_create(sock, family, type, protocol, kern);
1167 if (err)
1168 goto out_release;
55737fda 1169 *res = sock;
1da177e4 1170
55737fda
SH
1171 return 0;
1172
1173out_module_busy:
1174 err = -EAFNOSUPPORT;
1da177e4 1175out_module_put:
55737fda
SH
1176 sock->ops = NULL;
1177 module_put(pf->owner);
1178out_sock_release:
1da177e4 1179 sock_release(sock);
55737fda
SH
1180 return err;
1181
1182out_release:
1183 rcu_read_unlock();
1184 goto out_sock_release;
1da177e4
LT
1185}
1186
1187int sock_create(int family, int type, int protocol, struct socket **res)
1188{
1189 return __sock_create(family, type, protocol, res, 0);
1190}
1191
1192int sock_create_kern(int family, int type, int protocol, struct socket **res)
1193{
1194 return __sock_create(family, type, protocol, res, 1);
1195}
1196
1197asmlinkage long sys_socket(int family, int type, int protocol)
1198{
1199 int retval;
1200 struct socket *sock;
1201
1202 retval = sock_create(family, type, protocol, &sock);
1203 if (retval < 0)
1204 goto out;
1205
1206 retval = sock_map_fd(sock);
1207 if (retval < 0)
1208 goto out_release;
1209
1210out:
1211 /* It may be already another descriptor 8) Not kernel problem. */
1212 return retval;
1213
1214out_release:
1215 sock_release(sock);
1216 return retval;
1217}
1218
1219/*
1220 * Create a pair of connected sockets.
1221 */
1222
89bddce5
SH
1223asmlinkage long sys_socketpair(int family, int type, int protocol,
1224 int __user *usockvec)
1da177e4
LT
1225{
1226 struct socket *sock1, *sock2;
1227 int fd1, fd2, err;
db349509 1228 struct file *newfile1, *newfile2;
1da177e4
LT
1229
1230 /*
1231 * Obtain the first socket and check if the underlying protocol
1232 * supports the socketpair call.
1233 */
1234
1235 err = sock_create(family, type, protocol, &sock1);
1236 if (err < 0)
1237 goto out;
1238
1239 err = sock_create(family, type, protocol, &sock2);
1240 if (err < 0)
1241 goto out_release_1;
1242
1243 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1244 if (err < 0)
1da177e4
LT
1245 goto out_release_both;
1246
db349509
AV
1247 fd1 = sock_alloc_fd(&newfile1);
1248 if (unlikely(fd1 < 0))
1249 goto out_release_both;
1da177e4 1250
db349509
AV
1251 fd2 = sock_alloc_fd(&newfile2);
1252 if (unlikely(fd2 < 0)) {
1253 put_filp(newfile1);
1254 put_unused_fd(fd1);
1da177e4 1255 goto out_release_both;
db349509 1256 }
1da177e4 1257
db349509
AV
1258 err = sock_attach_fd(sock1, newfile1);
1259 if (unlikely(err < 0)) {
1260 goto out_fd2;
1261 }
1262
1263 err = sock_attach_fd(sock2, newfile2);
1264 if (unlikely(err < 0)) {
1265 fput(newfile1);
1266 goto out_fd1;
1267 }
1268
1269 err = audit_fd_pair(fd1, fd2);
1270 if (err < 0) {
1271 fput(newfile1);
1272 fput(newfile2);
1273 goto out_fd;
1274 }
1da177e4 1275
db349509
AV
1276 fd_install(fd1, newfile1);
1277 fd_install(fd2, newfile2);
1da177e4
LT
1278 /* fd1 and fd2 may be already another descriptors.
1279 * Not kernel problem.
1280 */
1281
89bddce5 1282 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1283 if (!err)
1284 err = put_user(fd2, &usockvec[1]);
1285 if (!err)
1286 return 0;
1287
1288 sys_close(fd2);
1289 sys_close(fd1);
1290 return err;
1291
1da177e4 1292out_release_both:
89bddce5 1293 sock_release(sock2);
1da177e4 1294out_release_1:
89bddce5 1295 sock_release(sock1);
1da177e4
LT
1296out:
1297 return err;
db349509
AV
1298
1299out_fd2:
1300 put_filp(newfile1);
1301 sock_release(sock1);
1302out_fd1:
1303 put_filp(newfile2);
1304 sock_release(sock2);
1305out_fd:
1306 put_unused_fd(fd1);
1307 put_unused_fd(fd2);
1308 goto out;
1da177e4
LT
1309}
1310
1da177e4
LT
1311/*
1312 * Bind a name to a socket. Nothing much to do here since it's
1313 * the protocol's responsibility to handle the local address.
1314 *
1315 * We move the socket address to kernel space before we call
1316 * the protocol layer (having also checked the address is ok).
1317 */
1318
1319asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1320{
1321 struct socket *sock;
1322 char address[MAX_SOCK_ADDR];
6cb153ca 1323 int err, fput_needed;
1da177e4 1324
89bddce5 1325 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1326 if (sock) {
89bddce5
SH
1327 err = move_addr_to_kernel(umyaddr, addrlen, address);
1328 if (err >= 0) {
1329 err = security_socket_bind(sock,
1330 (struct sockaddr *)address,
1331 addrlen);
6cb153ca
BL
1332 if (!err)
1333 err = sock->ops->bind(sock,
89bddce5
SH
1334 (struct sockaddr *)
1335 address, addrlen);
1da177e4 1336 }
6cb153ca 1337 fput_light(sock->file, fput_needed);
89bddce5 1338 }
1da177e4
LT
1339 return err;
1340}
1341
1da177e4
LT
1342/*
1343 * Perform a listen. Basically, we allow the protocol to do anything
1344 * necessary for a listen, and if that works, we mark the socket as
1345 * ready for listening.
1346 */
1347
7a42c217 1348int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1349
1350asmlinkage long sys_listen(int fd, int backlog)
1351{
1352 struct socket *sock;
6cb153ca 1353 int err, fput_needed;
89bddce5
SH
1354
1355 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1356 if (sock) {
1357 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1358 backlog = sysctl_somaxconn;
1359
1360 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1361 if (!err)
1362 err = sock->ops->listen(sock, backlog);
1da177e4 1363
6cb153ca 1364 fput_light(sock->file, fput_needed);
1da177e4
LT
1365 }
1366 return err;
1367}
1368
1da177e4
LT
1369/*
1370 * For accept, we attempt to create a new socket, set up the link
1371 * with the client, wake up the client, then return the new
1372 * connected fd. We collect the address of the connector in kernel
1373 * space and move it to user at the very end. This is unclean because
1374 * we open the socket then return an error.
1375 *
1376 * 1003.1g adds the ability to recvmsg() to query connection pending
1377 * status to recvmsg. We need to add that support in a way thats
1378 * clean when we restucture accept also.
1379 */
1380
89bddce5
SH
1381asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1382 int __user *upeer_addrlen)
1da177e4
LT
1383{
1384 struct socket *sock, *newsock;
39d8c1b6 1385 struct file *newfile;
6cb153ca 1386 int err, len, newfd, fput_needed;
1da177e4
LT
1387 char address[MAX_SOCK_ADDR];
1388
6cb153ca 1389 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1390 if (!sock)
1391 goto out;
1392
1393 err = -ENFILE;
89bddce5 1394 if (!(newsock = sock_alloc()))
1da177e4
LT
1395 goto out_put;
1396
1397 newsock->type = sock->type;
1398 newsock->ops = sock->ops;
1399
1da177e4
LT
1400 /*
1401 * We don't need try_module_get here, as the listening socket (sock)
1402 * has the protocol module (sock->ops->owner) held.
1403 */
1404 __module_get(newsock->ops->owner);
1405
39d8c1b6
DM
1406 newfd = sock_alloc_fd(&newfile);
1407 if (unlikely(newfd < 0)) {
1408 err = newfd;
9a1875e6
DM
1409 sock_release(newsock);
1410 goto out_put;
39d8c1b6
DM
1411 }
1412
1413 err = sock_attach_fd(newsock, newfile);
1414 if (err < 0)
79f4f642 1415 goto out_fd_simple;
39d8c1b6 1416
a79af59e
FF
1417 err = security_socket_accept(sock, newsock);
1418 if (err)
39d8c1b6 1419 goto out_fd;
a79af59e 1420
1da177e4
LT
1421 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1422 if (err < 0)
39d8c1b6 1423 goto out_fd;
1da177e4
LT
1424
1425 if (upeer_sockaddr) {
89bddce5
SH
1426 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1427 &len, 2) < 0) {
1da177e4 1428 err = -ECONNABORTED;
39d8c1b6 1429 goto out_fd;
1da177e4 1430 }
89bddce5
SH
1431 err = move_addr_to_user(address, len, upeer_sockaddr,
1432 upeer_addrlen);
1da177e4 1433 if (err < 0)
39d8c1b6 1434 goto out_fd;
1da177e4
LT
1435 }
1436
1437 /* File flags are not inherited via accept() unlike another OSes. */
1438
39d8c1b6
DM
1439 fd_install(newfd, newfile);
1440 err = newfd;
1da177e4
LT
1441
1442 security_socket_post_accept(sock, newsock);
1443
1444out_put:
6cb153ca 1445 fput_light(sock->file, fput_needed);
1da177e4
LT
1446out:
1447 return err;
79f4f642
AD
1448out_fd_simple:
1449 sock_release(newsock);
1450 put_filp(newfile);
1451 put_unused_fd(newfd);
1452 goto out_put;
39d8c1b6 1453out_fd:
9606a216 1454 fput(newfile);
39d8c1b6 1455 put_unused_fd(newfd);
1da177e4
LT
1456 goto out_put;
1457}
1458
1da177e4
LT
1459/*
1460 * Attempt to connect to a socket with the server address. The address
1461 * is in user space so we verify it is OK and move it to kernel space.
1462 *
1463 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1464 * break bindings
1465 *
1466 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1467 * other SEQPACKET protocols that take time to connect() as it doesn't
1468 * include the -EINPROGRESS status for such sockets.
1469 */
1470
89bddce5
SH
1471asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1472 int addrlen)
1da177e4
LT
1473{
1474 struct socket *sock;
1475 char address[MAX_SOCK_ADDR];
6cb153ca 1476 int err, fput_needed;
1da177e4 1477
6cb153ca 1478 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1479 if (!sock)
1480 goto out;
1481 err = move_addr_to_kernel(uservaddr, addrlen, address);
1482 if (err < 0)
1483 goto out_put;
1484
89bddce5
SH
1485 err =
1486 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1487 if (err)
1488 goto out_put;
1489
89bddce5 1490 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1491 sock->file->f_flags);
1492out_put:
6cb153ca 1493 fput_light(sock->file, fput_needed);
1da177e4
LT
1494out:
1495 return err;
1496}
1497
1498/*
1499 * Get the local address ('name') of a socket object. Move the obtained
1500 * name to user space.
1501 */
1502
89bddce5
SH
1503asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1504 int __user *usockaddr_len)
1da177e4
LT
1505{
1506 struct socket *sock;
1507 char address[MAX_SOCK_ADDR];
6cb153ca 1508 int len, err, fput_needed;
89bddce5 1509
6cb153ca 1510 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1511 if (!sock)
1512 goto out;
1513
1514 err = security_socket_getsockname(sock);
1515 if (err)
1516 goto out_put;
1517
1518 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1519 if (err)
1520 goto out_put;
1521 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1522
1523out_put:
6cb153ca 1524 fput_light(sock->file, fput_needed);
1da177e4
LT
1525out:
1526 return err;
1527}
1528
1529/*
1530 * Get the remote address ('name') of a socket object. Move the obtained
1531 * name to user space.
1532 */
1533
89bddce5
SH
1534asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1535 int __user *usockaddr_len)
1da177e4
LT
1536{
1537 struct socket *sock;
1538 char address[MAX_SOCK_ADDR];
6cb153ca 1539 int len, err, fput_needed;
1da177e4 1540
89bddce5
SH
1541 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1542 if (sock != NULL) {
1da177e4
LT
1543 err = security_socket_getpeername(sock);
1544 if (err) {
6cb153ca 1545 fput_light(sock->file, fput_needed);
1da177e4
LT
1546 return err;
1547 }
1548
89bddce5
SH
1549 err =
1550 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1551 1);
1da177e4 1552 if (!err)
89bddce5
SH
1553 err = move_addr_to_user(address, len, usockaddr,
1554 usockaddr_len);
6cb153ca 1555 fput_light(sock->file, fput_needed);
1da177e4
LT
1556 }
1557 return err;
1558}
1559
1560/*
1561 * Send a datagram to a given address. We move the address into kernel
1562 * space and check the user space data area is readable before invoking
1563 * the protocol.
1564 */
1565
89bddce5
SH
1566asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1567 unsigned flags, struct sockaddr __user *addr,
1568 int addr_len)
1da177e4
LT
1569{
1570 struct socket *sock;
1571 char address[MAX_SOCK_ADDR];
1572 int err;
1573 struct msghdr msg;
1574 struct iovec iov;
6cb153ca
BL
1575 int fput_needed;
1576 struct file *sock_file;
1577
1578 sock_file = fget_light(fd, &fput_needed);
4387ff75 1579 err = -EBADF;
6cb153ca 1580 if (!sock_file)
4387ff75 1581 goto out;
6cb153ca
BL
1582
1583 sock = sock_from_file(sock_file, &err);
1da177e4 1584 if (!sock)
6cb153ca 1585 goto out_put;
89bddce5
SH
1586 iov.iov_base = buff;
1587 iov.iov_len = len;
1588 msg.msg_name = NULL;
1589 msg.msg_iov = &iov;
1590 msg.msg_iovlen = 1;
1591 msg.msg_control = NULL;
1592 msg.msg_controllen = 0;
1593 msg.msg_namelen = 0;
6cb153ca 1594 if (addr) {
1da177e4
LT
1595 err = move_addr_to_kernel(addr, addr_len, address);
1596 if (err < 0)
1597 goto out_put;
89bddce5
SH
1598 msg.msg_name = address;
1599 msg.msg_namelen = addr_len;
1da177e4
LT
1600 }
1601 if (sock->file->f_flags & O_NONBLOCK)
1602 flags |= MSG_DONTWAIT;
1603 msg.msg_flags = flags;
1604 err = sock_sendmsg(sock, &msg, len);
1605
89bddce5 1606out_put:
6cb153ca 1607 fput_light(sock_file, fput_needed);
4387ff75 1608out:
1da177e4
LT
1609 return err;
1610}
1611
1612/*
89bddce5 1613 * Send a datagram down a socket.
1da177e4
LT
1614 */
1615
89bddce5 1616asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1617{
1618 return sys_sendto(fd, buff, len, flags, NULL, 0);
1619}
1620
1621/*
89bddce5 1622 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1623 * sender. We verify the buffers are writable and if needed move the
1624 * sender address from kernel to user space.
1625 */
1626
89bddce5
SH
1627asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1628 unsigned flags, struct sockaddr __user *addr,
1629 int __user *addr_len)
1da177e4
LT
1630{
1631 struct socket *sock;
1632 struct iovec iov;
1633 struct msghdr msg;
1634 char address[MAX_SOCK_ADDR];
89bddce5 1635 int err, err2;
6cb153ca
BL
1636 struct file *sock_file;
1637 int fput_needed;
1638
1639 sock_file = fget_light(fd, &fput_needed);
4387ff75 1640 err = -EBADF;
6cb153ca 1641 if (!sock_file)
4387ff75 1642 goto out;
1da177e4 1643
6cb153ca 1644 sock = sock_from_file(sock_file, &err);
1da177e4 1645 if (!sock)
4387ff75 1646 goto out_put;
1da177e4 1647
89bddce5
SH
1648 msg.msg_control = NULL;
1649 msg.msg_controllen = 0;
1650 msg.msg_iovlen = 1;
1651 msg.msg_iov = &iov;
1652 iov.iov_len = size;
1653 iov.iov_base = ubuf;
1654 msg.msg_name = address;
1655 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1656 if (sock->file->f_flags & O_NONBLOCK)
1657 flags |= MSG_DONTWAIT;
89bddce5 1658 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1659
89bddce5
SH
1660 if (err >= 0 && addr != NULL) {
1661 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1662 if (err2 < 0)
1663 err = err2;
1da177e4 1664 }
4387ff75 1665out_put:
6cb153ca 1666 fput_light(sock_file, fput_needed);
4387ff75 1667out:
1da177e4
LT
1668 return err;
1669}
1670
1671/*
89bddce5 1672 * Receive a datagram from a socket.
1da177e4
LT
1673 */
1674
89bddce5
SH
1675asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1676 unsigned flags)
1da177e4
LT
1677{
1678 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1679}
1680
1681/*
1682 * Set a socket option. Because we don't know the option lengths we have
1683 * to pass the user mode parameter for the protocols to sort out.
1684 */
1685
89bddce5
SH
1686asmlinkage long sys_setsockopt(int fd, int level, int optname,
1687 char __user *optval, int optlen)
1da177e4 1688{
6cb153ca 1689 int err, fput_needed;
1da177e4
LT
1690 struct socket *sock;
1691
1692 if (optlen < 0)
1693 return -EINVAL;
89bddce5
SH
1694
1695 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1696 if (sock != NULL) {
1697 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1698 if (err)
1699 goto out_put;
1da177e4
LT
1700
1701 if (level == SOL_SOCKET)
89bddce5
SH
1702 err =
1703 sock_setsockopt(sock, level, optname, optval,
1704 optlen);
1da177e4 1705 else
89bddce5
SH
1706 err =
1707 sock->ops->setsockopt(sock, level, optname, optval,
1708 optlen);
6cb153ca
BL
1709out_put:
1710 fput_light(sock->file, fput_needed);
1da177e4
LT
1711 }
1712 return err;
1713}
1714
1715/*
1716 * Get a socket option. Because we don't know the option lengths we have
1717 * to pass a user mode parameter for the protocols to sort out.
1718 */
1719
89bddce5
SH
1720asmlinkage long sys_getsockopt(int fd, int level, int optname,
1721 char __user *optval, int __user *optlen)
1da177e4 1722{
6cb153ca 1723 int err, fput_needed;
1da177e4
LT
1724 struct socket *sock;
1725
89bddce5
SH
1726 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1727 if (sock != NULL) {
6cb153ca
BL
1728 err = security_socket_getsockopt(sock, level, optname);
1729 if (err)
1730 goto out_put;
1da177e4
LT
1731
1732 if (level == SOL_SOCKET)
89bddce5
SH
1733 err =
1734 sock_getsockopt(sock, level, optname, optval,
1735 optlen);
1da177e4 1736 else
89bddce5
SH
1737 err =
1738 sock->ops->getsockopt(sock, level, optname, optval,
1739 optlen);
6cb153ca
BL
1740out_put:
1741 fput_light(sock->file, fput_needed);
1da177e4
LT
1742 }
1743 return err;
1744}
1745
1da177e4
LT
1746/*
1747 * Shutdown a socket.
1748 */
1749
1750asmlinkage long sys_shutdown(int fd, int how)
1751{
6cb153ca 1752 int err, fput_needed;
1da177e4
LT
1753 struct socket *sock;
1754
89bddce5
SH
1755 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1756 if (sock != NULL) {
1da177e4 1757 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1758 if (!err)
1759 err = sock->ops->shutdown(sock, how);
1760 fput_light(sock->file, fput_needed);
1da177e4
LT
1761 }
1762 return err;
1763}
1764
89bddce5 1765/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1766 * fields which are the same type (int / unsigned) on our platforms.
1767 */
1768#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1769#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1770#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1771
1da177e4
LT
1772/*
1773 * BSD sendmsg interface
1774 */
1775
1776asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1777{
89bddce5
SH
1778 struct compat_msghdr __user *msg_compat =
1779 (struct compat_msghdr __user *)msg;
1da177e4
LT
1780 struct socket *sock;
1781 char address[MAX_SOCK_ADDR];
1782 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1783 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1784 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1785 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1786 unsigned char *ctl_buf = ctl;
1787 struct msghdr msg_sys;
1788 int err, ctl_len, iov_size, total_len;
6cb153ca 1789 int fput_needed;
89bddce5 1790
1da177e4
LT
1791 err = -EFAULT;
1792 if (MSG_CMSG_COMPAT & flags) {
1793 if (get_compat_msghdr(&msg_sys, msg_compat))
1794 return -EFAULT;
89bddce5
SH
1795 }
1796 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1797 return -EFAULT;
1798
6cb153ca 1799 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1800 if (!sock)
1da177e4
LT
1801 goto out;
1802
1803 /* do not move before msg_sys is valid */
1804 err = -EMSGSIZE;
1805 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1806 goto out_put;
1807
89bddce5 1808 /* Check whether to allocate the iovec area */
1da177e4
LT
1809 err = -ENOMEM;
1810 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1811 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1812 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1813 if (!iov)
1814 goto out_put;
1815 }
1816
1817 /* This will also move the address data into kernel space */
1818 if (MSG_CMSG_COMPAT & flags) {
1819 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1820 } else
1821 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1822 if (err < 0)
1da177e4
LT
1823 goto out_freeiov;
1824 total_len = err;
1825
1826 err = -ENOBUFS;
1827
1828 if (msg_sys.msg_controllen > INT_MAX)
1829 goto out_freeiov;
89bddce5 1830 ctl_len = msg_sys.msg_controllen;
1da177e4 1831 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1832 err =
1833 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1834 sizeof(ctl));
1da177e4
LT
1835 if (err)
1836 goto out_freeiov;
1837 ctl_buf = msg_sys.msg_control;
8920e8f9 1838 ctl_len = msg_sys.msg_controllen;
1da177e4 1839 } else if (ctl_len) {
89bddce5 1840 if (ctl_len > sizeof(ctl)) {
1da177e4 1841 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1842 if (ctl_buf == NULL)
1da177e4
LT
1843 goto out_freeiov;
1844 }
1845 err = -EFAULT;
1846 /*
1847 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1848 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1849 * checking falls down on this.
1850 */
89bddce5
SH
1851 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1852 ctl_len))
1da177e4
LT
1853 goto out_freectl;
1854 msg_sys.msg_control = ctl_buf;
1855 }
1856 msg_sys.msg_flags = flags;
1857
1858 if (sock->file->f_flags & O_NONBLOCK)
1859 msg_sys.msg_flags |= MSG_DONTWAIT;
1860 err = sock_sendmsg(sock, &msg_sys, total_len);
1861
1862out_freectl:
89bddce5 1863 if (ctl_buf != ctl)
1da177e4
LT
1864 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1865out_freeiov:
1866 if (iov != iovstack)
1867 sock_kfree_s(sock->sk, iov, iov_size);
1868out_put:
6cb153ca 1869 fput_light(sock->file, fput_needed);
89bddce5 1870out:
1da177e4
LT
1871 return err;
1872}
1873
1874/*
1875 * BSD recvmsg interface
1876 */
1877
89bddce5
SH
1878asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1879 unsigned int flags)
1da177e4 1880{
89bddce5
SH
1881 struct compat_msghdr __user *msg_compat =
1882 (struct compat_msghdr __user *)msg;
1da177e4
LT
1883 struct socket *sock;
1884 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1885 struct iovec *iov = iovstack;
1da177e4
LT
1886 struct msghdr msg_sys;
1887 unsigned long cmsg_ptr;
1888 int err, iov_size, total_len, len;
6cb153ca 1889 int fput_needed;
1da177e4
LT
1890
1891 /* kernel mode address */
1892 char addr[MAX_SOCK_ADDR];
1893
1894 /* user mode address pointers */
1895 struct sockaddr __user *uaddr;
1896 int __user *uaddr_len;
89bddce5 1897
1da177e4
LT
1898 if (MSG_CMSG_COMPAT & flags) {
1899 if (get_compat_msghdr(&msg_sys, msg_compat))
1900 return -EFAULT;
89bddce5
SH
1901 }
1902 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1903 return -EFAULT;
1da177e4 1904
6cb153ca 1905 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1906 if (!sock)
1907 goto out;
1908
1909 err = -EMSGSIZE;
1910 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1911 goto out_put;
89bddce5
SH
1912
1913 /* Check whether to allocate the iovec area */
1da177e4
LT
1914 err = -ENOMEM;
1915 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1916 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1917 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1918 if (!iov)
1919 goto out_put;
1920 }
1921
1922 /*
89bddce5
SH
1923 * Save the user-mode address (verify_iovec will change the
1924 * kernel msghdr to use the kernel address space)
1da177e4 1925 */
89bddce5
SH
1926
1927 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1928 uaddr_len = COMPAT_NAMELEN(msg);
1929 if (MSG_CMSG_COMPAT & flags) {
1930 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1931 } else
1932 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1933 if (err < 0)
1934 goto out_freeiov;
89bddce5 1935 total_len = err;
1da177e4
LT
1936
1937 cmsg_ptr = (unsigned long)msg_sys.msg_control;
1938 msg_sys.msg_flags = 0;
1939 if (MSG_CMSG_COMPAT & flags)
1940 msg_sys.msg_flags = MSG_CMSG_COMPAT;
89bddce5 1941
1da177e4
LT
1942 if (sock->file->f_flags & O_NONBLOCK)
1943 flags |= MSG_DONTWAIT;
1944 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1945 if (err < 0)
1946 goto out_freeiov;
1947 len = err;
1948
1949 if (uaddr != NULL) {
89bddce5
SH
1950 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1951 uaddr_len);
1da177e4
LT
1952 if (err < 0)
1953 goto out_freeiov;
1954 }
37f7f421
DM
1955 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1956 COMPAT_FLAGS(msg));
1da177e4
LT
1957 if (err)
1958 goto out_freeiov;
1959 if (MSG_CMSG_COMPAT & flags)
89bddce5 1960 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1961 &msg_compat->msg_controllen);
1962 else
89bddce5 1963 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1964 &msg->msg_controllen);
1965 if (err)
1966 goto out_freeiov;
1967 err = len;
1968
1969out_freeiov:
1970 if (iov != iovstack)
1971 sock_kfree_s(sock->sk, iov, iov_size);
1972out_put:
6cb153ca 1973 fput_light(sock->file, fput_needed);
1da177e4
LT
1974out:
1975 return err;
1976}
1977
1978#ifdef __ARCH_WANT_SYS_SOCKETCALL
1979
1980/* Argument list sizes for sys_socketcall */
1981#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1982static const unsigned char nargs[18]={
1983 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1984 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1985 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1986};
1987
1da177e4
LT
1988#undef AL
1989
1990/*
89bddce5 1991 * System call vectors.
1da177e4
LT
1992 *
1993 * Argument checking cleaned up. Saved 20% in size.
1994 * This function doesn't need to set the kernel lock because
89bddce5 1995 * it is set by the callees.
1da177e4
LT
1996 */
1997
1998asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1999{
2000 unsigned long a[6];
89bddce5 2001 unsigned long a0, a1;
1da177e4
LT
2002 int err;
2003
89bddce5 2004 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2005 return -EINVAL;
2006
2007 /* copy_from_user should be SMP safe. */
2008 if (copy_from_user(a, args, nargs[call]))
2009 return -EFAULT;
3ec3b2fb 2010
89bddce5 2011 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2012 if (err)
2013 return err;
2014
89bddce5
SH
2015 a0 = a[0];
2016 a1 = a[1];
2017
2018 switch (call) {
2019 case SYS_SOCKET:
2020 err = sys_socket(a0, a1, a[2]);
2021 break;
2022 case SYS_BIND:
2023 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2024 break;
2025 case SYS_CONNECT:
2026 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2027 break;
2028 case SYS_LISTEN:
2029 err = sys_listen(a0, a1);
2030 break;
2031 case SYS_ACCEPT:
2032 err =
2033 sys_accept(a0, (struct sockaddr __user *)a1,
2034 (int __user *)a[2]);
2035 break;
2036 case SYS_GETSOCKNAME:
2037 err =
2038 sys_getsockname(a0, (struct sockaddr __user *)a1,
2039 (int __user *)a[2]);
2040 break;
2041 case SYS_GETPEERNAME:
2042 err =
2043 sys_getpeername(a0, (struct sockaddr __user *)a1,
2044 (int __user *)a[2]);
2045 break;
2046 case SYS_SOCKETPAIR:
2047 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2048 break;
2049 case SYS_SEND:
2050 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2051 break;
2052 case SYS_SENDTO:
2053 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2054 (struct sockaddr __user *)a[4], a[5]);
2055 break;
2056 case SYS_RECV:
2057 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2058 break;
2059 case SYS_RECVFROM:
2060 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2061 (struct sockaddr __user *)a[4],
2062 (int __user *)a[5]);
2063 break;
2064 case SYS_SHUTDOWN:
2065 err = sys_shutdown(a0, a1);
2066 break;
2067 case SYS_SETSOCKOPT:
2068 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2069 break;
2070 case SYS_GETSOCKOPT:
2071 err =
2072 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2073 (int __user *)a[4]);
2074 break;
2075 case SYS_SENDMSG:
2076 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2077 break;
2078 case SYS_RECVMSG:
2079 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2080 break;
2081 default:
2082 err = -EINVAL;
2083 break;
1da177e4
LT
2084 }
2085 return err;
2086}
2087
89bddce5 2088#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2089
55737fda
SH
2090/**
2091 * sock_register - add a socket protocol handler
2092 * @ops: description of protocol
2093 *
1da177e4
LT
2094 * This function is called by a protocol handler that wants to
2095 * advertise its address family, and have it linked into the
55737fda
SH
2096 * socket interface. The value ops->family coresponds to the
2097 * socket system call protocol family.
1da177e4 2098 */
f0fd27d4 2099int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2100{
2101 int err;
2102
2103 if (ops->family >= NPROTO) {
89bddce5
SH
2104 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2105 NPROTO);
1da177e4
LT
2106 return -ENOBUFS;
2107 }
55737fda
SH
2108
2109 spin_lock(&net_family_lock);
2110 if (net_families[ops->family])
2111 err = -EEXIST;
2112 else {
89bddce5 2113 net_families[ops->family] = ops;
1da177e4
LT
2114 err = 0;
2115 }
55737fda
SH
2116 spin_unlock(&net_family_lock);
2117
89bddce5 2118 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2119 return err;
2120}
2121
55737fda
SH
2122/**
2123 * sock_unregister - remove a protocol handler
2124 * @family: protocol family to remove
2125 *
1da177e4
LT
2126 * This function is called by a protocol handler that wants to
2127 * remove its address family, and have it unlinked from the
55737fda
SH
2128 * new socket creation.
2129 *
2130 * If protocol handler is a module, then it can use module reference
2131 * counts to protect against new references. If protocol handler is not
2132 * a module then it needs to provide its own protection in
2133 * the ops->create routine.
1da177e4 2134 */
f0fd27d4 2135void sock_unregister(int family)
1da177e4 2136{
f0fd27d4 2137 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2138
55737fda 2139 spin_lock(&net_family_lock);
89bddce5 2140 net_families[family] = NULL;
55737fda
SH
2141 spin_unlock(&net_family_lock);
2142
2143 synchronize_rcu();
2144
89bddce5 2145 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2146}
2147
77d76ea3 2148static int __init sock_init(void)
1da177e4
LT
2149{
2150 /*
89bddce5 2151 * Initialize sock SLAB cache.
1da177e4 2152 */
89bddce5 2153
1da177e4
LT
2154 sk_init();
2155
1da177e4 2156 /*
89bddce5 2157 * Initialize skbuff SLAB cache
1da177e4
LT
2158 */
2159 skb_init();
1da177e4
LT
2160
2161 /*
89bddce5 2162 * Initialize the protocols module.
1da177e4
LT
2163 */
2164
2165 init_inodecache();
2166 register_filesystem(&sock_fs_type);
2167 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2168
2169 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2170 */
2171
2172#ifdef CONFIG_NETFILTER
2173 netfilter_init();
2174#endif
cbeb321a
DM
2175
2176 return 0;
1da177e4
LT
2177}
2178
77d76ea3
AK
2179core_initcall(sock_init); /* early initcall */
2180
1da177e4
LT
2181#ifdef CONFIG_PROC_FS
2182void socket_seq_show(struct seq_file *seq)
2183{
2184 int cpu;
2185 int counter = 0;
2186
6f912042 2187 for_each_possible_cpu(cpu)
89bddce5 2188 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2189
2190 /* It can be negative, by the way. 8) */
2191 if (counter < 0)
2192 counter = 0;
2193
2194 seq_printf(seq, "sockets: used %d\n", counter);
2195}
89bddce5 2196#endif /* CONFIG_PROC_FS */
1da177e4 2197
89bbfc95
SP
2198#ifdef CONFIG_COMPAT
2199static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2200 unsigned long arg)
89bbfc95
SP
2201{
2202 struct socket *sock = file->private_data;
2203 int ret = -ENOIOCTLCMD;
2204
2205 if (sock->ops->compat_ioctl)
2206 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2207
2208 return ret;
2209}
2210#endif
2211
ac5a488e
SS
2212int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2213{
2214 return sock->ops->bind(sock, addr, addrlen);
2215}
2216
2217int kernel_listen(struct socket *sock, int backlog)
2218{
2219 return sock->ops->listen(sock, backlog);
2220}
2221
2222int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2223{
2224 struct sock *sk = sock->sk;
2225 int err;
2226
2227 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2228 newsock);
2229 if (err < 0)
2230 goto done;
2231
2232 err = sock->ops->accept(sock, *newsock, flags);
2233 if (err < 0) {
2234 sock_release(*newsock);
2235 goto done;
2236 }
2237
2238 (*newsock)->ops = sock->ops;
2239
2240done:
2241 return err;
2242}
2243
2244int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2245 int flags)
ac5a488e
SS
2246{
2247 return sock->ops->connect(sock, addr, addrlen, flags);
2248}
2249
2250int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2251 int *addrlen)
2252{
2253 return sock->ops->getname(sock, addr, addrlen, 0);
2254}
2255
2256int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2257 int *addrlen)
2258{
2259 return sock->ops->getname(sock, addr, addrlen, 1);
2260}
2261
2262int kernel_getsockopt(struct socket *sock, int level, int optname,
2263 char *optval, int *optlen)
2264{
2265 mm_segment_t oldfs = get_fs();
2266 int err;
2267
2268 set_fs(KERNEL_DS);
2269 if (level == SOL_SOCKET)
2270 err = sock_getsockopt(sock, level, optname, optval, optlen);
2271 else
2272 err = sock->ops->getsockopt(sock, level, optname, optval,
2273 optlen);
2274 set_fs(oldfs);
2275 return err;
2276}
2277
2278int kernel_setsockopt(struct socket *sock, int level, int optname,
2279 char *optval, int optlen)
2280{
2281 mm_segment_t oldfs = get_fs();
2282 int err;
2283
2284 set_fs(KERNEL_DS);
2285 if (level == SOL_SOCKET)
2286 err = sock_setsockopt(sock, level, optname, optval, optlen);
2287 else
2288 err = sock->ops->setsockopt(sock, level, optname, optval,
2289 optlen);
2290 set_fs(oldfs);
2291 return err;
2292}
2293
2294int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2295 size_t size, int flags)
2296{
2297 if (sock->ops->sendpage)
2298 return sock->ops->sendpage(sock, page, offset, size, flags);
2299
2300 return sock_no_sendpage(sock, page, offset, size, flags);
2301}
2302
2303int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2304{
2305 mm_segment_t oldfs = get_fs();
2306 int err;
2307
2308 set_fs(KERNEL_DS);
2309 err = sock->ops->ioctl(sock, cmd, arg);
2310 set_fs(oldfs);
2311
2312 return err;
2313}
2314
1da177e4
LT
2315/* ABI emulation layers need these two */
2316EXPORT_SYMBOL(move_addr_to_kernel);
2317EXPORT_SYMBOL(move_addr_to_user);
2318EXPORT_SYMBOL(sock_create);
2319EXPORT_SYMBOL(sock_create_kern);
2320EXPORT_SYMBOL(sock_create_lite);
2321EXPORT_SYMBOL(sock_map_fd);
2322EXPORT_SYMBOL(sock_recvmsg);
2323EXPORT_SYMBOL(sock_register);
2324EXPORT_SYMBOL(sock_release);
2325EXPORT_SYMBOL(sock_sendmsg);
2326EXPORT_SYMBOL(sock_unregister);
2327EXPORT_SYMBOL(sock_wake_async);
2328EXPORT_SYMBOL(sockfd_lookup);
2329EXPORT_SYMBOL(kernel_sendmsg);
2330EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2331EXPORT_SYMBOL(kernel_bind);
2332EXPORT_SYMBOL(kernel_listen);
2333EXPORT_SYMBOL(kernel_accept);
2334EXPORT_SYMBOL(kernel_connect);
2335EXPORT_SYMBOL(kernel_getsockname);
2336EXPORT_SYMBOL(kernel_getpeername);
2337EXPORT_SYMBOL(kernel_getsockopt);
2338EXPORT_SYMBOL(kernel_setsockopt);
2339EXPORT_SYMBOL(kernel_sendpage);
2340EXPORT_SYMBOL(kernel_sock_ioctl);
This page took 0.539959 seconds and 5 git commands to generate.