hamradio: Fix bit test correctly.
[deliverable/linux.git] / net / core / sock.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
02c30a84 10 * Authors: Ross Biro
1da177e4
LT
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
14 *
15 * Fixes:
16 * Alan Cox : Numerous verify_area() problems
17 * Alan Cox : Connecting on a connecting socket
18 * now returns an error for tcp.
19 * Alan Cox : sock->protocol is set correctly.
20 * and is not sometimes left as 0.
21 * Alan Cox : connect handles icmp errors on a
22 * connect properly. Unfortunately there
23 * is a restart syscall nasty there. I
24 * can't match BSD without hacking the C
25 * library. Ideas urgently sought!
26 * Alan Cox : Disallow bind() to addresses that are
27 * not ours - especially broadcast ones!!
28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
30 * instead they leave that for the DESTROY timer.
31 * Alan Cox : Clean up error flag in accept
32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
33 * was buggy. Put a remove_sock() in the handler
34 * for memory when we hit 0. Also altered the timer
4ec93edb 35 * code. The ACK stuff can wait and needs major
1da177e4
LT
36 * TCP layer surgery.
37 * Alan Cox : Fixed TCP ack bug, removed remove sock
38 * and fixed timer/inet_bh race.
39 * Alan Cox : Added zapped flag for TCP
40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45 * Rick Sladkey : Relaxed UDP rules for matching packets.
46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
47 * Pauline Middelink : identd support
48 * Alan Cox : Fixed connect() taking signals I think.
49 * Alan Cox : SO_LINGER supported
50 * Alan Cox : Error reporting fixes
51 * Anonymous : inet_create tidied up (sk->reuse setting)
52 * Alan Cox : inet sockets don't set sk->type!
53 * Alan Cox : Split socket option code
54 * Alan Cox : Callbacks
55 * Alan Cox : Nagle flag for Charles & Johannes stuff
56 * Alex : Removed restriction on inet fioctl
57 * Alan Cox : Splitting INET from NET core
58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
60 * Alan Cox : Split IP from generic code
61 * Alan Cox : New kfree_skbmem()
62 * Alan Cox : Make SO_DEBUG superuser only.
63 * Alan Cox : Allow anyone to clear SO_DEBUG
64 * (compatibility fix)
65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
66 * Alan Cox : Allocator for a socket is settable.
67 * Alan Cox : SO_ERROR includes soft errors.
68 * Alan Cox : Allow NULL arguments on some SO_ opts
69 * Alan Cox : Generic socket allocation to make hooks
70 * easier (suggested by Craig Metz).
71 * Michael Pall : SO_ERROR returns positive errno again
72 * Steve Whitehouse: Added default destructor to free
73 * protocol private data.
74 * Steve Whitehouse: Added various other default routines
75 * common to several socket families.
76 * Chris Evans : Call suser() check last on F_SETOWN
77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
79 * Andi Kleen : Fix write_space callback
80 * Chris Evans : Security fixes - signedness again
81 * Arnaldo C. Melo : cleanups, use skb_queue_purge
82 *
83 * To Fix:
84 *
85 *
86 * This program is free software; you can redistribute it and/or
87 * modify it under the terms of the GNU General Public License
88 * as published by the Free Software Foundation; either version
89 * 2 of the License, or (at your option) any later version.
90 */
91
4fc268d2 92#include <linux/capability.h>
1da177e4
LT
93#include <linux/errno.h>
94#include <linux/types.h>
95#include <linux/socket.h>
96#include <linux/in.h>
97#include <linux/kernel.h>
1da177e4
LT
98#include <linux/module.h>
99#include <linux/proc_fs.h>
100#include <linux/seq_file.h>
101#include <linux/sched.h>
102#include <linux/timer.h>
103#include <linux/string.h>
104#include <linux/sockios.h>
105#include <linux/net.h>
106#include <linux/mm.h>
107#include <linux/slab.h>
108#include <linux/interrupt.h>
109#include <linux/poll.h>
110#include <linux/tcp.h>
111#include <linux/init.h>
a1f8e7f7 112#include <linux/highmem.h>
1da177e4
LT
113
114#include <asm/uaccess.h>
115#include <asm/system.h>
116
117#include <linux/netdevice.h>
118#include <net/protocol.h>
119#include <linux/skbuff.h>
457c4cbc 120#include <net/net_namespace.h>
2e6599cb 121#include <net/request_sock.h>
1da177e4 122#include <net/sock.h>
20d49473 123#include <linux/net_tstamp.h>
1da177e4
LT
124#include <net/xfrm.h>
125#include <linux/ipsec.h>
126
127#include <linux/filter.h>
128
129#ifdef CONFIG_INET
130#include <net/tcp.h>
131#endif
132
da21f24d
IM
133/*
134 * Each address family might have different locking rules, so we have
135 * one slock key per address family:
136 */
a5b5bb9a
IM
137static struct lock_class_key af_family_keys[AF_MAX];
138static struct lock_class_key af_family_slock_keys[AF_MAX];
139
a5b5bb9a
IM
140/*
141 * Make lock validator output more readable. (we pre-construct these
142 * strings build-time, so that runtime initialization of socket
143 * locks is fast):
144 */
36cbd3dc 145static const char *const af_family_key_strings[AF_MAX+1] = {
a5b5bb9a
IM
146 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
147 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
148 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
149 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
150 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
151 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
152 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
cbd151bf 153 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
a5b5bb9a 154 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
cd05acfe 155 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
17926a79 156 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
bce7b154 157 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
fcb94e42 158 "sk_lock-AF_IEEE802154",
bce7b154 159 "sk_lock-AF_MAX"
a5b5bb9a 160};
36cbd3dc 161static const char *const af_family_slock_key_strings[AF_MAX+1] = {
a5b5bb9a
IM
162 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
163 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
164 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
165 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
166 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
167 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
168 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
cbd151bf 169 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
a5b5bb9a 170 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
cd05acfe 171 "slock-27" , "slock-28" , "slock-AF_CAN" ,
17926a79 172 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
bce7b154 173 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
fcb94e42 174 "slock-AF_IEEE802154",
bce7b154 175 "slock-AF_MAX"
a5b5bb9a 176};
36cbd3dc 177static const char *const af_family_clock_key_strings[AF_MAX+1] = {
443aef0e
PZ
178 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
179 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
180 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
181 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
182 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
183 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
184 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
cbd151bf 185 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
443aef0e 186 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
b4942af6 187 "clock-27" , "clock-28" , "clock-AF_CAN" ,
e51f802b 188 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
bce7b154 189 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
fcb94e42 190 "clock-AF_IEEE802154",
bce7b154 191 "clock-AF_MAX"
443aef0e 192};
da21f24d
IM
193
194/*
195 * sk_callback_lock locking rules are per-address-family,
196 * so split the lock classes by using a per-AF key:
197 */
198static struct lock_class_key af_callback_keys[AF_MAX];
199
1da177e4
LT
200/* Take into consideration the size of the struct sk_buff overhead in the
201 * determination of these values, since that is non-constant across
202 * platforms. This makes socket queueing behavior and performance
203 * not depend upon such differences.
204 */
205#define _SK_MEM_PACKETS 256
206#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256)
207#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
208#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
209
210/* Run time adjustable parameters. */
ab32ea5d
BH
211__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
212__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
213__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
214__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
1da177e4
LT
215
216/* Maximal space eaten by iovec or ancilliary data plus some space */
ab32ea5d 217int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
2a91525c 218EXPORT_SYMBOL(sysctl_optmem_max);
1da177e4
LT
219
220static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
221{
222 struct timeval tv;
223
224 if (optlen < sizeof(tv))
225 return -EINVAL;
226 if (copy_from_user(&tv, optval, sizeof(tv)))
227 return -EFAULT;
ba78073e
VA
228 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
229 return -EDOM;
1da177e4 230
ba78073e 231 if (tv.tv_sec < 0) {
6f11df83
AM
232 static int warned __read_mostly;
233
ba78073e 234 *timeo_p = 0;
50aab54f 235 if (warned < 10 && net_ratelimit()) {
ba78073e
VA
236 warned++;
237 printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
238 "tries to set negative timeout\n",
ba25f9dc 239 current->comm, task_pid_nr(current));
50aab54f 240 }
ba78073e
VA
241 return 0;
242 }
1da177e4
LT
243 *timeo_p = MAX_SCHEDULE_TIMEOUT;
244 if (tv.tv_sec == 0 && tv.tv_usec == 0)
245 return 0;
246 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
247 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
248 return 0;
249}
250
251static void sock_warn_obsolete_bsdism(const char *name)
252{
253 static int warned;
254 static char warncomm[TASK_COMM_LEN];
4ec93edb
YH
255 if (strcmp(warncomm, current->comm) && warned < 5) {
256 strcpy(warncomm, current->comm);
1da177e4
LT
257 printk(KERN_WARNING "process `%s' is using obsolete "
258 "%s SO_BSDCOMPAT\n", warncomm, name);
259 warned++;
260 }
261}
262
20d49473 263static void sock_disable_timestamp(struct sock *sk, int flag)
4ec93edb 264{
20d49473
PO
265 if (sock_flag(sk, flag)) {
266 sock_reset_flag(sk, flag);
267 if (!sock_flag(sk, SOCK_TIMESTAMP) &&
268 !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) {
269 net_disable_timestamp();
270 }
1da177e4
LT
271 }
272}
273
274
f0088a50
DV
275int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
276{
277 int err = 0;
278 int skb_len;
3b885787
NH
279 unsigned long flags;
280 struct sk_buff_head *list = &sk->sk_receive_queue;
f0088a50 281
9ee6b7f1 282 /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
f0088a50
DV
283 number of warnings when compiling with -W --ANK
284 */
285 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
286 (unsigned)sk->sk_rcvbuf) {
287 err = -ENOMEM;
288 goto out;
289 }
290
fda9ef5d 291 err = sk_filter(sk, skb);
f0088a50
DV
292 if (err)
293 goto out;
294
3ab224be
HA
295 if (!sk_rmem_schedule(sk, skb->truesize)) {
296 err = -ENOBUFS;
297 goto out;
298 }
299
f0088a50
DV
300 skb->dev = NULL;
301 skb_set_owner_r(skb, sk);
49ad9599 302
f0088a50
DV
303 /* Cache the SKB length before we tack it onto the receive
304 * queue. Once it is added it no longer belongs to us and
305 * may be freed by other threads of control pulling packets
306 * from the queue.
307 */
308 skb_len = skb->len;
309
3b885787
NH
310 spin_lock_irqsave(&list->lock, flags);
311 skb->dropcount = atomic_read(&sk->sk_drops);
312 __skb_queue_tail(list, skb);
313 spin_unlock_irqrestore(&list->lock, flags);
f0088a50
DV
314
315 if (!sock_flag(sk, SOCK_DEAD))
316 sk->sk_data_ready(sk, skb_len);
317out:
318 return err;
319}
320EXPORT_SYMBOL(sock_queue_rcv_skb);
321
58a5a7b9 322int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
f0088a50
DV
323{
324 int rc = NET_RX_SUCCESS;
325
fda9ef5d 326 if (sk_filter(sk, skb))
f0088a50
DV
327 goto discard_and_relse;
328
329 skb->dev = NULL;
330
58a5a7b9
ACM
331 if (nested)
332 bh_lock_sock_nested(sk);
333 else
334 bh_lock_sock(sk);
a5b5bb9a
IM
335 if (!sock_owned_by_user(sk)) {
336 /*
337 * trylock + unlock semantics:
338 */
339 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
340
c57943a1 341 rc = sk_backlog_rcv(sk, skb);
a5b5bb9a
IM
342
343 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
344 } else
f0088a50
DV
345 sk_add_backlog(sk, skb);
346 bh_unlock_sock(sk);
347out:
348 sock_put(sk);
349 return rc;
350discard_and_relse:
351 kfree_skb(skb);
352 goto out;
353}
354EXPORT_SYMBOL(sk_receive_skb);
355
356struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
357{
358 struct dst_entry *dst = sk->sk_dst_cache;
359
360 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
361 sk->sk_dst_cache = NULL;
362 dst_release(dst);
363 return NULL;
364 }
365
366 return dst;
367}
368EXPORT_SYMBOL(__sk_dst_check);
369
370struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
371{
372 struct dst_entry *dst = sk_dst_get(sk);
373
374 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
375 sk_dst_reset(sk);
376 dst_release(dst);
377 return NULL;
378 }
379
380 return dst;
381}
382EXPORT_SYMBOL(sk_dst_check);
383
4878809f
DM
384static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
385{
386 int ret = -ENOPROTOOPT;
387#ifdef CONFIG_NETDEVICES
3b1e0a65 388 struct net *net = sock_net(sk);
4878809f
DM
389 char devname[IFNAMSIZ];
390 int index;
391
392 /* Sorry... */
393 ret = -EPERM;
394 if (!capable(CAP_NET_RAW))
395 goto out;
396
397 ret = -EINVAL;
398 if (optlen < 0)
399 goto out;
400
401 /* Bind this socket to a particular device like "eth0",
402 * as specified in the passed interface name. If the
403 * name is "" or the option length is zero the socket
404 * is not bound.
405 */
406 if (optlen > IFNAMSIZ - 1)
407 optlen = IFNAMSIZ - 1;
408 memset(devname, 0, sizeof(devname));
409
410 ret = -EFAULT;
411 if (copy_from_user(devname, optval, optlen))
412 goto out;
413
414 if (devname[0] == '\0') {
415 index = 0;
416 } else {
881d966b 417 struct net_device *dev = dev_get_by_name(net, devname);
4878809f
DM
418
419 ret = -ENODEV;
420 if (!dev)
421 goto out;
422
423 index = dev->ifindex;
424 dev_put(dev);
425 }
426
427 lock_sock(sk);
428 sk->sk_bound_dev_if = index;
429 sk_dst_reset(sk);
430 release_sock(sk);
431
432 ret = 0;
433
434out:
435#endif
436
437 return ret;
438}
439
c0ef877b
PE
440static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
441{
442 if (valbool)
443 sock_set_flag(sk, bit);
444 else
445 sock_reset_flag(sk, bit);
446}
447
1da177e4
LT
448/*
449 * This is meant for all protocols to use and covers goings on
450 * at the socket level. Everything here is generic.
451 */
452
453int sock_setsockopt(struct socket *sock, int level, int optname,
b7058842 454 char __user *optval, unsigned int optlen)
1da177e4 455{
2a91525c 456 struct sock *sk = sock->sk;
1da177e4
LT
457 int val;
458 int valbool;
459 struct linger ling;
460 int ret = 0;
4ec93edb 461
1da177e4
LT
462 /*
463 * Options without arguments
464 */
465
4878809f
DM
466 if (optname == SO_BINDTODEVICE)
467 return sock_bindtodevice(sk, optval, optlen);
468
e71a4783
SH
469 if (optlen < sizeof(int))
470 return -EINVAL;
4ec93edb 471
1da177e4
LT
472 if (get_user(val, (int __user *)optval))
473 return -EFAULT;
4ec93edb 474
2a91525c 475 valbool = val ? 1 : 0;
1da177e4
LT
476
477 lock_sock(sk);
478
2a91525c 479 switch (optname) {
e71a4783 480 case SO_DEBUG:
2a91525c 481 if (val && !capable(CAP_NET_ADMIN))
e71a4783 482 ret = -EACCES;
2a91525c 483 else
c0ef877b 484 sock_valbool_flag(sk, SOCK_DBG, valbool);
e71a4783
SH
485 break;
486 case SO_REUSEADDR:
487 sk->sk_reuse = valbool;
488 break;
489 case SO_TYPE:
49c794e9 490 case SO_PROTOCOL:
0d6038ee 491 case SO_DOMAIN:
e71a4783
SH
492 case SO_ERROR:
493 ret = -ENOPROTOOPT;
494 break;
495 case SO_DONTROUTE:
c0ef877b 496 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
e71a4783
SH
497 break;
498 case SO_BROADCAST:
499 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
500 break;
501 case SO_SNDBUF:
502 /* Don't error on this BSD doesn't and if you think
503 about it this is right. Otherwise apps have to
504 play 'guess the biggest size' games. RCVBUF/SNDBUF
505 are treated in BSD as hints */
506
507 if (val > sysctl_wmem_max)
508 val = sysctl_wmem_max;
b0573dea 509set_sndbuf:
e71a4783
SH
510 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
511 if ((val * 2) < SOCK_MIN_SNDBUF)
512 sk->sk_sndbuf = SOCK_MIN_SNDBUF;
513 else
514 sk->sk_sndbuf = val * 2;
1da177e4 515
e71a4783
SH
516 /*
517 * Wake up sending tasks if we
518 * upped the value.
519 */
520 sk->sk_write_space(sk);
521 break;
1da177e4 522
e71a4783
SH
523 case SO_SNDBUFFORCE:
524 if (!capable(CAP_NET_ADMIN)) {
525 ret = -EPERM;
526 break;
527 }
528 goto set_sndbuf;
b0573dea 529
e71a4783
SH
530 case SO_RCVBUF:
531 /* Don't error on this BSD doesn't and if you think
532 about it this is right. Otherwise apps have to
533 play 'guess the biggest size' games. RCVBUF/SNDBUF
534 are treated in BSD as hints */
4ec93edb 535
e71a4783
SH
536 if (val > sysctl_rmem_max)
537 val = sysctl_rmem_max;
b0573dea 538set_rcvbuf:
e71a4783
SH
539 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
540 /*
541 * We double it on the way in to account for
542 * "struct sk_buff" etc. overhead. Applications
543 * assume that the SO_RCVBUF setting they make will
544 * allow that much actual data to be received on that
545 * socket.
546 *
547 * Applications are unaware that "struct sk_buff" and
548 * other overheads allocate from the receive buffer
549 * during socket buffer allocation.
550 *
551 * And after considering the possible alternatives,
552 * returning the value we actually used in getsockopt
553 * is the most desirable behavior.
554 */
555 if ((val * 2) < SOCK_MIN_RCVBUF)
556 sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
557 else
558 sk->sk_rcvbuf = val * 2;
559 break;
560
561 case SO_RCVBUFFORCE:
562 if (!capable(CAP_NET_ADMIN)) {
563 ret = -EPERM;
1da177e4 564 break;
e71a4783
SH
565 }
566 goto set_rcvbuf;
1da177e4 567
e71a4783 568 case SO_KEEPALIVE:
1da177e4 569#ifdef CONFIG_INET
e71a4783
SH
570 if (sk->sk_protocol == IPPROTO_TCP)
571 tcp_set_keepalive(sk, valbool);
1da177e4 572#endif
e71a4783
SH
573 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
574 break;
575
576 case SO_OOBINLINE:
577 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
578 break;
579
580 case SO_NO_CHECK:
581 sk->sk_no_check = valbool;
582 break;
583
584 case SO_PRIORITY:
585 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
586 sk->sk_priority = val;
587 else
588 ret = -EPERM;
589 break;
590
591 case SO_LINGER:
592 if (optlen < sizeof(ling)) {
593 ret = -EINVAL; /* 1003.1g */
1da177e4 594 break;
e71a4783 595 }
2a91525c 596 if (copy_from_user(&ling, optval, sizeof(ling))) {
e71a4783 597 ret = -EFAULT;
1da177e4 598 break;
e71a4783
SH
599 }
600 if (!ling.l_onoff)
601 sock_reset_flag(sk, SOCK_LINGER);
602 else {
1da177e4 603#if (BITS_PER_LONG == 32)
e71a4783
SH
604 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
605 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
1da177e4 606 else
e71a4783
SH
607#endif
608 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
609 sock_set_flag(sk, SOCK_LINGER);
610 }
611 break;
612
613 case SO_BSDCOMPAT:
614 sock_warn_obsolete_bsdism("setsockopt");
615 break;
616
617 case SO_PASSCRED:
618 if (valbool)
619 set_bit(SOCK_PASSCRED, &sock->flags);
620 else
621 clear_bit(SOCK_PASSCRED, &sock->flags);
622 break;
623
624 case SO_TIMESTAMP:
92f37fd2 625 case SO_TIMESTAMPNS:
e71a4783 626 if (valbool) {
92f37fd2
ED
627 if (optname == SO_TIMESTAMP)
628 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
629 else
630 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783 631 sock_set_flag(sk, SOCK_RCVTSTAMP);
20d49473 632 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
92f37fd2 633 } else {
e71a4783 634 sock_reset_flag(sk, SOCK_RCVTSTAMP);
92f37fd2
ED
635 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
636 }
e71a4783
SH
637 break;
638
20d49473
PO
639 case SO_TIMESTAMPING:
640 if (val & ~SOF_TIMESTAMPING_MASK) {
f249fb78 641 ret = -EINVAL;
20d49473
PO
642 break;
643 }
644 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
645 val & SOF_TIMESTAMPING_TX_HARDWARE);
646 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
647 val & SOF_TIMESTAMPING_TX_SOFTWARE);
648 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
649 val & SOF_TIMESTAMPING_RX_HARDWARE);
650 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
651 sock_enable_timestamp(sk,
652 SOCK_TIMESTAMPING_RX_SOFTWARE);
653 else
654 sock_disable_timestamp(sk,
655 SOCK_TIMESTAMPING_RX_SOFTWARE);
656 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
657 val & SOF_TIMESTAMPING_SOFTWARE);
658 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
659 val & SOF_TIMESTAMPING_SYS_HARDWARE);
660 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
661 val & SOF_TIMESTAMPING_RAW_HARDWARE);
662 break;
663
e71a4783
SH
664 case SO_RCVLOWAT:
665 if (val < 0)
666 val = INT_MAX;
667 sk->sk_rcvlowat = val ? : 1;
668 break;
669
670 case SO_RCVTIMEO:
671 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
672 break;
673
674 case SO_SNDTIMEO:
675 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
676 break;
1da177e4 677
e71a4783
SH
678 case SO_ATTACH_FILTER:
679 ret = -EINVAL;
680 if (optlen == sizeof(struct sock_fprog)) {
681 struct sock_fprog fprog;
1da177e4 682
e71a4783
SH
683 ret = -EFAULT;
684 if (copy_from_user(&fprog, optval, sizeof(fprog)))
1da177e4 685 break;
e71a4783
SH
686
687 ret = sk_attach_filter(&fprog, sk);
688 }
689 break;
690
691 case SO_DETACH_FILTER:
55b33325 692 ret = sk_detach_filter(sk);
e71a4783 693 break;
1da177e4 694
e71a4783
SH
695 case SO_PASSSEC:
696 if (valbool)
697 set_bit(SOCK_PASSSEC, &sock->flags);
698 else
699 clear_bit(SOCK_PASSSEC, &sock->flags);
700 break;
4a19ec58
LAT
701 case SO_MARK:
702 if (!capable(CAP_NET_ADMIN))
703 ret = -EPERM;
2a91525c 704 else
4a19ec58 705 sk->sk_mark = val;
4a19ec58 706 break;
877ce7c1 707
1da177e4
LT
708 /* We implement the SO_SNDLOWAT etc to
709 not be settable (1003.1g 5.3) */
3b885787
NH
710 case SO_RXQ_OVFL:
711 if (valbool)
712 sock_set_flag(sk, SOCK_RXQ_OVFL);
713 else
714 sock_reset_flag(sk, SOCK_RXQ_OVFL);
715 break;
e71a4783
SH
716 default:
717 ret = -ENOPROTOOPT;
718 break;
4ec93edb 719 }
1da177e4
LT
720 release_sock(sk);
721 return ret;
722}
2a91525c 723EXPORT_SYMBOL(sock_setsockopt);
1da177e4
LT
724
725
726int sock_getsockopt(struct socket *sock, int level, int optname,
727 char __user *optval, int __user *optlen)
728{
729 struct sock *sk = sock->sk;
4ec93edb 730
e71a4783 731 union {
4ec93edb
YH
732 int val;
733 struct linger ling;
1da177e4
LT
734 struct timeval tm;
735 } v;
4ec93edb 736
1da177e4
LT
737 unsigned int lv = sizeof(int);
738 int len;
4ec93edb 739
e71a4783 740 if (get_user(len, optlen))
4ec93edb 741 return -EFAULT;
e71a4783 742 if (len < 0)
1da177e4 743 return -EINVAL;
4ec93edb 744
50fee1de 745 memset(&v, 0, sizeof(v));
df0bca04 746
2a91525c 747 switch (optname) {
e71a4783
SH
748 case SO_DEBUG:
749 v.val = sock_flag(sk, SOCK_DBG);
750 break;
751
752 case SO_DONTROUTE:
753 v.val = sock_flag(sk, SOCK_LOCALROUTE);
754 break;
755
756 case SO_BROADCAST:
757 v.val = !!sock_flag(sk, SOCK_BROADCAST);
758 break;
759
760 case SO_SNDBUF:
761 v.val = sk->sk_sndbuf;
762 break;
763
764 case SO_RCVBUF:
765 v.val = sk->sk_rcvbuf;
766 break;
767
768 case SO_REUSEADDR:
769 v.val = sk->sk_reuse;
770 break;
771
772 case SO_KEEPALIVE:
773 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
774 break;
775
776 case SO_TYPE:
777 v.val = sk->sk_type;
778 break;
779
49c794e9
JE
780 case SO_PROTOCOL:
781 v.val = sk->sk_protocol;
782 break;
783
0d6038ee
JE
784 case SO_DOMAIN:
785 v.val = sk->sk_family;
786 break;
787
e71a4783
SH
788 case SO_ERROR:
789 v.val = -sock_error(sk);
2a91525c 790 if (v.val == 0)
e71a4783
SH
791 v.val = xchg(&sk->sk_err_soft, 0);
792 break;
793
794 case SO_OOBINLINE:
795 v.val = !!sock_flag(sk, SOCK_URGINLINE);
796 break;
797
798 case SO_NO_CHECK:
799 v.val = sk->sk_no_check;
800 break;
801
802 case SO_PRIORITY:
803 v.val = sk->sk_priority;
804 break;
805
806 case SO_LINGER:
807 lv = sizeof(v.ling);
808 v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER);
809 v.ling.l_linger = sk->sk_lingertime / HZ;
810 break;
811
812 case SO_BSDCOMPAT:
813 sock_warn_obsolete_bsdism("getsockopt");
814 break;
815
816 case SO_TIMESTAMP:
92f37fd2
ED
817 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
818 !sock_flag(sk, SOCK_RCVTSTAMPNS);
819 break;
820
821 case SO_TIMESTAMPNS:
822 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783
SH
823 break;
824
20d49473
PO
825 case SO_TIMESTAMPING:
826 v.val = 0;
827 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
828 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
829 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
830 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
831 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
832 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
833 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
834 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
835 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
836 v.val |= SOF_TIMESTAMPING_SOFTWARE;
837 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
838 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
839 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
840 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
841 break;
842
e71a4783 843 case SO_RCVTIMEO:
2a91525c 844 lv = sizeof(struct timeval);
e71a4783
SH
845 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
846 v.tm.tv_sec = 0;
847 v.tm.tv_usec = 0;
848 } else {
849 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
850 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
851 }
852 break;
853
854 case SO_SNDTIMEO:
2a91525c 855 lv = sizeof(struct timeval);
e71a4783
SH
856 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
857 v.tm.tv_sec = 0;
858 v.tm.tv_usec = 0;
859 } else {
860 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
861 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
862 }
863 break;
1da177e4 864
e71a4783
SH
865 case SO_RCVLOWAT:
866 v.val = sk->sk_rcvlowat;
867 break;
1da177e4 868
e71a4783 869 case SO_SNDLOWAT:
2a91525c 870 v.val = 1;
e71a4783 871 break;
1da177e4 872
e71a4783
SH
873 case SO_PASSCRED:
874 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
875 break;
1da177e4 876
e71a4783
SH
877 case SO_PEERCRED:
878 if (len > sizeof(sk->sk_peercred))
879 len = sizeof(sk->sk_peercred);
880 if (copy_to_user(optval, &sk->sk_peercred, len))
881 return -EFAULT;
882 goto lenout;
1da177e4 883
e71a4783
SH
884 case SO_PEERNAME:
885 {
886 char address[128];
887
888 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
889 return -ENOTCONN;
890 if (lv < len)
891 return -EINVAL;
892 if (copy_to_user(optval, address, len))
893 return -EFAULT;
894 goto lenout;
895 }
1da177e4 896
e71a4783
SH
897 /* Dubious BSD thing... Probably nobody even uses it, but
898 * the UNIX standard wants it for whatever reason... -DaveM
899 */
900 case SO_ACCEPTCONN:
901 v.val = sk->sk_state == TCP_LISTEN;
902 break;
1da177e4 903
e71a4783
SH
904 case SO_PASSSEC:
905 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
906 break;
877ce7c1 907
e71a4783
SH
908 case SO_PEERSEC:
909 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1da177e4 910
4a19ec58
LAT
911 case SO_MARK:
912 v.val = sk->sk_mark;
913 break;
914
3b885787
NH
915 case SO_RXQ_OVFL:
916 v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
917 break;
918
e71a4783
SH
919 default:
920 return -ENOPROTOOPT;
1da177e4 921 }
e71a4783 922
1da177e4
LT
923 if (len > lv)
924 len = lv;
925 if (copy_to_user(optval, &v, len))
926 return -EFAULT;
927lenout:
4ec93edb
YH
928 if (put_user(len, optlen))
929 return -EFAULT;
930 return 0;
1da177e4
LT
931}
932
a5b5bb9a
IM
933/*
934 * Initialize an sk_lock.
935 *
936 * (We also register the sk_lock with the lock validator.)
937 */
b6f99a21 938static inline void sock_lock_init(struct sock *sk)
a5b5bb9a 939{
ed07536e
PZ
940 sock_lock_init_class_and_name(sk,
941 af_family_slock_key_strings[sk->sk_family],
942 af_family_slock_keys + sk->sk_family,
943 af_family_key_strings[sk->sk_family],
944 af_family_keys + sk->sk_family);
a5b5bb9a
IM
945}
946
4dc6dc71
ED
947/*
948 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
949 * even temporarly, because of RCU lookups. sk_node should also be left as is.
950 */
f1a6c4da
PE
951static void sock_copy(struct sock *nsk, const struct sock *osk)
952{
953#ifdef CONFIG_SECURITY_NETWORK
954 void *sptr = nsk->sk_security;
955#endif
4dc6dc71
ED
956 BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) !=
957 sizeof(osk->sk_node) + sizeof(osk->sk_refcnt));
958 memcpy(&nsk->sk_copy_start, &osk->sk_copy_start,
959 osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start));
f1a6c4da
PE
960#ifdef CONFIG_SECURITY_NETWORK
961 nsk->sk_security = sptr;
962 security_sk_clone(osk, nsk);
963#endif
964}
965
2e4afe7b
PE
966static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
967 int family)
c308c1b2
PE
968{
969 struct sock *sk;
970 struct kmem_cache *slab;
971
972 slab = prot->slab;
e912b114
ED
973 if (slab != NULL) {
974 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
975 if (!sk)
976 return sk;
977 if (priority & __GFP_ZERO) {
978 /*
979 * caches using SLAB_DESTROY_BY_RCU should let
980 * sk_node.next un-modified. Special care is taken
981 * when initializing object to zero.
982 */
983 if (offsetof(struct sock, sk_node.next) != 0)
984 memset(sk, 0, offsetof(struct sock, sk_node.next));
985 memset(&sk->sk_node.pprev, 0,
986 prot->obj_size - offsetof(struct sock,
987 sk_node.pprev));
988 }
989 }
c308c1b2
PE
990 else
991 sk = kmalloc(prot->obj_size, priority);
992
2e4afe7b 993 if (sk != NULL) {
a98b65a3
VN
994 kmemcheck_annotate_bitfield(sk, flags);
995
2e4afe7b
PE
996 if (security_sk_alloc(sk, family, priority))
997 goto out_free;
998
999 if (!try_module_get(prot->owner))
1000 goto out_free_sec;
1001 }
1002
c308c1b2 1003 return sk;
2e4afe7b
PE
1004
1005out_free_sec:
1006 security_sk_free(sk);
1007out_free:
1008 if (slab != NULL)
1009 kmem_cache_free(slab, sk);
1010 else
1011 kfree(sk);
1012 return NULL;
c308c1b2
PE
1013}
1014
1015static void sk_prot_free(struct proto *prot, struct sock *sk)
1016{
1017 struct kmem_cache *slab;
2e4afe7b 1018 struct module *owner;
c308c1b2 1019
2e4afe7b 1020 owner = prot->owner;
c308c1b2 1021 slab = prot->slab;
2e4afe7b
PE
1022
1023 security_sk_free(sk);
c308c1b2
PE
1024 if (slab != NULL)
1025 kmem_cache_free(slab, sk);
1026 else
1027 kfree(sk);
2e4afe7b 1028 module_put(owner);
c308c1b2
PE
1029}
1030
1da177e4
LT
1031/**
1032 * sk_alloc - All socket objects are allocated here
c4ea43c5 1033 * @net: the applicable net namespace
4dc3b16b
PP
1034 * @family: protocol family
1035 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1036 * @prot: struct proto associated with this new sock instance
1da177e4 1037 */
1b8d7ae4 1038struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
6257ff21 1039 struct proto *prot)
1da177e4 1040{
c308c1b2 1041 struct sock *sk;
1da177e4 1042
154adbc8 1043 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1da177e4 1044 if (sk) {
154adbc8
PE
1045 sk->sk_family = family;
1046 /*
1047 * See comment in struct sock definition to understand
1048 * why we need sk_prot_creator -acme
1049 */
1050 sk->sk_prot = sk->sk_prot_creator = prot;
1051 sock_lock_init(sk);
3b1e0a65 1052 sock_net_set(sk, get_net(net));
d66ee058 1053 atomic_set(&sk->sk_wmem_alloc, 1);
1da177e4 1054 }
a79af59e 1055
2e4afe7b 1056 return sk;
1da177e4 1057}
2a91525c 1058EXPORT_SYMBOL(sk_alloc);
1da177e4 1059
2b85a34e 1060static void __sk_free(struct sock *sk)
1da177e4
LT
1061{
1062 struct sk_filter *filter;
1da177e4
LT
1063
1064 if (sk->sk_destruct)
1065 sk->sk_destruct(sk);
1066
fda9ef5d 1067 filter = rcu_dereference(sk->sk_filter);
1da177e4 1068 if (filter) {
309dd5fc 1069 sk_filter_uncharge(sk, filter);
fda9ef5d 1070 rcu_assign_pointer(sk->sk_filter, NULL);
1da177e4
LT
1071 }
1072
20d49473
PO
1073 sock_disable_timestamp(sk, SOCK_TIMESTAMP);
1074 sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE);
1da177e4
LT
1075
1076 if (atomic_read(&sk->sk_omem_alloc))
1077 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
0dc47877 1078 __func__, atomic_read(&sk->sk_omem_alloc));
1da177e4 1079
3b1e0a65 1080 put_net(sock_net(sk));
c308c1b2 1081 sk_prot_free(sk->sk_prot_creator, sk);
1da177e4 1082}
2b85a34e
ED
1083
1084void sk_free(struct sock *sk)
1085{
1086 /*
1087 * We substract one from sk_wmem_alloc and can know if
1088 * some packets are still in some tx queue.
1089 * If not null, sock_wfree() will call __sk_free(sk) later
1090 */
1091 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1092 __sk_free(sk);
1093}
2a91525c 1094EXPORT_SYMBOL(sk_free);
1da177e4 1095
edf02087
DL
1096/*
1097 * Last sock_put should drop referrence to sk->sk_net. It has already
1098 * been dropped in sk_change_net. Taking referrence to stopping namespace
1099 * is not an option.
1100 * Take referrence to a socket to remove it from hash _alive_ and after that
1101 * destroy it in the context of init_net.
1102 */
1103void sk_release_kernel(struct sock *sk)
1104{
1105 if (sk == NULL || sk->sk_socket == NULL)
1106 return;
1107
1108 sock_hold(sk);
1109 sock_release(sk->sk_socket);
65a18ec5 1110 release_net(sock_net(sk));
3b1e0a65 1111 sock_net_set(sk, get_net(&init_net));
edf02087
DL
1112 sock_put(sk);
1113}
45af1754 1114EXPORT_SYMBOL(sk_release_kernel);
edf02087 1115
dd0fc66f 1116struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
87d11ceb 1117{
8fd1d178 1118 struct sock *newsk;
87d11ceb 1119
8fd1d178 1120 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
87d11ceb
ACM
1121 if (newsk != NULL) {
1122 struct sk_filter *filter;
1123
892c141e 1124 sock_copy(newsk, sk);
87d11ceb
ACM
1125
1126 /* SANITY */
3b1e0a65 1127 get_net(sock_net(newsk));
87d11ceb
ACM
1128 sk_node_init(&newsk->sk_node);
1129 sock_lock_init(newsk);
1130 bh_lock_sock(newsk);
fa438ccf 1131 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
87d11ceb
ACM
1132
1133 atomic_set(&newsk->sk_rmem_alloc, 0);
2b85a34e
ED
1134 /*
1135 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1136 */
1137 atomic_set(&newsk->sk_wmem_alloc, 1);
87d11ceb
ACM
1138 atomic_set(&newsk->sk_omem_alloc, 0);
1139 skb_queue_head_init(&newsk->sk_receive_queue);
1140 skb_queue_head_init(&newsk->sk_write_queue);
97fc2f08
CL
1141#ifdef CONFIG_NET_DMA
1142 skb_queue_head_init(&newsk->sk_async_wait_queue);
1143#endif
87d11ceb
ACM
1144
1145 rwlock_init(&newsk->sk_dst_lock);
1146 rwlock_init(&newsk->sk_callback_lock);
443aef0e
PZ
1147 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1148 af_callback_keys + newsk->sk_family,
1149 af_family_clock_key_strings[newsk->sk_family]);
87d11ceb
ACM
1150
1151 newsk->sk_dst_cache = NULL;
1152 newsk->sk_wmem_queued = 0;
1153 newsk->sk_forward_alloc = 0;
1154 newsk->sk_send_head = NULL;
87d11ceb
ACM
1155 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1156
1157 sock_reset_flag(newsk, SOCK_DONE);
1158 skb_queue_head_init(&newsk->sk_error_queue);
1159
1160 filter = newsk->sk_filter;
1161 if (filter != NULL)
1162 sk_filter_charge(newsk, filter);
1163
1164 if (unlikely(xfrm_sk_clone_policy(newsk))) {
1165 /* It is still raw copy of parent, so invalidate
1166 * destructor and make plain sk_free() */
1167 newsk->sk_destruct = NULL;
1168 sk_free(newsk);
1169 newsk = NULL;
1170 goto out;
1171 }
1172
1173 newsk->sk_err = 0;
1174 newsk->sk_priority = 0;
4dc6dc71
ED
1175 /*
1176 * Before updating sk_refcnt, we must commit prior changes to memory
1177 * (Documentation/RCU/rculist_nulls.txt for details)
1178 */
1179 smp_wmb();
87d11ceb
ACM
1180 atomic_set(&newsk->sk_refcnt, 2);
1181
1182 /*
1183 * Increment the counter in the same struct proto as the master
1184 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1185 * is the same as sk->sk_prot->socks, as this field was copied
1186 * with memcpy).
1187 *
1188 * This _changes_ the previous behaviour, where
1189 * tcp_create_openreq_child always was incrementing the
1190 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1191 * to be taken into account in all callers. -acme
1192 */
1193 sk_refcnt_debug_inc(newsk);
972692e0 1194 sk_set_socket(newsk, NULL);
87d11ceb
ACM
1195 newsk->sk_sleep = NULL;
1196
1197 if (newsk->sk_prot->sockets_allocated)
1748376b 1198 percpu_counter_inc(newsk->sk_prot->sockets_allocated);
87d11ceb
ACM
1199 }
1200out:
1201 return newsk;
1202}
87d11ceb
ACM
1203EXPORT_SYMBOL_GPL(sk_clone);
1204
9958089a
AK
1205void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1206{
1207 __sk_dst_set(sk, dst);
1208 sk->sk_route_caps = dst->dev->features;
1209 if (sk->sk_route_caps & NETIF_F_GSO)
4fcd6b99 1210 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
9958089a 1211 if (sk_can_gso(sk)) {
82cc1a7a 1212 if (dst->header_len) {
9958089a 1213 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
82cc1a7a 1214 } else {
9958089a 1215 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
82cc1a7a
PWJ
1216 sk->sk_gso_max_size = dst->dev->gso_max_size;
1217 }
9958089a
AK
1218 }
1219}
1220EXPORT_SYMBOL_GPL(sk_setup_caps);
1221
1da177e4
LT
1222void __init sk_init(void)
1223{
4481374c 1224 if (totalram_pages <= 4096) {
1da177e4
LT
1225 sysctl_wmem_max = 32767;
1226 sysctl_rmem_max = 32767;
1227 sysctl_wmem_default = 32767;
1228 sysctl_rmem_default = 32767;
4481374c 1229 } else if (totalram_pages >= 131072) {
1da177e4
LT
1230 sysctl_wmem_max = 131071;
1231 sysctl_rmem_max = 131071;
1232 }
1233}
1234
1235/*
1236 * Simple resource managers for sockets.
1237 */
1238
1239
4ec93edb
YH
1240/*
1241 * Write buffer destructor automatically called from kfree_skb.
1da177e4
LT
1242 */
1243void sock_wfree(struct sk_buff *skb)
1244{
1245 struct sock *sk = skb->sk;
d99927f4 1246 unsigned int len = skb->truesize;
1da177e4 1247
d99927f4
ED
1248 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1249 /*
1250 * Keep a reference on sk_wmem_alloc, this will be released
1251 * after sk_write_space() call
1252 */
1253 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1da177e4 1254 sk->sk_write_space(sk);
d99927f4
ED
1255 len = 1;
1256 }
2b85a34e 1257 /*
d99927f4
ED
1258 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1259 * could not do because of in-flight packets
2b85a34e 1260 */
d99927f4 1261 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
2b85a34e 1262 __sk_free(sk);
1da177e4 1263}
2a91525c 1264EXPORT_SYMBOL(sock_wfree);
1da177e4 1265
4ec93edb
YH
1266/*
1267 * Read buffer destructor automatically called from kfree_skb.
1da177e4
LT
1268 */
1269void sock_rfree(struct sk_buff *skb)
1270{
1271 struct sock *sk = skb->sk;
1272
1273 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
3ab224be 1274 sk_mem_uncharge(skb->sk, skb->truesize);
1da177e4 1275}
2a91525c 1276EXPORT_SYMBOL(sock_rfree);
1da177e4
LT
1277
1278
1279int sock_i_uid(struct sock *sk)
1280{
1281 int uid;
1282
1283 read_lock(&sk->sk_callback_lock);
1284 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1285 read_unlock(&sk->sk_callback_lock);
1286 return uid;
1287}
2a91525c 1288EXPORT_SYMBOL(sock_i_uid);
1da177e4
LT
1289
1290unsigned long sock_i_ino(struct sock *sk)
1291{
1292 unsigned long ino;
1293
1294 read_lock(&sk->sk_callback_lock);
1295 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1296 read_unlock(&sk->sk_callback_lock);
1297 return ino;
1298}
2a91525c 1299EXPORT_SYMBOL(sock_i_ino);
1da177e4
LT
1300
1301/*
1302 * Allocate a skb from the socket's send buffer.
1303 */
86a76caf 1304struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1305 gfp_t priority)
1da177e4
LT
1306{
1307 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
2a91525c 1308 struct sk_buff *skb = alloc_skb(size, priority);
1da177e4
LT
1309 if (skb) {
1310 skb_set_owner_w(skb, sk);
1311 return skb;
1312 }
1313 }
1314 return NULL;
1315}
2a91525c 1316EXPORT_SYMBOL(sock_wmalloc);
1da177e4
LT
1317
1318/*
1319 * Allocate a skb from the socket's receive buffer.
4ec93edb 1320 */
86a76caf 1321struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1322 gfp_t priority)
1da177e4
LT
1323{
1324 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1325 struct sk_buff *skb = alloc_skb(size, priority);
1326 if (skb) {
1327 skb_set_owner_r(skb, sk);
1328 return skb;
1329 }
1330 }
1331 return NULL;
1332}
1333
4ec93edb 1334/*
1da177e4 1335 * Allocate a memory block from the socket's option memory buffer.
4ec93edb 1336 */
dd0fc66f 1337void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1da177e4
LT
1338{
1339 if ((unsigned)size <= sysctl_optmem_max &&
1340 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1341 void *mem;
1342 /* First do the add, to avoid the race if kmalloc
4ec93edb 1343 * might sleep.
1da177e4
LT
1344 */
1345 atomic_add(size, &sk->sk_omem_alloc);
1346 mem = kmalloc(size, priority);
1347 if (mem)
1348 return mem;
1349 atomic_sub(size, &sk->sk_omem_alloc);
1350 }
1351 return NULL;
1352}
2a91525c 1353EXPORT_SYMBOL(sock_kmalloc);
1da177e4
LT
1354
1355/*
1356 * Free an option memory block.
1357 */
1358void sock_kfree_s(struct sock *sk, void *mem, int size)
1359{
1360 kfree(mem);
1361 atomic_sub(size, &sk->sk_omem_alloc);
1362}
2a91525c 1363EXPORT_SYMBOL(sock_kfree_s);
1da177e4
LT
1364
1365/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1366 I think, these locks should be removed for datagram sockets.
1367 */
2a91525c 1368static long sock_wait_for_wmem(struct sock *sk, long timeo)
1da177e4
LT
1369{
1370 DEFINE_WAIT(wait);
1371
1372 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1373 for (;;) {
1374 if (!timeo)
1375 break;
1376 if (signal_pending(current))
1377 break;
1378 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1379 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1380 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1381 break;
1382 if (sk->sk_shutdown & SEND_SHUTDOWN)
1383 break;
1384 if (sk->sk_err)
1385 break;
1386 timeo = schedule_timeout(timeo);
1387 }
1388 finish_wait(sk->sk_sleep, &wait);
1389 return timeo;
1390}
1391
1392
1393/*
1394 * Generic send/receive buffer handlers
1395 */
1396
4cc7f68d
HX
1397struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1398 unsigned long data_len, int noblock,
1399 int *errcode)
1da177e4
LT
1400{
1401 struct sk_buff *skb;
7d877f3b 1402 gfp_t gfp_mask;
1da177e4
LT
1403 long timeo;
1404 int err;
1405
1406 gfp_mask = sk->sk_allocation;
1407 if (gfp_mask & __GFP_WAIT)
1408 gfp_mask |= __GFP_REPEAT;
1409
1410 timeo = sock_sndtimeo(sk, noblock);
1411 while (1) {
1412 err = sock_error(sk);
1413 if (err != 0)
1414 goto failure;
1415
1416 err = -EPIPE;
1417 if (sk->sk_shutdown & SEND_SHUTDOWN)
1418 goto failure;
1419
1420 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
db38c179 1421 skb = alloc_skb(header_len, gfp_mask);
1da177e4
LT
1422 if (skb) {
1423 int npages;
1424 int i;
1425
1426 /* No pages, we're done... */
1427 if (!data_len)
1428 break;
1429
1430 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1431 skb->truesize += data_len;
1432 skb_shinfo(skb)->nr_frags = npages;
1433 for (i = 0; i < npages; i++) {
1434 struct page *page;
1435 skb_frag_t *frag;
1436
1437 page = alloc_pages(sk->sk_allocation, 0);
1438 if (!page) {
1439 err = -ENOBUFS;
1440 skb_shinfo(skb)->nr_frags = i;
1441 kfree_skb(skb);
1442 goto failure;
1443 }
1444
1445 frag = &skb_shinfo(skb)->frags[i];
1446 frag->page = page;
1447 frag->page_offset = 0;
1448 frag->size = (data_len >= PAGE_SIZE ?
1449 PAGE_SIZE :
1450 data_len);
1451 data_len -= PAGE_SIZE;
1452 }
1453
1454 /* Full success... */
1455 break;
1456 }
1457 err = -ENOBUFS;
1458 goto failure;
1459 }
1460 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1461 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1462 err = -EAGAIN;
1463 if (!timeo)
1464 goto failure;
1465 if (signal_pending(current))
1466 goto interrupted;
1467 timeo = sock_wait_for_wmem(sk, timeo);
1468 }
1469
1470 skb_set_owner_w(skb, sk);
1471 return skb;
1472
1473interrupted:
1474 err = sock_intr_errno(timeo);
1475failure:
1476 *errcode = err;
1477 return NULL;
1478}
4cc7f68d 1479EXPORT_SYMBOL(sock_alloc_send_pskb);
1da177e4 1480
4ec93edb 1481struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1da177e4
LT
1482 int noblock, int *errcode)
1483{
1484 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1485}
2a91525c 1486EXPORT_SYMBOL(sock_alloc_send_skb);
1da177e4
LT
1487
1488static void __lock_sock(struct sock *sk)
1489{
1490 DEFINE_WAIT(wait);
1491
e71a4783 1492 for (;;) {
1da177e4
LT
1493 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1494 TASK_UNINTERRUPTIBLE);
1495 spin_unlock_bh(&sk->sk_lock.slock);
1496 schedule();
1497 spin_lock_bh(&sk->sk_lock.slock);
e71a4783 1498 if (!sock_owned_by_user(sk))
1da177e4
LT
1499 break;
1500 }
1501 finish_wait(&sk->sk_lock.wq, &wait);
1502}
1503
1504static void __release_sock(struct sock *sk)
1505{
1506 struct sk_buff *skb = sk->sk_backlog.head;
1507
1508 do {
1509 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1510 bh_unlock_sock(sk);
1511
1512 do {
1513 struct sk_buff *next = skb->next;
1514
1515 skb->next = NULL;
c57943a1 1516 sk_backlog_rcv(sk, skb);
1da177e4
LT
1517
1518 /*
1519 * We are in process context here with softirqs
1520 * disabled, use cond_resched_softirq() to preempt.
1521 * This is safe to do because we've taken the backlog
1522 * queue private:
1523 */
1524 cond_resched_softirq();
1525
1526 skb = next;
1527 } while (skb != NULL);
1528
1529 bh_lock_sock(sk);
e71a4783 1530 } while ((skb = sk->sk_backlog.head) != NULL);
1da177e4
LT
1531}
1532
1533/**
1534 * sk_wait_data - wait for data to arrive at sk_receive_queue
4dc3b16b
PP
1535 * @sk: sock to wait on
1536 * @timeo: for how long
1da177e4
LT
1537 *
1538 * Now socket state including sk->sk_err is changed only under lock,
1539 * hence we may omit checks after joining wait queue.
1540 * We check receive queue before schedule() only as optimization;
1541 * it is very likely that release_sock() added new data.
1542 */
1543int sk_wait_data(struct sock *sk, long *timeo)
1544{
1545 int rc;
1546 DEFINE_WAIT(wait);
1547
1548 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1549 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1550 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1551 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1552 finish_wait(sk->sk_sleep, &wait);
1553 return rc;
1554}
1da177e4
LT
1555EXPORT_SYMBOL(sk_wait_data);
1556
3ab224be
HA
1557/**
1558 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
1559 * @sk: socket
1560 * @size: memory size to allocate
1561 * @kind: allocation type
1562 *
1563 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
1564 * rmem allocation. This function assumes that protocols which have
1565 * memory_pressure use sk_wmem_queued as write buffer accounting.
1566 */
1567int __sk_mem_schedule(struct sock *sk, int size, int kind)
1568{
1569 struct proto *prot = sk->sk_prot;
1570 int amt = sk_mem_pages(size);
1571 int allocated;
1572
1573 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
1574 allocated = atomic_add_return(amt, prot->memory_allocated);
1575
1576 /* Under limit. */
1577 if (allocated <= prot->sysctl_mem[0]) {
1578 if (prot->memory_pressure && *prot->memory_pressure)
1579 *prot->memory_pressure = 0;
1580 return 1;
1581 }
1582
1583 /* Under pressure. */
1584 if (allocated > prot->sysctl_mem[1])
1585 if (prot->enter_memory_pressure)
5c52ba17 1586 prot->enter_memory_pressure(sk);
3ab224be
HA
1587
1588 /* Over hard limit. */
1589 if (allocated > prot->sysctl_mem[2])
1590 goto suppress_allocation;
1591
1592 /* guarantee minimum buffer size under pressure */
1593 if (kind == SK_MEM_RECV) {
1594 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
1595 return 1;
1596 } else { /* SK_MEM_SEND */
1597 if (sk->sk_type == SOCK_STREAM) {
1598 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
1599 return 1;
1600 } else if (atomic_read(&sk->sk_wmem_alloc) <
1601 prot->sysctl_wmem[0])
1602 return 1;
1603 }
1604
1605 if (prot->memory_pressure) {
1748376b
ED
1606 int alloc;
1607
1608 if (!*prot->memory_pressure)
1609 return 1;
1610 alloc = percpu_counter_read_positive(prot->sockets_allocated);
1611 if (prot->sysctl_mem[2] > alloc *
3ab224be
HA
1612 sk_mem_pages(sk->sk_wmem_queued +
1613 atomic_read(&sk->sk_rmem_alloc) +
1614 sk->sk_forward_alloc))
1615 return 1;
1616 }
1617
1618suppress_allocation:
1619
1620 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
1621 sk_stream_moderate_sndbuf(sk);
1622
1623 /* Fail only if socket is _under_ its sndbuf.
1624 * In this case we cannot block, so that we have to fail.
1625 */
1626 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
1627 return 1;
1628 }
1629
1630 /* Alas. Undo changes. */
1631 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
1632 atomic_sub(amt, prot->memory_allocated);
1633 return 0;
1634}
3ab224be
HA
1635EXPORT_SYMBOL(__sk_mem_schedule);
1636
1637/**
1638 * __sk_reclaim - reclaim memory_allocated
1639 * @sk: socket
1640 */
1641void __sk_mem_reclaim(struct sock *sk)
1642{
1643 struct proto *prot = sk->sk_prot;
1644
680a5a50 1645 atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
3ab224be
HA
1646 prot->memory_allocated);
1647 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
1648
1649 if (prot->memory_pressure && *prot->memory_pressure &&
1650 (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
1651 *prot->memory_pressure = 0;
1652}
3ab224be
HA
1653EXPORT_SYMBOL(__sk_mem_reclaim);
1654
1655
1da177e4
LT
1656/*
1657 * Set of default routines for initialising struct proto_ops when
1658 * the protocol does not support a particular function. In certain
1659 * cases where it makes no sense for a protocol to have a "do nothing"
1660 * function, some default processing is provided.
1661 */
1662
1663int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1664{
1665 return -EOPNOTSUPP;
1666}
2a91525c 1667EXPORT_SYMBOL(sock_no_bind);
1da177e4 1668
4ec93edb 1669int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
1670 int len, int flags)
1671{
1672 return -EOPNOTSUPP;
1673}
2a91525c 1674EXPORT_SYMBOL(sock_no_connect);
1da177e4
LT
1675
1676int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1677{
1678 return -EOPNOTSUPP;
1679}
2a91525c 1680EXPORT_SYMBOL(sock_no_socketpair);
1da177e4
LT
1681
1682int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1683{
1684 return -EOPNOTSUPP;
1685}
2a91525c 1686EXPORT_SYMBOL(sock_no_accept);
1da177e4 1687
4ec93edb 1688int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
1689 int *len, int peer)
1690{
1691 return -EOPNOTSUPP;
1692}
2a91525c 1693EXPORT_SYMBOL(sock_no_getname);
1da177e4 1694
2a91525c 1695unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
1da177e4
LT
1696{
1697 return 0;
1698}
2a91525c 1699EXPORT_SYMBOL(sock_no_poll);
1da177e4
LT
1700
1701int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1702{
1703 return -EOPNOTSUPP;
1704}
2a91525c 1705EXPORT_SYMBOL(sock_no_ioctl);
1da177e4
LT
1706
1707int sock_no_listen(struct socket *sock, int backlog)
1708{
1709 return -EOPNOTSUPP;
1710}
2a91525c 1711EXPORT_SYMBOL(sock_no_listen);
1da177e4
LT
1712
1713int sock_no_shutdown(struct socket *sock, int how)
1714{
1715 return -EOPNOTSUPP;
1716}
2a91525c 1717EXPORT_SYMBOL(sock_no_shutdown);
1da177e4
LT
1718
1719int sock_no_setsockopt(struct socket *sock, int level, int optname,
b7058842 1720 char __user *optval, unsigned int optlen)
1da177e4
LT
1721{
1722 return -EOPNOTSUPP;
1723}
2a91525c 1724EXPORT_SYMBOL(sock_no_setsockopt);
1da177e4
LT
1725
1726int sock_no_getsockopt(struct socket *sock, int level, int optname,
1727 char __user *optval, int __user *optlen)
1728{
1729 return -EOPNOTSUPP;
1730}
2a91525c 1731EXPORT_SYMBOL(sock_no_getsockopt);
1da177e4
LT
1732
1733int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1734 size_t len)
1735{
1736 return -EOPNOTSUPP;
1737}
2a91525c 1738EXPORT_SYMBOL(sock_no_sendmsg);
1da177e4
LT
1739
1740int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1741 size_t len, int flags)
1742{
1743 return -EOPNOTSUPP;
1744}
2a91525c 1745EXPORT_SYMBOL(sock_no_recvmsg);
1da177e4
LT
1746
1747int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1748{
1749 /* Mirror missing mmap method error code */
1750 return -ENODEV;
1751}
2a91525c 1752EXPORT_SYMBOL(sock_no_mmap);
1da177e4
LT
1753
1754ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1755{
1756 ssize_t res;
1757 struct msghdr msg = {.msg_flags = flags};
1758 struct kvec iov;
1759 char *kaddr = kmap(page);
1760 iov.iov_base = kaddr + offset;
1761 iov.iov_len = size;
1762 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1763 kunmap(page);
1764 return res;
1765}
2a91525c 1766EXPORT_SYMBOL(sock_no_sendpage);
1da177e4
LT
1767
1768/*
1769 * Default Socket Callbacks
1770 */
1771
1772static void sock_def_wakeup(struct sock *sk)
1773{
1774 read_lock(&sk->sk_callback_lock);
a57de0b4 1775 if (sk_has_sleeper(sk))
1da177e4
LT
1776 wake_up_interruptible_all(sk->sk_sleep);
1777 read_unlock(&sk->sk_callback_lock);
1778}
1779
1780static void sock_def_error_report(struct sock *sk)
1781{
1782 read_lock(&sk->sk_callback_lock);
a57de0b4 1783 if (sk_has_sleeper(sk))
37e5540b 1784 wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
8d8ad9d7 1785 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
1da177e4
LT
1786 read_unlock(&sk->sk_callback_lock);
1787}
1788
1789static void sock_def_readable(struct sock *sk, int len)
1790{
1791 read_lock(&sk->sk_callback_lock);
a57de0b4 1792 if (sk_has_sleeper(sk))
37e5540b
DL
1793 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
1794 POLLRDNORM | POLLRDBAND);
8d8ad9d7 1795 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
1da177e4
LT
1796 read_unlock(&sk->sk_callback_lock);
1797}
1798
1799static void sock_def_write_space(struct sock *sk)
1800{
1801 read_lock(&sk->sk_callback_lock);
1802
1803 /* Do not wake up a writer until he can make "significant"
1804 * progress. --DaveM
1805 */
e71a4783 1806 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
a57de0b4 1807 if (sk_has_sleeper(sk))
37e5540b
DL
1808 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
1809 POLLWRNORM | POLLWRBAND);
1da177e4
LT
1810
1811 /* Should agree with poll, otherwise some programs break */
1812 if (sock_writeable(sk))
8d8ad9d7 1813 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4
LT
1814 }
1815
1816 read_unlock(&sk->sk_callback_lock);
1817}
1818
1819static void sock_def_destruct(struct sock *sk)
1820{
a51482bd 1821 kfree(sk->sk_protinfo);
1da177e4
LT
1822}
1823
1824void sk_send_sigurg(struct sock *sk)
1825{
1826 if (sk->sk_socket && sk->sk_socket->file)
1827 if (send_sigurg(&sk->sk_socket->file->f_owner))
8d8ad9d7 1828 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
1da177e4 1829}
2a91525c 1830EXPORT_SYMBOL(sk_send_sigurg);
1da177e4
LT
1831
1832void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1833 unsigned long expires)
1834{
1835 if (!mod_timer(timer, expires))
1836 sock_hold(sk);
1837}
1da177e4
LT
1838EXPORT_SYMBOL(sk_reset_timer);
1839
1840void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1841{
1842 if (timer_pending(timer) && del_timer(timer))
1843 __sock_put(sk);
1844}
1da177e4
LT
1845EXPORT_SYMBOL(sk_stop_timer);
1846
1847void sock_init_data(struct socket *sock, struct sock *sk)
1848{
1849 skb_queue_head_init(&sk->sk_receive_queue);
1850 skb_queue_head_init(&sk->sk_write_queue);
1851 skb_queue_head_init(&sk->sk_error_queue);
97fc2f08
CL
1852#ifdef CONFIG_NET_DMA
1853 skb_queue_head_init(&sk->sk_async_wait_queue);
1854#endif
1da177e4
LT
1855
1856 sk->sk_send_head = NULL;
1857
1858 init_timer(&sk->sk_timer);
4ec93edb 1859
1da177e4
LT
1860 sk->sk_allocation = GFP_KERNEL;
1861 sk->sk_rcvbuf = sysctl_rmem_default;
1862 sk->sk_sndbuf = sysctl_wmem_default;
1863 sk->sk_state = TCP_CLOSE;
972692e0 1864 sk_set_socket(sk, sock);
1da177e4
LT
1865
1866 sock_set_flag(sk, SOCK_ZAPPED);
1867
e71a4783 1868 if (sock) {
1da177e4
LT
1869 sk->sk_type = sock->type;
1870 sk->sk_sleep = &sock->wait;
1871 sock->sk = sk;
1872 } else
1873 sk->sk_sleep = NULL;
1874
1875 rwlock_init(&sk->sk_dst_lock);
1876 rwlock_init(&sk->sk_callback_lock);
443aef0e
PZ
1877 lockdep_set_class_and_name(&sk->sk_callback_lock,
1878 af_callback_keys + sk->sk_family,
1879 af_family_clock_key_strings[sk->sk_family]);
1da177e4
LT
1880
1881 sk->sk_state_change = sock_def_wakeup;
1882 sk->sk_data_ready = sock_def_readable;
1883 sk->sk_write_space = sock_def_write_space;
1884 sk->sk_error_report = sock_def_error_report;
1885 sk->sk_destruct = sock_def_destruct;
1886
1887 sk->sk_sndmsg_page = NULL;
1888 sk->sk_sndmsg_off = 0;
1889
1890 sk->sk_peercred.pid = 0;
1891 sk->sk_peercred.uid = -1;
1892 sk->sk_peercred.gid = -1;
1893 sk->sk_write_pending = 0;
1894 sk->sk_rcvlowat = 1;
1895 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1896 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
1897
f37f0afb 1898 sk->sk_stamp = ktime_set(-1L, 0);
1da177e4 1899
4dc6dc71
ED
1900 /*
1901 * Before updating sk_refcnt, we must commit prior changes to memory
1902 * (Documentation/RCU/rculist_nulls.txt for details)
1903 */
1904 smp_wmb();
1da177e4 1905 atomic_set(&sk->sk_refcnt, 1);
33c732c3 1906 atomic_set(&sk->sk_drops, 0);
1da177e4 1907}
2a91525c 1908EXPORT_SYMBOL(sock_init_data);
1da177e4 1909
b5606c2d 1910void lock_sock_nested(struct sock *sk, int subclass)
1da177e4
LT
1911{
1912 might_sleep();
a5b5bb9a 1913 spin_lock_bh(&sk->sk_lock.slock);
d2e9117c 1914 if (sk->sk_lock.owned)
1da177e4 1915 __lock_sock(sk);
d2e9117c 1916 sk->sk_lock.owned = 1;
a5b5bb9a
IM
1917 spin_unlock(&sk->sk_lock.slock);
1918 /*
1919 * The sk_lock has mutex_lock() semantics here:
1920 */
fcc70d5f 1921 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
a5b5bb9a 1922 local_bh_enable();
1da177e4 1923}
fcc70d5f 1924EXPORT_SYMBOL(lock_sock_nested);
1da177e4 1925
b5606c2d 1926void release_sock(struct sock *sk)
1da177e4 1927{
a5b5bb9a
IM
1928 /*
1929 * The sk_lock has mutex_unlock() semantics:
1930 */
1931 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1932
1933 spin_lock_bh(&sk->sk_lock.slock);
1da177e4
LT
1934 if (sk->sk_backlog.tail)
1935 __release_sock(sk);
d2e9117c 1936 sk->sk_lock.owned = 0;
a5b5bb9a
IM
1937 if (waitqueue_active(&sk->sk_lock.wq))
1938 wake_up(&sk->sk_lock.wq);
1939 spin_unlock_bh(&sk->sk_lock.slock);
1da177e4
LT
1940}
1941EXPORT_SYMBOL(release_sock);
1942
1943int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
4ec93edb 1944{
b7aa0bf7 1945 struct timeval tv;
1da177e4 1946 if (!sock_flag(sk, SOCK_TIMESTAMP))
20d49473 1947 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
b7aa0bf7
ED
1948 tv = ktime_to_timeval(sk->sk_stamp);
1949 if (tv.tv_sec == -1)
1da177e4 1950 return -ENOENT;
b7aa0bf7
ED
1951 if (tv.tv_sec == 0) {
1952 sk->sk_stamp = ktime_get_real();
1953 tv = ktime_to_timeval(sk->sk_stamp);
1954 }
1955 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
4ec93edb 1956}
1da177e4
LT
1957EXPORT_SYMBOL(sock_get_timestamp);
1958
ae40eb1e
ED
1959int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1960{
1961 struct timespec ts;
1962 if (!sock_flag(sk, SOCK_TIMESTAMP))
20d49473 1963 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
ae40eb1e
ED
1964 ts = ktime_to_timespec(sk->sk_stamp);
1965 if (ts.tv_sec == -1)
1966 return -ENOENT;
1967 if (ts.tv_sec == 0) {
1968 sk->sk_stamp = ktime_get_real();
1969 ts = ktime_to_timespec(sk->sk_stamp);
1970 }
1971 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1972}
1973EXPORT_SYMBOL(sock_get_timestampns);
1974
20d49473 1975void sock_enable_timestamp(struct sock *sk, int flag)
4ec93edb 1976{
20d49473
PO
1977 if (!sock_flag(sk, flag)) {
1978 sock_set_flag(sk, flag);
1979 /*
1980 * we just set one of the two flags which require net
1981 * time stamping, but time stamping might have been on
1982 * already because of the other one
1983 */
1984 if (!sock_flag(sk,
1985 flag == SOCK_TIMESTAMP ?
1986 SOCK_TIMESTAMPING_RX_SOFTWARE :
1987 SOCK_TIMESTAMP))
1988 net_enable_timestamp();
1da177e4
LT
1989 }
1990}
1da177e4
LT
1991
1992/*
1993 * Get a socket option on an socket.
1994 *
1995 * FIX: POSIX 1003.1g is very ambiguous here. It states that
1996 * asynchronous errors should be reported by getsockopt. We assume
1997 * this means if you specify SO_ERROR (otherwise whats the point of it).
1998 */
1999int sock_common_getsockopt(struct socket *sock, int level, int optname,
2000 char __user *optval, int __user *optlen)
2001{
2002 struct sock *sk = sock->sk;
2003
2004 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2005}
1da177e4
LT
2006EXPORT_SYMBOL(sock_common_getsockopt);
2007
3fdadf7d 2008#ifdef CONFIG_COMPAT
543d9cfe
ACM
2009int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2010 char __user *optval, int __user *optlen)
3fdadf7d
DM
2011{
2012 struct sock *sk = sock->sk;
2013
1e51f951 2014 if (sk->sk_prot->compat_getsockopt != NULL)
543d9cfe
ACM
2015 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2016 optval, optlen);
3fdadf7d
DM
2017 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2018}
2019EXPORT_SYMBOL(compat_sock_common_getsockopt);
2020#endif
2021
1da177e4
LT
2022int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2023 struct msghdr *msg, size_t size, int flags)
2024{
2025 struct sock *sk = sock->sk;
2026 int addr_len = 0;
2027 int err;
2028
2029 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2030 flags & ~MSG_DONTWAIT, &addr_len);
2031 if (err >= 0)
2032 msg->msg_namelen = addr_len;
2033 return err;
2034}
1da177e4
LT
2035EXPORT_SYMBOL(sock_common_recvmsg);
2036
2037/*
2038 * Set socket options on an inet socket.
2039 */
2040int sock_common_setsockopt(struct socket *sock, int level, int optname,
b7058842 2041 char __user *optval, unsigned int optlen)
1da177e4
LT
2042{
2043 struct sock *sk = sock->sk;
2044
2045 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2046}
1da177e4
LT
2047EXPORT_SYMBOL(sock_common_setsockopt);
2048
3fdadf7d 2049#ifdef CONFIG_COMPAT
543d9cfe 2050int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
b7058842 2051 char __user *optval, unsigned int optlen)
3fdadf7d
DM
2052{
2053 struct sock *sk = sock->sk;
2054
543d9cfe
ACM
2055 if (sk->sk_prot->compat_setsockopt != NULL)
2056 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2057 optval, optlen);
3fdadf7d
DM
2058 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2059}
2060EXPORT_SYMBOL(compat_sock_common_setsockopt);
2061#endif
2062
1da177e4
LT
2063void sk_common_release(struct sock *sk)
2064{
2065 if (sk->sk_prot->destroy)
2066 sk->sk_prot->destroy(sk);
2067
2068 /*
2069 * Observation: when sock_common_release is called, processes have
2070 * no access to socket. But net still has.
2071 * Step one, detach it from networking:
2072 *
2073 * A. Remove from hash tables.
2074 */
2075
2076 sk->sk_prot->unhash(sk);
2077
2078 /*
2079 * In this point socket cannot receive new packets, but it is possible
2080 * that some packets are in flight because some CPU runs receiver and
2081 * did hash table lookup before we unhashed socket. They will achieve
2082 * receive queue and will be purged by socket destructor.
2083 *
2084 * Also we still have packets pending on receive queue and probably,
2085 * our own packets waiting in device queues. sock_destroy will drain
2086 * receive queue, but transmitted packets will delay socket destruction
2087 * until the last reference will be released.
2088 */
2089
2090 sock_orphan(sk);
2091
2092 xfrm_sk_free_policy(sk);
2093
e6848976 2094 sk_refcnt_debug_release(sk);
1da177e4
LT
2095 sock_put(sk);
2096}
1da177e4
LT
2097EXPORT_SYMBOL(sk_common_release);
2098
2099static DEFINE_RWLOCK(proto_list_lock);
2100static LIST_HEAD(proto_list);
2101
13ff3d6f
PE
2102#ifdef CONFIG_PROC_FS
2103#define PROTO_INUSE_NR 64 /* should be enough for the first time */
1338d466
PE
2104struct prot_inuse {
2105 int val[PROTO_INUSE_NR];
2106};
13ff3d6f
PE
2107
2108static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
70ee1159
PE
2109
2110#ifdef CONFIG_NET_NS
2111void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2112{
2113 int cpu = smp_processor_id();
2114 per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
2115}
2116EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2117
2118int sock_prot_inuse_get(struct net *net, struct proto *prot)
2119{
2120 int cpu, idx = prot->inuse_idx;
2121 int res = 0;
2122
2123 for_each_possible_cpu(cpu)
2124 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2125
2126 return res >= 0 ? res : 0;
2127}
2128EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2129
2130static int sock_inuse_init_net(struct net *net)
2131{
2132 net->core.inuse = alloc_percpu(struct prot_inuse);
2133 return net->core.inuse ? 0 : -ENOMEM;
2134}
2135
2136static void sock_inuse_exit_net(struct net *net)
2137{
2138 free_percpu(net->core.inuse);
2139}
2140
2141static struct pernet_operations net_inuse_ops = {
2142 .init = sock_inuse_init_net,
2143 .exit = sock_inuse_exit_net,
2144};
2145
2146static __init int net_inuse_init(void)
2147{
2148 if (register_pernet_subsys(&net_inuse_ops))
2149 panic("Cannot initialize net inuse counters");
2150
2151 return 0;
2152}
2153
2154core_initcall(net_inuse_init);
2155#else
1338d466
PE
2156static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2157
c29a0bc4 2158void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
1338d466
PE
2159{
2160 __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val;
2161}
2162EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2163
c29a0bc4 2164int sock_prot_inuse_get(struct net *net, struct proto *prot)
1338d466
PE
2165{
2166 int cpu, idx = prot->inuse_idx;
2167 int res = 0;
2168
2169 for_each_possible_cpu(cpu)
2170 res += per_cpu(prot_inuse, cpu).val[idx];
2171
2172 return res >= 0 ? res : 0;
2173}
2174EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
70ee1159 2175#endif
13ff3d6f
PE
2176
2177static void assign_proto_idx(struct proto *prot)
2178{
2179 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2180
2181 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2182 printk(KERN_ERR "PROTO_INUSE_NR exhausted\n");
2183 return;
2184 }
2185
2186 set_bit(prot->inuse_idx, proto_inuse_idx);
2187}
2188
2189static void release_proto_idx(struct proto *prot)
2190{
2191 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2192 clear_bit(prot->inuse_idx, proto_inuse_idx);
2193}
2194#else
2195static inline void assign_proto_idx(struct proto *prot)
2196{
2197}
2198
2199static inline void release_proto_idx(struct proto *prot)
2200{
2201}
2202#endif
2203
b733c007
PE
2204int proto_register(struct proto *prot, int alloc_slab)
2205{
1da177e4
LT
2206 if (alloc_slab) {
2207 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
271b72c7
ED
2208 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2209 NULL);
1da177e4
LT
2210
2211 if (prot->slab == NULL) {
2212 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
2213 prot->name);
60e7663d 2214 goto out;
1da177e4 2215 }
2e6599cb
ACM
2216
2217 if (prot->rsk_prot != NULL) {
2218 static const char mask[] = "request_sock_%s";
2219
7e56b5d6
CM
2220 prot->rsk_prot->slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
2221 if (prot->rsk_prot->slab_name == NULL)
2e6599cb
ACM
2222 goto out_free_sock_slab;
2223
7e56b5d6
CM
2224 sprintf(prot->rsk_prot->slab_name, mask, prot->name);
2225 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2e6599cb 2226 prot->rsk_prot->obj_size, 0,
20c2df83 2227 SLAB_HWCACHE_ALIGN, NULL);
2e6599cb
ACM
2228
2229 if (prot->rsk_prot->slab == NULL) {
2230 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
2231 prot->name);
2232 goto out_free_request_sock_slab_name;
2233 }
2234 }
8feaf0c0 2235
6d6ee43e 2236 if (prot->twsk_prot != NULL) {
8feaf0c0
ACM
2237 static const char mask[] = "tw_sock_%s";
2238
7e56b5d6 2239 prot->twsk_prot->twsk_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
8feaf0c0 2240
7e56b5d6 2241 if (prot->twsk_prot->twsk_slab_name == NULL)
8feaf0c0
ACM
2242 goto out_free_request_sock_slab;
2243
7e56b5d6 2244 sprintf(prot->twsk_prot->twsk_slab_name, mask, prot->name);
6d6ee43e 2245 prot->twsk_prot->twsk_slab =
7e56b5d6 2246 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
6d6ee43e 2247 prot->twsk_prot->twsk_obj_size,
3ab5aee7
ED
2248 0,
2249 SLAB_HWCACHE_ALIGN |
2250 prot->slab_flags,
20c2df83 2251 NULL);
6d6ee43e 2252 if (prot->twsk_prot->twsk_slab == NULL)
8feaf0c0
ACM
2253 goto out_free_timewait_sock_slab_name;
2254 }
1da177e4
LT
2255 }
2256
2a278051 2257 write_lock(&proto_list_lock);
1da177e4 2258 list_add(&prot->node, &proto_list);
13ff3d6f 2259 assign_proto_idx(prot);
1da177e4 2260 write_unlock(&proto_list_lock);
b733c007
PE
2261 return 0;
2262
8feaf0c0 2263out_free_timewait_sock_slab_name:
7e56b5d6 2264 kfree(prot->twsk_prot->twsk_slab_name);
8feaf0c0
ACM
2265out_free_request_sock_slab:
2266 if (prot->rsk_prot && prot->rsk_prot->slab) {
2267 kmem_cache_destroy(prot->rsk_prot->slab);
2268 prot->rsk_prot->slab = NULL;
2269 }
2e6599cb 2270out_free_request_sock_slab_name:
7e56b5d6 2271 kfree(prot->rsk_prot->slab_name);
2e6599cb
ACM
2272out_free_sock_slab:
2273 kmem_cache_destroy(prot->slab);
2274 prot->slab = NULL;
b733c007
PE
2275out:
2276 return -ENOBUFS;
1da177e4 2277}
1da177e4
LT
2278EXPORT_SYMBOL(proto_register);
2279
2280void proto_unregister(struct proto *prot)
2281{
2282 write_lock(&proto_list_lock);
13ff3d6f 2283 release_proto_idx(prot);
0a3f4358
PM
2284 list_del(&prot->node);
2285 write_unlock(&proto_list_lock);
1da177e4
LT
2286
2287 if (prot->slab != NULL) {
2288 kmem_cache_destroy(prot->slab);
2289 prot->slab = NULL;
2290 }
2291
2e6599cb 2292 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
2e6599cb 2293 kmem_cache_destroy(prot->rsk_prot->slab);
7e56b5d6 2294 kfree(prot->rsk_prot->slab_name);
2e6599cb
ACM
2295 prot->rsk_prot->slab = NULL;
2296 }
2297
6d6ee43e 2298 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
6d6ee43e 2299 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
7e56b5d6 2300 kfree(prot->twsk_prot->twsk_slab_name);
6d6ee43e 2301 prot->twsk_prot->twsk_slab = NULL;
8feaf0c0 2302 }
1da177e4 2303}
1da177e4
LT
2304EXPORT_SYMBOL(proto_unregister);
2305
2306#ifdef CONFIG_PROC_FS
1da177e4 2307static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 2308 __acquires(proto_list_lock)
1da177e4
LT
2309{
2310 read_lock(&proto_list_lock);
60f0438a 2311 return seq_list_start_head(&proto_list, *pos);
1da177e4
LT
2312}
2313
2314static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2315{
60f0438a 2316 return seq_list_next(v, &proto_list, pos);
1da177e4
LT
2317}
2318
2319static void proto_seq_stop(struct seq_file *seq, void *v)
9a429c49 2320 __releases(proto_list_lock)
1da177e4
LT
2321{
2322 read_unlock(&proto_list_lock);
2323}
2324
2325static char proto_method_implemented(const void *method)
2326{
2327 return method == NULL ? 'n' : 'y';
2328}
2329
2330static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2331{
2332 seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
2333 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2334 proto->name,
2335 proto->obj_size,
14e943db 2336 sock_prot_inuse_get(seq_file_net(seq), proto),
1da177e4
LT
2337 proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
2338 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
2339 proto->max_header,
2340 proto->slab == NULL ? "no" : "yes",
2341 module_name(proto->owner),
2342 proto_method_implemented(proto->close),
2343 proto_method_implemented(proto->connect),
2344 proto_method_implemented(proto->disconnect),
2345 proto_method_implemented(proto->accept),
2346 proto_method_implemented(proto->ioctl),
2347 proto_method_implemented(proto->init),
2348 proto_method_implemented(proto->destroy),
2349 proto_method_implemented(proto->shutdown),
2350 proto_method_implemented(proto->setsockopt),
2351 proto_method_implemented(proto->getsockopt),
2352 proto_method_implemented(proto->sendmsg),
2353 proto_method_implemented(proto->recvmsg),
2354 proto_method_implemented(proto->sendpage),
2355 proto_method_implemented(proto->bind),
2356 proto_method_implemented(proto->backlog_rcv),
2357 proto_method_implemented(proto->hash),
2358 proto_method_implemented(proto->unhash),
2359 proto_method_implemented(proto->get_port),
2360 proto_method_implemented(proto->enter_memory_pressure));
2361}
2362
2363static int proto_seq_show(struct seq_file *seq, void *v)
2364{
60f0438a 2365 if (v == &proto_list)
1da177e4
LT
2366 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2367 "protocol",
2368 "size",
2369 "sockets",
2370 "memory",
2371 "press",
2372 "maxhdr",
2373 "slab",
2374 "module",
2375 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2376 else
60f0438a 2377 proto_seq_printf(seq, list_entry(v, struct proto, node));
1da177e4
LT
2378 return 0;
2379}
2380
f690808e 2381static const struct seq_operations proto_seq_ops = {
1da177e4
LT
2382 .start = proto_seq_start,
2383 .next = proto_seq_next,
2384 .stop = proto_seq_stop,
2385 .show = proto_seq_show,
2386};
2387
2388static int proto_seq_open(struct inode *inode, struct file *file)
2389{
14e943db
ED
2390 return seq_open_net(inode, file, &proto_seq_ops,
2391 sizeof(struct seq_net_private));
1da177e4
LT
2392}
2393
9a32144e 2394static const struct file_operations proto_seq_fops = {
1da177e4
LT
2395 .owner = THIS_MODULE,
2396 .open = proto_seq_open,
2397 .read = seq_read,
2398 .llseek = seq_lseek,
14e943db
ED
2399 .release = seq_release_net,
2400};
2401
2402static __net_init int proto_init_net(struct net *net)
2403{
2404 if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops))
2405 return -ENOMEM;
2406
2407 return 0;
2408}
2409
2410static __net_exit void proto_exit_net(struct net *net)
2411{
2412 proc_net_remove(net, "protocols");
2413}
2414
2415
2416static __net_initdata struct pernet_operations proto_net_ops = {
2417 .init = proto_init_net,
2418 .exit = proto_exit_net,
1da177e4
LT
2419};
2420
2421static int __init proto_init(void)
2422{
14e943db 2423 return register_pernet_subsys(&proto_net_ops);
1da177e4
LT
2424}
2425
2426subsys_initcall(proto_init);
2427
2428#endif /* PROC_FS */
This page took 0.65073 seconds and 5 git commands to generate.