net: Avoid compiler warning for mmsghdr when CONFIG_COMPAT is not selected
[deliverable/linux.git] / net / core / sock.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
02c30a84 10 * Authors: Ross Biro
1da177e4
LT
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
14 *
15 * Fixes:
16 * Alan Cox : Numerous verify_area() problems
17 * Alan Cox : Connecting on a connecting socket
18 * now returns an error for tcp.
19 * Alan Cox : sock->protocol is set correctly.
20 * and is not sometimes left as 0.
21 * Alan Cox : connect handles icmp errors on a
22 * connect properly. Unfortunately there
23 * is a restart syscall nasty there. I
24 * can't match BSD without hacking the C
25 * library. Ideas urgently sought!
26 * Alan Cox : Disallow bind() to addresses that are
27 * not ours - especially broadcast ones!!
28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
30 * instead they leave that for the DESTROY timer.
31 * Alan Cox : Clean up error flag in accept
32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
33 * was buggy. Put a remove_sock() in the handler
34 * for memory when we hit 0. Also altered the timer
4ec93edb 35 * code. The ACK stuff can wait and needs major
1da177e4
LT
36 * TCP layer surgery.
37 * Alan Cox : Fixed TCP ack bug, removed remove sock
38 * and fixed timer/inet_bh race.
39 * Alan Cox : Added zapped flag for TCP
40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45 * Rick Sladkey : Relaxed UDP rules for matching packets.
46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
47 * Pauline Middelink : identd support
48 * Alan Cox : Fixed connect() taking signals I think.
49 * Alan Cox : SO_LINGER supported
50 * Alan Cox : Error reporting fixes
51 * Anonymous : inet_create tidied up (sk->reuse setting)
52 * Alan Cox : inet sockets don't set sk->type!
53 * Alan Cox : Split socket option code
54 * Alan Cox : Callbacks
55 * Alan Cox : Nagle flag for Charles & Johannes stuff
56 * Alex : Removed restriction on inet fioctl
57 * Alan Cox : Splitting INET from NET core
58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
60 * Alan Cox : Split IP from generic code
61 * Alan Cox : New kfree_skbmem()
62 * Alan Cox : Make SO_DEBUG superuser only.
63 * Alan Cox : Allow anyone to clear SO_DEBUG
64 * (compatibility fix)
65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
66 * Alan Cox : Allocator for a socket is settable.
67 * Alan Cox : SO_ERROR includes soft errors.
68 * Alan Cox : Allow NULL arguments on some SO_ opts
69 * Alan Cox : Generic socket allocation to make hooks
70 * easier (suggested by Craig Metz).
71 * Michael Pall : SO_ERROR returns positive errno again
72 * Steve Whitehouse: Added default destructor to free
73 * protocol private data.
74 * Steve Whitehouse: Added various other default routines
75 * common to several socket families.
76 * Chris Evans : Call suser() check last on F_SETOWN
77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
79 * Andi Kleen : Fix write_space callback
80 * Chris Evans : Security fixes - signedness again
81 * Arnaldo C. Melo : cleanups, use skb_queue_purge
82 *
83 * To Fix:
84 *
85 *
86 * This program is free software; you can redistribute it and/or
87 * modify it under the terms of the GNU General Public License
88 * as published by the Free Software Foundation; either version
89 * 2 of the License, or (at your option) any later version.
90 */
91
4fc268d2 92#include <linux/capability.h>
1da177e4
LT
93#include <linux/errno.h>
94#include <linux/types.h>
95#include <linux/socket.h>
96#include <linux/in.h>
97#include <linux/kernel.h>
1da177e4
LT
98#include <linux/module.h>
99#include <linux/proc_fs.h>
100#include <linux/seq_file.h>
101#include <linux/sched.h>
102#include <linux/timer.h>
103#include <linux/string.h>
104#include <linux/sockios.h>
105#include <linux/net.h>
106#include <linux/mm.h>
107#include <linux/slab.h>
108#include <linux/interrupt.h>
109#include <linux/poll.h>
110#include <linux/tcp.h>
111#include <linux/init.h>
a1f8e7f7 112#include <linux/highmem.h>
1da177e4
LT
113
114#include <asm/uaccess.h>
115#include <asm/system.h>
116
117#include <linux/netdevice.h>
118#include <net/protocol.h>
119#include <linux/skbuff.h>
457c4cbc 120#include <net/net_namespace.h>
2e6599cb 121#include <net/request_sock.h>
1da177e4 122#include <net/sock.h>
20d49473 123#include <linux/net_tstamp.h>
1da177e4
LT
124#include <net/xfrm.h>
125#include <linux/ipsec.h>
126
127#include <linux/filter.h>
128
129#ifdef CONFIG_INET
130#include <net/tcp.h>
131#endif
132
da21f24d
IM
133/*
134 * Each address family might have different locking rules, so we have
135 * one slock key per address family:
136 */
a5b5bb9a
IM
137static struct lock_class_key af_family_keys[AF_MAX];
138static struct lock_class_key af_family_slock_keys[AF_MAX];
139
a5b5bb9a
IM
140/*
141 * Make lock validator output more readable. (we pre-construct these
142 * strings build-time, so that runtime initialization of socket
143 * locks is fast):
144 */
36cbd3dc 145static const char *const af_family_key_strings[AF_MAX+1] = {
a5b5bb9a
IM
146 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
147 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
148 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
149 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
150 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
151 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
152 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
cbd151bf 153 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
a5b5bb9a 154 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
cd05acfe 155 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
17926a79 156 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
bce7b154 157 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
fcb94e42 158 "sk_lock-AF_IEEE802154",
bce7b154 159 "sk_lock-AF_MAX"
a5b5bb9a 160};
36cbd3dc 161static const char *const af_family_slock_key_strings[AF_MAX+1] = {
a5b5bb9a
IM
162 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
163 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
164 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
165 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
166 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
167 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
168 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
cbd151bf 169 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
a5b5bb9a 170 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
cd05acfe 171 "slock-27" , "slock-28" , "slock-AF_CAN" ,
17926a79 172 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
bce7b154 173 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
fcb94e42 174 "slock-AF_IEEE802154",
bce7b154 175 "slock-AF_MAX"
a5b5bb9a 176};
36cbd3dc 177static const char *const af_family_clock_key_strings[AF_MAX+1] = {
443aef0e
PZ
178 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
179 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
180 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
181 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
182 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
183 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
184 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
cbd151bf 185 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
443aef0e 186 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
b4942af6 187 "clock-27" , "clock-28" , "clock-AF_CAN" ,
e51f802b 188 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
bce7b154 189 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
fcb94e42 190 "clock-AF_IEEE802154",
bce7b154 191 "clock-AF_MAX"
443aef0e 192};
da21f24d
IM
193
194/*
195 * sk_callback_lock locking rules are per-address-family,
196 * so split the lock classes by using a per-AF key:
197 */
198static struct lock_class_key af_callback_keys[AF_MAX];
199
1da177e4
LT
200/* Take into consideration the size of the struct sk_buff overhead in the
201 * determination of these values, since that is non-constant across
202 * platforms. This makes socket queueing behavior and performance
203 * not depend upon such differences.
204 */
205#define _SK_MEM_PACKETS 256
206#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256)
207#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
208#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
209
210/* Run time adjustable parameters. */
ab32ea5d
BH
211__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
212__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
213__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
214__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
1da177e4
LT
215
216/* Maximal space eaten by iovec or ancilliary data plus some space */
ab32ea5d 217int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
2a91525c 218EXPORT_SYMBOL(sysctl_optmem_max);
1da177e4
LT
219
220static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
221{
222 struct timeval tv;
223
224 if (optlen < sizeof(tv))
225 return -EINVAL;
226 if (copy_from_user(&tv, optval, sizeof(tv)))
227 return -EFAULT;
ba78073e
VA
228 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
229 return -EDOM;
1da177e4 230
ba78073e 231 if (tv.tv_sec < 0) {
6f11df83
AM
232 static int warned __read_mostly;
233
ba78073e 234 *timeo_p = 0;
50aab54f 235 if (warned < 10 && net_ratelimit()) {
ba78073e
VA
236 warned++;
237 printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
238 "tries to set negative timeout\n",
ba25f9dc 239 current->comm, task_pid_nr(current));
50aab54f 240 }
ba78073e
VA
241 return 0;
242 }
1da177e4
LT
243 *timeo_p = MAX_SCHEDULE_TIMEOUT;
244 if (tv.tv_sec == 0 && tv.tv_usec == 0)
245 return 0;
246 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
247 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
248 return 0;
249}
250
251static void sock_warn_obsolete_bsdism(const char *name)
252{
253 static int warned;
254 static char warncomm[TASK_COMM_LEN];
4ec93edb
YH
255 if (strcmp(warncomm, current->comm) && warned < 5) {
256 strcpy(warncomm, current->comm);
1da177e4
LT
257 printk(KERN_WARNING "process `%s' is using obsolete "
258 "%s SO_BSDCOMPAT\n", warncomm, name);
259 warned++;
260 }
261}
262
20d49473 263static void sock_disable_timestamp(struct sock *sk, int flag)
4ec93edb 264{
20d49473
PO
265 if (sock_flag(sk, flag)) {
266 sock_reset_flag(sk, flag);
267 if (!sock_flag(sk, SOCK_TIMESTAMP) &&
268 !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) {
269 net_disable_timestamp();
270 }
1da177e4
LT
271 }
272}
273
274
f0088a50
DV
275int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
276{
766e9037 277 int err;
f0088a50 278 int skb_len;
3b885787
NH
279 unsigned long flags;
280 struct sk_buff_head *list = &sk->sk_receive_queue;
f0088a50 281
9ee6b7f1 282 /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
f0088a50
DV
283 number of warnings when compiling with -W --ANK
284 */
285 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
286 (unsigned)sk->sk_rcvbuf) {
766e9037
ED
287 atomic_inc(&sk->sk_drops);
288 return -ENOMEM;
f0088a50
DV
289 }
290
fda9ef5d 291 err = sk_filter(sk, skb);
f0088a50 292 if (err)
766e9037 293 return err;
f0088a50 294
3ab224be 295 if (!sk_rmem_schedule(sk, skb->truesize)) {
766e9037
ED
296 atomic_inc(&sk->sk_drops);
297 return -ENOBUFS;
3ab224be
HA
298 }
299
f0088a50
DV
300 skb->dev = NULL;
301 skb_set_owner_r(skb, sk);
49ad9599 302
f0088a50
DV
303 /* Cache the SKB length before we tack it onto the receive
304 * queue. Once it is added it no longer belongs to us and
305 * may be freed by other threads of control pulling packets
306 * from the queue.
307 */
308 skb_len = skb->len;
309
3b885787
NH
310 spin_lock_irqsave(&list->lock, flags);
311 skb->dropcount = atomic_read(&sk->sk_drops);
312 __skb_queue_tail(list, skb);
313 spin_unlock_irqrestore(&list->lock, flags);
f0088a50
DV
314
315 if (!sock_flag(sk, SOCK_DEAD))
316 sk->sk_data_ready(sk, skb_len);
766e9037 317 return 0;
f0088a50
DV
318}
319EXPORT_SYMBOL(sock_queue_rcv_skb);
320
58a5a7b9 321int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
f0088a50
DV
322{
323 int rc = NET_RX_SUCCESS;
324
fda9ef5d 325 if (sk_filter(sk, skb))
f0088a50
DV
326 goto discard_and_relse;
327
328 skb->dev = NULL;
329
58a5a7b9
ACM
330 if (nested)
331 bh_lock_sock_nested(sk);
332 else
333 bh_lock_sock(sk);
a5b5bb9a
IM
334 if (!sock_owned_by_user(sk)) {
335 /*
336 * trylock + unlock semantics:
337 */
338 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
339
c57943a1 340 rc = sk_backlog_rcv(sk, skb);
a5b5bb9a
IM
341
342 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
343 } else
f0088a50
DV
344 sk_add_backlog(sk, skb);
345 bh_unlock_sock(sk);
346out:
347 sock_put(sk);
348 return rc;
349discard_and_relse:
350 kfree_skb(skb);
351 goto out;
352}
353EXPORT_SYMBOL(sk_receive_skb);
354
355struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
356{
357 struct dst_entry *dst = sk->sk_dst_cache;
358
359 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
360 sk->sk_dst_cache = NULL;
361 dst_release(dst);
362 return NULL;
363 }
364
365 return dst;
366}
367EXPORT_SYMBOL(__sk_dst_check);
368
369struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
370{
371 struct dst_entry *dst = sk_dst_get(sk);
372
373 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
374 sk_dst_reset(sk);
375 dst_release(dst);
376 return NULL;
377 }
378
379 return dst;
380}
381EXPORT_SYMBOL(sk_dst_check);
382
4878809f
DM
383static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
384{
385 int ret = -ENOPROTOOPT;
386#ifdef CONFIG_NETDEVICES
3b1e0a65 387 struct net *net = sock_net(sk);
4878809f
DM
388 char devname[IFNAMSIZ];
389 int index;
390
391 /* Sorry... */
392 ret = -EPERM;
393 if (!capable(CAP_NET_RAW))
394 goto out;
395
396 ret = -EINVAL;
397 if (optlen < 0)
398 goto out;
399
400 /* Bind this socket to a particular device like "eth0",
401 * as specified in the passed interface name. If the
402 * name is "" or the option length is zero the socket
403 * is not bound.
404 */
405 if (optlen > IFNAMSIZ - 1)
406 optlen = IFNAMSIZ - 1;
407 memset(devname, 0, sizeof(devname));
408
409 ret = -EFAULT;
410 if (copy_from_user(devname, optval, optlen))
411 goto out;
412
413 if (devname[0] == '\0') {
414 index = 0;
415 } else {
881d966b 416 struct net_device *dev = dev_get_by_name(net, devname);
4878809f
DM
417
418 ret = -ENODEV;
419 if (!dev)
420 goto out;
421
422 index = dev->ifindex;
423 dev_put(dev);
424 }
425
426 lock_sock(sk);
427 sk->sk_bound_dev_if = index;
428 sk_dst_reset(sk);
429 release_sock(sk);
430
431 ret = 0;
432
433out:
434#endif
435
436 return ret;
437}
438
c0ef877b
PE
439static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
440{
441 if (valbool)
442 sock_set_flag(sk, bit);
443 else
444 sock_reset_flag(sk, bit);
445}
446
1da177e4
LT
447/*
448 * This is meant for all protocols to use and covers goings on
449 * at the socket level. Everything here is generic.
450 */
451
452int sock_setsockopt(struct socket *sock, int level, int optname,
b7058842 453 char __user *optval, unsigned int optlen)
1da177e4 454{
2a91525c 455 struct sock *sk = sock->sk;
1da177e4
LT
456 int val;
457 int valbool;
458 struct linger ling;
459 int ret = 0;
4ec93edb 460
1da177e4
LT
461 /*
462 * Options without arguments
463 */
464
4878809f
DM
465 if (optname == SO_BINDTODEVICE)
466 return sock_bindtodevice(sk, optval, optlen);
467
e71a4783
SH
468 if (optlen < sizeof(int))
469 return -EINVAL;
4ec93edb 470
1da177e4
LT
471 if (get_user(val, (int __user *)optval))
472 return -EFAULT;
4ec93edb 473
2a91525c 474 valbool = val ? 1 : 0;
1da177e4
LT
475
476 lock_sock(sk);
477
2a91525c 478 switch (optname) {
e71a4783 479 case SO_DEBUG:
2a91525c 480 if (val && !capable(CAP_NET_ADMIN))
e71a4783 481 ret = -EACCES;
2a91525c 482 else
c0ef877b 483 sock_valbool_flag(sk, SOCK_DBG, valbool);
e71a4783
SH
484 break;
485 case SO_REUSEADDR:
486 sk->sk_reuse = valbool;
487 break;
488 case SO_TYPE:
49c794e9 489 case SO_PROTOCOL:
0d6038ee 490 case SO_DOMAIN:
e71a4783
SH
491 case SO_ERROR:
492 ret = -ENOPROTOOPT;
493 break;
494 case SO_DONTROUTE:
c0ef877b 495 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
e71a4783
SH
496 break;
497 case SO_BROADCAST:
498 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
499 break;
500 case SO_SNDBUF:
501 /* Don't error on this BSD doesn't and if you think
502 about it this is right. Otherwise apps have to
503 play 'guess the biggest size' games. RCVBUF/SNDBUF
504 are treated in BSD as hints */
505
506 if (val > sysctl_wmem_max)
507 val = sysctl_wmem_max;
b0573dea 508set_sndbuf:
e71a4783
SH
509 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
510 if ((val * 2) < SOCK_MIN_SNDBUF)
511 sk->sk_sndbuf = SOCK_MIN_SNDBUF;
512 else
513 sk->sk_sndbuf = val * 2;
1da177e4 514
e71a4783
SH
515 /*
516 * Wake up sending tasks if we
517 * upped the value.
518 */
519 sk->sk_write_space(sk);
520 break;
1da177e4 521
e71a4783
SH
522 case SO_SNDBUFFORCE:
523 if (!capable(CAP_NET_ADMIN)) {
524 ret = -EPERM;
525 break;
526 }
527 goto set_sndbuf;
b0573dea 528
e71a4783
SH
529 case SO_RCVBUF:
530 /* Don't error on this BSD doesn't and if you think
531 about it this is right. Otherwise apps have to
532 play 'guess the biggest size' games. RCVBUF/SNDBUF
533 are treated in BSD as hints */
4ec93edb 534
e71a4783
SH
535 if (val > sysctl_rmem_max)
536 val = sysctl_rmem_max;
b0573dea 537set_rcvbuf:
e71a4783
SH
538 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
539 /*
540 * We double it on the way in to account for
541 * "struct sk_buff" etc. overhead. Applications
542 * assume that the SO_RCVBUF setting they make will
543 * allow that much actual data to be received on that
544 * socket.
545 *
546 * Applications are unaware that "struct sk_buff" and
547 * other overheads allocate from the receive buffer
548 * during socket buffer allocation.
549 *
550 * And after considering the possible alternatives,
551 * returning the value we actually used in getsockopt
552 * is the most desirable behavior.
553 */
554 if ((val * 2) < SOCK_MIN_RCVBUF)
555 sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
556 else
557 sk->sk_rcvbuf = val * 2;
558 break;
559
560 case SO_RCVBUFFORCE:
561 if (!capable(CAP_NET_ADMIN)) {
562 ret = -EPERM;
1da177e4 563 break;
e71a4783
SH
564 }
565 goto set_rcvbuf;
1da177e4 566
e71a4783 567 case SO_KEEPALIVE:
1da177e4 568#ifdef CONFIG_INET
e71a4783
SH
569 if (sk->sk_protocol == IPPROTO_TCP)
570 tcp_set_keepalive(sk, valbool);
1da177e4 571#endif
e71a4783
SH
572 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
573 break;
574
575 case SO_OOBINLINE:
576 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
577 break;
578
579 case SO_NO_CHECK:
580 sk->sk_no_check = valbool;
581 break;
582
583 case SO_PRIORITY:
584 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
585 sk->sk_priority = val;
586 else
587 ret = -EPERM;
588 break;
589
590 case SO_LINGER:
591 if (optlen < sizeof(ling)) {
592 ret = -EINVAL; /* 1003.1g */
1da177e4 593 break;
e71a4783 594 }
2a91525c 595 if (copy_from_user(&ling, optval, sizeof(ling))) {
e71a4783 596 ret = -EFAULT;
1da177e4 597 break;
e71a4783
SH
598 }
599 if (!ling.l_onoff)
600 sock_reset_flag(sk, SOCK_LINGER);
601 else {
1da177e4 602#if (BITS_PER_LONG == 32)
e71a4783
SH
603 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
604 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
1da177e4 605 else
e71a4783
SH
606#endif
607 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
608 sock_set_flag(sk, SOCK_LINGER);
609 }
610 break;
611
612 case SO_BSDCOMPAT:
613 sock_warn_obsolete_bsdism("setsockopt");
614 break;
615
616 case SO_PASSCRED:
617 if (valbool)
618 set_bit(SOCK_PASSCRED, &sock->flags);
619 else
620 clear_bit(SOCK_PASSCRED, &sock->flags);
621 break;
622
623 case SO_TIMESTAMP:
92f37fd2 624 case SO_TIMESTAMPNS:
e71a4783 625 if (valbool) {
92f37fd2
ED
626 if (optname == SO_TIMESTAMP)
627 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
628 else
629 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783 630 sock_set_flag(sk, SOCK_RCVTSTAMP);
20d49473 631 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
92f37fd2 632 } else {
e71a4783 633 sock_reset_flag(sk, SOCK_RCVTSTAMP);
92f37fd2
ED
634 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
635 }
e71a4783
SH
636 break;
637
20d49473
PO
638 case SO_TIMESTAMPING:
639 if (val & ~SOF_TIMESTAMPING_MASK) {
f249fb78 640 ret = -EINVAL;
20d49473
PO
641 break;
642 }
643 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
644 val & SOF_TIMESTAMPING_TX_HARDWARE);
645 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
646 val & SOF_TIMESTAMPING_TX_SOFTWARE);
647 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
648 val & SOF_TIMESTAMPING_RX_HARDWARE);
649 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
650 sock_enable_timestamp(sk,
651 SOCK_TIMESTAMPING_RX_SOFTWARE);
652 else
653 sock_disable_timestamp(sk,
654 SOCK_TIMESTAMPING_RX_SOFTWARE);
655 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
656 val & SOF_TIMESTAMPING_SOFTWARE);
657 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
658 val & SOF_TIMESTAMPING_SYS_HARDWARE);
659 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
660 val & SOF_TIMESTAMPING_RAW_HARDWARE);
661 break;
662
e71a4783
SH
663 case SO_RCVLOWAT:
664 if (val < 0)
665 val = INT_MAX;
666 sk->sk_rcvlowat = val ? : 1;
667 break;
668
669 case SO_RCVTIMEO:
670 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
671 break;
672
673 case SO_SNDTIMEO:
674 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
675 break;
1da177e4 676
e71a4783
SH
677 case SO_ATTACH_FILTER:
678 ret = -EINVAL;
679 if (optlen == sizeof(struct sock_fprog)) {
680 struct sock_fprog fprog;
1da177e4 681
e71a4783
SH
682 ret = -EFAULT;
683 if (copy_from_user(&fprog, optval, sizeof(fprog)))
1da177e4 684 break;
e71a4783
SH
685
686 ret = sk_attach_filter(&fprog, sk);
687 }
688 break;
689
690 case SO_DETACH_FILTER:
55b33325 691 ret = sk_detach_filter(sk);
e71a4783 692 break;
1da177e4 693
e71a4783
SH
694 case SO_PASSSEC:
695 if (valbool)
696 set_bit(SOCK_PASSSEC, &sock->flags);
697 else
698 clear_bit(SOCK_PASSSEC, &sock->flags);
699 break;
4a19ec58
LAT
700 case SO_MARK:
701 if (!capable(CAP_NET_ADMIN))
702 ret = -EPERM;
2a91525c 703 else
4a19ec58 704 sk->sk_mark = val;
4a19ec58 705 break;
877ce7c1 706
1da177e4
LT
707 /* We implement the SO_SNDLOWAT etc to
708 not be settable (1003.1g 5.3) */
3b885787
NH
709 case SO_RXQ_OVFL:
710 if (valbool)
711 sock_set_flag(sk, SOCK_RXQ_OVFL);
712 else
713 sock_reset_flag(sk, SOCK_RXQ_OVFL);
714 break;
e71a4783
SH
715 default:
716 ret = -ENOPROTOOPT;
717 break;
4ec93edb 718 }
1da177e4
LT
719 release_sock(sk);
720 return ret;
721}
2a91525c 722EXPORT_SYMBOL(sock_setsockopt);
1da177e4
LT
723
724
725int sock_getsockopt(struct socket *sock, int level, int optname,
726 char __user *optval, int __user *optlen)
727{
728 struct sock *sk = sock->sk;
4ec93edb 729
e71a4783 730 union {
4ec93edb
YH
731 int val;
732 struct linger ling;
1da177e4
LT
733 struct timeval tm;
734 } v;
4ec93edb 735
1da177e4
LT
736 unsigned int lv = sizeof(int);
737 int len;
4ec93edb 738
e71a4783 739 if (get_user(len, optlen))
4ec93edb 740 return -EFAULT;
e71a4783 741 if (len < 0)
1da177e4 742 return -EINVAL;
4ec93edb 743
50fee1de 744 memset(&v, 0, sizeof(v));
df0bca04 745
2a91525c 746 switch (optname) {
e71a4783
SH
747 case SO_DEBUG:
748 v.val = sock_flag(sk, SOCK_DBG);
749 break;
750
751 case SO_DONTROUTE:
752 v.val = sock_flag(sk, SOCK_LOCALROUTE);
753 break;
754
755 case SO_BROADCAST:
756 v.val = !!sock_flag(sk, SOCK_BROADCAST);
757 break;
758
759 case SO_SNDBUF:
760 v.val = sk->sk_sndbuf;
761 break;
762
763 case SO_RCVBUF:
764 v.val = sk->sk_rcvbuf;
765 break;
766
767 case SO_REUSEADDR:
768 v.val = sk->sk_reuse;
769 break;
770
771 case SO_KEEPALIVE:
772 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
773 break;
774
775 case SO_TYPE:
776 v.val = sk->sk_type;
777 break;
778
49c794e9
JE
779 case SO_PROTOCOL:
780 v.val = sk->sk_protocol;
781 break;
782
0d6038ee
JE
783 case SO_DOMAIN:
784 v.val = sk->sk_family;
785 break;
786
e71a4783
SH
787 case SO_ERROR:
788 v.val = -sock_error(sk);
2a91525c 789 if (v.val == 0)
e71a4783
SH
790 v.val = xchg(&sk->sk_err_soft, 0);
791 break;
792
793 case SO_OOBINLINE:
794 v.val = !!sock_flag(sk, SOCK_URGINLINE);
795 break;
796
797 case SO_NO_CHECK:
798 v.val = sk->sk_no_check;
799 break;
800
801 case SO_PRIORITY:
802 v.val = sk->sk_priority;
803 break;
804
805 case SO_LINGER:
806 lv = sizeof(v.ling);
807 v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER);
808 v.ling.l_linger = sk->sk_lingertime / HZ;
809 break;
810
811 case SO_BSDCOMPAT:
812 sock_warn_obsolete_bsdism("getsockopt");
813 break;
814
815 case SO_TIMESTAMP:
92f37fd2
ED
816 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
817 !sock_flag(sk, SOCK_RCVTSTAMPNS);
818 break;
819
820 case SO_TIMESTAMPNS:
821 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783
SH
822 break;
823
20d49473
PO
824 case SO_TIMESTAMPING:
825 v.val = 0;
826 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
827 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
828 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
829 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
830 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
831 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
832 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
833 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
834 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
835 v.val |= SOF_TIMESTAMPING_SOFTWARE;
836 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
837 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
838 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
839 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
840 break;
841
e71a4783 842 case SO_RCVTIMEO:
2a91525c 843 lv = sizeof(struct timeval);
e71a4783
SH
844 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
845 v.tm.tv_sec = 0;
846 v.tm.tv_usec = 0;
847 } else {
848 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
849 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
850 }
851 break;
852
853 case SO_SNDTIMEO:
2a91525c 854 lv = sizeof(struct timeval);
e71a4783
SH
855 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
856 v.tm.tv_sec = 0;
857 v.tm.tv_usec = 0;
858 } else {
859 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
860 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
861 }
862 break;
1da177e4 863
e71a4783
SH
864 case SO_RCVLOWAT:
865 v.val = sk->sk_rcvlowat;
866 break;
1da177e4 867
e71a4783 868 case SO_SNDLOWAT:
2a91525c 869 v.val = 1;
e71a4783 870 break;
1da177e4 871
e71a4783
SH
872 case SO_PASSCRED:
873 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
874 break;
1da177e4 875
e71a4783
SH
876 case SO_PEERCRED:
877 if (len > sizeof(sk->sk_peercred))
878 len = sizeof(sk->sk_peercred);
879 if (copy_to_user(optval, &sk->sk_peercred, len))
880 return -EFAULT;
881 goto lenout;
1da177e4 882
e71a4783
SH
883 case SO_PEERNAME:
884 {
885 char address[128];
886
887 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
888 return -ENOTCONN;
889 if (lv < len)
890 return -EINVAL;
891 if (copy_to_user(optval, address, len))
892 return -EFAULT;
893 goto lenout;
894 }
1da177e4 895
e71a4783
SH
896 /* Dubious BSD thing... Probably nobody even uses it, but
897 * the UNIX standard wants it for whatever reason... -DaveM
898 */
899 case SO_ACCEPTCONN:
900 v.val = sk->sk_state == TCP_LISTEN;
901 break;
1da177e4 902
e71a4783
SH
903 case SO_PASSSEC:
904 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
905 break;
877ce7c1 906
e71a4783
SH
907 case SO_PEERSEC:
908 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1da177e4 909
4a19ec58
LAT
910 case SO_MARK:
911 v.val = sk->sk_mark;
912 break;
913
3b885787
NH
914 case SO_RXQ_OVFL:
915 v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
916 break;
917
e71a4783
SH
918 default:
919 return -ENOPROTOOPT;
1da177e4 920 }
e71a4783 921
1da177e4
LT
922 if (len > lv)
923 len = lv;
924 if (copy_to_user(optval, &v, len))
925 return -EFAULT;
926lenout:
4ec93edb
YH
927 if (put_user(len, optlen))
928 return -EFAULT;
929 return 0;
1da177e4
LT
930}
931
a5b5bb9a
IM
932/*
933 * Initialize an sk_lock.
934 *
935 * (We also register the sk_lock with the lock validator.)
936 */
b6f99a21 937static inline void sock_lock_init(struct sock *sk)
a5b5bb9a 938{
ed07536e
PZ
939 sock_lock_init_class_and_name(sk,
940 af_family_slock_key_strings[sk->sk_family],
941 af_family_slock_keys + sk->sk_family,
942 af_family_key_strings[sk->sk_family],
943 af_family_keys + sk->sk_family);
a5b5bb9a
IM
944}
945
4dc6dc71
ED
946/*
947 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
948 * even temporarly, because of RCU lookups. sk_node should also be left as is.
949 */
f1a6c4da
PE
950static void sock_copy(struct sock *nsk, const struct sock *osk)
951{
952#ifdef CONFIG_SECURITY_NETWORK
953 void *sptr = nsk->sk_security;
954#endif
4dc6dc71
ED
955 BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) !=
956 sizeof(osk->sk_node) + sizeof(osk->sk_refcnt));
957 memcpy(&nsk->sk_copy_start, &osk->sk_copy_start,
958 osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start));
f1a6c4da
PE
959#ifdef CONFIG_SECURITY_NETWORK
960 nsk->sk_security = sptr;
961 security_sk_clone(osk, nsk);
962#endif
963}
964
2e4afe7b
PE
965static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
966 int family)
c308c1b2
PE
967{
968 struct sock *sk;
969 struct kmem_cache *slab;
970
971 slab = prot->slab;
e912b114
ED
972 if (slab != NULL) {
973 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
974 if (!sk)
975 return sk;
976 if (priority & __GFP_ZERO) {
977 /*
978 * caches using SLAB_DESTROY_BY_RCU should let
979 * sk_node.next un-modified. Special care is taken
980 * when initializing object to zero.
981 */
982 if (offsetof(struct sock, sk_node.next) != 0)
983 memset(sk, 0, offsetof(struct sock, sk_node.next));
984 memset(&sk->sk_node.pprev, 0,
985 prot->obj_size - offsetof(struct sock,
986 sk_node.pprev));
987 }
988 }
c308c1b2
PE
989 else
990 sk = kmalloc(prot->obj_size, priority);
991
2e4afe7b 992 if (sk != NULL) {
a98b65a3
VN
993 kmemcheck_annotate_bitfield(sk, flags);
994
2e4afe7b
PE
995 if (security_sk_alloc(sk, family, priority))
996 goto out_free;
997
998 if (!try_module_get(prot->owner))
999 goto out_free_sec;
1000 }
1001
c308c1b2 1002 return sk;
2e4afe7b
PE
1003
1004out_free_sec:
1005 security_sk_free(sk);
1006out_free:
1007 if (slab != NULL)
1008 kmem_cache_free(slab, sk);
1009 else
1010 kfree(sk);
1011 return NULL;
c308c1b2
PE
1012}
1013
1014static void sk_prot_free(struct proto *prot, struct sock *sk)
1015{
1016 struct kmem_cache *slab;
2e4afe7b 1017 struct module *owner;
c308c1b2 1018
2e4afe7b 1019 owner = prot->owner;
c308c1b2 1020 slab = prot->slab;
2e4afe7b
PE
1021
1022 security_sk_free(sk);
c308c1b2
PE
1023 if (slab != NULL)
1024 kmem_cache_free(slab, sk);
1025 else
1026 kfree(sk);
2e4afe7b 1027 module_put(owner);
c308c1b2
PE
1028}
1029
1da177e4
LT
1030/**
1031 * sk_alloc - All socket objects are allocated here
c4ea43c5 1032 * @net: the applicable net namespace
4dc3b16b
PP
1033 * @family: protocol family
1034 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1035 * @prot: struct proto associated with this new sock instance
1da177e4 1036 */
1b8d7ae4 1037struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
6257ff21 1038 struct proto *prot)
1da177e4 1039{
c308c1b2 1040 struct sock *sk;
1da177e4 1041
154adbc8 1042 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1da177e4 1043 if (sk) {
154adbc8
PE
1044 sk->sk_family = family;
1045 /*
1046 * See comment in struct sock definition to understand
1047 * why we need sk_prot_creator -acme
1048 */
1049 sk->sk_prot = sk->sk_prot_creator = prot;
1050 sock_lock_init(sk);
3b1e0a65 1051 sock_net_set(sk, get_net(net));
d66ee058 1052 atomic_set(&sk->sk_wmem_alloc, 1);
1da177e4 1053 }
a79af59e 1054
2e4afe7b 1055 return sk;
1da177e4 1056}
2a91525c 1057EXPORT_SYMBOL(sk_alloc);
1da177e4 1058
2b85a34e 1059static void __sk_free(struct sock *sk)
1da177e4
LT
1060{
1061 struct sk_filter *filter;
1da177e4
LT
1062
1063 if (sk->sk_destruct)
1064 sk->sk_destruct(sk);
1065
fda9ef5d 1066 filter = rcu_dereference(sk->sk_filter);
1da177e4 1067 if (filter) {
309dd5fc 1068 sk_filter_uncharge(sk, filter);
fda9ef5d 1069 rcu_assign_pointer(sk->sk_filter, NULL);
1da177e4
LT
1070 }
1071
20d49473
PO
1072 sock_disable_timestamp(sk, SOCK_TIMESTAMP);
1073 sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE);
1da177e4
LT
1074
1075 if (atomic_read(&sk->sk_omem_alloc))
1076 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
0dc47877 1077 __func__, atomic_read(&sk->sk_omem_alloc));
1da177e4 1078
3b1e0a65 1079 put_net(sock_net(sk));
c308c1b2 1080 sk_prot_free(sk->sk_prot_creator, sk);
1da177e4 1081}
2b85a34e
ED
1082
1083void sk_free(struct sock *sk)
1084{
1085 /*
1086 * We substract one from sk_wmem_alloc and can know if
1087 * some packets are still in some tx queue.
1088 * If not null, sock_wfree() will call __sk_free(sk) later
1089 */
1090 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1091 __sk_free(sk);
1092}
2a91525c 1093EXPORT_SYMBOL(sk_free);
1da177e4 1094
edf02087
DL
1095/*
1096 * Last sock_put should drop referrence to sk->sk_net. It has already
1097 * been dropped in sk_change_net. Taking referrence to stopping namespace
1098 * is not an option.
1099 * Take referrence to a socket to remove it from hash _alive_ and after that
1100 * destroy it in the context of init_net.
1101 */
1102void sk_release_kernel(struct sock *sk)
1103{
1104 if (sk == NULL || sk->sk_socket == NULL)
1105 return;
1106
1107 sock_hold(sk);
1108 sock_release(sk->sk_socket);
65a18ec5 1109 release_net(sock_net(sk));
3b1e0a65 1110 sock_net_set(sk, get_net(&init_net));
edf02087
DL
1111 sock_put(sk);
1112}
45af1754 1113EXPORT_SYMBOL(sk_release_kernel);
edf02087 1114
dd0fc66f 1115struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
87d11ceb 1116{
8fd1d178 1117 struct sock *newsk;
87d11ceb 1118
8fd1d178 1119 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
87d11ceb
ACM
1120 if (newsk != NULL) {
1121 struct sk_filter *filter;
1122
892c141e 1123 sock_copy(newsk, sk);
87d11ceb
ACM
1124
1125 /* SANITY */
3b1e0a65 1126 get_net(sock_net(newsk));
87d11ceb
ACM
1127 sk_node_init(&newsk->sk_node);
1128 sock_lock_init(newsk);
1129 bh_lock_sock(newsk);
fa438ccf 1130 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
87d11ceb
ACM
1131
1132 atomic_set(&newsk->sk_rmem_alloc, 0);
2b85a34e
ED
1133 /*
1134 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1135 */
1136 atomic_set(&newsk->sk_wmem_alloc, 1);
87d11ceb
ACM
1137 atomic_set(&newsk->sk_omem_alloc, 0);
1138 skb_queue_head_init(&newsk->sk_receive_queue);
1139 skb_queue_head_init(&newsk->sk_write_queue);
97fc2f08
CL
1140#ifdef CONFIG_NET_DMA
1141 skb_queue_head_init(&newsk->sk_async_wait_queue);
1142#endif
87d11ceb
ACM
1143
1144 rwlock_init(&newsk->sk_dst_lock);
1145 rwlock_init(&newsk->sk_callback_lock);
443aef0e
PZ
1146 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1147 af_callback_keys + newsk->sk_family,
1148 af_family_clock_key_strings[newsk->sk_family]);
87d11ceb
ACM
1149
1150 newsk->sk_dst_cache = NULL;
1151 newsk->sk_wmem_queued = 0;
1152 newsk->sk_forward_alloc = 0;
1153 newsk->sk_send_head = NULL;
87d11ceb
ACM
1154 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1155
1156 sock_reset_flag(newsk, SOCK_DONE);
1157 skb_queue_head_init(&newsk->sk_error_queue);
1158
1159 filter = newsk->sk_filter;
1160 if (filter != NULL)
1161 sk_filter_charge(newsk, filter);
1162
1163 if (unlikely(xfrm_sk_clone_policy(newsk))) {
1164 /* It is still raw copy of parent, so invalidate
1165 * destructor and make plain sk_free() */
1166 newsk->sk_destruct = NULL;
1167 sk_free(newsk);
1168 newsk = NULL;
1169 goto out;
1170 }
1171
1172 newsk->sk_err = 0;
1173 newsk->sk_priority = 0;
4dc6dc71
ED
1174 /*
1175 * Before updating sk_refcnt, we must commit prior changes to memory
1176 * (Documentation/RCU/rculist_nulls.txt for details)
1177 */
1178 smp_wmb();
87d11ceb
ACM
1179 atomic_set(&newsk->sk_refcnt, 2);
1180
1181 /*
1182 * Increment the counter in the same struct proto as the master
1183 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1184 * is the same as sk->sk_prot->socks, as this field was copied
1185 * with memcpy).
1186 *
1187 * This _changes_ the previous behaviour, where
1188 * tcp_create_openreq_child always was incrementing the
1189 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1190 * to be taken into account in all callers. -acme
1191 */
1192 sk_refcnt_debug_inc(newsk);
972692e0 1193 sk_set_socket(newsk, NULL);
87d11ceb
ACM
1194 newsk->sk_sleep = NULL;
1195
1196 if (newsk->sk_prot->sockets_allocated)
1748376b 1197 percpu_counter_inc(newsk->sk_prot->sockets_allocated);
87d11ceb
ACM
1198 }
1199out:
1200 return newsk;
1201}
87d11ceb
ACM
1202EXPORT_SYMBOL_GPL(sk_clone);
1203
9958089a
AK
1204void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1205{
1206 __sk_dst_set(sk, dst);
1207 sk->sk_route_caps = dst->dev->features;
1208 if (sk->sk_route_caps & NETIF_F_GSO)
4fcd6b99 1209 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
9958089a 1210 if (sk_can_gso(sk)) {
82cc1a7a 1211 if (dst->header_len) {
9958089a 1212 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
82cc1a7a 1213 } else {
9958089a 1214 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
82cc1a7a
PWJ
1215 sk->sk_gso_max_size = dst->dev->gso_max_size;
1216 }
9958089a
AK
1217 }
1218}
1219EXPORT_SYMBOL_GPL(sk_setup_caps);
1220
1da177e4
LT
1221void __init sk_init(void)
1222{
4481374c 1223 if (totalram_pages <= 4096) {
1da177e4
LT
1224 sysctl_wmem_max = 32767;
1225 sysctl_rmem_max = 32767;
1226 sysctl_wmem_default = 32767;
1227 sysctl_rmem_default = 32767;
4481374c 1228 } else if (totalram_pages >= 131072) {
1da177e4
LT
1229 sysctl_wmem_max = 131071;
1230 sysctl_rmem_max = 131071;
1231 }
1232}
1233
1234/*
1235 * Simple resource managers for sockets.
1236 */
1237
1238
4ec93edb
YH
1239/*
1240 * Write buffer destructor automatically called from kfree_skb.
1da177e4
LT
1241 */
1242void sock_wfree(struct sk_buff *skb)
1243{
1244 struct sock *sk = skb->sk;
d99927f4 1245 unsigned int len = skb->truesize;
1da177e4 1246
d99927f4
ED
1247 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1248 /*
1249 * Keep a reference on sk_wmem_alloc, this will be released
1250 * after sk_write_space() call
1251 */
1252 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1da177e4 1253 sk->sk_write_space(sk);
d99927f4
ED
1254 len = 1;
1255 }
2b85a34e 1256 /*
d99927f4
ED
1257 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1258 * could not do because of in-flight packets
2b85a34e 1259 */
d99927f4 1260 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
2b85a34e 1261 __sk_free(sk);
1da177e4 1262}
2a91525c 1263EXPORT_SYMBOL(sock_wfree);
1da177e4 1264
4ec93edb
YH
1265/*
1266 * Read buffer destructor automatically called from kfree_skb.
1da177e4
LT
1267 */
1268void sock_rfree(struct sk_buff *skb)
1269{
1270 struct sock *sk = skb->sk;
1271
1272 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
3ab224be 1273 sk_mem_uncharge(skb->sk, skb->truesize);
1da177e4 1274}
2a91525c 1275EXPORT_SYMBOL(sock_rfree);
1da177e4
LT
1276
1277
1278int sock_i_uid(struct sock *sk)
1279{
1280 int uid;
1281
1282 read_lock(&sk->sk_callback_lock);
1283 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1284 read_unlock(&sk->sk_callback_lock);
1285 return uid;
1286}
2a91525c 1287EXPORT_SYMBOL(sock_i_uid);
1da177e4
LT
1288
1289unsigned long sock_i_ino(struct sock *sk)
1290{
1291 unsigned long ino;
1292
1293 read_lock(&sk->sk_callback_lock);
1294 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1295 read_unlock(&sk->sk_callback_lock);
1296 return ino;
1297}
2a91525c 1298EXPORT_SYMBOL(sock_i_ino);
1da177e4
LT
1299
1300/*
1301 * Allocate a skb from the socket's send buffer.
1302 */
86a76caf 1303struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1304 gfp_t priority)
1da177e4
LT
1305{
1306 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
2a91525c 1307 struct sk_buff *skb = alloc_skb(size, priority);
1da177e4
LT
1308 if (skb) {
1309 skb_set_owner_w(skb, sk);
1310 return skb;
1311 }
1312 }
1313 return NULL;
1314}
2a91525c 1315EXPORT_SYMBOL(sock_wmalloc);
1da177e4
LT
1316
1317/*
1318 * Allocate a skb from the socket's receive buffer.
4ec93edb 1319 */
86a76caf 1320struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1321 gfp_t priority)
1da177e4
LT
1322{
1323 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1324 struct sk_buff *skb = alloc_skb(size, priority);
1325 if (skb) {
1326 skb_set_owner_r(skb, sk);
1327 return skb;
1328 }
1329 }
1330 return NULL;
1331}
1332
4ec93edb 1333/*
1da177e4 1334 * Allocate a memory block from the socket's option memory buffer.
4ec93edb 1335 */
dd0fc66f 1336void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1da177e4
LT
1337{
1338 if ((unsigned)size <= sysctl_optmem_max &&
1339 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1340 void *mem;
1341 /* First do the add, to avoid the race if kmalloc
4ec93edb 1342 * might sleep.
1da177e4
LT
1343 */
1344 atomic_add(size, &sk->sk_omem_alloc);
1345 mem = kmalloc(size, priority);
1346 if (mem)
1347 return mem;
1348 atomic_sub(size, &sk->sk_omem_alloc);
1349 }
1350 return NULL;
1351}
2a91525c 1352EXPORT_SYMBOL(sock_kmalloc);
1da177e4
LT
1353
1354/*
1355 * Free an option memory block.
1356 */
1357void sock_kfree_s(struct sock *sk, void *mem, int size)
1358{
1359 kfree(mem);
1360 atomic_sub(size, &sk->sk_omem_alloc);
1361}
2a91525c 1362EXPORT_SYMBOL(sock_kfree_s);
1da177e4
LT
1363
1364/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1365 I think, these locks should be removed for datagram sockets.
1366 */
2a91525c 1367static long sock_wait_for_wmem(struct sock *sk, long timeo)
1da177e4
LT
1368{
1369 DEFINE_WAIT(wait);
1370
1371 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1372 for (;;) {
1373 if (!timeo)
1374 break;
1375 if (signal_pending(current))
1376 break;
1377 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1378 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1379 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1380 break;
1381 if (sk->sk_shutdown & SEND_SHUTDOWN)
1382 break;
1383 if (sk->sk_err)
1384 break;
1385 timeo = schedule_timeout(timeo);
1386 }
1387 finish_wait(sk->sk_sleep, &wait);
1388 return timeo;
1389}
1390
1391
1392/*
1393 * Generic send/receive buffer handlers
1394 */
1395
4cc7f68d
HX
1396struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1397 unsigned long data_len, int noblock,
1398 int *errcode)
1da177e4
LT
1399{
1400 struct sk_buff *skb;
7d877f3b 1401 gfp_t gfp_mask;
1da177e4
LT
1402 long timeo;
1403 int err;
1404
1405 gfp_mask = sk->sk_allocation;
1406 if (gfp_mask & __GFP_WAIT)
1407 gfp_mask |= __GFP_REPEAT;
1408
1409 timeo = sock_sndtimeo(sk, noblock);
1410 while (1) {
1411 err = sock_error(sk);
1412 if (err != 0)
1413 goto failure;
1414
1415 err = -EPIPE;
1416 if (sk->sk_shutdown & SEND_SHUTDOWN)
1417 goto failure;
1418
1419 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
db38c179 1420 skb = alloc_skb(header_len, gfp_mask);
1da177e4
LT
1421 if (skb) {
1422 int npages;
1423 int i;
1424
1425 /* No pages, we're done... */
1426 if (!data_len)
1427 break;
1428
1429 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1430 skb->truesize += data_len;
1431 skb_shinfo(skb)->nr_frags = npages;
1432 for (i = 0; i < npages; i++) {
1433 struct page *page;
1434 skb_frag_t *frag;
1435
1436 page = alloc_pages(sk->sk_allocation, 0);
1437 if (!page) {
1438 err = -ENOBUFS;
1439 skb_shinfo(skb)->nr_frags = i;
1440 kfree_skb(skb);
1441 goto failure;
1442 }
1443
1444 frag = &skb_shinfo(skb)->frags[i];
1445 frag->page = page;
1446 frag->page_offset = 0;
1447 frag->size = (data_len >= PAGE_SIZE ?
1448 PAGE_SIZE :
1449 data_len);
1450 data_len -= PAGE_SIZE;
1451 }
1452
1453 /* Full success... */
1454 break;
1455 }
1456 err = -ENOBUFS;
1457 goto failure;
1458 }
1459 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1460 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1461 err = -EAGAIN;
1462 if (!timeo)
1463 goto failure;
1464 if (signal_pending(current))
1465 goto interrupted;
1466 timeo = sock_wait_for_wmem(sk, timeo);
1467 }
1468
1469 skb_set_owner_w(skb, sk);
1470 return skb;
1471
1472interrupted:
1473 err = sock_intr_errno(timeo);
1474failure:
1475 *errcode = err;
1476 return NULL;
1477}
4cc7f68d 1478EXPORT_SYMBOL(sock_alloc_send_pskb);
1da177e4 1479
4ec93edb 1480struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1da177e4
LT
1481 int noblock, int *errcode)
1482{
1483 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1484}
2a91525c 1485EXPORT_SYMBOL(sock_alloc_send_skb);
1da177e4
LT
1486
1487static void __lock_sock(struct sock *sk)
1488{
1489 DEFINE_WAIT(wait);
1490
e71a4783 1491 for (;;) {
1da177e4
LT
1492 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1493 TASK_UNINTERRUPTIBLE);
1494 spin_unlock_bh(&sk->sk_lock.slock);
1495 schedule();
1496 spin_lock_bh(&sk->sk_lock.slock);
e71a4783 1497 if (!sock_owned_by_user(sk))
1da177e4
LT
1498 break;
1499 }
1500 finish_wait(&sk->sk_lock.wq, &wait);
1501}
1502
1503static void __release_sock(struct sock *sk)
1504{
1505 struct sk_buff *skb = sk->sk_backlog.head;
1506
1507 do {
1508 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1509 bh_unlock_sock(sk);
1510
1511 do {
1512 struct sk_buff *next = skb->next;
1513
1514 skb->next = NULL;
c57943a1 1515 sk_backlog_rcv(sk, skb);
1da177e4
LT
1516
1517 /*
1518 * We are in process context here with softirqs
1519 * disabled, use cond_resched_softirq() to preempt.
1520 * This is safe to do because we've taken the backlog
1521 * queue private:
1522 */
1523 cond_resched_softirq();
1524
1525 skb = next;
1526 } while (skb != NULL);
1527
1528 bh_lock_sock(sk);
e71a4783 1529 } while ((skb = sk->sk_backlog.head) != NULL);
1da177e4
LT
1530}
1531
1532/**
1533 * sk_wait_data - wait for data to arrive at sk_receive_queue
4dc3b16b
PP
1534 * @sk: sock to wait on
1535 * @timeo: for how long
1da177e4
LT
1536 *
1537 * Now socket state including sk->sk_err is changed only under lock,
1538 * hence we may omit checks after joining wait queue.
1539 * We check receive queue before schedule() only as optimization;
1540 * it is very likely that release_sock() added new data.
1541 */
1542int sk_wait_data(struct sock *sk, long *timeo)
1543{
1544 int rc;
1545 DEFINE_WAIT(wait);
1546
1547 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1548 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1549 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1550 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1551 finish_wait(sk->sk_sleep, &wait);
1552 return rc;
1553}
1da177e4
LT
1554EXPORT_SYMBOL(sk_wait_data);
1555
3ab224be
HA
1556/**
1557 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
1558 * @sk: socket
1559 * @size: memory size to allocate
1560 * @kind: allocation type
1561 *
1562 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
1563 * rmem allocation. This function assumes that protocols which have
1564 * memory_pressure use sk_wmem_queued as write buffer accounting.
1565 */
1566int __sk_mem_schedule(struct sock *sk, int size, int kind)
1567{
1568 struct proto *prot = sk->sk_prot;
1569 int amt = sk_mem_pages(size);
1570 int allocated;
1571
1572 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
1573 allocated = atomic_add_return(amt, prot->memory_allocated);
1574
1575 /* Under limit. */
1576 if (allocated <= prot->sysctl_mem[0]) {
1577 if (prot->memory_pressure && *prot->memory_pressure)
1578 *prot->memory_pressure = 0;
1579 return 1;
1580 }
1581
1582 /* Under pressure. */
1583 if (allocated > prot->sysctl_mem[1])
1584 if (prot->enter_memory_pressure)
5c52ba17 1585 prot->enter_memory_pressure(sk);
3ab224be
HA
1586
1587 /* Over hard limit. */
1588 if (allocated > prot->sysctl_mem[2])
1589 goto suppress_allocation;
1590
1591 /* guarantee minimum buffer size under pressure */
1592 if (kind == SK_MEM_RECV) {
1593 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
1594 return 1;
1595 } else { /* SK_MEM_SEND */
1596 if (sk->sk_type == SOCK_STREAM) {
1597 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
1598 return 1;
1599 } else if (atomic_read(&sk->sk_wmem_alloc) <
1600 prot->sysctl_wmem[0])
1601 return 1;
1602 }
1603
1604 if (prot->memory_pressure) {
1748376b
ED
1605 int alloc;
1606
1607 if (!*prot->memory_pressure)
1608 return 1;
1609 alloc = percpu_counter_read_positive(prot->sockets_allocated);
1610 if (prot->sysctl_mem[2] > alloc *
3ab224be
HA
1611 sk_mem_pages(sk->sk_wmem_queued +
1612 atomic_read(&sk->sk_rmem_alloc) +
1613 sk->sk_forward_alloc))
1614 return 1;
1615 }
1616
1617suppress_allocation:
1618
1619 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
1620 sk_stream_moderate_sndbuf(sk);
1621
1622 /* Fail only if socket is _under_ its sndbuf.
1623 * In this case we cannot block, so that we have to fail.
1624 */
1625 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
1626 return 1;
1627 }
1628
1629 /* Alas. Undo changes. */
1630 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
1631 atomic_sub(amt, prot->memory_allocated);
1632 return 0;
1633}
3ab224be
HA
1634EXPORT_SYMBOL(__sk_mem_schedule);
1635
1636/**
1637 * __sk_reclaim - reclaim memory_allocated
1638 * @sk: socket
1639 */
1640void __sk_mem_reclaim(struct sock *sk)
1641{
1642 struct proto *prot = sk->sk_prot;
1643
680a5a50 1644 atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
3ab224be
HA
1645 prot->memory_allocated);
1646 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
1647
1648 if (prot->memory_pressure && *prot->memory_pressure &&
1649 (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
1650 *prot->memory_pressure = 0;
1651}
3ab224be
HA
1652EXPORT_SYMBOL(__sk_mem_reclaim);
1653
1654
1da177e4
LT
1655/*
1656 * Set of default routines for initialising struct proto_ops when
1657 * the protocol does not support a particular function. In certain
1658 * cases where it makes no sense for a protocol to have a "do nothing"
1659 * function, some default processing is provided.
1660 */
1661
1662int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1663{
1664 return -EOPNOTSUPP;
1665}
2a91525c 1666EXPORT_SYMBOL(sock_no_bind);
1da177e4 1667
4ec93edb 1668int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
1669 int len, int flags)
1670{
1671 return -EOPNOTSUPP;
1672}
2a91525c 1673EXPORT_SYMBOL(sock_no_connect);
1da177e4
LT
1674
1675int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1676{
1677 return -EOPNOTSUPP;
1678}
2a91525c 1679EXPORT_SYMBOL(sock_no_socketpair);
1da177e4
LT
1680
1681int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1682{
1683 return -EOPNOTSUPP;
1684}
2a91525c 1685EXPORT_SYMBOL(sock_no_accept);
1da177e4 1686
4ec93edb 1687int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
1688 int *len, int peer)
1689{
1690 return -EOPNOTSUPP;
1691}
2a91525c 1692EXPORT_SYMBOL(sock_no_getname);
1da177e4 1693
2a91525c 1694unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
1da177e4
LT
1695{
1696 return 0;
1697}
2a91525c 1698EXPORT_SYMBOL(sock_no_poll);
1da177e4
LT
1699
1700int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1701{
1702 return -EOPNOTSUPP;
1703}
2a91525c 1704EXPORT_SYMBOL(sock_no_ioctl);
1da177e4
LT
1705
1706int sock_no_listen(struct socket *sock, int backlog)
1707{
1708 return -EOPNOTSUPP;
1709}
2a91525c 1710EXPORT_SYMBOL(sock_no_listen);
1da177e4
LT
1711
1712int sock_no_shutdown(struct socket *sock, int how)
1713{
1714 return -EOPNOTSUPP;
1715}
2a91525c 1716EXPORT_SYMBOL(sock_no_shutdown);
1da177e4
LT
1717
1718int sock_no_setsockopt(struct socket *sock, int level, int optname,
b7058842 1719 char __user *optval, unsigned int optlen)
1da177e4
LT
1720{
1721 return -EOPNOTSUPP;
1722}
2a91525c 1723EXPORT_SYMBOL(sock_no_setsockopt);
1da177e4
LT
1724
1725int sock_no_getsockopt(struct socket *sock, int level, int optname,
1726 char __user *optval, int __user *optlen)
1727{
1728 return -EOPNOTSUPP;
1729}
2a91525c 1730EXPORT_SYMBOL(sock_no_getsockopt);
1da177e4
LT
1731
1732int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1733 size_t len)
1734{
1735 return -EOPNOTSUPP;
1736}
2a91525c 1737EXPORT_SYMBOL(sock_no_sendmsg);
1da177e4
LT
1738
1739int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1740 size_t len, int flags)
1741{
1742 return -EOPNOTSUPP;
1743}
2a91525c 1744EXPORT_SYMBOL(sock_no_recvmsg);
1da177e4
LT
1745
1746int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1747{
1748 /* Mirror missing mmap method error code */
1749 return -ENODEV;
1750}
2a91525c 1751EXPORT_SYMBOL(sock_no_mmap);
1da177e4
LT
1752
1753ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1754{
1755 ssize_t res;
1756 struct msghdr msg = {.msg_flags = flags};
1757 struct kvec iov;
1758 char *kaddr = kmap(page);
1759 iov.iov_base = kaddr + offset;
1760 iov.iov_len = size;
1761 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1762 kunmap(page);
1763 return res;
1764}
2a91525c 1765EXPORT_SYMBOL(sock_no_sendpage);
1da177e4
LT
1766
1767/*
1768 * Default Socket Callbacks
1769 */
1770
1771static void sock_def_wakeup(struct sock *sk)
1772{
1773 read_lock(&sk->sk_callback_lock);
a57de0b4 1774 if (sk_has_sleeper(sk))
1da177e4
LT
1775 wake_up_interruptible_all(sk->sk_sleep);
1776 read_unlock(&sk->sk_callback_lock);
1777}
1778
1779static void sock_def_error_report(struct sock *sk)
1780{
1781 read_lock(&sk->sk_callback_lock);
a57de0b4 1782 if (sk_has_sleeper(sk))
37e5540b 1783 wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
8d8ad9d7 1784 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
1da177e4
LT
1785 read_unlock(&sk->sk_callback_lock);
1786}
1787
1788static void sock_def_readable(struct sock *sk, int len)
1789{
1790 read_lock(&sk->sk_callback_lock);
a57de0b4 1791 if (sk_has_sleeper(sk))
37e5540b
DL
1792 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
1793 POLLRDNORM | POLLRDBAND);
8d8ad9d7 1794 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
1da177e4
LT
1795 read_unlock(&sk->sk_callback_lock);
1796}
1797
1798static void sock_def_write_space(struct sock *sk)
1799{
1800 read_lock(&sk->sk_callback_lock);
1801
1802 /* Do not wake up a writer until he can make "significant"
1803 * progress. --DaveM
1804 */
e71a4783 1805 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
a57de0b4 1806 if (sk_has_sleeper(sk))
37e5540b
DL
1807 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
1808 POLLWRNORM | POLLWRBAND);
1da177e4
LT
1809
1810 /* Should agree with poll, otherwise some programs break */
1811 if (sock_writeable(sk))
8d8ad9d7 1812 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4
LT
1813 }
1814
1815 read_unlock(&sk->sk_callback_lock);
1816}
1817
1818static void sock_def_destruct(struct sock *sk)
1819{
a51482bd 1820 kfree(sk->sk_protinfo);
1da177e4
LT
1821}
1822
1823void sk_send_sigurg(struct sock *sk)
1824{
1825 if (sk->sk_socket && sk->sk_socket->file)
1826 if (send_sigurg(&sk->sk_socket->file->f_owner))
8d8ad9d7 1827 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
1da177e4 1828}
2a91525c 1829EXPORT_SYMBOL(sk_send_sigurg);
1da177e4
LT
1830
1831void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1832 unsigned long expires)
1833{
1834 if (!mod_timer(timer, expires))
1835 sock_hold(sk);
1836}
1da177e4
LT
1837EXPORT_SYMBOL(sk_reset_timer);
1838
1839void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1840{
1841 if (timer_pending(timer) && del_timer(timer))
1842 __sock_put(sk);
1843}
1da177e4
LT
1844EXPORT_SYMBOL(sk_stop_timer);
1845
1846void sock_init_data(struct socket *sock, struct sock *sk)
1847{
1848 skb_queue_head_init(&sk->sk_receive_queue);
1849 skb_queue_head_init(&sk->sk_write_queue);
1850 skb_queue_head_init(&sk->sk_error_queue);
97fc2f08
CL
1851#ifdef CONFIG_NET_DMA
1852 skb_queue_head_init(&sk->sk_async_wait_queue);
1853#endif
1da177e4
LT
1854
1855 sk->sk_send_head = NULL;
1856
1857 init_timer(&sk->sk_timer);
4ec93edb 1858
1da177e4
LT
1859 sk->sk_allocation = GFP_KERNEL;
1860 sk->sk_rcvbuf = sysctl_rmem_default;
1861 sk->sk_sndbuf = sysctl_wmem_default;
1862 sk->sk_state = TCP_CLOSE;
972692e0 1863 sk_set_socket(sk, sock);
1da177e4
LT
1864
1865 sock_set_flag(sk, SOCK_ZAPPED);
1866
e71a4783 1867 if (sock) {
1da177e4
LT
1868 sk->sk_type = sock->type;
1869 sk->sk_sleep = &sock->wait;
1870 sock->sk = sk;
1871 } else
1872 sk->sk_sleep = NULL;
1873
1874 rwlock_init(&sk->sk_dst_lock);
1875 rwlock_init(&sk->sk_callback_lock);
443aef0e
PZ
1876 lockdep_set_class_and_name(&sk->sk_callback_lock,
1877 af_callback_keys + sk->sk_family,
1878 af_family_clock_key_strings[sk->sk_family]);
1da177e4
LT
1879
1880 sk->sk_state_change = sock_def_wakeup;
1881 sk->sk_data_ready = sock_def_readable;
1882 sk->sk_write_space = sock_def_write_space;
1883 sk->sk_error_report = sock_def_error_report;
1884 sk->sk_destruct = sock_def_destruct;
1885
1886 sk->sk_sndmsg_page = NULL;
1887 sk->sk_sndmsg_off = 0;
1888
1889 sk->sk_peercred.pid = 0;
1890 sk->sk_peercred.uid = -1;
1891 sk->sk_peercred.gid = -1;
1892 sk->sk_write_pending = 0;
1893 sk->sk_rcvlowat = 1;
1894 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1895 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
1896
f37f0afb 1897 sk->sk_stamp = ktime_set(-1L, 0);
1da177e4 1898
4dc6dc71
ED
1899 /*
1900 * Before updating sk_refcnt, we must commit prior changes to memory
1901 * (Documentation/RCU/rculist_nulls.txt for details)
1902 */
1903 smp_wmb();
1da177e4 1904 atomic_set(&sk->sk_refcnt, 1);
33c732c3 1905 atomic_set(&sk->sk_drops, 0);
1da177e4 1906}
2a91525c 1907EXPORT_SYMBOL(sock_init_data);
1da177e4 1908
b5606c2d 1909void lock_sock_nested(struct sock *sk, int subclass)
1da177e4
LT
1910{
1911 might_sleep();
a5b5bb9a 1912 spin_lock_bh(&sk->sk_lock.slock);
d2e9117c 1913 if (sk->sk_lock.owned)
1da177e4 1914 __lock_sock(sk);
d2e9117c 1915 sk->sk_lock.owned = 1;
a5b5bb9a
IM
1916 spin_unlock(&sk->sk_lock.slock);
1917 /*
1918 * The sk_lock has mutex_lock() semantics here:
1919 */
fcc70d5f 1920 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
a5b5bb9a 1921 local_bh_enable();
1da177e4 1922}
fcc70d5f 1923EXPORT_SYMBOL(lock_sock_nested);
1da177e4 1924
b5606c2d 1925void release_sock(struct sock *sk)
1da177e4 1926{
a5b5bb9a
IM
1927 /*
1928 * The sk_lock has mutex_unlock() semantics:
1929 */
1930 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1931
1932 spin_lock_bh(&sk->sk_lock.slock);
1da177e4
LT
1933 if (sk->sk_backlog.tail)
1934 __release_sock(sk);
d2e9117c 1935 sk->sk_lock.owned = 0;
a5b5bb9a
IM
1936 if (waitqueue_active(&sk->sk_lock.wq))
1937 wake_up(&sk->sk_lock.wq);
1938 spin_unlock_bh(&sk->sk_lock.slock);
1da177e4
LT
1939}
1940EXPORT_SYMBOL(release_sock);
1941
1942int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
4ec93edb 1943{
b7aa0bf7 1944 struct timeval tv;
1da177e4 1945 if (!sock_flag(sk, SOCK_TIMESTAMP))
20d49473 1946 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
b7aa0bf7
ED
1947 tv = ktime_to_timeval(sk->sk_stamp);
1948 if (tv.tv_sec == -1)
1da177e4 1949 return -ENOENT;
b7aa0bf7
ED
1950 if (tv.tv_sec == 0) {
1951 sk->sk_stamp = ktime_get_real();
1952 tv = ktime_to_timeval(sk->sk_stamp);
1953 }
1954 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
4ec93edb 1955}
1da177e4
LT
1956EXPORT_SYMBOL(sock_get_timestamp);
1957
ae40eb1e
ED
1958int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1959{
1960 struct timespec ts;
1961 if (!sock_flag(sk, SOCK_TIMESTAMP))
20d49473 1962 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
ae40eb1e
ED
1963 ts = ktime_to_timespec(sk->sk_stamp);
1964 if (ts.tv_sec == -1)
1965 return -ENOENT;
1966 if (ts.tv_sec == 0) {
1967 sk->sk_stamp = ktime_get_real();
1968 ts = ktime_to_timespec(sk->sk_stamp);
1969 }
1970 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1971}
1972EXPORT_SYMBOL(sock_get_timestampns);
1973
20d49473 1974void sock_enable_timestamp(struct sock *sk, int flag)
4ec93edb 1975{
20d49473
PO
1976 if (!sock_flag(sk, flag)) {
1977 sock_set_flag(sk, flag);
1978 /*
1979 * we just set one of the two flags which require net
1980 * time stamping, but time stamping might have been on
1981 * already because of the other one
1982 */
1983 if (!sock_flag(sk,
1984 flag == SOCK_TIMESTAMP ?
1985 SOCK_TIMESTAMPING_RX_SOFTWARE :
1986 SOCK_TIMESTAMP))
1987 net_enable_timestamp();
1da177e4
LT
1988 }
1989}
1da177e4
LT
1990
1991/*
1992 * Get a socket option on an socket.
1993 *
1994 * FIX: POSIX 1003.1g is very ambiguous here. It states that
1995 * asynchronous errors should be reported by getsockopt. We assume
1996 * this means if you specify SO_ERROR (otherwise whats the point of it).
1997 */
1998int sock_common_getsockopt(struct socket *sock, int level, int optname,
1999 char __user *optval, int __user *optlen)
2000{
2001 struct sock *sk = sock->sk;
2002
2003 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2004}
1da177e4
LT
2005EXPORT_SYMBOL(sock_common_getsockopt);
2006
3fdadf7d 2007#ifdef CONFIG_COMPAT
543d9cfe
ACM
2008int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2009 char __user *optval, int __user *optlen)
3fdadf7d
DM
2010{
2011 struct sock *sk = sock->sk;
2012
1e51f951 2013 if (sk->sk_prot->compat_getsockopt != NULL)
543d9cfe
ACM
2014 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2015 optval, optlen);
3fdadf7d
DM
2016 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2017}
2018EXPORT_SYMBOL(compat_sock_common_getsockopt);
2019#endif
2020
1da177e4
LT
2021int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2022 struct msghdr *msg, size_t size, int flags)
2023{
2024 struct sock *sk = sock->sk;
2025 int addr_len = 0;
2026 int err;
2027
2028 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2029 flags & ~MSG_DONTWAIT, &addr_len);
2030 if (err >= 0)
2031 msg->msg_namelen = addr_len;
2032 return err;
2033}
1da177e4
LT
2034EXPORT_SYMBOL(sock_common_recvmsg);
2035
2036/*
2037 * Set socket options on an inet socket.
2038 */
2039int sock_common_setsockopt(struct socket *sock, int level, int optname,
b7058842 2040 char __user *optval, unsigned int optlen)
1da177e4
LT
2041{
2042 struct sock *sk = sock->sk;
2043
2044 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2045}
1da177e4
LT
2046EXPORT_SYMBOL(sock_common_setsockopt);
2047
3fdadf7d 2048#ifdef CONFIG_COMPAT
543d9cfe 2049int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
b7058842 2050 char __user *optval, unsigned int optlen)
3fdadf7d
DM
2051{
2052 struct sock *sk = sock->sk;
2053
543d9cfe
ACM
2054 if (sk->sk_prot->compat_setsockopt != NULL)
2055 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2056 optval, optlen);
3fdadf7d
DM
2057 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2058}
2059EXPORT_SYMBOL(compat_sock_common_setsockopt);
2060#endif
2061
1da177e4
LT
2062void sk_common_release(struct sock *sk)
2063{
2064 if (sk->sk_prot->destroy)
2065 sk->sk_prot->destroy(sk);
2066
2067 /*
2068 * Observation: when sock_common_release is called, processes have
2069 * no access to socket. But net still has.
2070 * Step one, detach it from networking:
2071 *
2072 * A. Remove from hash tables.
2073 */
2074
2075 sk->sk_prot->unhash(sk);
2076
2077 /*
2078 * In this point socket cannot receive new packets, but it is possible
2079 * that some packets are in flight because some CPU runs receiver and
2080 * did hash table lookup before we unhashed socket. They will achieve
2081 * receive queue and will be purged by socket destructor.
2082 *
2083 * Also we still have packets pending on receive queue and probably,
2084 * our own packets waiting in device queues. sock_destroy will drain
2085 * receive queue, but transmitted packets will delay socket destruction
2086 * until the last reference will be released.
2087 */
2088
2089 sock_orphan(sk);
2090
2091 xfrm_sk_free_policy(sk);
2092
e6848976 2093 sk_refcnt_debug_release(sk);
1da177e4
LT
2094 sock_put(sk);
2095}
1da177e4
LT
2096EXPORT_SYMBOL(sk_common_release);
2097
2098static DEFINE_RWLOCK(proto_list_lock);
2099static LIST_HEAD(proto_list);
2100
13ff3d6f
PE
2101#ifdef CONFIG_PROC_FS
2102#define PROTO_INUSE_NR 64 /* should be enough for the first time */
1338d466
PE
2103struct prot_inuse {
2104 int val[PROTO_INUSE_NR];
2105};
13ff3d6f
PE
2106
2107static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
70ee1159
PE
2108
2109#ifdef CONFIG_NET_NS
2110void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2111{
2112 int cpu = smp_processor_id();
2113 per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
2114}
2115EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2116
2117int sock_prot_inuse_get(struct net *net, struct proto *prot)
2118{
2119 int cpu, idx = prot->inuse_idx;
2120 int res = 0;
2121
2122 for_each_possible_cpu(cpu)
2123 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2124
2125 return res >= 0 ? res : 0;
2126}
2127EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2128
2129static int sock_inuse_init_net(struct net *net)
2130{
2131 net->core.inuse = alloc_percpu(struct prot_inuse);
2132 return net->core.inuse ? 0 : -ENOMEM;
2133}
2134
2135static void sock_inuse_exit_net(struct net *net)
2136{
2137 free_percpu(net->core.inuse);
2138}
2139
2140static struct pernet_operations net_inuse_ops = {
2141 .init = sock_inuse_init_net,
2142 .exit = sock_inuse_exit_net,
2143};
2144
2145static __init int net_inuse_init(void)
2146{
2147 if (register_pernet_subsys(&net_inuse_ops))
2148 panic("Cannot initialize net inuse counters");
2149
2150 return 0;
2151}
2152
2153core_initcall(net_inuse_init);
2154#else
1338d466
PE
2155static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2156
c29a0bc4 2157void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
1338d466
PE
2158{
2159 __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val;
2160}
2161EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2162
c29a0bc4 2163int sock_prot_inuse_get(struct net *net, struct proto *prot)
1338d466
PE
2164{
2165 int cpu, idx = prot->inuse_idx;
2166 int res = 0;
2167
2168 for_each_possible_cpu(cpu)
2169 res += per_cpu(prot_inuse, cpu).val[idx];
2170
2171 return res >= 0 ? res : 0;
2172}
2173EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
70ee1159 2174#endif
13ff3d6f
PE
2175
2176static void assign_proto_idx(struct proto *prot)
2177{
2178 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2179
2180 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2181 printk(KERN_ERR "PROTO_INUSE_NR exhausted\n");
2182 return;
2183 }
2184
2185 set_bit(prot->inuse_idx, proto_inuse_idx);
2186}
2187
2188static void release_proto_idx(struct proto *prot)
2189{
2190 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2191 clear_bit(prot->inuse_idx, proto_inuse_idx);
2192}
2193#else
2194static inline void assign_proto_idx(struct proto *prot)
2195{
2196}
2197
2198static inline void release_proto_idx(struct proto *prot)
2199{
2200}
2201#endif
2202
b733c007
PE
2203int proto_register(struct proto *prot, int alloc_slab)
2204{
1da177e4
LT
2205 if (alloc_slab) {
2206 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
271b72c7
ED
2207 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2208 NULL);
1da177e4
LT
2209
2210 if (prot->slab == NULL) {
2211 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
2212 prot->name);
60e7663d 2213 goto out;
1da177e4 2214 }
2e6599cb
ACM
2215
2216 if (prot->rsk_prot != NULL) {
2217 static const char mask[] = "request_sock_%s";
2218
7e56b5d6
CM
2219 prot->rsk_prot->slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
2220 if (prot->rsk_prot->slab_name == NULL)
2e6599cb
ACM
2221 goto out_free_sock_slab;
2222
7e56b5d6
CM
2223 sprintf(prot->rsk_prot->slab_name, mask, prot->name);
2224 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2e6599cb 2225 prot->rsk_prot->obj_size, 0,
20c2df83 2226 SLAB_HWCACHE_ALIGN, NULL);
2e6599cb
ACM
2227
2228 if (prot->rsk_prot->slab == NULL) {
2229 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
2230 prot->name);
2231 goto out_free_request_sock_slab_name;
2232 }
2233 }
8feaf0c0 2234
6d6ee43e 2235 if (prot->twsk_prot != NULL) {
8feaf0c0
ACM
2236 static const char mask[] = "tw_sock_%s";
2237
7e56b5d6 2238 prot->twsk_prot->twsk_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
8feaf0c0 2239
7e56b5d6 2240 if (prot->twsk_prot->twsk_slab_name == NULL)
8feaf0c0
ACM
2241 goto out_free_request_sock_slab;
2242
7e56b5d6 2243 sprintf(prot->twsk_prot->twsk_slab_name, mask, prot->name);
6d6ee43e 2244 prot->twsk_prot->twsk_slab =
7e56b5d6 2245 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
6d6ee43e 2246 prot->twsk_prot->twsk_obj_size,
3ab5aee7
ED
2247 0,
2248 SLAB_HWCACHE_ALIGN |
2249 prot->slab_flags,
20c2df83 2250 NULL);
6d6ee43e 2251 if (prot->twsk_prot->twsk_slab == NULL)
8feaf0c0
ACM
2252 goto out_free_timewait_sock_slab_name;
2253 }
1da177e4
LT
2254 }
2255
2a278051 2256 write_lock(&proto_list_lock);
1da177e4 2257 list_add(&prot->node, &proto_list);
13ff3d6f 2258 assign_proto_idx(prot);
1da177e4 2259 write_unlock(&proto_list_lock);
b733c007
PE
2260 return 0;
2261
8feaf0c0 2262out_free_timewait_sock_slab_name:
7e56b5d6 2263 kfree(prot->twsk_prot->twsk_slab_name);
8feaf0c0
ACM
2264out_free_request_sock_slab:
2265 if (prot->rsk_prot && prot->rsk_prot->slab) {
2266 kmem_cache_destroy(prot->rsk_prot->slab);
2267 prot->rsk_prot->slab = NULL;
2268 }
2e6599cb 2269out_free_request_sock_slab_name:
7e56b5d6 2270 kfree(prot->rsk_prot->slab_name);
2e6599cb
ACM
2271out_free_sock_slab:
2272 kmem_cache_destroy(prot->slab);
2273 prot->slab = NULL;
b733c007
PE
2274out:
2275 return -ENOBUFS;
1da177e4 2276}
1da177e4
LT
2277EXPORT_SYMBOL(proto_register);
2278
2279void proto_unregister(struct proto *prot)
2280{
2281 write_lock(&proto_list_lock);
13ff3d6f 2282 release_proto_idx(prot);
0a3f4358
PM
2283 list_del(&prot->node);
2284 write_unlock(&proto_list_lock);
1da177e4
LT
2285
2286 if (prot->slab != NULL) {
2287 kmem_cache_destroy(prot->slab);
2288 prot->slab = NULL;
2289 }
2290
2e6599cb 2291 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
2e6599cb 2292 kmem_cache_destroy(prot->rsk_prot->slab);
7e56b5d6 2293 kfree(prot->rsk_prot->slab_name);
2e6599cb
ACM
2294 prot->rsk_prot->slab = NULL;
2295 }
2296
6d6ee43e 2297 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
6d6ee43e 2298 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
7e56b5d6 2299 kfree(prot->twsk_prot->twsk_slab_name);
6d6ee43e 2300 prot->twsk_prot->twsk_slab = NULL;
8feaf0c0 2301 }
1da177e4 2302}
1da177e4
LT
2303EXPORT_SYMBOL(proto_unregister);
2304
2305#ifdef CONFIG_PROC_FS
1da177e4 2306static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 2307 __acquires(proto_list_lock)
1da177e4
LT
2308{
2309 read_lock(&proto_list_lock);
60f0438a 2310 return seq_list_start_head(&proto_list, *pos);
1da177e4
LT
2311}
2312
2313static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2314{
60f0438a 2315 return seq_list_next(v, &proto_list, pos);
1da177e4
LT
2316}
2317
2318static void proto_seq_stop(struct seq_file *seq, void *v)
9a429c49 2319 __releases(proto_list_lock)
1da177e4
LT
2320{
2321 read_unlock(&proto_list_lock);
2322}
2323
2324static char proto_method_implemented(const void *method)
2325{
2326 return method == NULL ? 'n' : 'y';
2327}
2328
2329static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2330{
2331 seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
2332 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2333 proto->name,
2334 proto->obj_size,
14e943db 2335 sock_prot_inuse_get(seq_file_net(seq), proto),
1da177e4
LT
2336 proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
2337 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
2338 proto->max_header,
2339 proto->slab == NULL ? "no" : "yes",
2340 module_name(proto->owner),
2341 proto_method_implemented(proto->close),
2342 proto_method_implemented(proto->connect),
2343 proto_method_implemented(proto->disconnect),
2344 proto_method_implemented(proto->accept),
2345 proto_method_implemented(proto->ioctl),
2346 proto_method_implemented(proto->init),
2347 proto_method_implemented(proto->destroy),
2348 proto_method_implemented(proto->shutdown),
2349 proto_method_implemented(proto->setsockopt),
2350 proto_method_implemented(proto->getsockopt),
2351 proto_method_implemented(proto->sendmsg),
2352 proto_method_implemented(proto->recvmsg),
2353 proto_method_implemented(proto->sendpage),
2354 proto_method_implemented(proto->bind),
2355 proto_method_implemented(proto->backlog_rcv),
2356 proto_method_implemented(proto->hash),
2357 proto_method_implemented(proto->unhash),
2358 proto_method_implemented(proto->get_port),
2359 proto_method_implemented(proto->enter_memory_pressure));
2360}
2361
2362static int proto_seq_show(struct seq_file *seq, void *v)
2363{
60f0438a 2364 if (v == &proto_list)
1da177e4
LT
2365 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2366 "protocol",
2367 "size",
2368 "sockets",
2369 "memory",
2370 "press",
2371 "maxhdr",
2372 "slab",
2373 "module",
2374 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2375 else
60f0438a 2376 proto_seq_printf(seq, list_entry(v, struct proto, node));
1da177e4
LT
2377 return 0;
2378}
2379
f690808e 2380static const struct seq_operations proto_seq_ops = {
1da177e4
LT
2381 .start = proto_seq_start,
2382 .next = proto_seq_next,
2383 .stop = proto_seq_stop,
2384 .show = proto_seq_show,
2385};
2386
2387static int proto_seq_open(struct inode *inode, struct file *file)
2388{
14e943db
ED
2389 return seq_open_net(inode, file, &proto_seq_ops,
2390 sizeof(struct seq_net_private));
1da177e4
LT
2391}
2392
9a32144e 2393static const struct file_operations proto_seq_fops = {
1da177e4
LT
2394 .owner = THIS_MODULE,
2395 .open = proto_seq_open,
2396 .read = seq_read,
2397 .llseek = seq_lseek,
14e943db
ED
2398 .release = seq_release_net,
2399};
2400
2401static __net_init int proto_init_net(struct net *net)
2402{
2403 if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops))
2404 return -ENOMEM;
2405
2406 return 0;
2407}
2408
2409static __net_exit void proto_exit_net(struct net *net)
2410{
2411 proc_net_remove(net, "protocols");
2412}
2413
2414
2415static __net_initdata struct pernet_operations proto_net_ops = {
2416 .init = proto_init_net,
2417 .exit = proto_exit_net,
1da177e4
LT
2418};
2419
2420static int __init proto_init(void)
2421{
14e943db 2422 return register_pernet_subsys(&proto_net_ops);
1da177e4
LT
2423}
2424
2425subsys_initcall(proto_init);
2426
2427#endif /* PROC_FS */
This page took 0.670039 seconds and 5 git commands to generate.