Merge branch 'pci_set_drvdata'
[deliverable/linux.git] / net / core / sock.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
02c30a84 10 * Authors: Ross Biro
1da177e4
LT
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
14 *
15 * Fixes:
16 * Alan Cox : Numerous verify_area() problems
17 * Alan Cox : Connecting on a connecting socket
18 * now returns an error for tcp.
19 * Alan Cox : sock->protocol is set correctly.
20 * and is not sometimes left as 0.
21 * Alan Cox : connect handles icmp errors on a
22 * connect properly. Unfortunately there
23 * is a restart syscall nasty there. I
24 * can't match BSD without hacking the C
25 * library. Ideas urgently sought!
26 * Alan Cox : Disallow bind() to addresses that are
27 * not ours - especially broadcast ones!!
28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
30 * instead they leave that for the DESTROY timer.
31 * Alan Cox : Clean up error flag in accept
32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
33 * was buggy. Put a remove_sock() in the handler
34 * for memory when we hit 0. Also altered the timer
4ec93edb 35 * code. The ACK stuff can wait and needs major
1da177e4
LT
36 * TCP layer surgery.
37 * Alan Cox : Fixed TCP ack bug, removed remove sock
38 * and fixed timer/inet_bh race.
39 * Alan Cox : Added zapped flag for TCP
40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45 * Rick Sladkey : Relaxed UDP rules for matching packets.
46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
47 * Pauline Middelink : identd support
48 * Alan Cox : Fixed connect() taking signals I think.
49 * Alan Cox : SO_LINGER supported
50 * Alan Cox : Error reporting fixes
51 * Anonymous : inet_create tidied up (sk->reuse setting)
52 * Alan Cox : inet sockets don't set sk->type!
53 * Alan Cox : Split socket option code
54 * Alan Cox : Callbacks
55 * Alan Cox : Nagle flag for Charles & Johannes stuff
56 * Alex : Removed restriction on inet fioctl
57 * Alan Cox : Splitting INET from NET core
58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
60 * Alan Cox : Split IP from generic code
61 * Alan Cox : New kfree_skbmem()
62 * Alan Cox : Make SO_DEBUG superuser only.
63 * Alan Cox : Allow anyone to clear SO_DEBUG
64 * (compatibility fix)
65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
66 * Alan Cox : Allocator for a socket is settable.
67 * Alan Cox : SO_ERROR includes soft errors.
68 * Alan Cox : Allow NULL arguments on some SO_ opts
69 * Alan Cox : Generic socket allocation to make hooks
70 * easier (suggested by Craig Metz).
71 * Michael Pall : SO_ERROR returns positive errno again
72 * Steve Whitehouse: Added default destructor to free
73 * protocol private data.
74 * Steve Whitehouse: Added various other default routines
75 * common to several socket families.
76 * Chris Evans : Call suser() check last on F_SETOWN
77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
79 * Andi Kleen : Fix write_space callback
80 * Chris Evans : Security fixes - signedness again
81 * Arnaldo C. Melo : cleanups, use skb_queue_purge
82 *
83 * To Fix:
84 *
85 *
86 * This program is free software; you can redistribute it and/or
87 * modify it under the terms of the GNU General Public License
88 * as published by the Free Software Foundation; either version
89 * 2 of the License, or (at your option) any later version.
90 */
91
e005d193
JP
92#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
4fc268d2 94#include <linux/capability.h>
1da177e4 95#include <linux/errno.h>
cb820f8e 96#include <linux/errqueue.h>
1da177e4
LT
97#include <linux/types.h>
98#include <linux/socket.h>
99#include <linux/in.h>
100#include <linux/kernel.h>
1da177e4
LT
101#include <linux/module.h>
102#include <linux/proc_fs.h>
103#include <linux/seq_file.h>
104#include <linux/sched.h>
105#include <linux/timer.h>
106#include <linux/string.h>
107#include <linux/sockios.h>
108#include <linux/net.h>
109#include <linux/mm.h>
110#include <linux/slab.h>
111#include <linux/interrupt.h>
112#include <linux/poll.h>
113#include <linux/tcp.h>
114#include <linux/init.h>
a1f8e7f7 115#include <linux/highmem.h>
3f551f94 116#include <linux/user_namespace.h>
c5905afb 117#include <linux/static_key.h>
3969eb38 118#include <linux/memcontrol.h>
8c1ae10d 119#include <linux/prefetch.h>
1da177e4
LT
120
121#include <asm/uaccess.h>
1da177e4
LT
122
123#include <linux/netdevice.h>
124#include <net/protocol.h>
125#include <linux/skbuff.h>
457c4cbc 126#include <net/net_namespace.h>
2e6599cb 127#include <net/request_sock.h>
1da177e4 128#include <net/sock.h>
20d49473 129#include <linux/net_tstamp.h>
1da177e4
LT
130#include <net/xfrm.h>
131#include <linux/ipsec.h>
f8451725 132#include <net/cls_cgroup.h>
5bc1421e 133#include <net/netprio_cgroup.h>
1da177e4
LT
134
135#include <linux/filter.h>
136
3847ce32
SM
137#include <trace/events/sock.h>
138
1da177e4
LT
139#ifdef CONFIG_INET
140#include <net/tcp.h>
141#endif
142
076bb0c8 143#include <net/busy_poll.h>
06021292 144
36b77a52 145static DEFINE_MUTEX(proto_list_mutex);
d1a4c0b3
GC
146static LIST_HEAD(proto_list);
147
c255a458 148#ifdef CONFIG_MEMCG_KMEM
1d62e436 149int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
d1a4c0b3
GC
150{
151 struct proto *proto;
152 int ret = 0;
153
36b77a52 154 mutex_lock(&proto_list_mutex);
d1a4c0b3
GC
155 list_for_each_entry(proto, &proto_list, node) {
156 if (proto->init_cgroup) {
1d62e436 157 ret = proto->init_cgroup(memcg, ss);
d1a4c0b3
GC
158 if (ret)
159 goto out;
160 }
161 }
162
36b77a52 163 mutex_unlock(&proto_list_mutex);
d1a4c0b3
GC
164 return ret;
165out:
166 list_for_each_entry_continue_reverse(proto, &proto_list, node)
167 if (proto->destroy_cgroup)
1d62e436 168 proto->destroy_cgroup(memcg);
36b77a52 169 mutex_unlock(&proto_list_mutex);
d1a4c0b3
GC
170 return ret;
171}
172
1d62e436 173void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
d1a4c0b3
GC
174{
175 struct proto *proto;
176
36b77a52 177 mutex_lock(&proto_list_mutex);
d1a4c0b3
GC
178 list_for_each_entry_reverse(proto, &proto_list, node)
179 if (proto->destroy_cgroup)
1d62e436 180 proto->destroy_cgroup(memcg);
36b77a52 181 mutex_unlock(&proto_list_mutex);
d1a4c0b3
GC
182}
183#endif
184
da21f24d
IM
185/*
186 * Each address family might have different locking rules, so we have
187 * one slock key per address family:
188 */
a5b5bb9a
IM
189static struct lock_class_key af_family_keys[AF_MAX];
190static struct lock_class_key af_family_slock_keys[AF_MAX];
191
cbda4eaf 192#if defined(CONFIG_MEMCG_KMEM)
c5905afb 193struct static_key memcg_socket_limit_enabled;
e1aab161 194EXPORT_SYMBOL(memcg_socket_limit_enabled);
cbda4eaf 195#endif
e1aab161 196
a5b5bb9a
IM
197/*
198 * Make lock validator output more readable. (we pre-construct these
199 * strings build-time, so that runtime initialization of socket
200 * locks is fast):
201 */
36cbd3dc 202static const char *const af_family_key_strings[AF_MAX+1] = {
a5b5bb9a
IM
203 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
204 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
205 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
206 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
207 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
208 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
209 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
cbd151bf 210 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
a5b5bb9a 211 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
cd05acfe 212 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
17926a79 213 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
bce7b154 214 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
6f107b58 215 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
456db6a4 216 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
a5b5bb9a 217};
36cbd3dc 218static const char *const af_family_slock_key_strings[AF_MAX+1] = {
a5b5bb9a
IM
219 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
220 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
221 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
222 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
223 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
224 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
225 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
cbd151bf 226 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
a5b5bb9a 227 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
cd05acfe 228 "slock-27" , "slock-28" , "slock-AF_CAN" ,
17926a79 229 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
bce7b154 230 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
6f107b58 231 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
456db6a4 232 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
a5b5bb9a 233};
36cbd3dc 234static const char *const af_family_clock_key_strings[AF_MAX+1] = {
443aef0e
PZ
235 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
236 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
237 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
238 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
239 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
240 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
241 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
cbd151bf 242 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
443aef0e 243 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
b4942af6 244 "clock-27" , "clock-28" , "clock-AF_CAN" ,
e51f802b 245 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
bce7b154 246 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
6f107b58 247 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
456db6a4 248 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
443aef0e 249};
da21f24d
IM
250
251/*
252 * sk_callback_lock locking rules are per-address-family,
253 * so split the lock classes by using a per-AF key:
254 */
255static struct lock_class_key af_callback_keys[AF_MAX];
256
1da177e4
LT
257/* Take into consideration the size of the struct sk_buff overhead in the
258 * determination of these values, since that is non-constant across
259 * platforms. This makes socket queueing behavior and performance
260 * not depend upon such differences.
261 */
262#define _SK_MEM_PACKETS 256
87fb4b7b 263#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
1da177e4
LT
264#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
265#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
266
267/* Run time adjustable parameters. */
ab32ea5d 268__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
6d8ebc8a 269EXPORT_SYMBOL(sysctl_wmem_max);
ab32ea5d 270__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
6d8ebc8a 271EXPORT_SYMBOL(sysctl_rmem_max);
ab32ea5d
BH
272__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
273__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
1da177e4 274
25985edc 275/* Maximal space eaten by iovec or ancillary data plus some space */
ab32ea5d 276int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
2a91525c 277EXPORT_SYMBOL(sysctl_optmem_max);
1da177e4 278
c93bdd0e
MG
279struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
280EXPORT_SYMBOL_GPL(memalloc_socks);
281
7cb02404
MG
282/**
283 * sk_set_memalloc - sets %SOCK_MEMALLOC
284 * @sk: socket to set it on
285 *
286 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
287 * It's the responsibility of the admin to adjust min_free_kbytes
288 * to meet the requirements
289 */
290void sk_set_memalloc(struct sock *sk)
291{
292 sock_set_flag(sk, SOCK_MEMALLOC);
293 sk->sk_allocation |= __GFP_MEMALLOC;
c93bdd0e 294 static_key_slow_inc(&memalloc_socks);
7cb02404
MG
295}
296EXPORT_SYMBOL_GPL(sk_set_memalloc);
297
298void sk_clear_memalloc(struct sock *sk)
299{
300 sock_reset_flag(sk, SOCK_MEMALLOC);
301 sk->sk_allocation &= ~__GFP_MEMALLOC;
c93bdd0e 302 static_key_slow_dec(&memalloc_socks);
c76562b6
MG
303
304 /*
305 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
306 * progress of swapping. However, if SOCK_MEMALLOC is cleared while
307 * it has rmem allocations there is a risk that the user of the
308 * socket cannot make forward progress due to exceeding the rmem
309 * limits. By rights, sk_clear_memalloc() should only be called
310 * on sockets being torn down but warn and reset the accounting if
311 * that assumption breaks.
312 */
313 if (WARN_ON(sk->sk_forward_alloc))
314 sk_mem_reclaim(sk);
7cb02404
MG
315}
316EXPORT_SYMBOL_GPL(sk_clear_memalloc);
317
b4b9e355
MG
318int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
319{
320 int ret;
321 unsigned long pflags = current->flags;
322
323 /* these should have been dropped before queueing */
324 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
325
326 current->flags |= PF_MEMALLOC;
327 ret = sk->sk_backlog_rcv(sk, skb);
328 tsk_restore_flags(current, pflags, PF_MEMALLOC);
329
330 return ret;
331}
332EXPORT_SYMBOL(__sk_backlog_rcv);
333
1da177e4
LT
334static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
335{
336 struct timeval tv;
337
338 if (optlen < sizeof(tv))
339 return -EINVAL;
340 if (copy_from_user(&tv, optval, sizeof(tv)))
341 return -EFAULT;
ba78073e
VA
342 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
343 return -EDOM;
1da177e4 344
ba78073e 345 if (tv.tv_sec < 0) {
6f11df83
AM
346 static int warned __read_mostly;
347
ba78073e 348 *timeo_p = 0;
50aab54f 349 if (warned < 10 && net_ratelimit()) {
ba78073e 350 warned++;
e005d193
JP
351 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
352 __func__, current->comm, task_pid_nr(current));
50aab54f 353 }
ba78073e
VA
354 return 0;
355 }
1da177e4
LT
356 *timeo_p = MAX_SCHEDULE_TIMEOUT;
357 if (tv.tv_sec == 0 && tv.tv_usec == 0)
358 return 0;
359 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
360 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
361 return 0;
362}
363
364static void sock_warn_obsolete_bsdism(const char *name)
365{
366 static int warned;
367 static char warncomm[TASK_COMM_LEN];
4ec93edb
YH
368 if (strcmp(warncomm, current->comm) && warned < 5) {
369 strcpy(warncomm, current->comm);
e005d193
JP
370 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
371 warncomm, name);
1da177e4
LT
372 warned++;
373 }
374}
375
08e29af3
ED
376#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
377
378static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
4ec93edb 379{
08e29af3
ED
380 if (sk->sk_flags & flags) {
381 sk->sk_flags &= ~flags;
382 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
20d49473 383 net_disable_timestamp();
1da177e4
LT
384 }
385}
386
387
f0088a50
DV
388int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
389{
766e9037 390 int err;
f0088a50 391 int skb_len;
3b885787
NH
392 unsigned long flags;
393 struct sk_buff_head *list = &sk->sk_receive_queue;
f0088a50 394
0fd7bac6 395 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
766e9037 396 atomic_inc(&sk->sk_drops);
3847ce32 397 trace_sock_rcvqueue_full(sk, skb);
766e9037 398 return -ENOMEM;
f0088a50
DV
399 }
400
fda9ef5d 401 err = sk_filter(sk, skb);
f0088a50 402 if (err)
766e9037 403 return err;
f0088a50 404
c76562b6 405 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
766e9037
ED
406 atomic_inc(&sk->sk_drops);
407 return -ENOBUFS;
3ab224be
HA
408 }
409
f0088a50
DV
410 skb->dev = NULL;
411 skb_set_owner_r(skb, sk);
49ad9599 412
f0088a50
DV
413 /* Cache the SKB length before we tack it onto the receive
414 * queue. Once it is added it no longer belongs to us and
415 * may be freed by other threads of control pulling packets
416 * from the queue.
417 */
418 skb_len = skb->len;
419
7fee226a
ED
420 /* we escape from rcu protected region, make sure we dont leak
421 * a norefcounted dst
422 */
423 skb_dst_force(skb);
424
3b885787
NH
425 spin_lock_irqsave(&list->lock, flags);
426 skb->dropcount = atomic_read(&sk->sk_drops);
427 __skb_queue_tail(list, skb);
428 spin_unlock_irqrestore(&list->lock, flags);
f0088a50
DV
429
430 if (!sock_flag(sk, SOCK_DEAD))
431 sk->sk_data_ready(sk, skb_len);
766e9037 432 return 0;
f0088a50
DV
433}
434EXPORT_SYMBOL(sock_queue_rcv_skb);
435
58a5a7b9 436int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
f0088a50
DV
437{
438 int rc = NET_RX_SUCCESS;
439
fda9ef5d 440 if (sk_filter(sk, skb))
f0088a50
DV
441 goto discard_and_relse;
442
443 skb->dev = NULL;
444
f545a38f 445 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
c377411f
ED
446 atomic_inc(&sk->sk_drops);
447 goto discard_and_relse;
448 }
58a5a7b9
ACM
449 if (nested)
450 bh_lock_sock_nested(sk);
451 else
452 bh_lock_sock(sk);
a5b5bb9a
IM
453 if (!sock_owned_by_user(sk)) {
454 /*
455 * trylock + unlock semantics:
456 */
457 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
458
c57943a1 459 rc = sk_backlog_rcv(sk, skb);
a5b5bb9a
IM
460
461 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
f545a38f 462 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
8eae939f
ZY
463 bh_unlock_sock(sk);
464 atomic_inc(&sk->sk_drops);
465 goto discard_and_relse;
466 }
467
f0088a50
DV
468 bh_unlock_sock(sk);
469out:
470 sock_put(sk);
471 return rc;
472discard_and_relse:
473 kfree_skb(skb);
474 goto out;
475}
476EXPORT_SYMBOL(sk_receive_skb);
477
ea94ff3b
KK
478void sk_reset_txq(struct sock *sk)
479{
480 sk_tx_queue_clear(sk);
481}
482EXPORT_SYMBOL(sk_reset_txq);
483
f0088a50
DV
484struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
485{
b6c6712a 486 struct dst_entry *dst = __sk_dst_get(sk);
f0088a50
DV
487
488 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
e022f0b4 489 sk_tx_queue_clear(sk);
a9b3cd7f 490 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
f0088a50
DV
491 dst_release(dst);
492 return NULL;
493 }
494
495 return dst;
496}
497EXPORT_SYMBOL(__sk_dst_check);
498
499struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
500{
501 struct dst_entry *dst = sk_dst_get(sk);
502
503 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
504 sk_dst_reset(sk);
505 dst_release(dst);
506 return NULL;
507 }
508
509 return dst;
510}
511EXPORT_SYMBOL(sk_dst_check);
512
c91f6df2
BH
513static int sock_setbindtodevice(struct sock *sk, char __user *optval,
514 int optlen)
4878809f
DM
515{
516 int ret = -ENOPROTOOPT;
517#ifdef CONFIG_NETDEVICES
3b1e0a65 518 struct net *net = sock_net(sk);
4878809f
DM
519 char devname[IFNAMSIZ];
520 int index;
521
522 /* Sorry... */
523 ret = -EPERM;
5e1fccc0 524 if (!ns_capable(net->user_ns, CAP_NET_RAW))
4878809f
DM
525 goto out;
526
527 ret = -EINVAL;
528 if (optlen < 0)
529 goto out;
530
531 /* Bind this socket to a particular device like "eth0",
532 * as specified in the passed interface name. If the
533 * name is "" or the option length is zero the socket
534 * is not bound.
535 */
536 if (optlen > IFNAMSIZ - 1)
537 optlen = IFNAMSIZ - 1;
538 memset(devname, 0, sizeof(devname));
539
540 ret = -EFAULT;
541 if (copy_from_user(devname, optval, optlen))
542 goto out;
543
000ba2e4
DM
544 index = 0;
545 if (devname[0] != '\0') {
bf8e56bf 546 struct net_device *dev;
4878809f 547
bf8e56bf
ED
548 rcu_read_lock();
549 dev = dev_get_by_name_rcu(net, devname);
550 if (dev)
551 index = dev->ifindex;
552 rcu_read_unlock();
4878809f
DM
553 ret = -ENODEV;
554 if (!dev)
555 goto out;
4878809f
DM
556 }
557
558 lock_sock(sk);
559 sk->sk_bound_dev_if = index;
560 sk_dst_reset(sk);
561 release_sock(sk);
562
563 ret = 0;
564
565out:
566#endif
567
568 return ret;
569}
570
c91f6df2
BH
571static int sock_getbindtodevice(struct sock *sk, char __user *optval,
572 int __user *optlen, int len)
573{
574 int ret = -ENOPROTOOPT;
575#ifdef CONFIG_NETDEVICES
576 struct net *net = sock_net(sk);
c91f6df2 577 char devname[IFNAMSIZ];
c91f6df2
BH
578
579 if (sk->sk_bound_dev_if == 0) {
580 len = 0;
581 goto zero;
582 }
583
584 ret = -EINVAL;
585 if (len < IFNAMSIZ)
586 goto out;
587
5dbe7c17
NS
588 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
589 if (ret)
c91f6df2 590 goto out;
c91f6df2
BH
591
592 len = strlen(devname) + 1;
593
594 ret = -EFAULT;
595 if (copy_to_user(optval, devname, len))
596 goto out;
597
598zero:
599 ret = -EFAULT;
600 if (put_user(len, optlen))
601 goto out;
602
603 ret = 0;
604
605out:
606#endif
607
608 return ret;
609}
610
c0ef877b
PE
611static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
612{
613 if (valbool)
614 sock_set_flag(sk, bit);
615 else
616 sock_reset_flag(sk, bit);
617}
618
1da177e4
LT
619/*
620 * This is meant for all protocols to use and covers goings on
621 * at the socket level. Everything here is generic.
622 */
623
624int sock_setsockopt(struct socket *sock, int level, int optname,
b7058842 625 char __user *optval, unsigned int optlen)
1da177e4 626{
2a91525c 627 struct sock *sk = sock->sk;
1da177e4
LT
628 int val;
629 int valbool;
630 struct linger ling;
631 int ret = 0;
4ec93edb 632
1da177e4
LT
633 /*
634 * Options without arguments
635 */
636
4878809f 637 if (optname == SO_BINDTODEVICE)
c91f6df2 638 return sock_setbindtodevice(sk, optval, optlen);
4878809f 639
e71a4783
SH
640 if (optlen < sizeof(int))
641 return -EINVAL;
4ec93edb 642
1da177e4
LT
643 if (get_user(val, (int __user *)optval))
644 return -EFAULT;
4ec93edb 645
2a91525c 646 valbool = val ? 1 : 0;
1da177e4
LT
647
648 lock_sock(sk);
649
2a91525c 650 switch (optname) {
e71a4783 651 case SO_DEBUG:
2a91525c 652 if (val && !capable(CAP_NET_ADMIN))
e71a4783 653 ret = -EACCES;
2a91525c 654 else
c0ef877b 655 sock_valbool_flag(sk, SOCK_DBG, valbool);
e71a4783
SH
656 break;
657 case SO_REUSEADDR:
4a17fd52 658 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
e71a4783 659 break;
055dc21a
TH
660 case SO_REUSEPORT:
661 sk->sk_reuseport = valbool;
662 break;
e71a4783 663 case SO_TYPE:
49c794e9 664 case SO_PROTOCOL:
0d6038ee 665 case SO_DOMAIN:
e71a4783
SH
666 case SO_ERROR:
667 ret = -ENOPROTOOPT;
668 break;
669 case SO_DONTROUTE:
c0ef877b 670 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
e71a4783
SH
671 break;
672 case SO_BROADCAST:
673 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
674 break;
675 case SO_SNDBUF:
676 /* Don't error on this BSD doesn't and if you think
82981930
ED
677 * about it this is right. Otherwise apps have to
678 * play 'guess the biggest size' games. RCVBUF/SNDBUF
679 * are treated in BSD as hints
680 */
681 val = min_t(u32, val, sysctl_wmem_max);
b0573dea 682set_sndbuf:
e71a4783 683 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
82981930
ED
684 sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF);
685 /* Wake up sending tasks if we upped the value. */
e71a4783
SH
686 sk->sk_write_space(sk);
687 break;
1da177e4 688
e71a4783
SH
689 case SO_SNDBUFFORCE:
690 if (!capable(CAP_NET_ADMIN)) {
691 ret = -EPERM;
692 break;
693 }
694 goto set_sndbuf;
b0573dea 695
e71a4783
SH
696 case SO_RCVBUF:
697 /* Don't error on this BSD doesn't and if you think
82981930
ED
698 * about it this is right. Otherwise apps have to
699 * play 'guess the biggest size' games. RCVBUF/SNDBUF
700 * are treated in BSD as hints
701 */
702 val = min_t(u32, val, sysctl_rmem_max);
b0573dea 703set_rcvbuf:
e71a4783
SH
704 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
705 /*
706 * We double it on the way in to account for
707 * "struct sk_buff" etc. overhead. Applications
708 * assume that the SO_RCVBUF setting they make will
709 * allow that much actual data to be received on that
710 * socket.
711 *
712 * Applications are unaware that "struct sk_buff" and
713 * other overheads allocate from the receive buffer
714 * during socket buffer allocation.
715 *
716 * And after considering the possible alternatives,
717 * returning the value we actually used in getsockopt
718 * is the most desirable behavior.
719 */
82981930 720 sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF);
e71a4783
SH
721 break;
722
723 case SO_RCVBUFFORCE:
724 if (!capable(CAP_NET_ADMIN)) {
725 ret = -EPERM;
1da177e4 726 break;
e71a4783
SH
727 }
728 goto set_rcvbuf;
1da177e4 729
e71a4783 730 case SO_KEEPALIVE:
1da177e4 731#ifdef CONFIG_INET
3e10986d
ED
732 if (sk->sk_protocol == IPPROTO_TCP &&
733 sk->sk_type == SOCK_STREAM)
e71a4783 734 tcp_set_keepalive(sk, valbool);
1da177e4 735#endif
e71a4783
SH
736 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
737 break;
738
739 case SO_OOBINLINE:
740 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
741 break;
742
743 case SO_NO_CHECK:
744 sk->sk_no_check = valbool;
745 break;
746
747 case SO_PRIORITY:
5e1fccc0
EB
748 if ((val >= 0 && val <= 6) ||
749 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
e71a4783
SH
750 sk->sk_priority = val;
751 else
752 ret = -EPERM;
753 break;
754
755 case SO_LINGER:
756 if (optlen < sizeof(ling)) {
757 ret = -EINVAL; /* 1003.1g */
1da177e4 758 break;
e71a4783 759 }
2a91525c 760 if (copy_from_user(&ling, optval, sizeof(ling))) {
e71a4783 761 ret = -EFAULT;
1da177e4 762 break;
e71a4783
SH
763 }
764 if (!ling.l_onoff)
765 sock_reset_flag(sk, SOCK_LINGER);
766 else {
1da177e4 767#if (BITS_PER_LONG == 32)
e71a4783
SH
768 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
769 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
1da177e4 770 else
e71a4783
SH
771#endif
772 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
773 sock_set_flag(sk, SOCK_LINGER);
774 }
775 break;
776
777 case SO_BSDCOMPAT:
778 sock_warn_obsolete_bsdism("setsockopt");
779 break;
780
781 case SO_PASSCRED:
782 if (valbool)
783 set_bit(SOCK_PASSCRED, &sock->flags);
784 else
785 clear_bit(SOCK_PASSCRED, &sock->flags);
786 break;
787
788 case SO_TIMESTAMP:
92f37fd2 789 case SO_TIMESTAMPNS:
e71a4783 790 if (valbool) {
92f37fd2
ED
791 if (optname == SO_TIMESTAMP)
792 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
793 else
794 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783 795 sock_set_flag(sk, SOCK_RCVTSTAMP);
20d49473 796 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
92f37fd2 797 } else {
e71a4783 798 sock_reset_flag(sk, SOCK_RCVTSTAMP);
92f37fd2
ED
799 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
800 }
e71a4783
SH
801 break;
802
20d49473
PO
803 case SO_TIMESTAMPING:
804 if (val & ~SOF_TIMESTAMPING_MASK) {
f249fb78 805 ret = -EINVAL;
20d49473
PO
806 break;
807 }
808 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
809 val & SOF_TIMESTAMPING_TX_HARDWARE);
810 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
811 val & SOF_TIMESTAMPING_TX_SOFTWARE);
812 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
813 val & SOF_TIMESTAMPING_RX_HARDWARE);
814 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
815 sock_enable_timestamp(sk,
816 SOCK_TIMESTAMPING_RX_SOFTWARE);
817 else
818 sock_disable_timestamp(sk,
08e29af3 819 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
20d49473
PO
820 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
821 val & SOF_TIMESTAMPING_SOFTWARE);
822 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
823 val & SOF_TIMESTAMPING_SYS_HARDWARE);
824 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
825 val & SOF_TIMESTAMPING_RAW_HARDWARE);
826 break;
827
e71a4783
SH
828 case SO_RCVLOWAT:
829 if (val < 0)
830 val = INT_MAX;
831 sk->sk_rcvlowat = val ? : 1;
832 break;
833
834 case SO_RCVTIMEO:
835 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
836 break;
837
838 case SO_SNDTIMEO:
839 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
840 break;
1da177e4 841
e71a4783
SH
842 case SO_ATTACH_FILTER:
843 ret = -EINVAL;
844 if (optlen == sizeof(struct sock_fprog)) {
845 struct sock_fprog fprog;
1da177e4 846
e71a4783
SH
847 ret = -EFAULT;
848 if (copy_from_user(&fprog, optval, sizeof(fprog)))
1da177e4 849 break;
e71a4783
SH
850
851 ret = sk_attach_filter(&fprog, sk);
852 }
853 break;
854
855 case SO_DETACH_FILTER:
55b33325 856 ret = sk_detach_filter(sk);
e71a4783 857 break;
1da177e4 858
d59577b6
VB
859 case SO_LOCK_FILTER:
860 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
861 ret = -EPERM;
862 else
863 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
864 break;
865
e71a4783
SH
866 case SO_PASSSEC:
867 if (valbool)
868 set_bit(SOCK_PASSSEC, &sock->flags);
869 else
870 clear_bit(SOCK_PASSSEC, &sock->flags);
871 break;
4a19ec58 872 case SO_MARK:
5e1fccc0 873 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
4a19ec58 874 ret = -EPERM;
2a91525c 875 else
4a19ec58 876 sk->sk_mark = val;
4a19ec58 877 break;
877ce7c1 878
1da177e4
LT
879 /* We implement the SO_SNDLOWAT etc to
880 not be settable (1003.1g 5.3) */
3b885787 881 case SO_RXQ_OVFL:
8083f0fc 882 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
3b885787 883 break;
6e3e939f
JB
884
885 case SO_WIFI_STATUS:
886 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
887 break;
888
ef64a54f
PE
889 case SO_PEEK_OFF:
890 if (sock->ops->set_peek_off)
891 sock->ops->set_peek_off(sk, val);
892 else
893 ret = -EOPNOTSUPP;
894 break;
3bdc0eba
BG
895
896 case SO_NOFCS:
897 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
898 break;
899
7d4c04fc
KJ
900 case SO_SELECT_ERR_QUEUE:
901 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
902 break;
903
e0d1095a 904#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51 905 case SO_BUSY_POLL:
dafcc438
ET
906 /* allow unprivileged users to decrease the value */
907 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
908 ret = -EPERM;
909 else {
910 if (val < 0)
911 ret = -EINVAL;
912 else
913 sk->sk_ll_usec = val;
914 }
915 break;
916#endif
62748f32
ED
917
918 case SO_MAX_PACING_RATE:
919 sk->sk_max_pacing_rate = val;
920 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
921 sk->sk_max_pacing_rate);
922 break;
923
e71a4783
SH
924 default:
925 ret = -ENOPROTOOPT;
926 break;
4ec93edb 927 }
1da177e4
LT
928 release_sock(sk);
929 return ret;
930}
2a91525c 931EXPORT_SYMBOL(sock_setsockopt);
1da177e4
LT
932
933
3f551f94
EB
934void cred_to_ucred(struct pid *pid, const struct cred *cred,
935 struct ucred *ucred)
936{
937 ucred->pid = pid_vnr(pid);
938 ucred->uid = ucred->gid = -1;
939 if (cred) {
940 struct user_namespace *current_ns = current_user_ns();
941
b2e4f544
EB
942 ucred->uid = from_kuid_munged(current_ns, cred->euid);
943 ucred->gid = from_kgid_munged(current_ns, cred->egid);
3f551f94
EB
944 }
945}
3924773a 946EXPORT_SYMBOL_GPL(cred_to_ucred);
3f551f94 947
1da177e4
LT
948int sock_getsockopt(struct socket *sock, int level, int optname,
949 char __user *optval, int __user *optlen)
950{
951 struct sock *sk = sock->sk;
4ec93edb 952
e71a4783 953 union {
4ec93edb
YH
954 int val;
955 struct linger ling;
1da177e4
LT
956 struct timeval tm;
957 } v;
4ec93edb 958
4d0392be 959 int lv = sizeof(int);
1da177e4 960 int len;
4ec93edb 961
e71a4783 962 if (get_user(len, optlen))
4ec93edb 963 return -EFAULT;
e71a4783 964 if (len < 0)
1da177e4 965 return -EINVAL;
4ec93edb 966
50fee1de 967 memset(&v, 0, sizeof(v));
df0bca04 968
2a91525c 969 switch (optname) {
e71a4783
SH
970 case SO_DEBUG:
971 v.val = sock_flag(sk, SOCK_DBG);
972 break;
973
974 case SO_DONTROUTE:
975 v.val = sock_flag(sk, SOCK_LOCALROUTE);
976 break;
977
978 case SO_BROADCAST:
1b23a5df 979 v.val = sock_flag(sk, SOCK_BROADCAST);
e71a4783
SH
980 break;
981
982 case SO_SNDBUF:
983 v.val = sk->sk_sndbuf;
984 break;
985
986 case SO_RCVBUF:
987 v.val = sk->sk_rcvbuf;
988 break;
989
990 case SO_REUSEADDR:
991 v.val = sk->sk_reuse;
992 break;
993
055dc21a
TH
994 case SO_REUSEPORT:
995 v.val = sk->sk_reuseport;
996 break;
997
e71a4783 998 case SO_KEEPALIVE:
1b23a5df 999 v.val = sock_flag(sk, SOCK_KEEPOPEN);
e71a4783
SH
1000 break;
1001
1002 case SO_TYPE:
1003 v.val = sk->sk_type;
1004 break;
1005
49c794e9
JE
1006 case SO_PROTOCOL:
1007 v.val = sk->sk_protocol;
1008 break;
1009
0d6038ee
JE
1010 case SO_DOMAIN:
1011 v.val = sk->sk_family;
1012 break;
1013
e71a4783
SH
1014 case SO_ERROR:
1015 v.val = -sock_error(sk);
2a91525c 1016 if (v.val == 0)
e71a4783
SH
1017 v.val = xchg(&sk->sk_err_soft, 0);
1018 break;
1019
1020 case SO_OOBINLINE:
1b23a5df 1021 v.val = sock_flag(sk, SOCK_URGINLINE);
e71a4783
SH
1022 break;
1023
1024 case SO_NO_CHECK:
1025 v.val = sk->sk_no_check;
1026 break;
1027
1028 case SO_PRIORITY:
1029 v.val = sk->sk_priority;
1030 break;
1031
1032 case SO_LINGER:
1033 lv = sizeof(v.ling);
1b23a5df 1034 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
e71a4783
SH
1035 v.ling.l_linger = sk->sk_lingertime / HZ;
1036 break;
1037
1038 case SO_BSDCOMPAT:
1039 sock_warn_obsolete_bsdism("getsockopt");
1040 break;
1041
1042 case SO_TIMESTAMP:
92f37fd2
ED
1043 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1044 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1045 break;
1046
1047 case SO_TIMESTAMPNS:
1048 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783
SH
1049 break;
1050
20d49473
PO
1051 case SO_TIMESTAMPING:
1052 v.val = 0;
1053 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
1054 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
1055 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
1056 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
1057 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
1058 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
1059 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
1060 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
1061 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
1062 v.val |= SOF_TIMESTAMPING_SOFTWARE;
1063 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
1064 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
1065 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
1066 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
1067 break;
1068
e71a4783 1069 case SO_RCVTIMEO:
2a91525c 1070 lv = sizeof(struct timeval);
e71a4783
SH
1071 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1072 v.tm.tv_sec = 0;
1073 v.tm.tv_usec = 0;
1074 } else {
1075 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1076 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
1077 }
1078 break;
1079
1080 case SO_SNDTIMEO:
2a91525c 1081 lv = sizeof(struct timeval);
e71a4783
SH
1082 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1083 v.tm.tv_sec = 0;
1084 v.tm.tv_usec = 0;
1085 } else {
1086 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1087 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
1088 }
1089 break;
1da177e4 1090
e71a4783
SH
1091 case SO_RCVLOWAT:
1092 v.val = sk->sk_rcvlowat;
1093 break;
1da177e4 1094
e71a4783 1095 case SO_SNDLOWAT:
2a91525c 1096 v.val = 1;
e71a4783 1097 break;
1da177e4 1098
e71a4783 1099 case SO_PASSCRED:
82981930 1100 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
e71a4783 1101 break;
1da177e4 1102
e71a4783 1103 case SO_PEERCRED:
109f6e39
EB
1104 {
1105 struct ucred peercred;
1106 if (len > sizeof(peercred))
1107 len = sizeof(peercred);
1108 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1109 if (copy_to_user(optval, &peercred, len))
e71a4783
SH
1110 return -EFAULT;
1111 goto lenout;
109f6e39 1112 }
1da177e4 1113
e71a4783
SH
1114 case SO_PEERNAME:
1115 {
1116 char address[128];
1117
1118 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1119 return -ENOTCONN;
1120 if (lv < len)
1121 return -EINVAL;
1122 if (copy_to_user(optval, address, len))
1123 return -EFAULT;
1124 goto lenout;
1125 }
1da177e4 1126
e71a4783
SH
1127 /* Dubious BSD thing... Probably nobody even uses it, but
1128 * the UNIX standard wants it for whatever reason... -DaveM
1129 */
1130 case SO_ACCEPTCONN:
1131 v.val = sk->sk_state == TCP_LISTEN;
1132 break;
1da177e4 1133
e71a4783 1134 case SO_PASSSEC:
82981930 1135 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
e71a4783 1136 break;
877ce7c1 1137
e71a4783
SH
1138 case SO_PEERSEC:
1139 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1da177e4 1140
4a19ec58
LAT
1141 case SO_MARK:
1142 v.val = sk->sk_mark;
1143 break;
1144
3b885787 1145 case SO_RXQ_OVFL:
1b23a5df 1146 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
3b885787
NH
1147 break;
1148
6e3e939f 1149 case SO_WIFI_STATUS:
1b23a5df 1150 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
6e3e939f
JB
1151 break;
1152
ef64a54f
PE
1153 case SO_PEEK_OFF:
1154 if (!sock->ops->set_peek_off)
1155 return -EOPNOTSUPP;
1156
1157 v.val = sk->sk_peek_off;
1158 break;
bc2f7996 1159 case SO_NOFCS:
1b23a5df 1160 v.val = sock_flag(sk, SOCK_NOFCS);
bc2f7996 1161 break;
c91f6df2 1162
f7b86bfe 1163 case SO_BINDTODEVICE:
c91f6df2
BH
1164 return sock_getbindtodevice(sk, optval, optlen, len);
1165
a8fc9277
PE
1166 case SO_GET_FILTER:
1167 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1168 if (len < 0)
1169 return len;
1170
1171 goto lenout;
c91f6df2 1172
d59577b6
VB
1173 case SO_LOCK_FILTER:
1174 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1175 break;
1176
7d4c04fc
KJ
1177 case SO_SELECT_ERR_QUEUE:
1178 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1179 break;
1180
e0d1095a 1181#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51 1182 case SO_BUSY_POLL:
dafcc438
ET
1183 v.val = sk->sk_ll_usec;
1184 break;
1185#endif
1186
62748f32
ED
1187 case SO_MAX_PACING_RATE:
1188 v.val = sk->sk_max_pacing_rate;
1189 break;
1190
e71a4783
SH
1191 default:
1192 return -ENOPROTOOPT;
1da177e4 1193 }
e71a4783 1194
1da177e4
LT
1195 if (len > lv)
1196 len = lv;
1197 if (copy_to_user(optval, &v, len))
1198 return -EFAULT;
1199lenout:
4ec93edb
YH
1200 if (put_user(len, optlen))
1201 return -EFAULT;
1202 return 0;
1da177e4
LT
1203}
1204
a5b5bb9a
IM
1205/*
1206 * Initialize an sk_lock.
1207 *
1208 * (We also register the sk_lock with the lock validator.)
1209 */
b6f99a21 1210static inline void sock_lock_init(struct sock *sk)
a5b5bb9a 1211{
ed07536e
PZ
1212 sock_lock_init_class_and_name(sk,
1213 af_family_slock_key_strings[sk->sk_family],
1214 af_family_slock_keys + sk->sk_family,
1215 af_family_key_strings[sk->sk_family],
1216 af_family_keys + sk->sk_family);
a5b5bb9a
IM
1217}
1218
4dc6dc71
ED
1219/*
1220 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1221 * even temporarly, because of RCU lookups. sk_node should also be left as is.
68835aba 1222 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
4dc6dc71 1223 */
f1a6c4da
PE
1224static void sock_copy(struct sock *nsk, const struct sock *osk)
1225{
1226#ifdef CONFIG_SECURITY_NETWORK
1227 void *sptr = nsk->sk_security;
1228#endif
68835aba
ED
1229 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1230
1231 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1232 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1233
f1a6c4da
PE
1234#ifdef CONFIG_SECURITY_NETWORK
1235 nsk->sk_security = sptr;
1236 security_sk_clone(osk, nsk);
1237#endif
1238}
1239
fcbdf09d
OP
1240void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1241{
1242 unsigned long nulls1, nulls2;
1243
1244 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1245 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1246 if (nulls1 > nulls2)
1247 swap(nulls1, nulls2);
1248
1249 if (nulls1 != 0)
1250 memset((char *)sk, 0, nulls1);
1251 memset((char *)sk + nulls1 + sizeof(void *), 0,
1252 nulls2 - nulls1 - sizeof(void *));
1253 memset((char *)sk + nulls2 + sizeof(void *), 0,
1254 size - nulls2 - sizeof(void *));
1255}
1256EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1257
2e4afe7b
PE
1258static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1259 int family)
c308c1b2
PE
1260{
1261 struct sock *sk;
1262 struct kmem_cache *slab;
1263
1264 slab = prot->slab;
e912b114
ED
1265 if (slab != NULL) {
1266 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1267 if (!sk)
1268 return sk;
1269 if (priority & __GFP_ZERO) {
fcbdf09d
OP
1270 if (prot->clear_sk)
1271 prot->clear_sk(sk, prot->obj_size);
1272 else
1273 sk_prot_clear_nulls(sk, prot->obj_size);
e912b114 1274 }
fcbdf09d 1275 } else
c308c1b2
PE
1276 sk = kmalloc(prot->obj_size, priority);
1277
2e4afe7b 1278 if (sk != NULL) {
a98b65a3
VN
1279 kmemcheck_annotate_bitfield(sk, flags);
1280
2e4afe7b
PE
1281 if (security_sk_alloc(sk, family, priority))
1282 goto out_free;
1283
1284 if (!try_module_get(prot->owner))
1285 goto out_free_sec;
e022f0b4 1286 sk_tx_queue_clear(sk);
2e4afe7b
PE
1287 }
1288
c308c1b2 1289 return sk;
2e4afe7b
PE
1290
1291out_free_sec:
1292 security_sk_free(sk);
1293out_free:
1294 if (slab != NULL)
1295 kmem_cache_free(slab, sk);
1296 else
1297 kfree(sk);
1298 return NULL;
c308c1b2
PE
1299}
1300
1301static void sk_prot_free(struct proto *prot, struct sock *sk)
1302{
1303 struct kmem_cache *slab;
2e4afe7b 1304 struct module *owner;
c308c1b2 1305
2e4afe7b 1306 owner = prot->owner;
c308c1b2 1307 slab = prot->slab;
2e4afe7b
PE
1308
1309 security_sk_free(sk);
c308c1b2
PE
1310 if (slab != NULL)
1311 kmem_cache_free(slab, sk);
1312 else
1313 kfree(sk);
2e4afe7b 1314 module_put(owner);
c308c1b2
PE
1315}
1316
8fb974c9 1317#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
211d2f97 1318void sock_update_classid(struct sock *sk)
f8451725 1319{
1144182a 1320 u32 classid;
f8451725 1321
211d2f97 1322 classid = task_cls_classid(current);
3afa6d00 1323 if (classid != sk->sk_classid)
f8451725
HX
1324 sk->sk_classid = classid;
1325}
82862742 1326EXPORT_SYMBOL(sock_update_classid);
8fb974c9 1327#endif
5bc1421e 1328
51e4e7fa 1329#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
6ffd4641 1330void sock_update_netprioidx(struct sock *sk)
5bc1421e 1331{
5bc1421e
NH
1332 if (in_interrupt())
1333 return;
2b73bc65 1334
6ffd4641 1335 sk->sk_cgrp_prioidx = task_netprioidx(current);
5bc1421e
NH
1336}
1337EXPORT_SYMBOL_GPL(sock_update_netprioidx);
f8451725
HX
1338#endif
1339
1da177e4
LT
1340/**
1341 * sk_alloc - All socket objects are allocated here
c4ea43c5 1342 * @net: the applicable net namespace
4dc3b16b
PP
1343 * @family: protocol family
1344 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1345 * @prot: struct proto associated with this new sock instance
1da177e4 1346 */
1b8d7ae4 1347struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
6257ff21 1348 struct proto *prot)
1da177e4 1349{
c308c1b2 1350 struct sock *sk;
1da177e4 1351
154adbc8 1352 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1da177e4 1353 if (sk) {
154adbc8
PE
1354 sk->sk_family = family;
1355 /*
1356 * See comment in struct sock definition to understand
1357 * why we need sk_prot_creator -acme
1358 */
1359 sk->sk_prot = sk->sk_prot_creator = prot;
1360 sock_lock_init(sk);
3b1e0a65 1361 sock_net_set(sk, get_net(net));
d66ee058 1362 atomic_set(&sk->sk_wmem_alloc, 1);
f8451725 1363
211d2f97 1364 sock_update_classid(sk);
6ffd4641 1365 sock_update_netprioidx(sk);
1da177e4 1366 }
a79af59e 1367
2e4afe7b 1368 return sk;
1da177e4 1369}
2a91525c 1370EXPORT_SYMBOL(sk_alloc);
1da177e4 1371
2b85a34e 1372static void __sk_free(struct sock *sk)
1da177e4
LT
1373{
1374 struct sk_filter *filter;
1da177e4
LT
1375
1376 if (sk->sk_destruct)
1377 sk->sk_destruct(sk);
1378
a898def2
PM
1379 filter = rcu_dereference_check(sk->sk_filter,
1380 atomic_read(&sk->sk_wmem_alloc) == 0);
1da177e4 1381 if (filter) {
309dd5fc 1382 sk_filter_uncharge(sk, filter);
a9b3cd7f 1383 RCU_INIT_POINTER(sk->sk_filter, NULL);
1da177e4
LT
1384 }
1385
08e29af3 1386 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1da177e4
LT
1387
1388 if (atomic_read(&sk->sk_omem_alloc))
e005d193
JP
1389 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1390 __func__, atomic_read(&sk->sk_omem_alloc));
1da177e4 1391
109f6e39
EB
1392 if (sk->sk_peer_cred)
1393 put_cred(sk->sk_peer_cred);
1394 put_pid(sk->sk_peer_pid);
3b1e0a65 1395 put_net(sock_net(sk));
c308c1b2 1396 sk_prot_free(sk->sk_prot_creator, sk);
1da177e4 1397}
2b85a34e
ED
1398
1399void sk_free(struct sock *sk)
1400{
1401 /*
25985edc 1402 * We subtract one from sk_wmem_alloc and can know if
2b85a34e
ED
1403 * some packets are still in some tx queue.
1404 * If not null, sock_wfree() will call __sk_free(sk) later
1405 */
1406 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1407 __sk_free(sk);
1408}
2a91525c 1409EXPORT_SYMBOL(sk_free);
1da177e4 1410
edf02087 1411/*
25985edc
LDM
1412 * Last sock_put should drop reference to sk->sk_net. It has already
1413 * been dropped in sk_change_net. Taking reference to stopping namespace
edf02087 1414 * is not an option.
25985edc 1415 * Take reference to a socket to remove it from hash _alive_ and after that
edf02087
DL
1416 * destroy it in the context of init_net.
1417 */
1418void sk_release_kernel(struct sock *sk)
1419{
1420 if (sk == NULL || sk->sk_socket == NULL)
1421 return;
1422
1423 sock_hold(sk);
1424 sock_release(sk->sk_socket);
65a18ec5 1425 release_net(sock_net(sk));
3b1e0a65 1426 sock_net_set(sk, get_net(&init_net));
edf02087
DL
1427 sock_put(sk);
1428}
45af1754 1429EXPORT_SYMBOL(sk_release_kernel);
edf02087 1430
475f1b52
SR
1431static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1432{
1433 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1434 sock_update_memcg(newsk);
1435}
1436
e56c57d0
ED
1437/**
1438 * sk_clone_lock - clone a socket, and lock its clone
1439 * @sk: the socket to clone
1440 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1441 *
1442 * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1443 */
1444struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
87d11ceb 1445{
8fd1d178 1446 struct sock *newsk;
87d11ceb 1447
8fd1d178 1448 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
87d11ceb
ACM
1449 if (newsk != NULL) {
1450 struct sk_filter *filter;
1451
892c141e 1452 sock_copy(newsk, sk);
87d11ceb
ACM
1453
1454 /* SANITY */
3b1e0a65 1455 get_net(sock_net(newsk));
87d11ceb
ACM
1456 sk_node_init(&newsk->sk_node);
1457 sock_lock_init(newsk);
1458 bh_lock_sock(newsk);
fa438ccf 1459 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
8eae939f 1460 newsk->sk_backlog.len = 0;
87d11ceb
ACM
1461
1462 atomic_set(&newsk->sk_rmem_alloc, 0);
2b85a34e
ED
1463 /*
1464 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1465 */
1466 atomic_set(&newsk->sk_wmem_alloc, 1);
87d11ceb
ACM
1467 atomic_set(&newsk->sk_omem_alloc, 0);
1468 skb_queue_head_init(&newsk->sk_receive_queue);
1469 skb_queue_head_init(&newsk->sk_write_queue);
97fc2f08
CL
1470#ifdef CONFIG_NET_DMA
1471 skb_queue_head_init(&newsk->sk_async_wait_queue);
1472#endif
87d11ceb 1473
b6c6712a 1474 spin_lock_init(&newsk->sk_dst_lock);
87d11ceb 1475 rwlock_init(&newsk->sk_callback_lock);
443aef0e
PZ
1476 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1477 af_callback_keys + newsk->sk_family,
1478 af_family_clock_key_strings[newsk->sk_family]);
87d11ceb
ACM
1479
1480 newsk->sk_dst_cache = NULL;
1481 newsk->sk_wmem_queued = 0;
1482 newsk->sk_forward_alloc = 0;
1483 newsk->sk_send_head = NULL;
87d11ceb
ACM
1484 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1485
1486 sock_reset_flag(newsk, SOCK_DONE);
1487 skb_queue_head_init(&newsk->sk_error_queue);
1488
0d7da9dd 1489 filter = rcu_dereference_protected(newsk->sk_filter, 1);
87d11ceb
ACM
1490 if (filter != NULL)
1491 sk_filter_charge(newsk, filter);
1492
1493 if (unlikely(xfrm_sk_clone_policy(newsk))) {
1494 /* It is still raw copy of parent, so invalidate
1495 * destructor and make plain sk_free() */
1496 newsk->sk_destruct = NULL;
b0691c8e 1497 bh_unlock_sock(newsk);
87d11ceb
ACM
1498 sk_free(newsk);
1499 newsk = NULL;
1500 goto out;
1501 }
1502
1503 newsk->sk_err = 0;
1504 newsk->sk_priority = 0;
4dc6dc71
ED
1505 /*
1506 * Before updating sk_refcnt, we must commit prior changes to memory
1507 * (Documentation/RCU/rculist_nulls.txt for details)
1508 */
1509 smp_wmb();
87d11ceb
ACM
1510 atomic_set(&newsk->sk_refcnt, 2);
1511
1512 /*
1513 * Increment the counter in the same struct proto as the master
1514 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1515 * is the same as sk->sk_prot->socks, as this field was copied
1516 * with memcpy).
1517 *
1518 * This _changes_ the previous behaviour, where
1519 * tcp_create_openreq_child always was incrementing the
1520 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1521 * to be taken into account in all callers. -acme
1522 */
1523 sk_refcnt_debug_inc(newsk);
972692e0 1524 sk_set_socket(newsk, NULL);
43815482 1525 newsk->sk_wq = NULL;
87d11ceb 1526
f3f511e1
GC
1527 sk_update_clone(sk, newsk);
1528
87d11ceb 1529 if (newsk->sk_prot->sockets_allocated)
180d8cd9 1530 sk_sockets_allocated_inc(newsk);
704da560 1531
08e29af3 1532 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
704da560 1533 net_enable_timestamp();
87d11ceb
ACM
1534 }
1535out:
1536 return newsk;
1537}
e56c57d0 1538EXPORT_SYMBOL_GPL(sk_clone_lock);
87d11ceb 1539
9958089a
AK
1540void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1541{
1542 __sk_dst_set(sk, dst);
1543 sk->sk_route_caps = dst->dev->features;
1544 if (sk->sk_route_caps & NETIF_F_GSO)
4fcd6b99 1545 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
a465419b 1546 sk->sk_route_caps &= ~sk->sk_route_nocaps;
9958089a 1547 if (sk_can_gso(sk)) {
82cc1a7a 1548 if (dst->header_len) {
9958089a 1549 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
82cc1a7a 1550 } else {
9958089a 1551 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
82cc1a7a 1552 sk->sk_gso_max_size = dst->dev->gso_max_size;
1485348d 1553 sk->sk_gso_max_segs = dst->dev->gso_max_segs;
82cc1a7a 1554 }
9958089a
AK
1555 }
1556}
1557EXPORT_SYMBOL_GPL(sk_setup_caps);
1558
1da177e4
LT
1559/*
1560 * Simple resource managers for sockets.
1561 */
1562
1563
4ec93edb
YH
1564/*
1565 * Write buffer destructor automatically called from kfree_skb.
1da177e4
LT
1566 */
1567void sock_wfree(struct sk_buff *skb)
1568{
1569 struct sock *sk = skb->sk;
d99927f4 1570 unsigned int len = skb->truesize;
1da177e4 1571
d99927f4
ED
1572 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1573 /*
1574 * Keep a reference on sk_wmem_alloc, this will be released
1575 * after sk_write_space() call
1576 */
1577 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1da177e4 1578 sk->sk_write_space(sk);
d99927f4
ED
1579 len = 1;
1580 }
2b85a34e 1581 /*
d99927f4
ED
1582 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1583 * could not do because of in-flight packets
2b85a34e 1584 */
d99927f4 1585 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
2b85a34e 1586 __sk_free(sk);
1da177e4 1587}
2a91525c 1588EXPORT_SYMBOL(sock_wfree);
1da177e4 1589
f2f872f9
ED
1590void skb_orphan_partial(struct sk_buff *skb)
1591{
1592 /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
1593 * so we do not completely orphan skb, but transfert all
1594 * accounted bytes but one, to avoid unexpected reorders.
1595 */
1596 if (skb->destructor == sock_wfree
1597#ifdef CONFIG_INET
1598 || skb->destructor == tcp_wfree
1599#endif
1600 ) {
1601 atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
1602 skb->truesize = 1;
1603 } else {
1604 skb_orphan(skb);
1605 }
1606}
1607EXPORT_SYMBOL(skb_orphan_partial);
1608
4ec93edb
YH
1609/*
1610 * Read buffer destructor automatically called from kfree_skb.
1da177e4
LT
1611 */
1612void sock_rfree(struct sk_buff *skb)
1613{
1614 struct sock *sk = skb->sk;
d361fd59 1615 unsigned int len = skb->truesize;
1da177e4 1616
d361fd59
ED
1617 atomic_sub(len, &sk->sk_rmem_alloc);
1618 sk_mem_uncharge(sk, len);
1da177e4 1619}
2a91525c 1620EXPORT_SYMBOL(sock_rfree);
1da177e4 1621
41063e9d
DM
1622void sock_edemux(struct sk_buff *skb)
1623{
e812347c
ED
1624 struct sock *sk = skb->sk;
1625
1c463e57 1626#ifdef CONFIG_INET
e812347c
ED
1627 if (sk->sk_state == TCP_TIME_WAIT)
1628 inet_twsk_put(inet_twsk(sk));
1629 else
1c463e57 1630#endif
e812347c 1631 sock_put(sk);
41063e9d
DM
1632}
1633EXPORT_SYMBOL(sock_edemux);
1da177e4 1634
976d0201 1635kuid_t sock_i_uid(struct sock *sk)
1da177e4 1636{
976d0201 1637 kuid_t uid;
1da177e4 1638
f064af1e 1639 read_lock_bh(&sk->sk_callback_lock);
976d0201 1640 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
f064af1e 1641 read_unlock_bh(&sk->sk_callback_lock);
1da177e4
LT
1642 return uid;
1643}
2a91525c 1644EXPORT_SYMBOL(sock_i_uid);
1da177e4
LT
1645
1646unsigned long sock_i_ino(struct sock *sk)
1647{
1648 unsigned long ino;
1649
f064af1e 1650 read_lock_bh(&sk->sk_callback_lock);
1da177e4 1651 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
f064af1e 1652 read_unlock_bh(&sk->sk_callback_lock);
1da177e4
LT
1653 return ino;
1654}
2a91525c 1655EXPORT_SYMBOL(sock_i_ino);
1da177e4
LT
1656
1657/*
1658 * Allocate a skb from the socket's send buffer.
1659 */
86a76caf 1660struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1661 gfp_t priority)
1da177e4
LT
1662{
1663 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
2a91525c 1664 struct sk_buff *skb = alloc_skb(size, priority);
1da177e4
LT
1665 if (skb) {
1666 skb_set_owner_w(skb, sk);
1667 return skb;
1668 }
1669 }
1670 return NULL;
1671}
2a91525c 1672EXPORT_SYMBOL(sock_wmalloc);
1da177e4
LT
1673
1674/*
1675 * Allocate a skb from the socket's receive buffer.
4ec93edb 1676 */
86a76caf 1677struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1678 gfp_t priority)
1da177e4
LT
1679{
1680 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1681 struct sk_buff *skb = alloc_skb(size, priority);
1682 if (skb) {
1683 skb_set_owner_r(skb, sk);
1684 return skb;
1685 }
1686 }
1687 return NULL;
1688}
1689
4ec93edb 1690/*
1da177e4 1691 * Allocate a memory block from the socket's option memory buffer.
4ec93edb 1692 */
dd0fc66f 1693void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1da177e4 1694{
95c96174 1695 if ((unsigned int)size <= sysctl_optmem_max &&
1da177e4
LT
1696 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1697 void *mem;
1698 /* First do the add, to avoid the race if kmalloc
4ec93edb 1699 * might sleep.
1da177e4
LT
1700 */
1701 atomic_add(size, &sk->sk_omem_alloc);
1702 mem = kmalloc(size, priority);
1703 if (mem)
1704 return mem;
1705 atomic_sub(size, &sk->sk_omem_alloc);
1706 }
1707 return NULL;
1708}
2a91525c 1709EXPORT_SYMBOL(sock_kmalloc);
1da177e4
LT
1710
1711/*
1712 * Free an option memory block.
1713 */
1714void sock_kfree_s(struct sock *sk, void *mem, int size)
1715{
1716 kfree(mem);
1717 atomic_sub(size, &sk->sk_omem_alloc);
1718}
2a91525c 1719EXPORT_SYMBOL(sock_kfree_s);
1da177e4
LT
1720
1721/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1722 I think, these locks should be removed for datagram sockets.
1723 */
2a91525c 1724static long sock_wait_for_wmem(struct sock *sk, long timeo)
1da177e4
LT
1725{
1726 DEFINE_WAIT(wait);
1727
1728 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1729 for (;;) {
1730 if (!timeo)
1731 break;
1732 if (signal_pending(current))
1733 break;
1734 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
aa395145 1735 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4
LT
1736 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1737 break;
1738 if (sk->sk_shutdown & SEND_SHUTDOWN)
1739 break;
1740 if (sk->sk_err)
1741 break;
1742 timeo = schedule_timeout(timeo);
1743 }
aa395145 1744 finish_wait(sk_sleep(sk), &wait);
1da177e4
LT
1745 return timeo;
1746}
1747
1748
1749/*
1750 * Generic send/receive buffer handlers
1751 */
1752
4cc7f68d
HX
1753struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1754 unsigned long data_len, int noblock,
28d64271 1755 int *errcode, int max_page_order)
1da177e4 1756{
28d64271
ED
1757 struct sk_buff *skb = NULL;
1758 unsigned long chunk;
7d877f3b 1759 gfp_t gfp_mask;
1da177e4
LT
1760 long timeo;
1761 int err;
cc9b17ad 1762 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
28d64271
ED
1763 struct page *page;
1764 int i;
cc9b17ad
JW
1765
1766 err = -EMSGSIZE;
1767 if (npages > MAX_SKB_FRAGS)
1768 goto failure;
1da177e4 1769
1da177e4 1770 timeo = sock_sndtimeo(sk, noblock);
28d64271 1771 while (!skb) {
1da177e4
LT
1772 err = sock_error(sk);
1773 if (err != 0)
1774 goto failure;
1775
1776 err = -EPIPE;
1777 if (sk->sk_shutdown & SEND_SHUTDOWN)
1778 goto failure;
1779
28d64271
ED
1780 if (atomic_read(&sk->sk_wmem_alloc) >= sk->sk_sndbuf) {
1781 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1782 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1783 err = -EAGAIN;
1784 if (!timeo)
1785 goto failure;
1786 if (signal_pending(current))
1787 goto interrupted;
1788 timeo = sock_wait_for_wmem(sk, timeo);
1789 continue;
1790 }
1da177e4 1791
28d64271
ED
1792 err = -ENOBUFS;
1793 gfp_mask = sk->sk_allocation;
1794 if (gfp_mask & __GFP_WAIT)
1795 gfp_mask |= __GFP_REPEAT;
1796
1797 skb = alloc_skb(header_len, gfp_mask);
1798 if (!skb)
1da177e4 1799 goto failure;
28d64271
ED
1800
1801 skb->truesize += data_len;
1802
1803 for (i = 0; npages > 0; i++) {
1804 int order = max_page_order;
1805
1806 while (order) {
1807 if (npages >= 1 << order) {
1808 page = alloc_pages(sk->sk_allocation |
1809 __GFP_COMP | __GFP_NOWARN,
1810 order);
1811 if (page)
1812 goto fill_page;
1813 }
1814 order--;
1815 }
1816 page = alloc_page(sk->sk_allocation);
1817 if (!page)
1818 goto failure;
1819fill_page:
1820 chunk = min_t(unsigned long, data_len,
1821 PAGE_SIZE << order);
1822 skb_fill_page_desc(skb, i, page, 0, chunk);
1823 data_len -= chunk;
1824 npages -= 1 << order;
1da177e4 1825 }
1da177e4
LT
1826 }
1827
1828 skb_set_owner_w(skb, sk);
1829 return skb;
1830
1831interrupted:
1832 err = sock_intr_errno(timeo);
1833failure:
28d64271 1834 kfree_skb(skb);
1da177e4
LT
1835 *errcode = err;
1836 return NULL;
1837}
4cc7f68d 1838EXPORT_SYMBOL(sock_alloc_send_pskb);
1da177e4 1839
4ec93edb 1840struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1da177e4
LT
1841 int noblock, int *errcode)
1842{
28d64271 1843 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
1da177e4 1844}
2a91525c 1845EXPORT_SYMBOL(sock_alloc_send_skb);
1da177e4 1846
5640f768
ED
1847/* On 32bit arches, an skb frag is limited to 2^15 */
1848#define SKB_FRAG_PAGE_ORDER get_order(32768)
1849
1850bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1851{
1852 int order;
1853
1854 if (pfrag->page) {
1855 if (atomic_read(&pfrag->page->_count) == 1) {
1856 pfrag->offset = 0;
1857 return true;
1858 }
1859 if (pfrag->offset < pfrag->size)
1860 return true;
1861 put_page(pfrag->page);
1862 }
1863
1864 /* We restrict high order allocations to users that can afford to wait */
1865 order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
1866
1867 do {
1868 gfp_t gfp = sk->sk_allocation;
1869
1870 if (order)
1871 gfp |= __GFP_COMP | __GFP_NOWARN;
1872 pfrag->page = alloc_pages(gfp, order);
1873 if (likely(pfrag->page)) {
1874 pfrag->offset = 0;
1875 pfrag->size = PAGE_SIZE << order;
1876 return true;
1877 }
1878 } while (--order >= 0);
1879
1880 sk_enter_memory_pressure(sk);
1881 sk_stream_moderate_sndbuf(sk);
1882 return false;
1883}
1884EXPORT_SYMBOL(sk_page_frag_refill);
1885
1da177e4 1886static void __lock_sock(struct sock *sk)
f39234d6
NK
1887 __releases(&sk->sk_lock.slock)
1888 __acquires(&sk->sk_lock.slock)
1da177e4
LT
1889{
1890 DEFINE_WAIT(wait);
1891
e71a4783 1892 for (;;) {
1da177e4
LT
1893 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1894 TASK_UNINTERRUPTIBLE);
1895 spin_unlock_bh(&sk->sk_lock.slock);
1896 schedule();
1897 spin_lock_bh(&sk->sk_lock.slock);
e71a4783 1898 if (!sock_owned_by_user(sk))
1da177e4
LT
1899 break;
1900 }
1901 finish_wait(&sk->sk_lock.wq, &wait);
1902}
1903
1904static void __release_sock(struct sock *sk)
f39234d6
NK
1905 __releases(&sk->sk_lock.slock)
1906 __acquires(&sk->sk_lock.slock)
1da177e4
LT
1907{
1908 struct sk_buff *skb = sk->sk_backlog.head;
1909
1910 do {
1911 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1912 bh_unlock_sock(sk);
1913
1914 do {
1915 struct sk_buff *next = skb->next;
1916
e4cbb02a 1917 prefetch(next);
7fee226a 1918 WARN_ON_ONCE(skb_dst_is_noref(skb));
1da177e4 1919 skb->next = NULL;
c57943a1 1920 sk_backlog_rcv(sk, skb);
1da177e4
LT
1921
1922 /*
1923 * We are in process context here with softirqs
1924 * disabled, use cond_resched_softirq() to preempt.
1925 * This is safe to do because we've taken the backlog
1926 * queue private:
1927 */
1928 cond_resched_softirq();
1929
1930 skb = next;
1931 } while (skb != NULL);
1932
1933 bh_lock_sock(sk);
e71a4783 1934 } while ((skb = sk->sk_backlog.head) != NULL);
8eae939f
ZY
1935
1936 /*
1937 * Doing the zeroing here guarantee we can not loop forever
1938 * while a wild producer attempts to flood us.
1939 */
1940 sk->sk_backlog.len = 0;
1da177e4
LT
1941}
1942
1943/**
1944 * sk_wait_data - wait for data to arrive at sk_receive_queue
4dc3b16b
PP
1945 * @sk: sock to wait on
1946 * @timeo: for how long
1da177e4
LT
1947 *
1948 * Now socket state including sk->sk_err is changed only under lock,
1949 * hence we may omit checks after joining wait queue.
1950 * We check receive queue before schedule() only as optimization;
1951 * it is very likely that release_sock() added new data.
1952 */
1953int sk_wait_data(struct sock *sk, long *timeo)
1954{
1955 int rc;
1956 DEFINE_WAIT(wait);
1957
aa395145 1958 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4
LT
1959 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1960 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1961 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
aa395145 1962 finish_wait(sk_sleep(sk), &wait);
1da177e4
LT
1963 return rc;
1964}
1da177e4
LT
1965EXPORT_SYMBOL(sk_wait_data);
1966
3ab224be
HA
1967/**
1968 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
1969 * @sk: socket
1970 * @size: memory size to allocate
1971 * @kind: allocation type
1972 *
1973 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
1974 * rmem allocation. This function assumes that protocols which have
1975 * memory_pressure use sk_wmem_queued as write buffer accounting.
1976 */
1977int __sk_mem_schedule(struct sock *sk, int size, int kind)
1978{
1979 struct proto *prot = sk->sk_prot;
1980 int amt = sk_mem_pages(size);
8d987e5c 1981 long allocated;
e1aab161 1982 int parent_status = UNDER_LIMIT;
3ab224be
HA
1983
1984 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
180d8cd9 1985
e1aab161 1986 allocated = sk_memory_allocated_add(sk, amt, &parent_status);
3ab224be
HA
1987
1988 /* Under limit. */
e1aab161
GC
1989 if (parent_status == UNDER_LIMIT &&
1990 allocated <= sk_prot_mem_limits(sk, 0)) {
180d8cd9 1991 sk_leave_memory_pressure(sk);
3ab224be
HA
1992 return 1;
1993 }
1994
e1aab161
GC
1995 /* Under pressure. (we or our parents) */
1996 if ((parent_status > SOFT_LIMIT) ||
1997 allocated > sk_prot_mem_limits(sk, 1))
180d8cd9 1998 sk_enter_memory_pressure(sk);
3ab224be 1999
e1aab161
GC
2000 /* Over hard limit (we or our parents) */
2001 if ((parent_status == OVER_LIMIT) ||
2002 (allocated > sk_prot_mem_limits(sk, 2)))
3ab224be
HA
2003 goto suppress_allocation;
2004
2005 /* guarantee minimum buffer size under pressure */
2006 if (kind == SK_MEM_RECV) {
2007 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2008 return 1;
180d8cd9 2009
3ab224be
HA
2010 } else { /* SK_MEM_SEND */
2011 if (sk->sk_type == SOCK_STREAM) {
2012 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2013 return 1;
2014 } else if (atomic_read(&sk->sk_wmem_alloc) <
2015 prot->sysctl_wmem[0])
2016 return 1;
2017 }
2018
180d8cd9 2019 if (sk_has_memory_pressure(sk)) {
1748376b
ED
2020 int alloc;
2021
180d8cd9 2022 if (!sk_under_memory_pressure(sk))
1748376b 2023 return 1;
180d8cd9
GC
2024 alloc = sk_sockets_allocated_read_positive(sk);
2025 if (sk_prot_mem_limits(sk, 2) > alloc *
3ab224be
HA
2026 sk_mem_pages(sk->sk_wmem_queued +
2027 atomic_read(&sk->sk_rmem_alloc) +
2028 sk->sk_forward_alloc))
2029 return 1;
2030 }
2031
2032suppress_allocation:
2033
2034 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2035 sk_stream_moderate_sndbuf(sk);
2036
2037 /* Fail only if socket is _under_ its sndbuf.
2038 * In this case we cannot block, so that we have to fail.
2039 */
2040 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2041 return 1;
2042 }
2043
3847ce32
SM
2044 trace_sock_exceed_buf_limit(sk, prot, allocated);
2045
3ab224be
HA
2046 /* Alas. Undo changes. */
2047 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
180d8cd9 2048
0e90b31f 2049 sk_memory_allocated_sub(sk, amt);
180d8cd9 2050
3ab224be
HA
2051 return 0;
2052}
3ab224be
HA
2053EXPORT_SYMBOL(__sk_mem_schedule);
2054
2055/**
2056 * __sk_reclaim - reclaim memory_allocated
2057 * @sk: socket
2058 */
2059void __sk_mem_reclaim(struct sock *sk)
2060{
180d8cd9 2061 sk_memory_allocated_sub(sk,
0e90b31f 2062 sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
3ab224be
HA
2063 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
2064
180d8cd9
GC
2065 if (sk_under_memory_pressure(sk) &&
2066 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2067 sk_leave_memory_pressure(sk);
3ab224be 2068}
3ab224be
HA
2069EXPORT_SYMBOL(__sk_mem_reclaim);
2070
2071
1da177e4
LT
2072/*
2073 * Set of default routines for initialising struct proto_ops when
2074 * the protocol does not support a particular function. In certain
2075 * cases where it makes no sense for a protocol to have a "do nothing"
2076 * function, some default processing is provided.
2077 */
2078
2079int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2080{
2081 return -EOPNOTSUPP;
2082}
2a91525c 2083EXPORT_SYMBOL(sock_no_bind);
1da177e4 2084
4ec93edb 2085int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
2086 int len, int flags)
2087{
2088 return -EOPNOTSUPP;
2089}
2a91525c 2090EXPORT_SYMBOL(sock_no_connect);
1da177e4
LT
2091
2092int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2093{
2094 return -EOPNOTSUPP;
2095}
2a91525c 2096EXPORT_SYMBOL(sock_no_socketpair);
1da177e4
LT
2097
2098int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2099{
2100 return -EOPNOTSUPP;
2101}
2a91525c 2102EXPORT_SYMBOL(sock_no_accept);
1da177e4 2103
4ec93edb 2104int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
2105 int *len, int peer)
2106{
2107 return -EOPNOTSUPP;
2108}
2a91525c 2109EXPORT_SYMBOL(sock_no_getname);
1da177e4 2110
2a91525c 2111unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
1da177e4
LT
2112{
2113 return 0;
2114}
2a91525c 2115EXPORT_SYMBOL(sock_no_poll);
1da177e4
LT
2116
2117int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2118{
2119 return -EOPNOTSUPP;
2120}
2a91525c 2121EXPORT_SYMBOL(sock_no_ioctl);
1da177e4
LT
2122
2123int sock_no_listen(struct socket *sock, int backlog)
2124{
2125 return -EOPNOTSUPP;
2126}
2a91525c 2127EXPORT_SYMBOL(sock_no_listen);
1da177e4
LT
2128
2129int sock_no_shutdown(struct socket *sock, int how)
2130{
2131 return -EOPNOTSUPP;
2132}
2a91525c 2133EXPORT_SYMBOL(sock_no_shutdown);
1da177e4
LT
2134
2135int sock_no_setsockopt(struct socket *sock, int level, int optname,
b7058842 2136 char __user *optval, unsigned int optlen)
1da177e4
LT
2137{
2138 return -EOPNOTSUPP;
2139}
2a91525c 2140EXPORT_SYMBOL(sock_no_setsockopt);
1da177e4
LT
2141
2142int sock_no_getsockopt(struct socket *sock, int level, int optname,
2143 char __user *optval, int __user *optlen)
2144{
2145 return -EOPNOTSUPP;
2146}
2a91525c 2147EXPORT_SYMBOL(sock_no_getsockopt);
1da177e4
LT
2148
2149int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2150 size_t len)
2151{
2152 return -EOPNOTSUPP;
2153}
2a91525c 2154EXPORT_SYMBOL(sock_no_sendmsg);
1da177e4
LT
2155
2156int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2157 size_t len, int flags)
2158{
2159 return -EOPNOTSUPP;
2160}
2a91525c 2161EXPORT_SYMBOL(sock_no_recvmsg);
1da177e4
LT
2162
2163int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2164{
2165 /* Mirror missing mmap method error code */
2166 return -ENODEV;
2167}
2a91525c 2168EXPORT_SYMBOL(sock_no_mmap);
1da177e4
LT
2169
2170ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2171{
2172 ssize_t res;
2173 struct msghdr msg = {.msg_flags = flags};
2174 struct kvec iov;
2175 char *kaddr = kmap(page);
2176 iov.iov_base = kaddr + offset;
2177 iov.iov_len = size;
2178 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2179 kunmap(page);
2180 return res;
2181}
2a91525c 2182EXPORT_SYMBOL(sock_no_sendpage);
1da177e4
LT
2183
2184/*
2185 * Default Socket Callbacks
2186 */
2187
2188static void sock_def_wakeup(struct sock *sk)
2189{
43815482
ED
2190 struct socket_wq *wq;
2191
2192 rcu_read_lock();
2193 wq = rcu_dereference(sk->sk_wq);
2194 if (wq_has_sleeper(wq))
2195 wake_up_interruptible_all(&wq->wait);
2196 rcu_read_unlock();
1da177e4
LT
2197}
2198
2199static void sock_def_error_report(struct sock *sk)
2200{
43815482
ED
2201 struct socket_wq *wq;
2202
2203 rcu_read_lock();
2204 wq = rcu_dereference(sk->sk_wq);
2205 if (wq_has_sleeper(wq))
2206 wake_up_interruptible_poll(&wq->wait, POLLERR);
8d8ad9d7 2207 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
43815482 2208 rcu_read_unlock();
1da177e4
LT
2209}
2210
2211static void sock_def_readable(struct sock *sk, int len)
2212{
43815482
ED
2213 struct socket_wq *wq;
2214
2215 rcu_read_lock();
2216 wq = rcu_dereference(sk->sk_wq);
2217 if (wq_has_sleeper(wq))
2c6607c6 2218 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
37e5540b 2219 POLLRDNORM | POLLRDBAND);
8d8ad9d7 2220 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
43815482 2221 rcu_read_unlock();
1da177e4
LT
2222}
2223
2224static void sock_def_write_space(struct sock *sk)
2225{
43815482
ED
2226 struct socket_wq *wq;
2227
2228 rcu_read_lock();
1da177e4
LT
2229
2230 /* Do not wake up a writer until he can make "significant"
2231 * progress. --DaveM
2232 */
e71a4783 2233 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
43815482
ED
2234 wq = rcu_dereference(sk->sk_wq);
2235 if (wq_has_sleeper(wq))
2236 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
37e5540b 2237 POLLWRNORM | POLLWRBAND);
1da177e4
LT
2238
2239 /* Should agree with poll, otherwise some programs break */
2240 if (sock_writeable(sk))
8d8ad9d7 2241 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4
LT
2242 }
2243
43815482 2244 rcu_read_unlock();
1da177e4
LT
2245}
2246
2247static void sock_def_destruct(struct sock *sk)
2248{
a51482bd 2249 kfree(sk->sk_protinfo);
1da177e4
LT
2250}
2251
2252void sk_send_sigurg(struct sock *sk)
2253{
2254 if (sk->sk_socket && sk->sk_socket->file)
2255 if (send_sigurg(&sk->sk_socket->file->f_owner))
8d8ad9d7 2256 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
1da177e4 2257}
2a91525c 2258EXPORT_SYMBOL(sk_send_sigurg);
1da177e4
LT
2259
2260void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2261 unsigned long expires)
2262{
2263 if (!mod_timer(timer, expires))
2264 sock_hold(sk);
2265}
1da177e4
LT
2266EXPORT_SYMBOL(sk_reset_timer);
2267
2268void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2269{
25cc4ae9 2270 if (del_timer(timer))
1da177e4
LT
2271 __sock_put(sk);
2272}
1da177e4
LT
2273EXPORT_SYMBOL(sk_stop_timer);
2274
2275void sock_init_data(struct socket *sock, struct sock *sk)
2276{
2277 skb_queue_head_init(&sk->sk_receive_queue);
2278 skb_queue_head_init(&sk->sk_write_queue);
2279 skb_queue_head_init(&sk->sk_error_queue);
97fc2f08
CL
2280#ifdef CONFIG_NET_DMA
2281 skb_queue_head_init(&sk->sk_async_wait_queue);
2282#endif
1da177e4
LT
2283
2284 sk->sk_send_head = NULL;
2285
2286 init_timer(&sk->sk_timer);
4ec93edb 2287
1da177e4
LT
2288 sk->sk_allocation = GFP_KERNEL;
2289 sk->sk_rcvbuf = sysctl_rmem_default;
2290 sk->sk_sndbuf = sysctl_wmem_default;
2291 sk->sk_state = TCP_CLOSE;
972692e0 2292 sk_set_socket(sk, sock);
1da177e4
LT
2293
2294 sock_set_flag(sk, SOCK_ZAPPED);
2295
e71a4783 2296 if (sock) {
1da177e4 2297 sk->sk_type = sock->type;
43815482 2298 sk->sk_wq = sock->wq;
1da177e4
LT
2299 sock->sk = sk;
2300 } else
43815482 2301 sk->sk_wq = NULL;
1da177e4 2302
b6c6712a 2303 spin_lock_init(&sk->sk_dst_lock);
1da177e4 2304 rwlock_init(&sk->sk_callback_lock);
443aef0e
PZ
2305 lockdep_set_class_and_name(&sk->sk_callback_lock,
2306 af_callback_keys + sk->sk_family,
2307 af_family_clock_key_strings[sk->sk_family]);
1da177e4
LT
2308
2309 sk->sk_state_change = sock_def_wakeup;
2310 sk->sk_data_ready = sock_def_readable;
2311 sk->sk_write_space = sock_def_write_space;
2312 sk->sk_error_report = sock_def_error_report;
2313 sk->sk_destruct = sock_def_destruct;
2314
5640f768
ED
2315 sk->sk_frag.page = NULL;
2316 sk->sk_frag.offset = 0;
ef64a54f 2317 sk->sk_peek_off = -1;
1da177e4 2318
109f6e39
EB
2319 sk->sk_peer_pid = NULL;
2320 sk->sk_peer_cred = NULL;
1da177e4
LT
2321 sk->sk_write_pending = 0;
2322 sk->sk_rcvlowat = 1;
2323 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2324 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2325
f37f0afb 2326 sk->sk_stamp = ktime_set(-1L, 0);
1da177e4 2327
e0d1095a 2328#ifdef CONFIG_NET_RX_BUSY_POLL
06021292 2329 sk->sk_napi_id = 0;
64b0dc51 2330 sk->sk_ll_usec = sysctl_net_busy_read;
06021292
ET
2331#endif
2332
62748f32 2333 sk->sk_max_pacing_rate = ~0U;
7eec4174 2334 sk->sk_pacing_rate = ~0U;
4dc6dc71
ED
2335 /*
2336 * Before updating sk_refcnt, we must commit prior changes to memory
2337 * (Documentation/RCU/rculist_nulls.txt for details)
2338 */
2339 smp_wmb();
1da177e4 2340 atomic_set(&sk->sk_refcnt, 1);
33c732c3 2341 atomic_set(&sk->sk_drops, 0);
1da177e4 2342}
2a91525c 2343EXPORT_SYMBOL(sock_init_data);
1da177e4 2344
b5606c2d 2345void lock_sock_nested(struct sock *sk, int subclass)
1da177e4
LT
2346{
2347 might_sleep();
a5b5bb9a 2348 spin_lock_bh(&sk->sk_lock.slock);
d2e9117c 2349 if (sk->sk_lock.owned)
1da177e4 2350 __lock_sock(sk);
d2e9117c 2351 sk->sk_lock.owned = 1;
a5b5bb9a
IM
2352 spin_unlock(&sk->sk_lock.slock);
2353 /*
2354 * The sk_lock has mutex_lock() semantics here:
2355 */
fcc70d5f 2356 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
a5b5bb9a 2357 local_bh_enable();
1da177e4 2358}
fcc70d5f 2359EXPORT_SYMBOL(lock_sock_nested);
1da177e4 2360
b5606c2d 2361void release_sock(struct sock *sk)
1da177e4 2362{
a5b5bb9a
IM
2363 /*
2364 * The sk_lock has mutex_unlock() semantics:
2365 */
2366 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
2367
2368 spin_lock_bh(&sk->sk_lock.slock);
1da177e4
LT
2369 if (sk->sk_backlog.tail)
2370 __release_sock(sk);
46d3ceab
ED
2371
2372 if (sk->sk_prot->release_cb)
2373 sk->sk_prot->release_cb(sk);
2374
d2e9117c 2375 sk->sk_lock.owned = 0;
a5b5bb9a
IM
2376 if (waitqueue_active(&sk->sk_lock.wq))
2377 wake_up(&sk->sk_lock.wq);
2378 spin_unlock_bh(&sk->sk_lock.slock);
1da177e4
LT
2379}
2380EXPORT_SYMBOL(release_sock);
2381
8a74ad60
ED
2382/**
2383 * lock_sock_fast - fast version of lock_sock
2384 * @sk: socket
2385 *
2386 * This version should be used for very small section, where process wont block
2387 * return false if fast path is taken
2388 * sk_lock.slock locked, owned = 0, BH disabled
2389 * return true if slow path is taken
2390 * sk_lock.slock unlocked, owned = 1, BH enabled
2391 */
2392bool lock_sock_fast(struct sock *sk)
2393{
2394 might_sleep();
2395 spin_lock_bh(&sk->sk_lock.slock);
2396
2397 if (!sk->sk_lock.owned)
2398 /*
2399 * Note : We must disable BH
2400 */
2401 return false;
2402
2403 __lock_sock(sk);
2404 sk->sk_lock.owned = 1;
2405 spin_unlock(&sk->sk_lock.slock);
2406 /*
2407 * The sk_lock has mutex_lock() semantics here:
2408 */
2409 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2410 local_bh_enable();
2411 return true;
2412}
2413EXPORT_SYMBOL(lock_sock_fast);
2414
1da177e4 2415int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
4ec93edb 2416{
b7aa0bf7 2417 struct timeval tv;
1da177e4 2418 if (!sock_flag(sk, SOCK_TIMESTAMP))
20d49473 2419 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
b7aa0bf7
ED
2420 tv = ktime_to_timeval(sk->sk_stamp);
2421 if (tv.tv_sec == -1)
1da177e4 2422 return -ENOENT;
b7aa0bf7
ED
2423 if (tv.tv_sec == 0) {
2424 sk->sk_stamp = ktime_get_real();
2425 tv = ktime_to_timeval(sk->sk_stamp);
2426 }
2427 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
4ec93edb 2428}
1da177e4
LT
2429EXPORT_SYMBOL(sock_get_timestamp);
2430
ae40eb1e
ED
2431int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2432{
2433 struct timespec ts;
2434 if (!sock_flag(sk, SOCK_TIMESTAMP))
20d49473 2435 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
ae40eb1e
ED
2436 ts = ktime_to_timespec(sk->sk_stamp);
2437 if (ts.tv_sec == -1)
2438 return -ENOENT;
2439 if (ts.tv_sec == 0) {
2440 sk->sk_stamp = ktime_get_real();
2441 ts = ktime_to_timespec(sk->sk_stamp);
2442 }
2443 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2444}
2445EXPORT_SYMBOL(sock_get_timestampns);
2446
20d49473 2447void sock_enable_timestamp(struct sock *sk, int flag)
4ec93edb 2448{
20d49473 2449 if (!sock_flag(sk, flag)) {
08e29af3
ED
2450 unsigned long previous_flags = sk->sk_flags;
2451
20d49473
PO
2452 sock_set_flag(sk, flag);
2453 /*
2454 * we just set one of the two flags which require net
2455 * time stamping, but time stamping might have been on
2456 * already because of the other one
2457 */
08e29af3 2458 if (!(previous_flags & SK_FLAGS_TIMESTAMP))
20d49473 2459 net_enable_timestamp();
1da177e4
LT
2460 }
2461}
1da177e4 2462
cb820f8e
RC
2463int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
2464 int level, int type)
2465{
2466 struct sock_exterr_skb *serr;
2467 struct sk_buff *skb, *skb2;
2468 int copied, err;
2469
2470 err = -EAGAIN;
2471 skb = skb_dequeue(&sk->sk_error_queue);
2472 if (skb == NULL)
2473 goto out;
2474
2475 copied = skb->len;
2476 if (copied > len) {
2477 msg->msg_flags |= MSG_TRUNC;
2478 copied = len;
2479 }
2480 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
2481 if (err)
2482 goto out_free_skb;
2483
2484 sock_recv_timestamp(msg, sk, skb);
2485
2486 serr = SKB_EXT_ERR(skb);
2487 put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
2488
2489 msg->msg_flags |= MSG_ERRQUEUE;
2490 err = copied;
2491
2492 /* Reset and regenerate socket error */
2493 spin_lock_bh(&sk->sk_error_queue.lock);
2494 sk->sk_err = 0;
2495 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
2496 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
2497 spin_unlock_bh(&sk->sk_error_queue.lock);
2498 sk->sk_error_report(sk);
2499 } else
2500 spin_unlock_bh(&sk->sk_error_queue.lock);
2501
2502out_free_skb:
2503 kfree_skb(skb);
2504out:
2505 return err;
2506}
2507EXPORT_SYMBOL(sock_recv_errqueue);
2508
1da177e4
LT
2509/*
2510 * Get a socket option on an socket.
2511 *
2512 * FIX: POSIX 1003.1g is very ambiguous here. It states that
2513 * asynchronous errors should be reported by getsockopt. We assume
2514 * this means if you specify SO_ERROR (otherwise whats the point of it).
2515 */
2516int sock_common_getsockopt(struct socket *sock, int level, int optname,
2517 char __user *optval, int __user *optlen)
2518{
2519 struct sock *sk = sock->sk;
2520
2521 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2522}
1da177e4
LT
2523EXPORT_SYMBOL(sock_common_getsockopt);
2524
3fdadf7d 2525#ifdef CONFIG_COMPAT
543d9cfe
ACM
2526int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2527 char __user *optval, int __user *optlen)
3fdadf7d
DM
2528{
2529 struct sock *sk = sock->sk;
2530
1e51f951 2531 if (sk->sk_prot->compat_getsockopt != NULL)
543d9cfe
ACM
2532 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2533 optval, optlen);
3fdadf7d
DM
2534 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2535}
2536EXPORT_SYMBOL(compat_sock_common_getsockopt);
2537#endif
2538
1da177e4
LT
2539int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2540 struct msghdr *msg, size_t size, int flags)
2541{
2542 struct sock *sk = sock->sk;
2543 int addr_len = 0;
2544 int err;
2545
2546 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2547 flags & ~MSG_DONTWAIT, &addr_len);
2548 if (err >= 0)
2549 msg->msg_namelen = addr_len;
2550 return err;
2551}
1da177e4
LT
2552EXPORT_SYMBOL(sock_common_recvmsg);
2553
2554/*
2555 * Set socket options on an inet socket.
2556 */
2557int sock_common_setsockopt(struct socket *sock, int level, int optname,
b7058842 2558 char __user *optval, unsigned int optlen)
1da177e4
LT
2559{
2560 struct sock *sk = sock->sk;
2561
2562 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2563}
1da177e4
LT
2564EXPORT_SYMBOL(sock_common_setsockopt);
2565
3fdadf7d 2566#ifdef CONFIG_COMPAT
543d9cfe 2567int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
b7058842 2568 char __user *optval, unsigned int optlen)
3fdadf7d
DM
2569{
2570 struct sock *sk = sock->sk;
2571
543d9cfe
ACM
2572 if (sk->sk_prot->compat_setsockopt != NULL)
2573 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2574 optval, optlen);
3fdadf7d
DM
2575 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2576}
2577EXPORT_SYMBOL(compat_sock_common_setsockopt);
2578#endif
2579
1da177e4
LT
2580void sk_common_release(struct sock *sk)
2581{
2582 if (sk->sk_prot->destroy)
2583 sk->sk_prot->destroy(sk);
2584
2585 /*
2586 * Observation: when sock_common_release is called, processes have
2587 * no access to socket. But net still has.
2588 * Step one, detach it from networking:
2589 *
2590 * A. Remove from hash tables.
2591 */
2592
2593 sk->sk_prot->unhash(sk);
2594
2595 /*
2596 * In this point socket cannot receive new packets, but it is possible
2597 * that some packets are in flight because some CPU runs receiver and
2598 * did hash table lookup before we unhashed socket. They will achieve
2599 * receive queue and will be purged by socket destructor.
2600 *
2601 * Also we still have packets pending on receive queue and probably,
2602 * our own packets waiting in device queues. sock_destroy will drain
2603 * receive queue, but transmitted packets will delay socket destruction
2604 * until the last reference will be released.
2605 */
2606
2607 sock_orphan(sk);
2608
2609 xfrm_sk_free_policy(sk);
2610
e6848976 2611 sk_refcnt_debug_release(sk);
5640f768
ED
2612
2613 if (sk->sk_frag.page) {
2614 put_page(sk->sk_frag.page);
2615 sk->sk_frag.page = NULL;
2616 }
2617
1da177e4
LT
2618 sock_put(sk);
2619}
1da177e4
LT
2620EXPORT_SYMBOL(sk_common_release);
2621
13ff3d6f
PE
2622#ifdef CONFIG_PROC_FS
2623#define PROTO_INUSE_NR 64 /* should be enough for the first time */
1338d466
PE
2624struct prot_inuse {
2625 int val[PROTO_INUSE_NR];
2626};
13ff3d6f
PE
2627
2628static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
70ee1159
PE
2629
2630#ifdef CONFIG_NET_NS
2631void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2632{
d6d9ca0f 2633 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
70ee1159
PE
2634}
2635EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2636
2637int sock_prot_inuse_get(struct net *net, struct proto *prot)
2638{
2639 int cpu, idx = prot->inuse_idx;
2640 int res = 0;
2641
2642 for_each_possible_cpu(cpu)
2643 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2644
2645 return res >= 0 ? res : 0;
2646}
2647EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2648
2c8c1e72 2649static int __net_init sock_inuse_init_net(struct net *net)
70ee1159
PE
2650{
2651 net->core.inuse = alloc_percpu(struct prot_inuse);
2652 return net->core.inuse ? 0 : -ENOMEM;
2653}
2654
2c8c1e72 2655static void __net_exit sock_inuse_exit_net(struct net *net)
70ee1159
PE
2656{
2657 free_percpu(net->core.inuse);
2658}
2659
2660static struct pernet_operations net_inuse_ops = {
2661 .init = sock_inuse_init_net,
2662 .exit = sock_inuse_exit_net,
2663};
2664
2665static __init int net_inuse_init(void)
2666{
2667 if (register_pernet_subsys(&net_inuse_ops))
2668 panic("Cannot initialize net inuse counters");
2669
2670 return 0;
2671}
2672
2673core_initcall(net_inuse_init);
2674#else
1338d466
PE
2675static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2676
c29a0bc4 2677void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
1338d466 2678{
d6d9ca0f 2679 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
1338d466
PE
2680}
2681EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2682
c29a0bc4 2683int sock_prot_inuse_get(struct net *net, struct proto *prot)
1338d466
PE
2684{
2685 int cpu, idx = prot->inuse_idx;
2686 int res = 0;
2687
2688 for_each_possible_cpu(cpu)
2689 res += per_cpu(prot_inuse, cpu).val[idx];
2690
2691 return res >= 0 ? res : 0;
2692}
2693EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
70ee1159 2694#endif
13ff3d6f
PE
2695
2696static void assign_proto_idx(struct proto *prot)
2697{
2698 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2699
2700 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
e005d193 2701 pr_err("PROTO_INUSE_NR exhausted\n");
13ff3d6f
PE
2702 return;
2703 }
2704
2705 set_bit(prot->inuse_idx, proto_inuse_idx);
2706}
2707
2708static void release_proto_idx(struct proto *prot)
2709{
2710 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2711 clear_bit(prot->inuse_idx, proto_inuse_idx);
2712}
2713#else
2714static inline void assign_proto_idx(struct proto *prot)
2715{
2716}
2717
2718static inline void release_proto_idx(struct proto *prot)
2719{
2720}
2721#endif
2722
b733c007
PE
2723int proto_register(struct proto *prot, int alloc_slab)
2724{
1da177e4
LT
2725 if (alloc_slab) {
2726 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
271b72c7
ED
2727 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2728 NULL);
1da177e4
LT
2729
2730 if (prot->slab == NULL) {
e005d193
JP
2731 pr_crit("%s: Can't create sock SLAB cache!\n",
2732 prot->name);
60e7663d 2733 goto out;
1da177e4 2734 }
2e6599cb
ACM
2735
2736 if (prot->rsk_prot != NULL) {
faf23422 2737 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
7e56b5d6 2738 if (prot->rsk_prot->slab_name == NULL)
2e6599cb
ACM
2739 goto out_free_sock_slab;
2740
7e56b5d6 2741 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2e6599cb 2742 prot->rsk_prot->obj_size, 0,
20c2df83 2743 SLAB_HWCACHE_ALIGN, NULL);
2e6599cb
ACM
2744
2745 if (prot->rsk_prot->slab == NULL) {
e005d193
JP
2746 pr_crit("%s: Can't create request sock SLAB cache!\n",
2747 prot->name);
2e6599cb
ACM
2748 goto out_free_request_sock_slab_name;
2749 }
2750 }
8feaf0c0 2751
6d6ee43e 2752 if (prot->twsk_prot != NULL) {
faf23422 2753 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
8feaf0c0 2754
7e56b5d6 2755 if (prot->twsk_prot->twsk_slab_name == NULL)
8feaf0c0
ACM
2756 goto out_free_request_sock_slab;
2757
6d6ee43e 2758 prot->twsk_prot->twsk_slab =
7e56b5d6 2759 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
6d6ee43e 2760 prot->twsk_prot->twsk_obj_size,
3ab5aee7
ED
2761 0,
2762 SLAB_HWCACHE_ALIGN |
2763 prot->slab_flags,
20c2df83 2764 NULL);
6d6ee43e 2765 if (prot->twsk_prot->twsk_slab == NULL)
8feaf0c0
ACM
2766 goto out_free_timewait_sock_slab_name;
2767 }
1da177e4
LT
2768 }
2769
36b77a52 2770 mutex_lock(&proto_list_mutex);
1da177e4 2771 list_add(&prot->node, &proto_list);
13ff3d6f 2772 assign_proto_idx(prot);
36b77a52 2773 mutex_unlock(&proto_list_mutex);
b733c007
PE
2774 return 0;
2775
8feaf0c0 2776out_free_timewait_sock_slab_name:
7e56b5d6 2777 kfree(prot->twsk_prot->twsk_slab_name);
8feaf0c0
ACM
2778out_free_request_sock_slab:
2779 if (prot->rsk_prot && prot->rsk_prot->slab) {
2780 kmem_cache_destroy(prot->rsk_prot->slab);
2781 prot->rsk_prot->slab = NULL;
2782 }
2e6599cb 2783out_free_request_sock_slab_name:
72150e9b
DC
2784 if (prot->rsk_prot)
2785 kfree(prot->rsk_prot->slab_name);
2e6599cb
ACM
2786out_free_sock_slab:
2787 kmem_cache_destroy(prot->slab);
2788 prot->slab = NULL;
b733c007
PE
2789out:
2790 return -ENOBUFS;
1da177e4 2791}
1da177e4
LT
2792EXPORT_SYMBOL(proto_register);
2793
2794void proto_unregister(struct proto *prot)
2795{
36b77a52 2796 mutex_lock(&proto_list_mutex);
13ff3d6f 2797 release_proto_idx(prot);
0a3f4358 2798 list_del(&prot->node);
36b77a52 2799 mutex_unlock(&proto_list_mutex);
1da177e4
LT
2800
2801 if (prot->slab != NULL) {
2802 kmem_cache_destroy(prot->slab);
2803 prot->slab = NULL;
2804 }
2805
2e6599cb 2806 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
2e6599cb 2807 kmem_cache_destroy(prot->rsk_prot->slab);
7e56b5d6 2808 kfree(prot->rsk_prot->slab_name);
2e6599cb
ACM
2809 prot->rsk_prot->slab = NULL;
2810 }
2811
6d6ee43e 2812 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
6d6ee43e 2813 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
7e56b5d6 2814 kfree(prot->twsk_prot->twsk_slab_name);
6d6ee43e 2815 prot->twsk_prot->twsk_slab = NULL;
8feaf0c0 2816 }
1da177e4 2817}
1da177e4
LT
2818EXPORT_SYMBOL(proto_unregister);
2819
2820#ifdef CONFIG_PROC_FS
1da177e4 2821static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
36b77a52 2822 __acquires(proto_list_mutex)
1da177e4 2823{
36b77a52 2824 mutex_lock(&proto_list_mutex);
60f0438a 2825 return seq_list_start_head(&proto_list, *pos);
1da177e4
LT
2826}
2827
2828static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2829{
60f0438a 2830 return seq_list_next(v, &proto_list, pos);
1da177e4
LT
2831}
2832
2833static void proto_seq_stop(struct seq_file *seq, void *v)
36b77a52 2834 __releases(proto_list_mutex)
1da177e4 2835{
36b77a52 2836 mutex_unlock(&proto_list_mutex);
1da177e4
LT
2837}
2838
2839static char proto_method_implemented(const void *method)
2840{
2841 return method == NULL ? 'n' : 'y';
2842}
180d8cd9
GC
2843static long sock_prot_memory_allocated(struct proto *proto)
2844{
cb75a36c 2845 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
180d8cd9
GC
2846}
2847
2848static char *sock_prot_memory_pressure(struct proto *proto)
2849{
2850 return proto->memory_pressure != NULL ?
2851 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
2852}
1da177e4
LT
2853
2854static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2855{
180d8cd9 2856
8d987e5c 2857 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
1da177e4
LT
2858 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2859 proto->name,
2860 proto->obj_size,
14e943db 2861 sock_prot_inuse_get(seq_file_net(seq), proto),
180d8cd9
GC
2862 sock_prot_memory_allocated(proto),
2863 sock_prot_memory_pressure(proto),
1da177e4
LT
2864 proto->max_header,
2865 proto->slab == NULL ? "no" : "yes",
2866 module_name(proto->owner),
2867 proto_method_implemented(proto->close),
2868 proto_method_implemented(proto->connect),
2869 proto_method_implemented(proto->disconnect),
2870 proto_method_implemented(proto->accept),
2871 proto_method_implemented(proto->ioctl),
2872 proto_method_implemented(proto->init),
2873 proto_method_implemented(proto->destroy),
2874 proto_method_implemented(proto->shutdown),
2875 proto_method_implemented(proto->setsockopt),
2876 proto_method_implemented(proto->getsockopt),
2877 proto_method_implemented(proto->sendmsg),
2878 proto_method_implemented(proto->recvmsg),
2879 proto_method_implemented(proto->sendpage),
2880 proto_method_implemented(proto->bind),
2881 proto_method_implemented(proto->backlog_rcv),
2882 proto_method_implemented(proto->hash),
2883 proto_method_implemented(proto->unhash),
2884 proto_method_implemented(proto->get_port),
2885 proto_method_implemented(proto->enter_memory_pressure));
2886}
2887
2888static int proto_seq_show(struct seq_file *seq, void *v)
2889{
60f0438a 2890 if (v == &proto_list)
1da177e4
LT
2891 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2892 "protocol",
2893 "size",
2894 "sockets",
2895 "memory",
2896 "press",
2897 "maxhdr",
2898 "slab",
2899 "module",
2900 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2901 else
60f0438a 2902 proto_seq_printf(seq, list_entry(v, struct proto, node));
1da177e4
LT
2903 return 0;
2904}
2905
f690808e 2906static const struct seq_operations proto_seq_ops = {
1da177e4
LT
2907 .start = proto_seq_start,
2908 .next = proto_seq_next,
2909 .stop = proto_seq_stop,
2910 .show = proto_seq_show,
2911};
2912
2913static int proto_seq_open(struct inode *inode, struct file *file)
2914{
14e943db
ED
2915 return seq_open_net(inode, file, &proto_seq_ops,
2916 sizeof(struct seq_net_private));
1da177e4
LT
2917}
2918
9a32144e 2919static const struct file_operations proto_seq_fops = {
1da177e4
LT
2920 .owner = THIS_MODULE,
2921 .open = proto_seq_open,
2922 .read = seq_read,
2923 .llseek = seq_lseek,
14e943db
ED
2924 .release = seq_release_net,
2925};
2926
2927static __net_init int proto_init_net(struct net *net)
2928{
d4beaa66 2929 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
14e943db
ED
2930 return -ENOMEM;
2931
2932 return 0;
2933}
2934
2935static __net_exit void proto_exit_net(struct net *net)
2936{
ece31ffd 2937 remove_proc_entry("protocols", net->proc_net);
14e943db
ED
2938}
2939
2940
2941static __net_initdata struct pernet_operations proto_net_ops = {
2942 .init = proto_init_net,
2943 .exit = proto_exit_net,
1da177e4
LT
2944};
2945
2946static int __init proto_init(void)
2947{
14e943db 2948 return register_pernet_subsys(&proto_net_ops);
1da177e4
LT
2949}
2950
2951subsys_initcall(proto_init);
2952
2953#endif /* PROC_FS */
This page took 1.130647 seconds and 5 git commands to generate.