Merge tag 'for-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/kishon/linux...
[deliverable/linux.git] / net / core / sock.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
02c30a84 10 * Authors: Ross Biro
1da177e4
LT
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
14 *
15 * Fixes:
16 * Alan Cox : Numerous verify_area() problems
17 * Alan Cox : Connecting on a connecting socket
18 * now returns an error for tcp.
19 * Alan Cox : sock->protocol is set correctly.
20 * and is not sometimes left as 0.
21 * Alan Cox : connect handles icmp errors on a
22 * connect properly. Unfortunately there
23 * is a restart syscall nasty there. I
24 * can't match BSD without hacking the C
25 * library. Ideas urgently sought!
26 * Alan Cox : Disallow bind() to addresses that are
27 * not ours - especially broadcast ones!!
28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
30 * instead they leave that for the DESTROY timer.
31 * Alan Cox : Clean up error flag in accept
32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
33 * was buggy. Put a remove_sock() in the handler
34 * for memory when we hit 0. Also altered the timer
4ec93edb 35 * code. The ACK stuff can wait and needs major
1da177e4
LT
36 * TCP layer surgery.
37 * Alan Cox : Fixed TCP ack bug, removed remove sock
38 * and fixed timer/inet_bh race.
39 * Alan Cox : Added zapped flag for TCP
40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45 * Rick Sladkey : Relaxed UDP rules for matching packets.
46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
47 * Pauline Middelink : identd support
48 * Alan Cox : Fixed connect() taking signals I think.
49 * Alan Cox : SO_LINGER supported
50 * Alan Cox : Error reporting fixes
51 * Anonymous : inet_create tidied up (sk->reuse setting)
52 * Alan Cox : inet sockets don't set sk->type!
53 * Alan Cox : Split socket option code
54 * Alan Cox : Callbacks
55 * Alan Cox : Nagle flag for Charles & Johannes stuff
56 * Alex : Removed restriction on inet fioctl
57 * Alan Cox : Splitting INET from NET core
58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
60 * Alan Cox : Split IP from generic code
61 * Alan Cox : New kfree_skbmem()
62 * Alan Cox : Make SO_DEBUG superuser only.
63 * Alan Cox : Allow anyone to clear SO_DEBUG
64 * (compatibility fix)
65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
66 * Alan Cox : Allocator for a socket is settable.
67 * Alan Cox : SO_ERROR includes soft errors.
68 * Alan Cox : Allow NULL arguments on some SO_ opts
69 * Alan Cox : Generic socket allocation to make hooks
70 * easier (suggested by Craig Metz).
71 * Michael Pall : SO_ERROR returns positive errno again
72 * Steve Whitehouse: Added default destructor to free
73 * protocol private data.
74 * Steve Whitehouse: Added various other default routines
75 * common to several socket families.
76 * Chris Evans : Call suser() check last on F_SETOWN
77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
79 * Andi Kleen : Fix write_space callback
80 * Chris Evans : Security fixes - signedness again
81 * Arnaldo C. Melo : cleanups, use skb_queue_purge
82 *
83 * To Fix:
84 *
85 *
86 * This program is free software; you can redistribute it and/or
87 * modify it under the terms of the GNU General Public License
88 * as published by the Free Software Foundation; either version
89 * 2 of the License, or (at your option) any later version.
90 */
91
e005d193
JP
92#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
4fc268d2 94#include <linux/capability.h>
1da177e4 95#include <linux/errno.h>
cb820f8e 96#include <linux/errqueue.h>
1da177e4
LT
97#include <linux/types.h>
98#include <linux/socket.h>
99#include <linux/in.h>
100#include <linux/kernel.h>
1da177e4
LT
101#include <linux/module.h>
102#include <linux/proc_fs.h>
103#include <linux/seq_file.h>
104#include <linux/sched.h>
105#include <linux/timer.h>
106#include <linux/string.h>
107#include <linux/sockios.h>
108#include <linux/net.h>
109#include <linux/mm.h>
110#include <linux/slab.h>
111#include <linux/interrupt.h>
112#include <linux/poll.h>
113#include <linux/tcp.h>
114#include <linux/init.h>
a1f8e7f7 115#include <linux/highmem.h>
3f551f94 116#include <linux/user_namespace.h>
c5905afb 117#include <linux/static_key.h>
3969eb38 118#include <linux/memcontrol.h>
8c1ae10d 119#include <linux/prefetch.h>
1da177e4
LT
120
121#include <asm/uaccess.h>
1da177e4
LT
122
123#include <linux/netdevice.h>
124#include <net/protocol.h>
125#include <linux/skbuff.h>
457c4cbc 126#include <net/net_namespace.h>
2e6599cb 127#include <net/request_sock.h>
1da177e4 128#include <net/sock.h>
20d49473 129#include <linux/net_tstamp.h>
1da177e4
LT
130#include <net/xfrm.h>
131#include <linux/ipsec.h>
f8451725 132#include <net/cls_cgroup.h>
5bc1421e 133#include <net/netprio_cgroup.h>
1da177e4
LT
134
135#include <linux/filter.h>
136
3847ce32
SM
137#include <trace/events/sock.h>
138
1da177e4
LT
139#ifdef CONFIG_INET
140#include <net/tcp.h>
141#endif
142
076bb0c8 143#include <net/busy_poll.h>
06021292 144
36b77a52 145static DEFINE_MUTEX(proto_list_mutex);
d1a4c0b3
GC
146static LIST_HEAD(proto_list);
147
a3b299da
EB
148/**
149 * sk_ns_capable - General socket capability test
150 * @sk: Socket to use a capability on or through
151 * @user_ns: The user namespace of the capability to use
152 * @cap: The capability to use
153 *
154 * Test to see if the opener of the socket had when the socket was
155 * created and the current process has the capability @cap in the user
156 * namespace @user_ns.
157 */
158bool sk_ns_capable(const struct sock *sk,
159 struct user_namespace *user_ns, int cap)
160{
161 return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
162 ns_capable(user_ns, cap);
163}
164EXPORT_SYMBOL(sk_ns_capable);
165
166/**
167 * sk_capable - Socket global capability test
168 * @sk: Socket to use a capability on or through
e793c0f7 169 * @cap: The global capability to use
a3b299da
EB
170 *
171 * Test to see if the opener of the socket had when the socket was
172 * created and the current process has the capability @cap in all user
173 * namespaces.
174 */
175bool sk_capable(const struct sock *sk, int cap)
176{
177 return sk_ns_capable(sk, &init_user_ns, cap);
178}
179EXPORT_SYMBOL(sk_capable);
180
181/**
182 * sk_net_capable - Network namespace socket capability test
183 * @sk: Socket to use a capability on or through
184 * @cap: The capability to use
185 *
e793c0f7 186 * Test to see if the opener of the socket had when the socket was created
a3b299da
EB
187 * and the current process has the capability @cap over the network namespace
188 * the socket is a member of.
189 */
190bool sk_net_capable(const struct sock *sk, int cap)
191{
192 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
193}
194EXPORT_SYMBOL(sk_net_capable);
195
196
c255a458 197#ifdef CONFIG_MEMCG_KMEM
1d62e436 198int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
d1a4c0b3
GC
199{
200 struct proto *proto;
201 int ret = 0;
202
36b77a52 203 mutex_lock(&proto_list_mutex);
d1a4c0b3
GC
204 list_for_each_entry(proto, &proto_list, node) {
205 if (proto->init_cgroup) {
1d62e436 206 ret = proto->init_cgroup(memcg, ss);
d1a4c0b3
GC
207 if (ret)
208 goto out;
209 }
210 }
211
36b77a52 212 mutex_unlock(&proto_list_mutex);
d1a4c0b3
GC
213 return ret;
214out:
215 list_for_each_entry_continue_reverse(proto, &proto_list, node)
216 if (proto->destroy_cgroup)
1d62e436 217 proto->destroy_cgroup(memcg);
36b77a52 218 mutex_unlock(&proto_list_mutex);
d1a4c0b3
GC
219 return ret;
220}
221
1d62e436 222void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
d1a4c0b3
GC
223{
224 struct proto *proto;
225
36b77a52 226 mutex_lock(&proto_list_mutex);
d1a4c0b3
GC
227 list_for_each_entry_reverse(proto, &proto_list, node)
228 if (proto->destroy_cgroup)
1d62e436 229 proto->destroy_cgroup(memcg);
36b77a52 230 mutex_unlock(&proto_list_mutex);
d1a4c0b3
GC
231}
232#endif
233
da21f24d
IM
234/*
235 * Each address family might have different locking rules, so we have
236 * one slock key per address family:
237 */
a5b5bb9a
IM
238static struct lock_class_key af_family_keys[AF_MAX];
239static struct lock_class_key af_family_slock_keys[AF_MAX];
240
cbda4eaf 241#if defined(CONFIG_MEMCG_KMEM)
c5905afb 242struct static_key memcg_socket_limit_enabled;
e1aab161 243EXPORT_SYMBOL(memcg_socket_limit_enabled);
cbda4eaf 244#endif
e1aab161 245
a5b5bb9a
IM
246/*
247 * Make lock validator output more readable. (we pre-construct these
248 * strings build-time, so that runtime initialization of socket
249 * locks is fast):
250 */
36cbd3dc 251static const char *const af_family_key_strings[AF_MAX+1] = {
a5b5bb9a
IM
252 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
253 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
254 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
255 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
256 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
257 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
258 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
cbd151bf 259 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
a5b5bb9a 260 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
cd05acfe 261 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
17926a79 262 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
bce7b154 263 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
6f107b58 264 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
456db6a4 265 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
a5b5bb9a 266};
36cbd3dc 267static const char *const af_family_slock_key_strings[AF_MAX+1] = {
a5b5bb9a
IM
268 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
269 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
270 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
271 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
272 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
273 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
274 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
cbd151bf 275 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
a5b5bb9a 276 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
cd05acfe 277 "slock-27" , "slock-28" , "slock-AF_CAN" ,
17926a79 278 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
bce7b154 279 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
6f107b58 280 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
456db6a4 281 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
a5b5bb9a 282};
36cbd3dc 283static const char *const af_family_clock_key_strings[AF_MAX+1] = {
443aef0e
PZ
284 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
285 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
286 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
287 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
288 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
289 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
290 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
cbd151bf 291 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
443aef0e 292 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
b4942af6 293 "clock-27" , "clock-28" , "clock-AF_CAN" ,
e51f802b 294 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
bce7b154 295 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
6f107b58 296 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
456db6a4 297 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
443aef0e 298};
da21f24d
IM
299
300/*
301 * sk_callback_lock locking rules are per-address-family,
302 * so split the lock classes by using a per-AF key:
303 */
304static struct lock_class_key af_callback_keys[AF_MAX];
305
1da177e4
LT
306/* Take into consideration the size of the struct sk_buff overhead in the
307 * determination of these values, since that is non-constant across
308 * platforms. This makes socket queueing behavior and performance
309 * not depend upon such differences.
310 */
311#define _SK_MEM_PACKETS 256
87fb4b7b 312#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
1da177e4
LT
313#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
314#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
315
316/* Run time adjustable parameters. */
ab32ea5d 317__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
6d8ebc8a 318EXPORT_SYMBOL(sysctl_wmem_max);
ab32ea5d 319__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
6d8ebc8a 320EXPORT_SYMBOL(sysctl_rmem_max);
ab32ea5d
BH
321__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
322__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
1da177e4 323
25985edc 324/* Maximal space eaten by iovec or ancillary data plus some space */
ab32ea5d 325int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
2a91525c 326EXPORT_SYMBOL(sysctl_optmem_max);
1da177e4 327
b245be1f
WB
328int sysctl_tstamp_allow_data __read_mostly = 1;
329
c93bdd0e
MG
330struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
331EXPORT_SYMBOL_GPL(memalloc_socks);
332
7cb02404
MG
333/**
334 * sk_set_memalloc - sets %SOCK_MEMALLOC
335 * @sk: socket to set it on
336 *
337 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
338 * It's the responsibility of the admin to adjust min_free_kbytes
339 * to meet the requirements
340 */
341void sk_set_memalloc(struct sock *sk)
342{
343 sock_set_flag(sk, SOCK_MEMALLOC);
344 sk->sk_allocation |= __GFP_MEMALLOC;
c93bdd0e 345 static_key_slow_inc(&memalloc_socks);
7cb02404
MG
346}
347EXPORT_SYMBOL_GPL(sk_set_memalloc);
348
349void sk_clear_memalloc(struct sock *sk)
350{
351 sock_reset_flag(sk, SOCK_MEMALLOC);
352 sk->sk_allocation &= ~__GFP_MEMALLOC;
c93bdd0e 353 static_key_slow_dec(&memalloc_socks);
c76562b6
MG
354
355 /*
356 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
357 * progress of swapping. However, if SOCK_MEMALLOC is cleared while
358 * it has rmem allocations there is a risk that the user of the
359 * socket cannot make forward progress due to exceeding the rmem
360 * limits. By rights, sk_clear_memalloc() should only be called
361 * on sockets being torn down but warn and reset the accounting if
362 * that assumption breaks.
363 */
364 if (WARN_ON(sk->sk_forward_alloc))
365 sk_mem_reclaim(sk);
7cb02404
MG
366}
367EXPORT_SYMBOL_GPL(sk_clear_memalloc);
368
b4b9e355
MG
369int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
370{
371 int ret;
372 unsigned long pflags = current->flags;
373
374 /* these should have been dropped before queueing */
375 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
376
377 current->flags |= PF_MEMALLOC;
378 ret = sk->sk_backlog_rcv(sk, skb);
379 tsk_restore_flags(current, pflags, PF_MEMALLOC);
380
381 return ret;
382}
383EXPORT_SYMBOL(__sk_backlog_rcv);
384
1da177e4
LT
385static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
386{
387 struct timeval tv;
388
389 if (optlen < sizeof(tv))
390 return -EINVAL;
391 if (copy_from_user(&tv, optval, sizeof(tv)))
392 return -EFAULT;
ba78073e
VA
393 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
394 return -EDOM;
1da177e4 395
ba78073e 396 if (tv.tv_sec < 0) {
6f11df83
AM
397 static int warned __read_mostly;
398
ba78073e 399 *timeo_p = 0;
50aab54f 400 if (warned < 10 && net_ratelimit()) {
ba78073e 401 warned++;
e005d193
JP
402 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
403 __func__, current->comm, task_pid_nr(current));
50aab54f 404 }
ba78073e
VA
405 return 0;
406 }
1da177e4
LT
407 *timeo_p = MAX_SCHEDULE_TIMEOUT;
408 if (tv.tv_sec == 0 && tv.tv_usec == 0)
409 return 0;
410 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
411 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
412 return 0;
413}
414
415static void sock_warn_obsolete_bsdism(const char *name)
416{
417 static int warned;
418 static char warncomm[TASK_COMM_LEN];
4ec93edb
YH
419 if (strcmp(warncomm, current->comm) && warned < 5) {
420 strcpy(warncomm, current->comm);
e005d193
JP
421 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
422 warncomm, name);
1da177e4
LT
423 warned++;
424 }
425}
426
08e29af3
ED
427#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
428
429static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
4ec93edb 430{
08e29af3
ED
431 if (sk->sk_flags & flags) {
432 sk->sk_flags &= ~flags;
433 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
20d49473 434 net_disable_timestamp();
1da177e4
LT
435 }
436}
437
438
f0088a50
DV
439int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
440{
766e9037 441 int err;
3b885787
NH
442 unsigned long flags;
443 struct sk_buff_head *list = &sk->sk_receive_queue;
f0088a50 444
0fd7bac6 445 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
766e9037 446 atomic_inc(&sk->sk_drops);
3847ce32 447 trace_sock_rcvqueue_full(sk, skb);
766e9037 448 return -ENOMEM;
f0088a50
DV
449 }
450
fda9ef5d 451 err = sk_filter(sk, skb);
f0088a50 452 if (err)
766e9037 453 return err;
f0088a50 454
c76562b6 455 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
766e9037
ED
456 atomic_inc(&sk->sk_drops);
457 return -ENOBUFS;
3ab224be
HA
458 }
459
f0088a50
DV
460 skb->dev = NULL;
461 skb_set_owner_r(skb, sk);
49ad9599 462
7fee226a
ED
463 /* we escape from rcu protected region, make sure we dont leak
464 * a norefcounted dst
465 */
466 skb_dst_force(skb);
467
3b885787
NH
468 spin_lock_irqsave(&list->lock, flags);
469 skb->dropcount = atomic_read(&sk->sk_drops);
470 __skb_queue_tail(list, skb);
471 spin_unlock_irqrestore(&list->lock, flags);
f0088a50
DV
472
473 if (!sock_flag(sk, SOCK_DEAD))
676d2369 474 sk->sk_data_ready(sk);
766e9037 475 return 0;
f0088a50
DV
476}
477EXPORT_SYMBOL(sock_queue_rcv_skb);
478
58a5a7b9 479int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
f0088a50
DV
480{
481 int rc = NET_RX_SUCCESS;
482
fda9ef5d 483 if (sk_filter(sk, skb))
f0088a50
DV
484 goto discard_and_relse;
485
486 skb->dev = NULL;
487
274f482d 488 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
c377411f
ED
489 atomic_inc(&sk->sk_drops);
490 goto discard_and_relse;
491 }
58a5a7b9
ACM
492 if (nested)
493 bh_lock_sock_nested(sk);
494 else
495 bh_lock_sock(sk);
a5b5bb9a
IM
496 if (!sock_owned_by_user(sk)) {
497 /*
498 * trylock + unlock semantics:
499 */
500 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
501
c57943a1 502 rc = sk_backlog_rcv(sk, skb);
a5b5bb9a
IM
503
504 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
f545a38f 505 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
8eae939f
ZY
506 bh_unlock_sock(sk);
507 atomic_inc(&sk->sk_drops);
508 goto discard_and_relse;
509 }
510
f0088a50
DV
511 bh_unlock_sock(sk);
512out:
513 sock_put(sk);
514 return rc;
515discard_and_relse:
516 kfree_skb(skb);
517 goto out;
518}
519EXPORT_SYMBOL(sk_receive_skb);
520
521struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
522{
b6c6712a 523 struct dst_entry *dst = __sk_dst_get(sk);
f0088a50
DV
524
525 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
e022f0b4 526 sk_tx_queue_clear(sk);
a9b3cd7f 527 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
f0088a50
DV
528 dst_release(dst);
529 return NULL;
530 }
531
532 return dst;
533}
534EXPORT_SYMBOL(__sk_dst_check);
535
536struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
537{
538 struct dst_entry *dst = sk_dst_get(sk);
539
540 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
541 sk_dst_reset(sk);
542 dst_release(dst);
543 return NULL;
544 }
545
546 return dst;
547}
548EXPORT_SYMBOL(sk_dst_check);
549
c91f6df2
BH
550static int sock_setbindtodevice(struct sock *sk, char __user *optval,
551 int optlen)
4878809f
DM
552{
553 int ret = -ENOPROTOOPT;
554#ifdef CONFIG_NETDEVICES
3b1e0a65 555 struct net *net = sock_net(sk);
4878809f
DM
556 char devname[IFNAMSIZ];
557 int index;
558
559 /* Sorry... */
560 ret = -EPERM;
5e1fccc0 561 if (!ns_capable(net->user_ns, CAP_NET_RAW))
4878809f
DM
562 goto out;
563
564 ret = -EINVAL;
565 if (optlen < 0)
566 goto out;
567
568 /* Bind this socket to a particular device like "eth0",
569 * as specified in the passed interface name. If the
570 * name is "" or the option length is zero the socket
571 * is not bound.
572 */
573 if (optlen > IFNAMSIZ - 1)
574 optlen = IFNAMSIZ - 1;
575 memset(devname, 0, sizeof(devname));
576
577 ret = -EFAULT;
578 if (copy_from_user(devname, optval, optlen))
579 goto out;
580
000ba2e4
DM
581 index = 0;
582 if (devname[0] != '\0') {
bf8e56bf 583 struct net_device *dev;
4878809f 584
bf8e56bf
ED
585 rcu_read_lock();
586 dev = dev_get_by_name_rcu(net, devname);
587 if (dev)
588 index = dev->ifindex;
589 rcu_read_unlock();
4878809f
DM
590 ret = -ENODEV;
591 if (!dev)
592 goto out;
4878809f
DM
593 }
594
595 lock_sock(sk);
596 sk->sk_bound_dev_if = index;
597 sk_dst_reset(sk);
598 release_sock(sk);
599
600 ret = 0;
601
602out:
603#endif
604
605 return ret;
606}
607
c91f6df2
BH
608static int sock_getbindtodevice(struct sock *sk, char __user *optval,
609 int __user *optlen, int len)
610{
611 int ret = -ENOPROTOOPT;
612#ifdef CONFIG_NETDEVICES
613 struct net *net = sock_net(sk);
c91f6df2 614 char devname[IFNAMSIZ];
c91f6df2
BH
615
616 if (sk->sk_bound_dev_if == 0) {
617 len = 0;
618 goto zero;
619 }
620
621 ret = -EINVAL;
622 if (len < IFNAMSIZ)
623 goto out;
624
5dbe7c17
NS
625 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
626 if (ret)
c91f6df2 627 goto out;
c91f6df2
BH
628
629 len = strlen(devname) + 1;
630
631 ret = -EFAULT;
632 if (copy_to_user(optval, devname, len))
633 goto out;
634
635zero:
636 ret = -EFAULT;
637 if (put_user(len, optlen))
638 goto out;
639
640 ret = 0;
641
642out:
643#endif
644
645 return ret;
646}
647
c0ef877b
PE
648static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
649{
650 if (valbool)
651 sock_set_flag(sk, bit);
652 else
653 sock_reset_flag(sk, bit);
654}
655
f60e5990 656bool sk_mc_loop(struct sock *sk)
657{
658 if (dev_recursion_level())
659 return false;
660 if (!sk)
661 return true;
662 switch (sk->sk_family) {
663 case AF_INET:
664 return inet_sk(sk)->mc_loop;
665#if IS_ENABLED(CONFIG_IPV6)
666 case AF_INET6:
667 return inet6_sk(sk)->mc_loop;
668#endif
669 }
670 WARN_ON(1);
671 return true;
672}
673EXPORT_SYMBOL(sk_mc_loop);
674
1da177e4
LT
675/*
676 * This is meant for all protocols to use and covers goings on
677 * at the socket level. Everything here is generic.
678 */
679
680int sock_setsockopt(struct socket *sock, int level, int optname,
b7058842 681 char __user *optval, unsigned int optlen)
1da177e4 682{
2a91525c 683 struct sock *sk = sock->sk;
1da177e4
LT
684 int val;
685 int valbool;
686 struct linger ling;
687 int ret = 0;
4ec93edb 688
1da177e4
LT
689 /*
690 * Options without arguments
691 */
692
4878809f 693 if (optname == SO_BINDTODEVICE)
c91f6df2 694 return sock_setbindtodevice(sk, optval, optlen);
4878809f 695
e71a4783
SH
696 if (optlen < sizeof(int))
697 return -EINVAL;
4ec93edb 698
1da177e4
LT
699 if (get_user(val, (int __user *)optval))
700 return -EFAULT;
4ec93edb 701
2a91525c 702 valbool = val ? 1 : 0;
1da177e4
LT
703
704 lock_sock(sk);
705
2a91525c 706 switch (optname) {
e71a4783 707 case SO_DEBUG:
2a91525c 708 if (val && !capable(CAP_NET_ADMIN))
e71a4783 709 ret = -EACCES;
2a91525c 710 else
c0ef877b 711 sock_valbool_flag(sk, SOCK_DBG, valbool);
e71a4783
SH
712 break;
713 case SO_REUSEADDR:
4a17fd52 714 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
e71a4783 715 break;
055dc21a
TH
716 case SO_REUSEPORT:
717 sk->sk_reuseport = valbool;
718 break;
e71a4783 719 case SO_TYPE:
49c794e9 720 case SO_PROTOCOL:
0d6038ee 721 case SO_DOMAIN:
e71a4783
SH
722 case SO_ERROR:
723 ret = -ENOPROTOOPT;
724 break;
725 case SO_DONTROUTE:
c0ef877b 726 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
e71a4783
SH
727 break;
728 case SO_BROADCAST:
729 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
730 break;
731 case SO_SNDBUF:
732 /* Don't error on this BSD doesn't and if you think
82981930
ED
733 * about it this is right. Otherwise apps have to
734 * play 'guess the biggest size' games. RCVBUF/SNDBUF
735 * are treated in BSD as hints
736 */
737 val = min_t(u32, val, sysctl_wmem_max);
b0573dea 738set_sndbuf:
e71a4783 739 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
82981930
ED
740 sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF);
741 /* Wake up sending tasks if we upped the value. */
e71a4783
SH
742 sk->sk_write_space(sk);
743 break;
1da177e4 744
e71a4783
SH
745 case SO_SNDBUFFORCE:
746 if (!capable(CAP_NET_ADMIN)) {
747 ret = -EPERM;
748 break;
749 }
750 goto set_sndbuf;
b0573dea 751
e71a4783
SH
752 case SO_RCVBUF:
753 /* Don't error on this BSD doesn't and if you think
82981930
ED
754 * about it this is right. Otherwise apps have to
755 * play 'guess the biggest size' games. RCVBUF/SNDBUF
756 * are treated in BSD as hints
757 */
758 val = min_t(u32, val, sysctl_rmem_max);
b0573dea 759set_rcvbuf:
e71a4783
SH
760 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
761 /*
762 * We double it on the way in to account for
763 * "struct sk_buff" etc. overhead. Applications
764 * assume that the SO_RCVBUF setting they make will
765 * allow that much actual data to be received on that
766 * socket.
767 *
768 * Applications are unaware that "struct sk_buff" and
769 * other overheads allocate from the receive buffer
770 * during socket buffer allocation.
771 *
772 * And after considering the possible alternatives,
773 * returning the value we actually used in getsockopt
774 * is the most desirable behavior.
775 */
82981930 776 sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF);
e71a4783
SH
777 break;
778
779 case SO_RCVBUFFORCE:
780 if (!capable(CAP_NET_ADMIN)) {
781 ret = -EPERM;
1da177e4 782 break;
e71a4783
SH
783 }
784 goto set_rcvbuf;
1da177e4 785
e71a4783 786 case SO_KEEPALIVE:
1da177e4 787#ifdef CONFIG_INET
3e10986d
ED
788 if (sk->sk_protocol == IPPROTO_TCP &&
789 sk->sk_type == SOCK_STREAM)
e71a4783 790 tcp_set_keepalive(sk, valbool);
1da177e4 791#endif
e71a4783
SH
792 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
793 break;
794
795 case SO_OOBINLINE:
796 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
797 break;
798
799 case SO_NO_CHECK:
28448b80 800 sk->sk_no_check_tx = valbool;
e71a4783
SH
801 break;
802
803 case SO_PRIORITY:
5e1fccc0
EB
804 if ((val >= 0 && val <= 6) ||
805 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
e71a4783
SH
806 sk->sk_priority = val;
807 else
808 ret = -EPERM;
809 break;
810
811 case SO_LINGER:
812 if (optlen < sizeof(ling)) {
813 ret = -EINVAL; /* 1003.1g */
1da177e4 814 break;
e71a4783 815 }
2a91525c 816 if (copy_from_user(&ling, optval, sizeof(ling))) {
e71a4783 817 ret = -EFAULT;
1da177e4 818 break;
e71a4783
SH
819 }
820 if (!ling.l_onoff)
821 sock_reset_flag(sk, SOCK_LINGER);
822 else {
1da177e4 823#if (BITS_PER_LONG == 32)
e71a4783
SH
824 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
825 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
1da177e4 826 else
e71a4783
SH
827#endif
828 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
829 sock_set_flag(sk, SOCK_LINGER);
830 }
831 break;
832
833 case SO_BSDCOMPAT:
834 sock_warn_obsolete_bsdism("setsockopt");
835 break;
836
837 case SO_PASSCRED:
838 if (valbool)
839 set_bit(SOCK_PASSCRED, &sock->flags);
840 else
841 clear_bit(SOCK_PASSCRED, &sock->flags);
842 break;
843
844 case SO_TIMESTAMP:
92f37fd2 845 case SO_TIMESTAMPNS:
e71a4783 846 if (valbool) {
92f37fd2
ED
847 if (optname == SO_TIMESTAMP)
848 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
849 else
850 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783 851 sock_set_flag(sk, SOCK_RCVTSTAMP);
20d49473 852 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
92f37fd2 853 } else {
e71a4783 854 sock_reset_flag(sk, SOCK_RCVTSTAMP);
92f37fd2
ED
855 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
856 }
e71a4783
SH
857 break;
858
20d49473
PO
859 case SO_TIMESTAMPING:
860 if (val & ~SOF_TIMESTAMPING_MASK) {
f249fb78 861 ret = -EINVAL;
20d49473
PO
862 break;
863 }
b245be1f 864
09c2d251 865 if (val & SOF_TIMESTAMPING_OPT_ID &&
4ed2d765
WB
866 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
867 if (sk->sk_protocol == IPPROTO_TCP) {
868 if (sk->sk_state != TCP_ESTABLISHED) {
869 ret = -EINVAL;
870 break;
871 }
872 sk->sk_tskey = tcp_sk(sk)->snd_una;
873 } else {
874 sk->sk_tskey = 0;
875 }
876 }
b9f40e21 877 sk->sk_tsflags = val;
20d49473
PO
878 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
879 sock_enable_timestamp(sk,
880 SOCK_TIMESTAMPING_RX_SOFTWARE);
881 else
882 sock_disable_timestamp(sk,
08e29af3 883 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
20d49473
PO
884 break;
885
e71a4783
SH
886 case SO_RCVLOWAT:
887 if (val < 0)
888 val = INT_MAX;
889 sk->sk_rcvlowat = val ? : 1;
890 break;
891
892 case SO_RCVTIMEO:
893 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
894 break;
895
896 case SO_SNDTIMEO:
897 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
898 break;
1da177e4 899
e71a4783
SH
900 case SO_ATTACH_FILTER:
901 ret = -EINVAL;
902 if (optlen == sizeof(struct sock_fprog)) {
903 struct sock_fprog fprog;
1da177e4 904
e71a4783
SH
905 ret = -EFAULT;
906 if (copy_from_user(&fprog, optval, sizeof(fprog)))
1da177e4 907 break;
e71a4783
SH
908
909 ret = sk_attach_filter(&fprog, sk);
910 }
911 break;
912
89aa0758
AS
913 case SO_ATTACH_BPF:
914 ret = -EINVAL;
915 if (optlen == sizeof(u32)) {
916 u32 ufd;
917
918 ret = -EFAULT;
919 if (copy_from_user(&ufd, optval, sizeof(ufd)))
920 break;
921
922 ret = sk_attach_bpf(ufd, sk);
923 }
924 break;
925
e71a4783 926 case SO_DETACH_FILTER:
55b33325 927 ret = sk_detach_filter(sk);
e71a4783 928 break;
1da177e4 929
d59577b6
VB
930 case SO_LOCK_FILTER:
931 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
932 ret = -EPERM;
933 else
934 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
935 break;
936
e71a4783
SH
937 case SO_PASSSEC:
938 if (valbool)
939 set_bit(SOCK_PASSSEC, &sock->flags);
940 else
941 clear_bit(SOCK_PASSSEC, &sock->flags);
942 break;
4a19ec58 943 case SO_MARK:
5e1fccc0 944 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
4a19ec58 945 ret = -EPERM;
2a91525c 946 else
4a19ec58 947 sk->sk_mark = val;
4a19ec58 948 break;
877ce7c1 949
1da177e4
LT
950 /* We implement the SO_SNDLOWAT etc to
951 not be settable (1003.1g 5.3) */
3b885787 952 case SO_RXQ_OVFL:
8083f0fc 953 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
3b885787 954 break;
6e3e939f
JB
955
956 case SO_WIFI_STATUS:
957 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
958 break;
959
ef64a54f
PE
960 case SO_PEEK_OFF:
961 if (sock->ops->set_peek_off)
12663bfc 962 ret = sock->ops->set_peek_off(sk, val);
ef64a54f
PE
963 else
964 ret = -EOPNOTSUPP;
965 break;
3bdc0eba
BG
966
967 case SO_NOFCS:
968 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
969 break;
970
7d4c04fc
KJ
971 case SO_SELECT_ERR_QUEUE:
972 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
973 break;
974
e0d1095a 975#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51 976 case SO_BUSY_POLL:
dafcc438
ET
977 /* allow unprivileged users to decrease the value */
978 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
979 ret = -EPERM;
980 else {
981 if (val < 0)
982 ret = -EINVAL;
983 else
984 sk->sk_ll_usec = val;
985 }
986 break;
987#endif
62748f32
ED
988
989 case SO_MAX_PACING_RATE:
990 sk->sk_max_pacing_rate = val;
991 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
992 sk->sk_max_pacing_rate);
993 break;
994
e71a4783
SH
995 default:
996 ret = -ENOPROTOOPT;
997 break;
4ec93edb 998 }
1da177e4
LT
999 release_sock(sk);
1000 return ret;
1001}
2a91525c 1002EXPORT_SYMBOL(sock_setsockopt);
1da177e4
LT
1003
1004
8f09898b 1005static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1006 struct ucred *ucred)
3f551f94
EB
1007{
1008 ucred->pid = pid_vnr(pid);
1009 ucred->uid = ucred->gid = -1;
1010 if (cred) {
1011 struct user_namespace *current_ns = current_user_ns();
1012
b2e4f544
EB
1013 ucred->uid = from_kuid_munged(current_ns, cred->euid);
1014 ucred->gid = from_kgid_munged(current_ns, cred->egid);
3f551f94
EB
1015 }
1016}
1017
1da177e4
LT
1018int sock_getsockopt(struct socket *sock, int level, int optname,
1019 char __user *optval, int __user *optlen)
1020{
1021 struct sock *sk = sock->sk;
4ec93edb 1022
e71a4783 1023 union {
4ec93edb
YH
1024 int val;
1025 struct linger ling;
1da177e4
LT
1026 struct timeval tm;
1027 } v;
4ec93edb 1028
4d0392be 1029 int lv = sizeof(int);
1da177e4 1030 int len;
4ec93edb 1031
e71a4783 1032 if (get_user(len, optlen))
4ec93edb 1033 return -EFAULT;
e71a4783 1034 if (len < 0)
1da177e4 1035 return -EINVAL;
4ec93edb 1036
50fee1de 1037 memset(&v, 0, sizeof(v));
df0bca04 1038
2a91525c 1039 switch (optname) {
e71a4783
SH
1040 case SO_DEBUG:
1041 v.val = sock_flag(sk, SOCK_DBG);
1042 break;
1043
1044 case SO_DONTROUTE:
1045 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1046 break;
1047
1048 case SO_BROADCAST:
1b23a5df 1049 v.val = sock_flag(sk, SOCK_BROADCAST);
e71a4783
SH
1050 break;
1051
1052 case SO_SNDBUF:
1053 v.val = sk->sk_sndbuf;
1054 break;
1055
1056 case SO_RCVBUF:
1057 v.val = sk->sk_rcvbuf;
1058 break;
1059
1060 case SO_REUSEADDR:
1061 v.val = sk->sk_reuse;
1062 break;
1063
055dc21a
TH
1064 case SO_REUSEPORT:
1065 v.val = sk->sk_reuseport;
1066 break;
1067
e71a4783 1068 case SO_KEEPALIVE:
1b23a5df 1069 v.val = sock_flag(sk, SOCK_KEEPOPEN);
e71a4783
SH
1070 break;
1071
1072 case SO_TYPE:
1073 v.val = sk->sk_type;
1074 break;
1075
49c794e9
JE
1076 case SO_PROTOCOL:
1077 v.val = sk->sk_protocol;
1078 break;
1079
0d6038ee
JE
1080 case SO_DOMAIN:
1081 v.val = sk->sk_family;
1082 break;
1083
e71a4783
SH
1084 case SO_ERROR:
1085 v.val = -sock_error(sk);
2a91525c 1086 if (v.val == 0)
e71a4783
SH
1087 v.val = xchg(&sk->sk_err_soft, 0);
1088 break;
1089
1090 case SO_OOBINLINE:
1b23a5df 1091 v.val = sock_flag(sk, SOCK_URGINLINE);
e71a4783
SH
1092 break;
1093
1094 case SO_NO_CHECK:
28448b80 1095 v.val = sk->sk_no_check_tx;
e71a4783
SH
1096 break;
1097
1098 case SO_PRIORITY:
1099 v.val = sk->sk_priority;
1100 break;
1101
1102 case SO_LINGER:
1103 lv = sizeof(v.ling);
1b23a5df 1104 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
e71a4783
SH
1105 v.ling.l_linger = sk->sk_lingertime / HZ;
1106 break;
1107
1108 case SO_BSDCOMPAT:
1109 sock_warn_obsolete_bsdism("getsockopt");
1110 break;
1111
1112 case SO_TIMESTAMP:
92f37fd2
ED
1113 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1114 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1115 break;
1116
1117 case SO_TIMESTAMPNS:
1118 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783
SH
1119 break;
1120
20d49473 1121 case SO_TIMESTAMPING:
b9f40e21 1122 v.val = sk->sk_tsflags;
20d49473
PO
1123 break;
1124
e71a4783 1125 case SO_RCVTIMEO:
2a91525c 1126 lv = sizeof(struct timeval);
e71a4783
SH
1127 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1128 v.tm.tv_sec = 0;
1129 v.tm.tv_usec = 0;
1130 } else {
1131 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1132 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
1133 }
1134 break;
1135
1136 case SO_SNDTIMEO:
2a91525c 1137 lv = sizeof(struct timeval);
e71a4783
SH
1138 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1139 v.tm.tv_sec = 0;
1140 v.tm.tv_usec = 0;
1141 } else {
1142 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1143 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
1144 }
1145 break;
1da177e4 1146
e71a4783
SH
1147 case SO_RCVLOWAT:
1148 v.val = sk->sk_rcvlowat;
1149 break;
1da177e4 1150
e71a4783 1151 case SO_SNDLOWAT:
2a91525c 1152 v.val = 1;
e71a4783 1153 break;
1da177e4 1154
e71a4783 1155 case SO_PASSCRED:
82981930 1156 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
e71a4783 1157 break;
1da177e4 1158
e71a4783 1159 case SO_PEERCRED:
109f6e39
EB
1160 {
1161 struct ucred peercred;
1162 if (len > sizeof(peercred))
1163 len = sizeof(peercred);
1164 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1165 if (copy_to_user(optval, &peercred, len))
e71a4783
SH
1166 return -EFAULT;
1167 goto lenout;
109f6e39 1168 }
1da177e4 1169
e71a4783
SH
1170 case SO_PEERNAME:
1171 {
1172 char address[128];
1173
1174 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1175 return -ENOTCONN;
1176 if (lv < len)
1177 return -EINVAL;
1178 if (copy_to_user(optval, address, len))
1179 return -EFAULT;
1180 goto lenout;
1181 }
1da177e4 1182
e71a4783
SH
1183 /* Dubious BSD thing... Probably nobody even uses it, but
1184 * the UNIX standard wants it for whatever reason... -DaveM
1185 */
1186 case SO_ACCEPTCONN:
1187 v.val = sk->sk_state == TCP_LISTEN;
1188 break;
1da177e4 1189
e71a4783 1190 case SO_PASSSEC:
82981930 1191 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
e71a4783 1192 break;
877ce7c1 1193
e71a4783
SH
1194 case SO_PEERSEC:
1195 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1da177e4 1196
4a19ec58
LAT
1197 case SO_MARK:
1198 v.val = sk->sk_mark;
1199 break;
1200
3b885787 1201 case SO_RXQ_OVFL:
1b23a5df 1202 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
3b885787
NH
1203 break;
1204
6e3e939f 1205 case SO_WIFI_STATUS:
1b23a5df 1206 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
6e3e939f
JB
1207 break;
1208
ef64a54f
PE
1209 case SO_PEEK_OFF:
1210 if (!sock->ops->set_peek_off)
1211 return -EOPNOTSUPP;
1212
1213 v.val = sk->sk_peek_off;
1214 break;
bc2f7996 1215 case SO_NOFCS:
1b23a5df 1216 v.val = sock_flag(sk, SOCK_NOFCS);
bc2f7996 1217 break;
c91f6df2 1218
f7b86bfe 1219 case SO_BINDTODEVICE:
c91f6df2
BH
1220 return sock_getbindtodevice(sk, optval, optlen, len);
1221
a8fc9277
PE
1222 case SO_GET_FILTER:
1223 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1224 if (len < 0)
1225 return len;
1226
1227 goto lenout;
c91f6df2 1228
d59577b6
VB
1229 case SO_LOCK_FILTER:
1230 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1231 break;
1232
ea02f941
MS
1233 case SO_BPF_EXTENSIONS:
1234 v.val = bpf_tell_extensions();
1235 break;
1236
7d4c04fc
KJ
1237 case SO_SELECT_ERR_QUEUE:
1238 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1239 break;
1240
e0d1095a 1241#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51 1242 case SO_BUSY_POLL:
dafcc438
ET
1243 v.val = sk->sk_ll_usec;
1244 break;
1245#endif
1246
62748f32
ED
1247 case SO_MAX_PACING_RATE:
1248 v.val = sk->sk_max_pacing_rate;
1249 break;
1250
2c8c56e1
ED
1251 case SO_INCOMING_CPU:
1252 v.val = sk->sk_incoming_cpu;
1253 break;
1254
e71a4783
SH
1255 default:
1256 return -ENOPROTOOPT;
1da177e4 1257 }
e71a4783 1258
1da177e4
LT
1259 if (len > lv)
1260 len = lv;
1261 if (copy_to_user(optval, &v, len))
1262 return -EFAULT;
1263lenout:
4ec93edb
YH
1264 if (put_user(len, optlen))
1265 return -EFAULT;
1266 return 0;
1da177e4
LT
1267}
1268
a5b5bb9a
IM
1269/*
1270 * Initialize an sk_lock.
1271 *
1272 * (We also register the sk_lock with the lock validator.)
1273 */
b6f99a21 1274static inline void sock_lock_init(struct sock *sk)
a5b5bb9a 1275{
ed07536e
PZ
1276 sock_lock_init_class_and_name(sk,
1277 af_family_slock_key_strings[sk->sk_family],
1278 af_family_slock_keys + sk->sk_family,
1279 af_family_key_strings[sk->sk_family],
1280 af_family_keys + sk->sk_family);
a5b5bb9a
IM
1281}
1282
4dc6dc71
ED
1283/*
1284 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1285 * even temporarly, because of RCU lookups. sk_node should also be left as is.
68835aba 1286 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
4dc6dc71 1287 */
f1a6c4da
PE
1288static void sock_copy(struct sock *nsk, const struct sock *osk)
1289{
1290#ifdef CONFIG_SECURITY_NETWORK
1291 void *sptr = nsk->sk_security;
1292#endif
68835aba
ED
1293 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1294
1295 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1296 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1297
f1a6c4da
PE
1298#ifdef CONFIG_SECURITY_NETWORK
1299 nsk->sk_security = sptr;
1300 security_sk_clone(osk, nsk);
1301#endif
1302}
1303
fcbdf09d
OP
1304void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1305{
1306 unsigned long nulls1, nulls2;
1307
1308 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1309 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1310 if (nulls1 > nulls2)
1311 swap(nulls1, nulls2);
1312
1313 if (nulls1 != 0)
1314 memset((char *)sk, 0, nulls1);
1315 memset((char *)sk + nulls1 + sizeof(void *), 0,
1316 nulls2 - nulls1 - sizeof(void *));
1317 memset((char *)sk + nulls2 + sizeof(void *), 0,
1318 size - nulls2 - sizeof(void *));
1319}
1320EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1321
2e4afe7b
PE
1322static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1323 int family)
c308c1b2
PE
1324{
1325 struct sock *sk;
1326 struct kmem_cache *slab;
1327
1328 slab = prot->slab;
e912b114
ED
1329 if (slab != NULL) {
1330 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1331 if (!sk)
1332 return sk;
1333 if (priority & __GFP_ZERO) {
fcbdf09d
OP
1334 if (prot->clear_sk)
1335 prot->clear_sk(sk, prot->obj_size);
1336 else
1337 sk_prot_clear_nulls(sk, prot->obj_size);
e912b114 1338 }
fcbdf09d 1339 } else
c308c1b2
PE
1340 sk = kmalloc(prot->obj_size, priority);
1341
2e4afe7b 1342 if (sk != NULL) {
a98b65a3
VN
1343 kmemcheck_annotate_bitfield(sk, flags);
1344
2e4afe7b
PE
1345 if (security_sk_alloc(sk, family, priority))
1346 goto out_free;
1347
1348 if (!try_module_get(prot->owner))
1349 goto out_free_sec;
e022f0b4 1350 sk_tx_queue_clear(sk);
2e4afe7b
PE
1351 }
1352
c308c1b2 1353 return sk;
2e4afe7b
PE
1354
1355out_free_sec:
1356 security_sk_free(sk);
1357out_free:
1358 if (slab != NULL)
1359 kmem_cache_free(slab, sk);
1360 else
1361 kfree(sk);
1362 return NULL;
c308c1b2
PE
1363}
1364
1365static void sk_prot_free(struct proto *prot, struct sock *sk)
1366{
1367 struct kmem_cache *slab;
2e4afe7b 1368 struct module *owner;
c308c1b2 1369
2e4afe7b 1370 owner = prot->owner;
c308c1b2 1371 slab = prot->slab;
2e4afe7b
PE
1372
1373 security_sk_free(sk);
c308c1b2
PE
1374 if (slab != NULL)
1375 kmem_cache_free(slab, sk);
1376 else
1377 kfree(sk);
2e4afe7b 1378 module_put(owner);
c308c1b2
PE
1379}
1380
86f8515f 1381#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
6ffd4641 1382void sock_update_netprioidx(struct sock *sk)
5bc1421e 1383{
5bc1421e
NH
1384 if (in_interrupt())
1385 return;
2b73bc65 1386
6ffd4641 1387 sk->sk_cgrp_prioidx = task_netprioidx(current);
5bc1421e
NH
1388}
1389EXPORT_SYMBOL_GPL(sock_update_netprioidx);
f8451725
HX
1390#endif
1391
1da177e4
LT
1392/**
1393 * sk_alloc - All socket objects are allocated here
c4ea43c5 1394 * @net: the applicable net namespace
4dc3b16b
PP
1395 * @family: protocol family
1396 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1397 * @prot: struct proto associated with this new sock instance
1da177e4 1398 */
1b8d7ae4 1399struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
6257ff21 1400 struct proto *prot)
1da177e4 1401{
c308c1b2 1402 struct sock *sk;
1da177e4 1403
154adbc8 1404 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1da177e4 1405 if (sk) {
154adbc8
PE
1406 sk->sk_family = family;
1407 /*
1408 * See comment in struct sock definition to understand
1409 * why we need sk_prot_creator -acme
1410 */
1411 sk->sk_prot = sk->sk_prot_creator = prot;
1412 sock_lock_init(sk);
3b1e0a65 1413 sock_net_set(sk, get_net(net));
d66ee058 1414 atomic_set(&sk->sk_wmem_alloc, 1);
f8451725 1415
211d2f97 1416 sock_update_classid(sk);
6ffd4641 1417 sock_update_netprioidx(sk);
1da177e4 1418 }
a79af59e 1419
2e4afe7b 1420 return sk;
1da177e4 1421}
2a91525c 1422EXPORT_SYMBOL(sk_alloc);
1da177e4 1423
2b85a34e 1424static void __sk_free(struct sock *sk)
1da177e4
LT
1425{
1426 struct sk_filter *filter;
1da177e4
LT
1427
1428 if (sk->sk_destruct)
1429 sk->sk_destruct(sk);
1430
a898def2
PM
1431 filter = rcu_dereference_check(sk->sk_filter,
1432 atomic_read(&sk->sk_wmem_alloc) == 0);
1da177e4 1433 if (filter) {
309dd5fc 1434 sk_filter_uncharge(sk, filter);
a9b3cd7f 1435 RCU_INIT_POINTER(sk->sk_filter, NULL);
1da177e4
LT
1436 }
1437
08e29af3 1438 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1da177e4
LT
1439
1440 if (atomic_read(&sk->sk_omem_alloc))
e005d193
JP
1441 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1442 __func__, atomic_read(&sk->sk_omem_alloc));
1da177e4 1443
109f6e39
EB
1444 if (sk->sk_peer_cred)
1445 put_cred(sk->sk_peer_cred);
1446 put_pid(sk->sk_peer_pid);
3b1e0a65 1447 put_net(sock_net(sk));
c308c1b2 1448 sk_prot_free(sk->sk_prot_creator, sk);
1da177e4 1449}
2b85a34e
ED
1450
1451void sk_free(struct sock *sk)
1452{
1453 /*
25985edc 1454 * We subtract one from sk_wmem_alloc and can know if
2b85a34e
ED
1455 * some packets are still in some tx queue.
1456 * If not null, sock_wfree() will call __sk_free(sk) later
1457 */
1458 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1459 __sk_free(sk);
1460}
2a91525c 1461EXPORT_SYMBOL(sk_free);
1da177e4 1462
edf02087 1463/*
25985edc
LDM
1464 * Last sock_put should drop reference to sk->sk_net. It has already
1465 * been dropped in sk_change_net. Taking reference to stopping namespace
edf02087 1466 * is not an option.
25985edc 1467 * Take reference to a socket to remove it from hash _alive_ and after that
edf02087
DL
1468 * destroy it in the context of init_net.
1469 */
1470void sk_release_kernel(struct sock *sk)
1471{
1472 if (sk == NULL || sk->sk_socket == NULL)
1473 return;
1474
1475 sock_hold(sk);
1476 sock_release(sk->sk_socket);
65a18ec5 1477 release_net(sock_net(sk));
3b1e0a65 1478 sock_net_set(sk, get_net(&init_net));
edf02087
DL
1479 sock_put(sk);
1480}
45af1754 1481EXPORT_SYMBOL(sk_release_kernel);
edf02087 1482
475f1b52
SR
1483static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1484{
1485 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1486 sock_update_memcg(newsk);
1487}
1488
e56c57d0
ED
1489/**
1490 * sk_clone_lock - clone a socket, and lock its clone
1491 * @sk: the socket to clone
1492 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1493 *
1494 * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1495 */
1496struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
87d11ceb 1497{
8fd1d178 1498 struct sock *newsk;
278571ba 1499 bool is_charged = true;
87d11ceb 1500
8fd1d178 1501 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
87d11ceb
ACM
1502 if (newsk != NULL) {
1503 struct sk_filter *filter;
1504
892c141e 1505 sock_copy(newsk, sk);
87d11ceb
ACM
1506
1507 /* SANITY */
3b1e0a65 1508 get_net(sock_net(newsk));
87d11ceb
ACM
1509 sk_node_init(&newsk->sk_node);
1510 sock_lock_init(newsk);
1511 bh_lock_sock(newsk);
fa438ccf 1512 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
8eae939f 1513 newsk->sk_backlog.len = 0;
87d11ceb
ACM
1514
1515 atomic_set(&newsk->sk_rmem_alloc, 0);
2b85a34e
ED
1516 /*
1517 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1518 */
1519 atomic_set(&newsk->sk_wmem_alloc, 1);
87d11ceb
ACM
1520 atomic_set(&newsk->sk_omem_alloc, 0);
1521 skb_queue_head_init(&newsk->sk_receive_queue);
1522 skb_queue_head_init(&newsk->sk_write_queue);
1523
b6c6712a 1524 spin_lock_init(&newsk->sk_dst_lock);
87d11ceb 1525 rwlock_init(&newsk->sk_callback_lock);
443aef0e
PZ
1526 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1527 af_callback_keys + newsk->sk_family,
1528 af_family_clock_key_strings[newsk->sk_family]);
87d11ceb
ACM
1529
1530 newsk->sk_dst_cache = NULL;
1531 newsk->sk_wmem_queued = 0;
1532 newsk->sk_forward_alloc = 0;
1533 newsk->sk_send_head = NULL;
87d11ceb
ACM
1534 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1535
1536 sock_reset_flag(newsk, SOCK_DONE);
1537 skb_queue_head_init(&newsk->sk_error_queue);
1538
0d7da9dd 1539 filter = rcu_dereference_protected(newsk->sk_filter, 1);
87d11ceb 1540 if (filter != NULL)
278571ba
AS
1541 /* though it's an empty new sock, the charging may fail
1542 * if sysctl_optmem_max was changed between creation of
1543 * original socket and cloning
1544 */
1545 is_charged = sk_filter_charge(newsk, filter);
87d11ceb 1546
278571ba 1547 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk))) {
87d11ceb
ACM
1548 /* It is still raw copy of parent, so invalidate
1549 * destructor and make plain sk_free() */
1550 newsk->sk_destruct = NULL;
b0691c8e 1551 bh_unlock_sock(newsk);
87d11ceb
ACM
1552 sk_free(newsk);
1553 newsk = NULL;
1554 goto out;
1555 }
1556
1557 newsk->sk_err = 0;
1558 newsk->sk_priority = 0;
2c8c56e1 1559 newsk->sk_incoming_cpu = raw_smp_processor_id();
4dc6dc71
ED
1560 /*
1561 * Before updating sk_refcnt, we must commit prior changes to memory
1562 * (Documentation/RCU/rculist_nulls.txt for details)
1563 */
1564 smp_wmb();
87d11ceb
ACM
1565 atomic_set(&newsk->sk_refcnt, 2);
1566
1567 /*
1568 * Increment the counter in the same struct proto as the master
1569 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1570 * is the same as sk->sk_prot->socks, as this field was copied
1571 * with memcpy).
1572 *
1573 * This _changes_ the previous behaviour, where
1574 * tcp_create_openreq_child always was incrementing the
1575 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1576 * to be taken into account in all callers. -acme
1577 */
1578 sk_refcnt_debug_inc(newsk);
972692e0 1579 sk_set_socket(newsk, NULL);
43815482 1580 newsk->sk_wq = NULL;
87d11ceb 1581
f3f511e1
GC
1582 sk_update_clone(sk, newsk);
1583
87d11ceb 1584 if (newsk->sk_prot->sockets_allocated)
180d8cd9 1585 sk_sockets_allocated_inc(newsk);
704da560 1586
08e29af3 1587 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
704da560 1588 net_enable_timestamp();
87d11ceb
ACM
1589 }
1590out:
1591 return newsk;
1592}
e56c57d0 1593EXPORT_SYMBOL_GPL(sk_clone_lock);
87d11ceb 1594
9958089a
AK
1595void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1596{
1597 __sk_dst_set(sk, dst);
1598 sk->sk_route_caps = dst->dev->features;
1599 if (sk->sk_route_caps & NETIF_F_GSO)
4fcd6b99 1600 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
a465419b 1601 sk->sk_route_caps &= ~sk->sk_route_nocaps;
9958089a 1602 if (sk_can_gso(sk)) {
82cc1a7a 1603 if (dst->header_len) {
9958089a 1604 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
82cc1a7a 1605 } else {
9958089a 1606 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
82cc1a7a 1607 sk->sk_gso_max_size = dst->dev->gso_max_size;
1485348d 1608 sk->sk_gso_max_segs = dst->dev->gso_max_segs;
82cc1a7a 1609 }
9958089a
AK
1610 }
1611}
1612EXPORT_SYMBOL_GPL(sk_setup_caps);
1613
1da177e4
LT
1614/*
1615 * Simple resource managers for sockets.
1616 */
1617
1618
4ec93edb
YH
1619/*
1620 * Write buffer destructor automatically called from kfree_skb.
1da177e4
LT
1621 */
1622void sock_wfree(struct sk_buff *skb)
1623{
1624 struct sock *sk = skb->sk;
d99927f4 1625 unsigned int len = skb->truesize;
1da177e4 1626
d99927f4
ED
1627 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1628 /*
1629 * Keep a reference on sk_wmem_alloc, this will be released
1630 * after sk_write_space() call
1631 */
1632 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1da177e4 1633 sk->sk_write_space(sk);
d99927f4
ED
1634 len = 1;
1635 }
2b85a34e 1636 /*
d99927f4
ED
1637 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1638 * could not do because of in-flight packets
2b85a34e 1639 */
d99927f4 1640 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
2b85a34e 1641 __sk_free(sk);
1da177e4 1642}
2a91525c 1643EXPORT_SYMBOL(sock_wfree);
1da177e4 1644
f2f872f9
ED
1645void skb_orphan_partial(struct sk_buff *skb)
1646{
1647 /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
1648 * so we do not completely orphan skb, but transfert all
1649 * accounted bytes but one, to avoid unexpected reorders.
1650 */
1651 if (skb->destructor == sock_wfree
1652#ifdef CONFIG_INET
1653 || skb->destructor == tcp_wfree
1654#endif
1655 ) {
1656 atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
1657 skb->truesize = 1;
1658 } else {
1659 skb_orphan(skb);
1660 }
1661}
1662EXPORT_SYMBOL(skb_orphan_partial);
1663
4ec93edb
YH
1664/*
1665 * Read buffer destructor automatically called from kfree_skb.
1da177e4
LT
1666 */
1667void sock_rfree(struct sk_buff *skb)
1668{
1669 struct sock *sk = skb->sk;
d361fd59 1670 unsigned int len = skb->truesize;
1da177e4 1671
d361fd59
ED
1672 atomic_sub(len, &sk->sk_rmem_alloc);
1673 sk_mem_uncharge(sk, len);
1da177e4 1674}
2a91525c 1675EXPORT_SYMBOL(sock_rfree);
1da177e4 1676
7768eed8
OH
1677/*
1678 * Buffer destructor for skbs that are not used directly in read or write
1679 * path, e.g. for error handler skbs. Automatically called from kfree_skb.
1680 */
62bccb8c
AD
1681void sock_efree(struct sk_buff *skb)
1682{
1683 sock_put(skb->sk);
1684}
1685EXPORT_SYMBOL(sock_efree);
1686
82eabd9e 1687#ifdef CONFIG_INET
41063e9d
DM
1688void sock_edemux(struct sk_buff *skb)
1689{
e812347c
ED
1690 struct sock *sk = skb->sk;
1691
1692 if (sk->sk_state == TCP_TIME_WAIT)
1693 inet_twsk_put(inet_twsk(sk));
1694 else
1695 sock_put(sk);
41063e9d
DM
1696}
1697EXPORT_SYMBOL(sock_edemux);
82eabd9e 1698#endif
1da177e4 1699
976d0201 1700kuid_t sock_i_uid(struct sock *sk)
1da177e4 1701{
976d0201 1702 kuid_t uid;
1da177e4 1703
f064af1e 1704 read_lock_bh(&sk->sk_callback_lock);
976d0201 1705 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
f064af1e 1706 read_unlock_bh(&sk->sk_callback_lock);
1da177e4
LT
1707 return uid;
1708}
2a91525c 1709EXPORT_SYMBOL(sock_i_uid);
1da177e4
LT
1710
1711unsigned long sock_i_ino(struct sock *sk)
1712{
1713 unsigned long ino;
1714
f064af1e 1715 read_lock_bh(&sk->sk_callback_lock);
1da177e4 1716 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
f064af1e 1717 read_unlock_bh(&sk->sk_callback_lock);
1da177e4
LT
1718 return ino;
1719}
2a91525c 1720EXPORT_SYMBOL(sock_i_ino);
1da177e4
LT
1721
1722/*
1723 * Allocate a skb from the socket's send buffer.
1724 */
86a76caf 1725struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1726 gfp_t priority)
1da177e4
LT
1727{
1728 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
2a91525c 1729 struct sk_buff *skb = alloc_skb(size, priority);
1da177e4
LT
1730 if (skb) {
1731 skb_set_owner_w(skb, sk);
1732 return skb;
1733 }
1734 }
1735 return NULL;
1736}
2a91525c 1737EXPORT_SYMBOL(sock_wmalloc);
1da177e4 1738
4ec93edb 1739/*
1da177e4 1740 * Allocate a memory block from the socket's option memory buffer.
4ec93edb 1741 */
dd0fc66f 1742void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1da177e4 1743{
95c96174 1744 if ((unsigned int)size <= sysctl_optmem_max &&
1da177e4
LT
1745 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1746 void *mem;
1747 /* First do the add, to avoid the race if kmalloc
4ec93edb 1748 * might sleep.
1da177e4
LT
1749 */
1750 atomic_add(size, &sk->sk_omem_alloc);
1751 mem = kmalloc(size, priority);
1752 if (mem)
1753 return mem;
1754 atomic_sub(size, &sk->sk_omem_alloc);
1755 }
1756 return NULL;
1757}
2a91525c 1758EXPORT_SYMBOL(sock_kmalloc);
1da177e4 1759
79e88659
DB
1760/* Free an option memory block. Note, we actually want the inline
1761 * here as this allows gcc to detect the nullify and fold away the
1762 * condition entirely.
1da177e4 1763 */
79e88659
DB
1764static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
1765 const bool nullify)
1da177e4 1766{
e53da5fb
DM
1767 if (WARN_ON_ONCE(!mem))
1768 return;
79e88659
DB
1769 if (nullify)
1770 kzfree(mem);
1771 else
1772 kfree(mem);
1da177e4
LT
1773 atomic_sub(size, &sk->sk_omem_alloc);
1774}
79e88659
DB
1775
1776void sock_kfree_s(struct sock *sk, void *mem, int size)
1777{
1778 __sock_kfree_s(sk, mem, size, false);
1779}
2a91525c 1780EXPORT_SYMBOL(sock_kfree_s);
1da177e4 1781
79e88659
DB
1782void sock_kzfree_s(struct sock *sk, void *mem, int size)
1783{
1784 __sock_kfree_s(sk, mem, size, true);
1785}
1786EXPORT_SYMBOL(sock_kzfree_s);
1787
1da177e4
LT
1788/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1789 I think, these locks should be removed for datagram sockets.
1790 */
2a91525c 1791static long sock_wait_for_wmem(struct sock *sk, long timeo)
1da177e4
LT
1792{
1793 DEFINE_WAIT(wait);
1794
1795 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1796 for (;;) {
1797 if (!timeo)
1798 break;
1799 if (signal_pending(current))
1800 break;
1801 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
aa395145 1802 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4
LT
1803 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1804 break;
1805 if (sk->sk_shutdown & SEND_SHUTDOWN)
1806 break;
1807 if (sk->sk_err)
1808 break;
1809 timeo = schedule_timeout(timeo);
1810 }
aa395145 1811 finish_wait(sk_sleep(sk), &wait);
1da177e4
LT
1812 return timeo;
1813}
1814
1815
1816/*
1817 * Generic send/receive buffer handlers
1818 */
1819
4cc7f68d
HX
1820struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1821 unsigned long data_len, int noblock,
28d64271 1822 int *errcode, int max_page_order)
1da177e4 1823{
2e4e4410 1824 struct sk_buff *skb;
1da177e4
LT
1825 long timeo;
1826 int err;
1827
1da177e4 1828 timeo = sock_sndtimeo(sk, noblock);
2e4e4410 1829 for (;;) {
1da177e4
LT
1830 err = sock_error(sk);
1831 if (err != 0)
1832 goto failure;
1833
1834 err = -EPIPE;
1835 if (sk->sk_shutdown & SEND_SHUTDOWN)
1836 goto failure;
1837
2e4e4410
ED
1838 if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
1839 break;
28d64271 1840
2e4e4410
ED
1841 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1842 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1843 err = -EAGAIN;
1844 if (!timeo)
1da177e4 1845 goto failure;
2e4e4410
ED
1846 if (signal_pending(current))
1847 goto interrupted;
1848 timeo = sock_wait_for_wmem(sk, timeo);
1da177e4 1849 }
2e4e4410
ED
1850 skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
1851 errcode, sk->sk_allocation);
1852 if (skb)
1853 skb_set_owner_w(skb, sk);
1da177e4
LT
1854 return skb;
1855
1856interrupted:
1857 err = sock_intr_errno(timeo);
1858failure:
1859 *errcode = err;
1860 return NULL;
1861}
4cc7f68d 1862EXPORT_SYMBOL(sock_alloc_send_pskb);
1da177e4 1863
4ec93edb 1864struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1da177e4
LT
1865 int noblock, int *errcode)
1866{
28d64271 1867 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
1da177e4 1868}
2a91525c 1869EXPORT_SYMBOL(sock_alloc_send_skb);
1da177e4 1870
5640f768
ED
1871/* On 32bit arches, an skb frag is limited to 2^15 */
1872#define SKB_FRAG_PAGE_ORDER get_order(32768)
1873
400dfd3a
ED
1874/**
1875 * skb_page_frag_refill - check that a page_frag contains enough room
1876 * @sz: minimum size of the fragment we want to get
1877 * @pfrag: pointer to page_frag
82d5e2b8 1878 * @gfp: priority for memory allocation
400dfd3a
ED
1879 *
1880 * Note: While this allocator tries to use high order pages, there is
1881 * no guarantee that allocations succeed. Therefore, @sz MUST be
1882 * less or equal than PAGE_SIZE.
1883 */
d9b2938a 1884bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
5640f768 1885{
5640f768
ED
1886 if (pfrag->page) {
1887 if (atomic_read(&pfrag->page->_count) == 1) {
1888 pfrag->offset = 0;
1889 return true;
1890 }
400dfd3a 1891 if (pfrag->offset + sz <= pfrag->size)
5640f768
ED
1892 return true;
1893 put_page(pfrag->page);
1894 }
1895
d9b2938a
ED
1896 pfrag->offset = 0;
1897 if (SKB_FRAG_PAGE_ORDER) {
1898 pfrag->page = alloc_pages(gfp | __GFP_COMP |
1899 __GFP_NOWARN | __GFP_NORETRY,
1900 SKB_FRAG_PAGE_ORDER);
5640f768 1901 if (likely(pfrag->page)) {
d9b2938a 1902 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
5640f768
ED
1903 return true;
1904 }
d9b2938a
ED
1905 }
1906 pfrag->page = alloc_page(gfp);
1907 if (likely(pfrag->page)) {
1908 pfrag->size = PAGE_SIZE;
1909 return true;
1910 }
400dfd3a
ED
1911 return false;
1912}
1913EXPORT_SYMBOL(skb_page_frag_refill);
1914
1915bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1916{
1917 if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
1918 return true;
1919
5640f768
ED
1920 sk_enter_memory_pressure(sk);
1921 sk_stream_moderate_sndbuf(sk);
1922 return false;
1923}
1924EXPORT_SYMBOL(sk_page_frag_refill);
1925
1da177e4 1926static void __lock_sock(struct sock *sk)
f39234d6
NK
1927 __releases(&sk->sk_lock.slock)
1928 __acquires(&sk->sk_lock.slock)
1da177e4
LT
1929{
1930 DEFINE_WAIT(wait);
1931
e71a4783 1932 for (;;) {
1da177e4
LT
1933 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1934 TASK_UNINTERRUPTIBLE);
1935 spin_unlock_bh(&sk->sk_lock.slock);
1936 schedule();
1937 spin_lock_bh(&sk->sk_lock.slock);
e71a4783 1938 if (!sock_owned_by_user(sk))
1da177e4
LT
1939 break;
1940 }
1941 finish_wait(&sk->sk_lock.wq, &wait);
1942}
1943
1944static void __release_sock(struct sock *sk)
f39234d6
NK
1945 __releases(&sk->sk_lock.slock)
1946 __acquires(&sk->sk_lock.slock)
1da177e4
LT
1947{
1948 struct sk_buff *skb = sk->sk_backlog.head;
1949
1950 do {
1951 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1952 bh_unlock_sock(sk);
1953
1954 do {
1955 struct sk_buff *next = skb->next;
1956
e4cbb02a 1957 prefetch(next);
7fee226a 1958 WARN_ON_ONCE(skb_dst_is_noref(skb));
1da177e4 1959 skb->next = NULL;
c57943a1 1960 sk_backlog_rcv(sk, skb);
1da177e4
LT
1961
1962 /*
1963 * We are in process context here with softirqs
1964 * disabled, use cond_resched_softirq() to preempt.
1965 * This is safe to do because we've taken the backlog
1966 * queue private:
1967 */
1968 cond_resched_softirq();
1969
1970 skb = next;
1971 } while (skb != NULL);
1972
1973 bh_lock_sock(sk);
e71a4783 1974 } while ((skb = sk->sk_backlog.head) != NULL);
8eae939f
ZY
1975
1976 /*
1977 * Doing the zeroing here guarantee we can not loop forever
1978 * while a wild producer attempts to flood us.
1979 */
1980 sk->sk_backlog.len = 0;
1da177e4
LT
1981}
1982
1983/**
1984 * sk_wait_data - wait for data to arrive at sk_receive_queue
4dc3b16b
PP
1985 * @sk: sock to wait on
1986 * @timeo: for how long
1da177e4
LT
1987 *
1988 * Now socket state including sk->sk_err is changed only under lock,
1989 * hence we may omit checks after joining wait queue.
1990 * We check receive queue before schedule() only as optimization;
1991 * it is very likely that release_sock() added new data.
1992 */
1993int sk_wait_data(struct sock *sk, long *timeo)
1994{
1995 int rc;
1996 DEFINE_WAIT(wait);
1997
aa395145 1998 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4
LT
1999 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2000 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
2001 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
aa395145 2002 finish_wait(sk_sleep(sk), &wait);
1da177e4
LT
2003 return rc;
2004}
1da177e4
LT
2005EXPORT_SYMBOL(sk_wait_data);
2006
3ab224be
HA
2007/**
2008 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2009 * @sk: socket
2010 * @size: memory size to allocate
2011 * @kind: allocation type
2012 *
2013 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2014 * rmem allocation. This function assumes that protocols which have
2015 * memory_pressure use sk_wmem_queued as write buffer accounting.
2016 */
2017int __sk_mem_schedule(struct sock *sk, int size, int kind)
2018{
2019 struct proto *prot = sk->sk_prot;
2020 int amt = sk_mem_pages(size);
8d987e5c 2021 long allocated;
e1aab161 2022 int parent_status = UNDER_LIMIT;
3ab224be
HA
2023
2024 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
180d8cd9 2025
e1aab161 2026 allocated = sk_memory_allocated_add(sk, amt, &parent_status);
3ab224be
HA
2027
2028 /* Under limit. */
e1aab161
GC
2029 if (parent_status == UNDER_LIMIT &&
2030 allocated <= sk_prot_mem_limits(sk, 0)) {
180d8cd9 2031 sk_leave_memory_pressure(sk);
3ab224be
HA
2032 return 1;
2033 }
2034
e1aab161
GC
2035 /* Under pressure. (we or our parents) */
2036 if ((parent_status > SOFT_LIMIT) ||
2037 allocated > sk_prot_mem_limits(sk, 1))
180d8cd9 2038 sk_enter_memory_pressure(sk);
3ab224be 2039
e1aab161
GC
2040 /* Over hard limit (we or our parents) */
2041 if ((parent_status == OVER_LIMIT) ||
2042 (allocated > sk_prot_mem_limits(sk, 2)))
3ab224be
HA
2043 goto suppress_allocation;
2044
2045 /* guarantee minimum buffer size under pressure */
2046 if (kind == SK_MEM_RECV) {
2047 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2048 return 1;
180d8cd9 2049
3ab224be
HA
2050 } else { /* SK_MEM_SEND */
2051 if (sk->sk_type == SOCK_STREAM) {
2052 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2053 return 1;
2054 } else if (atomic_read(&sk->sk_wmem_alloc) <
2055 prot->sysctl_wmem[0])
2056 return 1;
2057 }
2058
180d8cd9 2059 if (sk_has_memory_pressure(sk)) {
1748376b
ED
2060 int alloc;
2061
180d8cd9 2062 if (!sk_under_memory_pressure(sk))
1748376b 2063 return 1;
180d8cd9
GC
2064 alloc = sk_sockets_allocated_read_positive(sk);
2065 if (sk_prot_mem_limits(sk, 2) > alloc *
3ab224be
HA
2066 sk_mem_pages(sk->sk_wmem_queued +
2067 atomic_read(&sk->sk_rmem_alloc) +
2068 sk->sk_forward_alloc))
2069 return 1;
2070 }
2071
2072suppress_allocation:
2073
2074 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2075 sk_stream_moderate_sndbuf(sk);
2076
2077 /* Fail only if socket is _under_ its sndbuf.
2078 * In this case we cannot block, so that we have to fail.
2079 */
2080 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2081 return 1;
2082 }
2083
3847ce32
SM
2084 trace_sock_exceed_buf_limit(sk, prot, allocated);
2085
3ab224be
HA
2086 /* Alas. Undo changes. */
2087 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
180d8cd9 2088
0e90b31f 2089 sk_memory_allocated_sub(sk, amt);
180d8cd9 2090
3ab224be
HA
2091 return 0;
2092}
3ab224be
HA
2093EXPORT_SYMBOL(__sk_mem_schedule);
2094
2095/**
2096 * __sk_reclaim - reclaim memory_allocated
2097 * @sk: socket
2098 */
2099void __sk_mem_reclaim(struct sock *sk)
2100{
180d8cd9 2101 sk_memory_allocated_sub(sk,
0e90b31f 2102 sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
3ab224be
HA
2103 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
2104
180d8cd9
GC
2105 if (sk_under_memory_pressure(sk) &&
2106 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2107 sk_leave_memory_pressure(sk);
3ab224be 2108}
3ab224be
HA
2109EXPORT_SYMBOL(__sk_mem_reclaim);
2110
2111
1da177e4
LT
2112/*
2113 * Set of default routines for initialising struct proto_ops when
2114 * the protocol does not support a particular function. In certain
2115 * cases where it makes no sense for a protocol to have a "do nothing"
2116 * function, some default processing is provided.
2117 */
2118
2119int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2120{
2121 return -EOPNOTSUPP;
2122}
2a91525c 2123EXPORT_SYMBOL(sock_no_bind);
1da177e4 2124
4ec93edb 2125int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
2126 int len, int flags)
2127{
2128 return -EOPNOTSUPP;
2129}
2a91525c 2130EXPORT_SYMBOL(sock_no_connect);
1da177e4
LT
2131
2132int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2133{
2134 return -EOPNOTSUPP;
2135}
2a91525c 2136EXPORT_SYMBOL(sock_no_socketpair);
1da177e4
LT
2137
2138int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2139{
2140 return -EOPNOTSUPP;
2141}
2a91525c 2142EXPORT_SYMBOL(sock_no_accept);
1da177e4 2143
4ec93edb 2144int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
2145 int *len, int peer)
2146{
2147 return -EOPNOTSUPP;
2148}
2a91525c 2149EXPORT_SYMBOL(sock_no_getname);
1da177e4 2150
2a91525c 2151unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
1da177e4
LT
2152{
2153 return 0;
2154}
2a91525c 2155EXPORT_SYMBOL(sock_no_poll);
1da177e4
LT
2156
2157int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2158{
2159 return -EOPNOTSUPP;
2160}
2a91525c 2161EXPORT_SYMBOL(sock_no_ioctl);
1da177e4
LT
2162
2163int sock_no_listen(struct socket *sock, int backlog)
2164{
2165 return -EOPNOTSUPP;
2166}
2a91525c 2167EXPORT_SYMBOL(sock_no_listen);
1da177e4
LT
2168
2169int sock_no_shutdown(struct socket *sock, int how)
2170{
2171 return -EOPNOTSUPP;
2172}
2a91525c 2173EXPORT_SYMBOL(sock_no_shutdown);
1da177e4
LT
2174
2175int sock_no_setsockopt(struct socket *sock, int level, int optname,
b7058842 2176 char __user *optval, unsigned int optlen)
1da177e4
LT
2177{
2178 return -EOPNOTSUPP;
2179}
2a91525c 2180EXPORT_SYMBOL(sock_no_setsockopt);
1da177e4
LT
2181
2182int sock_no_getsockopt(struct socket *sock, int level, int optname,
2183 char __user *optval, int __user *optlen)
2184{
2185 return -EOPNOTSUPP;
2186}
2a91525c 2187EXPORT_SYMBOL(sock_no_getsockopt);
1da177e4
LT
2188
2189int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2190 size_t len)
2191{
2192 return -EOPNOTSUPP;
2193}
2a91525c 2194EXPORT_SYMBOL(sock_no_sendmsg);
1da177e4
LT
2195
2196int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2197 size_t len, int flags)
2198{
2199 return -EOPNOTSUPP;
2200}
2a91525c 2201EXPORT_SYMBOL(sock_no_recvmsg);
1da177e4
LT
2202
2203int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2204{
2205 /* Mirror missing mmap method error code */
2206 return -ENODEV;
2207}
2a91525c 2208EXPORT_SYMBOL(sock_no_mmap);
1da177e4
LT
2209
2210ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2211{
2212 ssize_t res;
2213 struct msghdr msg = {.msg_flags = flags};
2214 struct kvec iov;
2215 char *kaddr = kmap(page);
2216 iov.iov_base = kaddr + offset;
2217 iov.iov_len = size;
2218 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2219 kunmap(page);
2220 return res;
2221}
2a91525c 2222EXPORT_SYMBOL(sock_no_sendpage);
1da177e4
LT
2223
2224/*
2225 * Default Socket Callbacks
2226 */
2227
2228static void sock_def_wakeup(struct sock *sk)
2229{
43815482
ED
2230 struct socket_wq *wq;
2231
2232 rcu_read_lock();
2233 wq = rcu_dereference(sk->sk_wq);
2234 if (wq_has_sleeper(wq))
2235 wake_up_interruptible_all(&wq->wait);
2236 rcu_read_unlock();
1da177e4
LT
2237}
2238
2239static void sock_def_error_report(struct sock *sk)
2240{
43815482
ED
2241 struct socket_wq *wq;
2242
2243 rcu_read_lock();
2244 wq = rcu_dereference(sk->sk_wq);
2245 if (wq_has_sleeper(wq))
2246 wake_up_interruptible_poll(&wq->wait, POLLERR);
8d8ad9d7 2247 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
43815482 2248 rcu_read_unlock();
1da177e4
LT
2249}
2250
676d2369 2251static void sock_def_readable(struct sock *sk)
1da177e4 2252{
43815482
ED
2253 struct socket_wq *wq;
2254
2255 rcu_read_lock();
2256 wq = rcu_dereference(sk->sk_wq);
2257 if (wq_has_sleeper(wq))
2c6607c6 2258 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
37e5540b 2259 POLLRDNORM | POLLRDBAND);
8d8ad9d7 2260 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
43815482 2261 rcu_read_unlock();
1da177e4
LT
2262}
2263
2264static void sock_def_write_space(struct sock *sk)
2265{
43815482
ED
2266 struct socket_wq *wq;
2267
2268 rcu_read_lock();
1da177e4
LT
2269
2270 /* Do not wake up a writer until he can make "significant"
2271 * progress. --DaveM
2272 */
e71a4783 2273 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
43815482
ED
2274 wq = rcu_dereference(sk->sk_wq);
2275 if (wq_has_sleeper(wq))
2276 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
37e5540b 2277 POLLWRNORM | POLLWRBAND);
1da177e4
LT
2278
2279 /* Should agree with poll, otherwise some programs break */
2280 if (sock_writeable(sk))
8d8ad9d7 2281 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4
LT
2282 }
2283
43815482 2284 rcu_read_unlock();
1da177e4
LT
2285}
2286
2287static void sock_def_destruct(struct sock *sk)
2288{
a51482bd 2289 kfree(sk->sk_protinfo);
1da177e4
LT
2290}
2291
2292void sk_send_sigurg(struct sock *sk)
2293{
2294 if (sk->sk_socket && sk->sk_socket->file)
2295 if (send_sigurg(&sk->sk_socket->file->f_owner))
8d8ad9d7 2296 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
1da177e4 2297}
2a91525c 2298EXPORT_SYMBOL(sk_send_sigurg);
1da177e4
LT
2299
2300void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2301 unsigned long expires)
2302{
2303 if (!mod_timer(timer, expires))
2304 sock_hold(sk);
2305}
1da177e4
LT
2306EXPORT_SYMBOL(sk_reset_timer);
2307
2308void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2309{
25cc4ae9 2310 if (del_timer(timer))
1da177e4
LT
2311 __sock_put(sk);
2312}
1da177e4
LT
2313EXPORT_SYMBOL(sk_stop_timer);
2314
2315void sock_init_data(struct socket *sock, struct sock *sk)
2316{
2317 skb_queue_head_init(&sk->sk_receive_queue);
2318 skb_queue_head_init(&sk->sk_write_queue);
2319 skb_queue_head_init(&sk->sk_error_queue);
2320
2321 sk->sk_send_head = NULL;
2322
2323 init_timer(&sk->sk_timer);
4ec93edb 2324
1da177e4
LT
2325 sk->sk_allocation = GFP_KERNEL;
2326 sk->sk_rcvbuf = sysctl_rmem_default;
2327 sk->sk_sndbuf = sysctl_wmem_default;
2328 sk->sk_state = TCP_CLOSE;
972692e0 2329 sk_set_socket(sk, sock);
1da177e4
LT
2330
2331 sock_set_flag(sk, SOCK_ZAPPED);
2332
e71a4783 2333 if (sock) {
1da177e4 2334 sk->sk_type = sock->type;
43815482 2335 sk->sk_wq = sock->wq;
1da177e4
LT
2336 sock->sk = sk;
2337 } else
43815482 2338 sk->sk_wq = NULL;
1da177e4 2339
b6c6712a 2340 spin_lock_init(&sk->sk_dst_lock);
1da177e4 2341 rwlock_init(&sk->sk_callback_lock);
443aef0e
PZ
2342 lockdep_set_class_and_name(&sk->sk_callback_lock,
2343 af_callback_keys + sk->sk_family,
2344 af_family_clock_key_strings[sk->sk_family]);
1da177e4
LT
2345
2346 sk->sk_state_change = sock_def_wakeup;
2347 sk->sk_data_ready = sock_def_readable;
2348 sk->sk_write_space = sock_def_write_space;
2349 sk->sk_error_report = sock_def_error_report;
2350 sk->sk_destruct = sock_def_destruct;
2351
5640f768
ED
2352 sk->sk_frag.page = NULL;
2353 sk->sk_frag.offset = 0;
ef64a54f 2354 sk->sk_peek_off = -1;
1da177e4 2355
109f6e39
EB
2356 sk->sk_peer_pid = NULL;
2357 sk->sk_peer_cred = NULL;
1da177e4
LT
2358 sk->sk_write_pending = 0;
2359 sk->sk_rcvlowat = 1;
2360 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2361 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2362
f37f0afb 2363 sk->sk_stamp = ktime_set(-1L, 0);
1da177e4 2364
e0d1095a 2365#ifdef CONFIG_NET_RX_BUSY_POLL
06021292 2366 sk->sk_napi_id = 0;
64b0dc51 2367 sk->sk_ll_usec = sysctl_net_busy_read;
06021292
ET
2368#endif
2369
62748f32 2370 sk->sk_max_pacing_rate = ~0U;
7eec4174 2371 sk->sk_pacing_rate = ~0U;
4dc6dc71
ED
2372 /*
2373 * Before updating sk_refcnt, we must commit prior changes to memory
2374 * (Documentation/RCU/rculist_nulls.txt for details)
2375 */
2376 smp_wmb();
1da177e4 2377 atomic_set(&sk->sk_refcnt, 1);
33c732c3 2378 atomic_set(&sk->sk_drops, 0);
1da177e4 2379}
2a91525c 2380EXPORT_SYMBOL(sock_init_data);
1da177e4 2381
b5606c2d 2382void lock_sock_nested(struct sock *sk, int subclass)
1da177e4
LT
2383{
2384 might_sleep();
a5b5bb9a 2385 spin_lock_bh(&sk->sk_lock.slock);
d2e9117c 2386 if (sk->sk_lock.owned)
1da177e4 2387 __lock_sock(sk);
d2e9117c 2388 sk->sk_lock.owned = 1;
a5b5bb9a
IM
2389 spin_unlock(&sk->sk_lock.slock);
2390 /*
2391 * The sk_lock has mutex_lock() semantics here:
2392 */
fcc70d5f 2393 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
a5b5bb9a 2394 local_bh_enable();
1da177e4 2395}
fcc70d5f 2396EXPORT_SYMBOL(lock_sock_nested);
1da177e4 2397
b5606c2d 2398void release_sock(struct sock *sk)
1da177e4 2399{
a5b5bb9a
IM
2400 /*
2401 * The sk_lock has mutex_unlock() semantics:
2402 */
2403 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
2404
2405 spin_lock_bh(&sk->sk_lock.slock);
1da177e4
LT
2406 if (sk->sk_backlog.tail)
2407 __release_sock(sk);
46d3ceab 2408
c3f9b018
ED
2409 /* Warning : release_cb() might need to release sk ownership,
2410 * ie call sock_release_ownership(sk) before us.
2411 */
46d3ceab
ED
2412 if (sk->sk_prot->release_cb)
2413 sk->sk_prot->release_cb(sk);
2414
c3f9b018 2415 sock_release_ownership(sk);
a5b5bb9a
IM
2416 if (waitqueue_active(&sk->sk_lock.wq))
2417 wake_up(&sk->sk_lock.wq);
2418 spin_unlock_bh(&sk->sk_lock.slock);
1da177e4
LT
2419}
2420EXPORT_SYMBOL(release_sock);
2421
8a74ad60
ED
2422/**
2423 * lock_sock_fast - fast version of lock_sock
2424 * @sk: socket
2425 *
2426 * This version should be used for very small section, where process wont block
2427 * return false if fast path is taken
2428 * sk_lock.slock locked, owned = 0, BH disabled
2429 * return true if slow path is taken
2430 * sk_lock.slock unlocked, owned = 1, BH enabled
2431 */
2432bool lock_sock_fast(struct sock *sk)
2433{
2434 might_sleep();
2435 spin_lock_bh(&sk->sk_lock.slock);
2436
2437 if (!sk->sk_lock.owned)
2438 /*
2439 * Note : We must disable BH
2440 */
2441 return false;
2442
2443 __lock_sock(sk);
2444 sk->sk_lock.owned = 1;
2445 spin_unlock(&sk->sk_lock.slock);
2446 /*
2447 * The sk_lock has mutex_lock() semantics here:
2448 */
2449 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2450 local_bh_enable();
2451 return true;
2452}
2453EXPORT_SYMBOL(lock_sock_fast);
2454
1da177e4 2455int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
4ec93edb 2456{
b7aa0bf7 2457 struct timeval tv;
1da177e4 2458 if (!sock_flag(sk, SOCK_TIMESTAMP))
20d49473 2459 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
b7aa0bf7
ED
2460 tv = ktime_to_timeval(sk->sk_stamp);
2461 if (tv.tv_sec == -1)
1da177e4 2462 return -ENOENT;
b7aa0bf7
ED
2463 if (tv.tv_sec == 0) {
2464 sk->sk_stamp = ktime_get_real();
2465 tv = ktime_to_timeval(sk->sk_stamp);
2466 }
2467 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
4ec93edb 2468}
1da177e4
LT
2469EXPORT_SYMBOL(sock_get_timestamp);
2470
ae40eb1e
ED
2471int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2472{
2473 struct timespec ts;
2474 if (!sock_flag(sk, SOCK_TIMESTAMP))
20d49473 2475 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
ae40eb1e
ED
2476 ts = ktime_to_timespec(sk->sk_stamp);
2477 if (ts.tv_sec == -1)
2478 return -ENOENT;
2479 if (ts.tv_sec == 0) {
2480 sk->sk_stamp = ktime_get_real();
2481 ts = ktime_to_timespec(sk->sk_stamp);
2482 }
2483 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2484}
2485EXPORT_SYMBOL(sock_get_timestampns);
2486
20d49473 2487void sock_enable_timestamp(struct sock *sk, int flag)
4ec93edb 2488{
20d49473 2489 if (!sock_flag(sk, flag)) {
08e29af3
ED
2490 unsigned long previous_flags = sk->sk_flags;
2491
20d49473
PO
2492 sock_set_flag(sk, flag);
2493 /*
2494 * we just set one of the two flags which require net
2495 * time stamping, but time stamping might have been on
2496 * already because of the other one
2497 */
08e29af3 2498 if (!(previous_flags & SK_FLAGS_TIMESTAMP))
20d49473 2499 net_enable_timestamp();
1da177e4
LT
2500 }
2501}
1da177e4 2502
cb820f8e
RC
2503int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
2504 int level, int type)
2505{
2506 struct sock_exterr_skb *serr;
364a9e93 2507 struct sk_buff *skb;
cb820f8e
RC
2508 int copied, err;
2509
2510 err = -EAGAIN;
364a9e93 2511 skb = sock_dequeue_err_skb(sk);
cb820f8e
RC
2512 if (skb == NULL)
2513 goto out;
2514
2515 copied = skb->len;
2516 if (copied > len) {
2517 msg->msg_flags |= MSG_TRUNC;
2518 copied = len;
2519 }
51f3d02b 2520 err = skb_copy_datagram_msg(skb, 0, msg, copied);
cb820f8e
RC
2521 if (err)
2522 goto out_free_skb;
2523
2524 sock_recv_timestamp(msg, sk, skb);
2525
2526 serr = SKB_EXT_ERR(skb);
2527 put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
2528
2529 msg->msg_flags |= MSG_ERRQUEUE;
2530 err = copied;
2531
cb820f8e
RC
2532out_free_skb:
2533 kfree_skb(skb);
2534out:
2535 return err;
2536}
2537EXPORT_SYMBOL(sock_recv_errqueue);
2538
1da177e4
LT
2539/*
2540 * Get a socket option on an socket.
2541 *
2542 * FIX: POSIX 1003.1g is very ambiguous here. It states that
2543 * asynchronous errors should be reported by getsockopt. We assume
2544 * this means if you specify SO_ERROR (otherwise whats the point of it).
2545 */
2546int sock_common_getsockopt(struct socket *sock, int level, int optname,
2547 char __user *optval, int __user *optlen)
2548{
2549 struct sock *sk = sock->sk;
2550
2551 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2552}
1da177e4
LT
2553EXPORT_SYMBOL(sock_common_getsockopt);
2554
3fdadf7d 2555#ifdef CONFIG_COMPAT
543d9cfe
ACM
2556int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2557 char __user *optval, int __user *optlen)
3fdadf7d
DM
2558{
2559 struct sock *sk = sock->sk;
2560
1e51f951 2561 if (sk->sk_prot->compat_getsockopt != NULL)
543d9cfe
ACM
2562 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2563 optval, optlen);
3fdadf7d
DM
2564 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2565}
2566EXPORT_SYMBOL(compat_sock_common_getsockopt);
2567#endif
2568
1da177e4
LT
2569int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2570 struct msghdr *msg, size_t size, int flags)
2571{
2572 struct sock *sk = sock->sk;
2573 int addr_len = 0;
2574 int err;
2575
2576 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2577 flags & ~MSG_DONTWAIT, &addr_len);
2578 if (err >= 0)
2579 msg->msg_namelen = addr_len;
2580 return err;
2581}
1da177e4
LT
2582EXPORT_SYMBOL(sock_common_recvmsg);
2583
2584/*
2585 * Set socket options on an inet socket.
2586 */
2587int sock_common_setsockopt(struct socket *sock, int level, int optname,
b7058842 2588 char __user *optval, unsigned int optlen)
1da177e4
LT
2589{
2590 struct sock *sk = sock->sk;
2591
2592 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2593}
1da177e4
LT
2594EXPORT_SYMBOL(sock_common_setsockopt);
2595
3fdadf7d 2596#ifdef CONFIG_COMPAT
543d9cfe 2597int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
b7058842 2598 char __user *optval, unsigned int optlen)
3fdadf7d
DM
2599{
2600 struct sock *sk = sock->sk;
2601
543d9cfe
ACM
2602 if (sk->sk_prot->compat_setsockopt != NULL)
2603 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2604 optval, optlen);
3fdadf7d
DM
2605 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2606}
2607EXPORT_SYMBOL(compat_sock_common_setsockopt);
2608#endif
2609
1da177e4
LT
2610void sk_common_release(struct sock *sk)
2611{
2612 if (sk->sk_prot->destroy)
2613 sk->sk_prot->destroy(sk);
2614
2615 /*
2616 * Observation: when sock_common_release is called, processes have
2617 * no access to socket. But net still has.
2618 * Step one, detach it from networking:
2619 *
2620 * A. Remove from hash tables.
2621 */
2622
2623 sk->sk_prot->unhash(sk);
2624
2625 /*
2626 * In this point socket cannot receive new packets, but it is possible
2627 * that some packets are in flight because some CPU runs receiver and
2628 * did hash table lookup before we unhashed socket. They will achieve
2629 * receive queue and will be purged by socket destructor.
2630 *
2631 * Also we still have packets pending on receive queue and probably,
2632 * our own packets waiting in device queues. sock_destroy will drain
2633 * receive queue, but transmitted packets will delay socket destruction
2634 * until the last reference will be released.
2635 */
2636
2637 sock_orphan(sk);
2638
2639 xfrm_sk_free_policy(sk);
2640
e6848976 2641 sk_refcnt_debug_release(sk);
5640f768
ED
2642
2643 if (sk->sk_frag.page) {
2644 put_page(sk->sk_frag.page);
2645 sk->sk_frag.page = NULL;
2646 }
2647
1da177e4
LT
2648 sock_put(sk);
2649}
1da177e4
LT
2650EXPORT_SYMBOL(sk_common_release);
2651
13ff3d6f
PE
2652#ifdef CONFIG_PROC_FS
2653#define PROTO_INUSE_NR 64 /* should be enough for the first time */
1338d466
PE
2654struct prot_inuse {
2655 int val[PROTO_INUSE_NR];
2656};
13ff3d6f
PE
2657
2658static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
70ee1159
PE
2659
2660#ifdef CONFIG_NET_NS
2661void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2662{
d6d9ca0f 2663 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
70ee1159
PE
2664}
2665EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2666
2667int sock_prot_inuse_get(struct net *net, struct proto *prot)
2668{
2669 int cpu, idx = prot->inuse_idx;
2670 int res = 0;
2671
2672 for_each_possible_cpu(cpu)
2673 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2674
2675 return res >= 0 ? res : 0;
2676}
2677EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2678
2c8c1e72 2679static int __net_init sock_inuse_init_net(struct net *net)
70ee1159
PE
2680{
2681 net->core.inuse = alloc_percpu(struct prot_inuse);
2682 return net->core.inuse ? 0 : -ENOMEM;
2683}
2684
2c8c1e72 2685static void __net_exit sock_inuse_exit_net(struct net *net)
70ee1159
PE
2686{
2687 free_percpu(net->core.inuse);
2688}
2689
2690static struct pernet_operations net_inuse_ops = {
2691 .init = sock_inuse_init_net,
2692 .exit = sock_inuse_exit_net,
2693};
2694
2695static __init int net_inuse_init(void)
2696{
2697 if (register_pernet_subsys(&net_inuse_ops))
2698 panic("Cannot initialize net inuse counters");
2699
2700 return 0;
2701}
2702
2703core_initcall(net_inuse_init);
2704#else
1338d466
PE
2705static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2706
c29a0bc4 2707void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
1338d466 2708{
d6d9ca0f 2709 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
1338d466
PE
2710}
2711EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2712
c29a0bc4 2713int sock_prot_inuse_get(struct net *net, struct proto *prot)
1338d466
PE
2714{
2715 int cpu, idx = prot->inuse_idx;
2716 int res = 0;
2717
2718 for_each_possible_cpu(cpu)
2719 res += per_cpu(prot_inuse, cpu).val[idx];
2720
2721 return res >= 0 ? res : 0;
2722}
2723EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
70ee1159 2724#endif
13ff3d6f
PE
2725
2726static void assign_proto_idx(struct proto *prot)
2727{
2728 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2729
2730 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
e005d193 2731 pr_err("PROTO_INUSE_NR exhausted\n");
13ff3d6f
PE
2732 return;
2733 }
2734
2735 set_bit(prot->inuse_idx, proto_inuse_idx);
2736}
2737
2738static void release_proto_idx(struct proto *prot)
2739{
2740 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2741 clear_bit(prot->inuse_idx, proto_inuse_idx);
2742}
2743#else
2744static inline void assign_proto_idx(struct proto *prot)
2745{
2746}
2747
2748static inline void release_proto_idx(struct proto *prot)
2749{
2750}
2751#endif
2752
b733c007
PE
2753int proto_register(struct proto *prot, int alloc_slab)
2754{
1da177e4
LT
2755 if (alloc_slab) {
2756 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
271b72c7
ED
2757 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2758 NULL);
1da177e4
LT
2759
2760 if (prot->slab == NULL) {
e005d193
JP
2761 pr_crit("%s: Can't create sock SLAB cache!\n",
2762 prot->name);
60e7663d 2763 goto out;
1da177e4 2764 }
2e6599cb
ACM
2765
2766 if (prot->rsk_prot != NULL) {
faf23422 2767 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
7e56b5d6 2768 if (prot->rsk_prot->slab_name == NULL)
2e6599cb
ACM
2769 goto out_free_sock_slab;
2770
7e56b5d6 2771 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2e6599cb 2772 prot->rsk_prot->obj_size, 0,
20c2df83 2773 SLAB_HWCACHE_ALIGN, NULL);
2e6599cb
ACM
2774
2775 if (prot->rsk_prot->slab == NULL) {
e005d193
JP
2776 pr_crit("%s: Can't create request sock SLAB cache!\n",
2777 prot->name);
2e6599cb
ACM
2778 goto out_free_request_sock_slab_name;
2779 }
2780 }
8feaf0c0 2781
6d6ee43e 2782 if (prot->twsk_prot != NULL) {
faf23422 2783 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
8feaf0c0 2784
7e56b5d6 2785 if (prot->twsk_prot->twsk_slab_name == NULL)
8feaf0c0
ACM
2786 goto out_free_request_sock_slab;
2787
6d6ee43e 2788 prot->twsk_prot->twsk_slab =
7e56b5d6 2789 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
6d6ee43e 2790 prot->twsk_prot->twsk_obj_size,
3ab5aee7
ED
2791 0,
2792 SLAB_HWCACHE_ALIGN |
2793 prot->slab_flags,
20c2df83 2794 NULL);
6d6ee43e 2795 if (prot->twsk_prot->twsk_slab == NULL)
8feaf0c0
ACM
2796 goto out_free_timewait_sock_slab_name;
2797 }
1da177e4
LT
2798 }
2799
36b77a52 2800 mutex_lock(&proto_list_mutex);
1da177e4 2801 list_add(&prot->node, &proto_list);
13ff3d6f 2802 assign_proto_idx(prot);
36b77a52 2803 mutex_unlock(&proto_list_mutex);
b733c007
PE
2804 return 0;
2805
8feaf0c0 2806out_free_timewait_sock_slab_name:
7e56b5d6 2807 kfree(prot->twsk_prot->twsk_slab_name);
8feaf0c0
ACM
2808out_free_request_sock_slab:
2809 if (prot->rsk_prot && prot->rsk_prot->slab) {
2810 kmem_cache_destroy(prot->rsk_prot->slab);
2811 prot->rsk_prot->slab = NULL;
2812 }
2e6599cb 2813out_free_request_sock_slab_name:
72150e9b
DC
2814 if (prot->rsk_prot)
2815 kfree(prot->rsk_prot->slab_name);
2e6599cb
ACM
2816out_free_sock_slab:
2817 kmem_cache_destroy(prot->slab);
2818 prot->slab = NULL;
b733c007
PE
2819out:
2820 return -ENOBUFS;
1da177e4 2821}
1da177e4
LT
2822EXPORT_SYMBOL(proto_register);
2823
2824void proto_unregister(struct proto *prot)
2825{
36b77a52 2826 mutex_lock(&proto_list_mutex);
13ff3d6f 2827 release_proto_idx(prot);
0a3f4358 2828 list_del(&prot->node);
36b77a52 2829 mutex_unlock(&proto_list_mutex);
1da177e4
LT
2830
2831 if (prot->slab != NULL) {
2832 kmem_cache_destroy(prot->slab);
2833 prot->slab = NULL;
2834 }
2835
2e6599cb 2836 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
2e6599cb 2837 kmem_cache_destroy(prot->rsk_prot->slab);
7e56b5d6 2838 kfree(prot->rsk_prot->slab_name);
2e6599cb
ACM
2839 prot->rsk_prot->slab = NULL;
2840 }
2841
6d6ee43e 2842 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
6d6ee43e 2843 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
7e56b5d6 2844 kfree(prot->twsk_prot->twsk_slab_name);
6d6ee43e 2845 prot->twsk_prot->twsk_slab = NULL;
8feaf0c0 2846 }
1da177e4 2847}
1da177e4
LT
2848EXPORT_SYMBOL(proto_unregister);
2849
2850#ifdef CONFIG_PROC_FS
1da177e4 2851static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
36b77a52 2852 __acquires(proto_list_mutex)
1da177e4 2853{
36b77a52 2854 mutex_lock(&proto_list_mutex);
60f0438a 2855 return seq_list_start_head(&proto_list, *pos);
1da177e4
LT
2856}
2857
2858static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2859{
60f0438a 2860 return seq_list_next(v, &proto_list, pos);
1da177e4
LT
2861}
2862
2863static void proto_seq_stop(struct seq_file *seq, void *v)
36b77a52 2864 __releases(proto_list_mutex)
1da177e4 2865{
36b77a52 2866 mutex_unlock(&proto_list_mutex);
1da177e4
LT
2867}
2868
2869static char proto_method_implemented(const void *method)
2870{
2871 return method == NULL ? 'n' : 'y';
2872}
180d8cd9
GC
2873static long sock_prot_memory_allocated(struct proto *proto)
2874{
cb75a36c 2875 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
180d8cd9
GC
2876}
2877
2878static char *sock_prot_memory_pressure(struct proto *proto)
2879{
2880 return proto->memory_pressure != NULL ?
2881 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
2882}
1da177e4
LT
2883
2884static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2885{
180d8cd9 2886
8d987e5c 2887 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
1da177e4
LT
2888 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2889 proto->name,
2890 proto->obj_size,
14e943db 2891 sock_prot_inuse_get(seq_file_net(seq), proto),
180d8cd9
GC
2892 sock_prot_memory_allocated(proto),
2893 sock_prot_memory_pressure(proto),
1da177e4
LT
2894 proto->max_header,
2895 proto->slab == NULL ? "no" : "yes",
2896 module_name(proto->owner),
2897 proto_method_implemented(proto->close),
2898 proto_method_implemented(proto->connect),
2899 proto_method_implemented(proto->disconnect),
2900 proto_method_implemented(proto->accept),
2901 proto_method_implemented(proto->ioctl),
2902 proto_method_implemented(proto->init),
2903 proto_method_implemented(proto->destroy),
2904 proto_method_implemented(proto->shutdown),
2905 proto_method_implemented(proto->setsockopt),
2906 proto_method_implemented(proto->getsockopt),
2907 proto_method_implemented(proto->sendmsg),
2908 proto_method_implemented(proto->recvmsg),
2909 proto_method_implemented(proto->sendpage),
2910 proto_method_implemented(proto->bind),
2911 proto_method_implemented(proto->backlog_rcv),
2912 proto_method_implemented(proto->hash),
2913 proto_method_implemented(proto->unhash),
2914 proto_method_implemented(proto->get_port),
2915 proto_method_implemented(proto->enter_memory_pressure));
2916}
2917
2918static int proto_seq_show(struct seq_file *seq, void *v)
2919{
60f0438a 2920 if (v == &proto_list)
1da177e4
LT
2921 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2922 "protocol",
2923 "size",
2924 "sockets",
2925 "memory",
2926 "press",
2927 "maxhdr",
2928 "slab",
2929 "module",
2930 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2931 else
60f0438a 2932 proto_seq_printf(seq, list_entry(v, struct proto, node));
1da177e4
LT
2933 return 0;
2934}
2935
f690808e 2936static const struct seq_operations proto_seq_ops = {
1da177e4
LT
2937 .start = proto_seq_start,
2938 .next = proto_seq_next,
2939 .stop = proto_seq_stop,
2940 .show = proto_seq_show,
2941};
2942
2943static int proto_seq_open(struct inode *inode, struct file *file)
2944{
14e943db
ED
2945 return seq_open_net(inode, file, &proto_seq_ops,
2946 sizeof(struct seq_net_private));
1da177e4
LT
2947}
2948
9a32144e 2949static const struct file_operations proto_seq_fops = {
1da177e4
LT
2950 .owner = THIS_MODULE,
2951 .open = proto_seq_open,
2952 .read = seq_read,
2953 .llseek = seq_lseek,
14e943db
ED
2954 .release = seq_release_net,
2955};
2956
2957static __net_init int proto_init_net(struct net *net)
2958{
d4beaa66 2959 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
14e943db
ED
2960 return -ENOMEM;
2961
2962 return 0;
2963}
2964
2965static __net_exit void proto_exit_net(struct net *net)
2966{
ece31ffd 2967 remove_proc_entry("protocols", net->proc_net);
14e943db
ED
2968}
2969
2970
2971static __net_initdata struct pernet_operations proto_net_ops = {
2972 .init = proto_init_net,
2973 .exit = proto_exit_net,
1da177e4
LT
2974};
2975
2976static int __init proto_init(void)
2977{
14e943db 2978 return register_pernet_subsys(&proto_net_ops);
1da177e4
LT
2979}
2980
2981subsys_initcall(proto_init);
2982
2983#endif /* PROC_FS */
This page took 1.099519 seconds and 5 git commands to generate.