net: Use a percpu_counter for orphan_count
[deliverable/linux.git] / net / dccp / proto.c
1 /*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 struct percpu_counter dccp_orphan_count;
44 EXPORT_SYMBOL_GPL(dccp_orphan_count);
45
46 struct inet_hashinfo dccp_hashinfo;
47 EXPORT_SYMBOL_GPL(dccp_hashinfo);
48
49 /* the maximum queue length for tx in packets. 0 is no limit */
50 int sysctl_dccp_tx_qlen __read_mostly = 5;
51
52 void dccp_set_state(struct sock *sk, const int state)
53 {
54 const int oldstate = sk->sk_state;
55
56 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
57 dccp_state_name(oldstate), dccp_state_name(state));
58 WARN_ON(state == oldstate);
59
60 switch (state) {
61 case DCCP_OPEN:
62 if (oldstate != DCCP_OPEN)
63 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
64 break;
65
66 case DCCP_CLOSED:
67 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
68 oldstate == DCCP_CLOSING)
69 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
70
71 sk->sk_prot->unhash(sk);
72 if (inet_csk(sk)->icsk_bind_hash != NULL &&
73 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
74 inet_put_port(sk);
75 /* fall through */
76 default:
77 if (oldstate == DCCP_OPEN)
78 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
79 }
80
81 /* Change state AFTER socket is unhashed to avoid closed
82 * socket sitting in hash tables.
83 */
84 sk->sk_state = state;
85 }
86
87 EXPORT_SYMBOL_GPL(dccp_set_state);
88
89 static void dccp_finish_passive_close(struct sock *sk)
90 {
91 switch (sk->sk_state) {
92 case DCCP_PASSIVE_CLOSE:
93 /* Node (client or server) has received Close packet. */
94 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
95 dccp_set_state(sk, DCCP_CLOSED);
96 break;
97 case DCCP_PASSIVE_CLOSEREQ:
98 /*
99 * Client received CloseReq. We set the `active' flag so that
100 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
101 */
102 dccp_send_close(sk, 1);
103 dccp_set_state(sk, DCCP_CLOSING);
104 }
105 }
106
107 void dccp_done(struct sock *sk)
108 {
109 dccp_set_state(sk, DCCP_CLOSED);
110 dccp_clear_xmit_timers(sk);
111
112 sk->sk_shutdown = SHUTDOWN_MASK;
113
114 if (!sock_flag(sk, SOCK_DEAD))
115 sk->sk_state_change(sk);
116 else
117 inet_csk_destroy_sock(sk);
118 }
119
120 EXPORT_SYMBOL_GPL(dccp_done);
121
122 const char *dccp_packet_name(const int type)
123 {
124 static const char *dccp_packet_names[] = {
125 [DCCP_PKT_REQUEST] = "REQUEST",
126 [DCCP_PKT_RESPONSE] = "RESPONSE",
127 [DCCP_PKT_DATA] = "DATA",
128 [DCCP_PKT_ACK] = "ACK",
129 [DCCP_PKT_DATAACK] = "DATAACK",
130 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
131 [DCCP_PKT_CLOSE] = "CLOSE",
132 [DCCP_PKT_RESET] = "RESET",
133 [DCCP_PKT_SYNC] = "SYNC",
134 [DCCP_PKT_SYNCACK] = "SYNCACK",
135 };
136
137 if (type >= DCCP_NR_PKT_TYPES)
138 return "INVALID";
139 else
140 return dccp_packet_names[type];
141 }
142
143 EXPORT_SYMBOL_GPL(dccp_packet_name);
144
145 const char *dccp_state_name(const int state)
146 {
147 static char *dccp_state_names[] = {
148 [DCCP_OPEN] = "OPEN",
149 [DCCP_REQUESTING] = "REQUESTING",
150 [DCCP_PARTOPEN] = "PARTOPEN",
151 [DCCP_LISTEN] = "LISTEN",
152 [DCCP_RESPOND] = "RESPOND",
153 [DCCP_CLOSING] = "CLOSING",
154 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
155 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
156 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
157 [DCCP_TIME_WAIT] = "TIME_WAIT",
158 [DCCP_CLOSED] = "CLOSED",
159 };
160
161 if (state >= DCCP_MAX_STATES)
162 return "INVALID STATE!";
163 else
164 return dccp_state_names[state];
165 }
166
167 EXPORT_SYMBOL_GPL(dccp_state_name);
168
169 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
170 {
171 struct dccp_sock *dp = dccp_sk(sk);
172 struct dccp_minisock *dmsk = dccp_msk(sk);
173 struct inet_connection_sock *icsk = inet_csk(sk);
174
175 dccp_minisock_init(&dp->dccps_minisock);
176
177 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
178 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
179 sk->sk_state = DCCP_CLOSED;
180 sk->sk_write_space = dccp_write_space;
181 icsk->icsk_sync_mss = dccp_sync_mss;
182 dp->dccps_mss_cache = 536;
183 dp->dccps_rate_last = jiffies;
184 dp->dccps_role = DCCP_ROLE_UNDEFINED;
185 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
186 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
187
188 dccp_init_xmit_timers(sk);
189
190 INIT_LIST_HEAD(&dp->dccps_featneg);
191 /*
192 * FIXME: We're hardcoding the CCID, and doing this at this point makes
193 * the listening (master) sock get CCID control blocks, which is not
194 * necessary, but for now, to not mess with the test userspace apps,
195 * lets leave it here, later the real solution is to do this in a
196 * setsockopt(CCIDs-I-want/accept). -acme
197 */
198 if (likely(ctl_sock_initialized)) {
199 int rc = dccp_feat_init(sk);
200
201 if (rc)
202 return rc;
203
204 if (dmsk->dccpms_send_ack_vector) {
205 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
206 if (dp->dccps_hc_rx_ackvec == NULL)
207 return -ENOMEM;
208 }
209 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
210 sk, GFP_KERNEL);
211 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
212 sk, GFP_KERNEL);
213 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
214 dp->dccps_hc_tx_ccid == NULL)) {
215 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
216 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
217 if (dmsk->dccpms_send_ack_vector) {
218 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
219 dp->dccps_hc_rx_ackvec = NULL;
220 }
221 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
222 return -ENOMEM;
223 }
224 } else {
225 /* control socket doesn't need feat nego */
226 INIT_LIST_HEAD(&dmsk->dccpms_pending);
227 INIT_LIST_HEAD(&dmsk->dccpms_conf);
228 }
229
230 return 0;
231 }
232
233 EXPORT_SYMBOL_GPL(dccp_init_sock);
234
235 void dccp_destroy_sock(struct sock *sk)
236 {
237 struct dccp_sock *dp = dccp_sk(sk);
238 struct dccp_minisock *dmsk = dccp_msk(sk);
239
240 /*
241 * DCCP doesn't use sk_write_queue, just sk_send_head
242 * for retransmissions
243 */
244 if (sk->sk_send_head != NULL) {
245 kfree_skb(sk->sk_send_head);
246 sk->sk_send_head = NULL;
247 }
248
249 /* Clean up a referenced DCCP bind bucket. */
250 if (inet_csk(sk)->icsk_bind_hash != NULL)
251 inet_put_port(sk);
252
253 kfree(dp->dccps_service_list);
254 dp->dccps_service_list = NULL;
255
256 if (dmsk->dccpms_send_ack_vector) {
257 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
258 dp->dccps_hc_rx_ackvec = NULL;
259 }
260 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
261 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
262 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
263
264 /* clean up feature negotiation state */
265 dccp_feat_list_purge(&dp->dccps_featneg);
266 }
267
268 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
269
270 static inline int dccp_listen_start(struct sock *sk, int backlog)
271 {
272 struct dccp_sock *dp = dccp_sk(sk);
273
274 dp->dccps_role = DCCP_ROLE_LISTEN;
275 /* do not start to listen if feature negotiation setup fails */
276 if (dccp_feat_finalise_settings(dp))
277 return -EPROTO;
278 return inet_csk_listen_start(sk, backlog);
279 }
280
281 static inline int dccp_need_reset(int state)
282 {
283 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
284 state != DCCP_REQUESTING;
285 }
286
287 int dccp_disconnect(struct sock *sk, int flags)
288 {
289 struct inet_connection_sock *icsk = inet_csk(sk);
290 struct inet_sock *inet = inet_sk(sk);
291 int err = 0;
292 const int old_state = sk->sk_state;
293
294 if (old_state != DCCP_CLOSED)
295 dccp_set_state(sk, DCCP_CLOSED);
296
297 /*
298 * This corresponds to the ABORT function of RFC793, sec. 3.8
299 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
300 */
301 if (old_state == DCCP_LISTEN) {
302 inet_csk_listen_stop(sk);
303 } else if (dccp_need_reset(old_state)) {
304 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
305 sk->sk_err = ECONNRESET;
306 } else if (old_state == DCCP_REQUESTING)
307 sk->sk_err = ECONNRESET;
308
309 dccp_clear_xmit_timers(sk);
310
311 __skb_queue_purge(&sk->sk_receive_queue);
312 __skb_queue_purge(&sk->sk_write_queue);
313 if (sk->sk_send_head != NULL) {
314 __kfree_skb(sk->sk_send_head);
315 sk->sk_send_head = NULL;
316 }
317
318 inet->dport = 0;
319
320 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
321 inet_reset_saddr(sk);
322
323 sk->sk_shutdown = 0;
324 sock_reset_flag(sk, SOCK_DONE);
325
326 icsk->icsk_backoff = 0;
327 inet_csk_delack_init(sk);
328 __sk_dst_reset(sk);
329
330 WARN_ON(inet->num && !icsk->icsk_bind_hash);
331
332 sk->sk_error_report(sk);
333 return err;
334 }
335
336 EXPORT_SYMBOL_GPL(dccp_disconnect);
337
338 /*
339 * Wait for a DCCP event.
340 *
341 * Note that we don't need to lock the socket, as the upper poll layers
342 * take care of normal races (between the test and the event) and we don't
343 * go look at any of the socket buffers directly.
344 */
345 unsigned int dccp_poll(struct file *file, struct socket *sock,
346 poll_table *wait)
347 {
348 unsigned int mask;
349 struct sock *sk = sock->sk;
350
351 poll_wait(file, sk->sk_sleep, wait);
352 if (sk->sk_state == DCCP_LISTEN)
353 return inet_csk_listen_poll(sk);
354
355 /* Socket is not locked. We are protected from async events
356 by poll logic and correct handling of state changes
357 made by another threads is impossible in any case.
358 */
359
360 mask = 0;
361 if (sk->sk_err)
362 mask = POLLERR;
363
364 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
365 mask |= POLLHUP;
366 if (sk->sk_shutdown & RCV_SHUTDOWN)
367 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
368
369 /* Connected? */
370 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
371 if (atomic_read(&sk->sk_rmem_alloc) > 0)
372 mask |= POLLIN | POLLRDNORM;
373
374 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
375 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
376 mask |= POLLOUT | POLLWRNORM;
377 } else { /* send SIGIO later */
378 set_bit(SOCK_ASYNC_NOSPACE,
379 &sk->sk_socket->flags);
380 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
381
382 /* Race breaker. If space is freed after
383 * wspace test but before the flags are set,
384 * IO signal will be lost.
385 */
386 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
387 mask |= POLLOUT | POLLWRNORM;
388 }
389 }
390 }
391 return mask;
392 }
393
394 EXPORT_SYMBOL_GPL(dccp_poll);
395
396 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
397 {
398 int rc = -ENOTCONN;
399
400 lock_sock(sk);
401
402 if (sk->sk_state == DCCP_LISTEN)
403 goto out;
404
405 switch (cmd) {
406 case SIOCINQ: {
407 struct sk_buff *skb;
408 unsigned long amount = 0;
409
410 skb = skb_peek(&sk->sk_receive_queue);
411 if (skb != NULL) {
412 /*
413 * We will only return the amount of this packet since
414 * that is all that will be read.
415 */
416 amount = skb->len;
417 }
418 rc = put_user(amount, (int __user *)arg);
419 }
420 break;
421 default:
422 rc = -ENOIOCTLCMD;
423 break;
424 }
425 out:
426 release_sock(sk);
427 return rc;
428 }
429
430 EXPORT_SYMBOL_GPL(dccp_ioctl);
431
432 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
433 char __user *optval, int optlen)
434 {
435 struct dccp_sock *dp = dccp_sk(sk);
436 struct dccp_service_list *sl = NULL;
437
438 if (service == DCCP_SERVICE_INVALID_VALUE ||
439 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
440 return -EINVAL;
441
442 if (optlen > sizeof(service)) {
443 sl = kmalloc(optlen, GFP_KERNEL);
444 if (sl == NULL)
445 return -ENOMEM;
446
447 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
448 if (copy_from_user(sl->dccpsl_list,
449 optval + sizeof(service),
450 optlen - sizeof(service)) ||
451 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
452 kfree(sl);
453 return -EFAULT;
454 }
455 }
456
457 lock_sock(sk);
458 dp->dccps_service = service;
459
460 kfree(dp->dccps_service_list);
461
462 dp->dccps_service_list = sl;
463 release_sock(sk);
464 return 0;
465 }
466
467 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
468 {
469 u8 *list, len;
470 int i, rc;
471
472 if (cscov < 0 || cscov > 15)
473 return -EINVAL;
474 /*
475 * Populate a list of permissible values, in the range cscov...15. This
476 * is necessary since feature negotiation of single values only works if
477 * both sides incidentally choose the same value. Since the list starts
478 * lowest-value first, negotiation will pick the smallest shared value.
479 */
480 if (cscov == 0)
481 return 0;
482 len = 16 - cscov;
483
484 list = kmalloc(len, GFP_KERNEL);
485 if (list == NULL)
486 return -ENOBUFS;
487
488 for (i = 0; i < len; i++)
489 list[i] = cscov++;
490
491 rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
492
493 if (rc == 0) {
494 if (rx)
495 dccp_sk(sk)->dccps_pcrlen = cscov;
496 else
497 dccp_sk(sk)->dccps_pcslen = cscov;
498 }
499 kfree(list);
500 return rc;
501 }
502
503 static int dccp_setsockopt_ccid(struct sock *sk, int type,
504 char __user *optval, int optlen)
505 {
506 u8 *val;
507 int rc = 0;
508
509 if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
510 return -EINVAL;
511
512 val = kmalloc(optlen, GFP_KERNEL);
513 if (val == NULL)
514 return -ENOMEM;
515
516 if (copy_from_user(val, optval, optlen)) {
517 kfree(val);
518 return -EFAULT;
519 }
520
521 lock_sock(sk);
522 if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
523 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
524
525 if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
526 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
527 release_sock(sk);
528
529 kfree(val);
530 return rc;
531 }
532
533 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
534 char __user *optval, int optlen)
535 {
536 struct dccp_sock *dp = dccp_sk(sk);
537 int val, err = 0;
538
539 switch (optname) {
540 case DCCP_SOCKOPT_PACKET_SIZE:
541 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
542 return 0;
543 case DCCP_SOCKOPT_CHANGE_L:
544 case DCCP_SOCKOPT_CHANGE_R:
545 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
546 return 0;
547 case DCCP_SOCKOPT_CCID:
548 case DCCP_SOCKOPT_RX_CCID:
549 case DCCP_SOCKOPT_TX_CCID:
550 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
551 }
552
553 if (optlen < (int)sizeof(int))
554 return -EINVAL;
555
556 if (get_user(val, (int __user *)optval))
557 return -EFAULT;
558
559 if (optname == DCCP_SOCKOPT_SERVICE)
560 return dccp_setsockopt_service(sk, val, optval, optlen);
561
562 lock_sock(sk);
563 switch (optname) {
564 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
565 if (dp->dccps_role != DCCP_ROLE_SERVER)
566 err = -EOPNOTSUPP;
567 else
568 dp->dccps_server_timewait = (val != 0);
569 break;
570 case DCCP_SOCKOPT_SEND_CSCOV:
571 err = dccp_setsockopt_cscov(sk, val, false);
572 break;
573 case DCCP_SOCKOPT_RECV_CSCOV:
574 err = dccp_setsockopt_cscov(sk, val, true);
575 break;
576 default:
577 err = -ENOPROTOOPT;
578 break;
579 }
580 release_sock(sk);
581
582 return err;
583 }
584
585 int dccp_setsockopt(struct sock *sk, int level, int optname,
586 char __user *optval, int optlen)
587 {
588 if (level != SOL_DCCP)
589 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
590 optname, optval,
591 optlen);
592 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
593 }
594
595 EXPORT_SYMBOL_GPL(dccp_setsockopt);
596
597 #ifdef CONFIG_COMPAT
598 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
599 char __user *optval, int optlen)
600 {
601 if (level != SOL_DCCP)
602 return inet_csk_compat_setsockopt(sk, level, optname,
603 optval, optlen);
604 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
605 }
606
607 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
608 #endif
609
610 static int dccp_getsockopt_service(struct sock *sk, int len,
611 __be32 __user *optval,
612 int __user *optlen)
613 {
614 const struct dccp_sock *dp = dccp_sk(sk);
615 const struct dccp_service_list *sl;
616 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
617
618 lock_sock(sk);
619 if ((sl = dp->dccps_service_list) != NULL) {
620 slen = sl->dccpsl_nr * sizeof(u32);
621 total_len += slen;
622 }
623
624 err = -EINVAL;
625 if (total_len > len)
626 goto out;
627
628 err = 0;
629 if (put_user(total_len, optlen) ||
630 put_user(dp->dccps_service, optval) ||
631 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
632 err = -EFAULT;
633 out:
634 release_sock(sk);
635 return err;
636 }
637
638 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
639 char __user *optval, int __user *optlen)
640 {
641 struct dccp_sock *dp;
642 int val, len;
643
644 if (get_user(len, optlen))
645 return -EFAULT;
646
647 if (len < (int)sizeof(int))
648 return -EINVAL;
649
650 dp = dccp_sk(sk);
651
652 switch (optname) {
653 case DCCP_SOCKOPT_PACKET_SIZE:
654 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
655 return 0;
656 case DCCP_SOCKOPT_SERVICE:
657 return dccp_getsockopt_service(sk, len,
658 (__be32 __user *)optval, optlen);
659 case DCCP_SOCKOPT_GET_CUR_MPS:
660 val = dp->dccps_mss_cache;
661 break;
662 case DCCP_SOCKOPT_AVAILABLE_CCIDS:
663 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
664 case DCCP_SOCKOPT_TX_CCID:
665 val = ccid_get_current_tx_ccid(dp);
666 if (val < 0)
667 return -ENOPROTOOPT;
668 break;
669 case DCCP_SOCKOPT_RX_CCID:
670 val = ccid_get_current_rx_ccid(dp);
671 if (val < 0)
672 return -ENOPROTOOPT;
673 break;
674 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
675 val = dp->dccps_server_timewait;
676 break;
677 case DCCP_SOCKOPT_SEND_CSCOV:
678 val = dp->dccps_pcslen;
679 break;
680 case DCCP_SOCKOPT_RECV_CSCOV:
681 val = dp->dccps_pcrlen;
682 break;
683 case 128 ... 191:
684 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
685 len, (u32 __user *)optval, optlen);
686 case 192 ... 255:
687 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
688 len, (u32 __user *)optval, optlen);
689 default:
690 return -ENOPROTOOPT;
691 }
692
693 len = sizeof(val);
694 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
695 return -EFAULT;
696
697 return 0;
698 }
699
700 int dccp_getsockopt(struct sock *sk, int level, int optname,
701 char __user *optval, int __user *optlen)
702 {
703 if (level != SOL_DCCP)
704 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
705 optname, optval,
706 optlen);
707 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
708 }
709
710 EXPORT_SYMBOL_GPL(dccp_getsockopt);
711
712 #ifdef CONFIG_COMPAT
713 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
714 char __user *optval, int __user *optlen)
715 {
716 if (level != SOL_DCCP)
717 return inet_csk_compat_getsockopt(sk, level, optname,
718 optval, optlen);
719 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
720 }
721
722 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
723 #endif
724
725 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
726 size_t len)
727 {
728 const struct dccp_sock *dp = dccp_sk(sk);
729 const int flags = msg->msg_flags;
730 const int noblock = flags & MSG_DONTWAIT;
731 struct sk_buff *skb;
732 int rc, size;
733 long timeo;
734
735 if (len > dp->dccps_mss_cache)
736 return -EMSGSIZE;
737
738 lock_sock(sk);
739
740 if (sysctl_dccp_tx_qlen &&
741 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
742 rc = -EAGAIN;
743 goto out_release;
744 }
745
746 timeo = sock_sndtimeo(sk, noblock);
747
748 /*
749 * We have to use sk_stream_wait_connect here to set sk_write_pending,
750 * so that the trick in dccp_rcv_request_sent_state_process.
751 */
752 /* Wait for a connection to finish. */
753 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
754 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
755 goto out_release;
756
757 size = sk->sk_prot->max_header + len;
758 release_sock(sk);
759 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
760 lock_sock(sk);
761 if (skb == NULL)
762 goto out_release;
763
764 skb_reserve(skb, sk->sk_prot->max_header);
765 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
766 if (rc != 0)
767 goto out_discard;
768
769 skb_queue_tail(&sk->sk_write_queue, skb);
770 dccp_write_xmit(sk,0);
771 out_release:
772 release_sock(sk);
773 return rc ? : len;
774 out_discard:
775 kfree_skb(skb);
776 goto out_release;
777 }
778
779 EXPORT_SYMBOL_GPL(dccp_sendmsg);
780
781 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
782 size_t len, int nonblock, int flags, int *addr_len)
783 {
784 const struct dccp_hdr *dh;
785 long timeo;
786
787 lock_sock(sk);
788
789 if (sk->sk_state == DCCP_LISTEN) {
790 len = -ENOTCONN;
791 goto out;
792 }
793
794 timeo = sock_rcvtimeo(sk, nonblock);
795
796 do {
797 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
798
799 if (skb == NULL)
800 goto verify_sock_status;
801
802 dh = dccp_hdr(skb);
803
804 switch (dh->dccph_type) {
805 case DCCP_PKT_DATA:
806 case DCCP_PKT_DATAACK:
807 goto found_ok_skb;
808
809 case DCCP_PKT_CLOSE:
810 case DCCP_PKT_CLOSEREQ:
811 if (!(flags & MSG_PEEK))
812 dccp_finish_passive_close(sk);
813 /* fall through */
814 case DCCP_PKT_RESET:
815 dccp_pr_debug("found fin (%s) ok!\n",
816 dccp_packet_name(dh->dccph_type));
817 len = 0;
818 goto found_fin_ok;
819 default:
820 dccp_pr_debug("packet_type=%s\n",
821 dccp_packet_name(dh->dccph_type));
822 sk_eat_skb(sk, skb, 0);
823 }
824 verify_sock_status:
825 if (sock_flag(sk, SOCK_DONE)) {
826 len = 0;
827 break;
828 }
829
830 if (sk->sk_err) {
831 len = sock_error(sk);
832 break;
833 }
834
835 if (sk->sk_shutdown & RCV_SHUTDOWN) {
836 len = 0;
837 break;
838 }
839
840 if (sk->sk_state == DCCP_CLOSED) {
841 if (!sock_flag(sk, SOCK_DONE)) {
842 /* This occurs when user tries to read
843 * from never connected socket.
844 */
845 len = -ENOTCONN;
846 break;
847 }
848 len = 0;
849 break;
850 }
851
852 if (!timeo) {
853 len = -EAGAIN;
854 break;
855 }
856
857 if (signal_pending(current)) {
858 len = sock_intr_errno(timeo);
859 break;
860 }
861
862 sk_wait_data(sk, &timeo);
863 continue;
864 found_ok_skb:
865 if (len > skb->len)
866 len = skb->len;
867 else if (len < skb->len)
868 msg->msg_flags |= MSG_TRUNC;
869
870 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
871 /* Exception. Bailout! */
872 len = -EFAULT;
873 break;
874 }
875 found_fin_ok:
876 if (!(flags & MSG_PEEK))
877 sk_eat_skb(sk, skb, 0);
878 break;
879 } while (1);
880 out:
881 release_sock(sk);
882 return len;
883 }
884
885 EXPORT_SYMBOL_GPL(dccp_recvmsg);
886
887 int inet_dccp_listen(struct socket *sock, int backlog)
888 {
889 struct sock *sk = sock->sk;
890 unsigned char old_state;
891 int err;
892
893 lock_sock(sk);
894
895 err = -EINVAL;
896 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
897 goto out;
898
899 old_state = sk->sk_state;
900 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
901 goto out;
902
903 /* Really, if the socket is already in listen state
904 * we can only allow the backlog to be adjusted.
905 */
906 if (old_state != DCCP_LISTEN) {
907 /*
908 * FIXME: here it probably should be sk->sk_prot->listen_start
909 * see tcp_listen_start
910 */
911 err = dccp_listen_start(sk, backlog);
912 if (err)
913 goto out;
914 }
915 sk->sk_max_ack_backlog = backlog;
916 err = 0;
917
918 out:
919 release_sock(sk);
920 return err;
921 }
922
923 EXPORT_SYMBOL_GPL(inet_dccp_listen);
924
925 static void dccp_terminate_connection(struct sock *sk)
926 {
927 u8 next_state = DCCP_CLOSED;
928
929 switch (sk->sk_state) {
930 case DCCP_PASSIVE_CLOSE:
931 case DCCP_PASSIVE_CLOSEREQ:
932 dccp_finish_passive_close(sk);
933 break;
934 case DCCP_PARTOPEN:
935 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
936 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
937 /* fall through */
938 case DCCP_OPEN:
939 dccp_send_close(sk, 1);
940
941 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
942 !dccp_sk(sk)->dccps_server_timewait)
943 next_state = DCCP_ACTIVE_CLOSEREQ;
944 else
945 next_state = DCCP_CLOSING;
946 /* fall through */
947 default:
948 dccp_set_state(sk, next_state);
949 }
950 }
951
952 void dccp_close(struct sock *sk, long timeout)
953 {
954 struct dccp_sock *dp = dccp_sk(sk);
955 struct sk_buff *skb;
956 u32 data_was_unread = 0;
957 int state;
958
959 lock_sock(sk);
960
961 sk->sk_shutdown = SHUTDOWN_MASK;
962
963 if (sk->sk_state == DCCP_LISTEN) {
964 dccp_set_state(sk, DCCP_CLOSED);
965
966 /* Special case. */
967 inet_csk_listen_stop(sk);
968
969 goto adjudge_to_death;
970 }
971
972 sk_stop_timer(sk, &dp->dccps_xmit_timer);
973
974 /*
975 * We need to flush the recv. buffs. We do this only on the
976 * descriptor close, not protocol-sourced closes, because the
977 *reader process may not have drained the data yet!
978 */
979 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
980 data_was_unread += skb->len;
981 __kfree_skb(skb);
982 }
983
984 if (data_was_unread) {
985 /* Unread data was tossed, send an appropriate Reset Code */
986 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
987 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
988 dccp_set_state(sk, DCCP_CLOSED);
989 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
990 /* Check zero linger _after_ checking for unread data. */
991 sk->sk_prot->disconnect(sk, 0);
992 } else if (sk->sk_state != DCCP_CLOSED) {
993 dccp_terminate_connection(sk);
994 }
995
996 sk_stream_wait_close(sk, timeout);
997
998 adjudge_to_death:
999 state = sk->sk_state;
1000 sock_hold(sk);
1001 sock_orphan(sk);
1002 percpu_counter_inc(sk->sk_prot->orphan_count);
1003
1004 /*
1005 * It is the last release_sock in its life. It will remove backlog.
1006 */
1007 release_sock(sk);
1008 /*
1009 * Now socket is owned by kernel and we acquire BH lock
1010 * to finish close. No need to check for user refs.
1011 */
1012 local_bh_disable();
1013 bh_lock_sock(sk);
1014 WARN_ON(sock_owned_by_user(sk));
1015
1016 /* Have we already been destroyed by a softirq or backlog? */
1017 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1018 goto out;
1019
1020 if (sk->sk_state == DCCP_CLOSED)
1021 inet_csk_destroy_sock(sk);
1022
1023 /* Otherwise, socket is reprieved until protocol close. */
1024
1025 out:
1026 bh_unlock_sock(sk);
1027 local_bh_enable();
1028 sock_put(sk);
1029 }
1030
1031 EXPORT_SYMBOL_GPL(dccp_close);
1032
1033 void dccp_shutdown(struct sock *sk, int how)
1034 {
1035 dccp_pr_debug("called shutdown(%x)\n", how);
1036 }
1037
1038 EXPORT_SYMBOL_GPL(dccp_shutdown);
1039
1040 static inline int dccp_mib_init(void)
1041 {
1042 return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1043 }
1044
1045 static inline void dccp_mib_exit(void)
1046 {
1047 snmp_mib_free((void**)dccp_statistics);
1048 }
1049
1050 static int thash_entries;
1051 module_param(thash_entries, int, 0444);
1052 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1053
1054 #ifdef CONFIG_IP_DCCP_DEBUG
1055 int dccp_debug;
1056 module_param(dccp_debug, bool, 0644);
1057 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1058
1059 EXPORT_SYMBOL_GPL(dccp_debug);
1060 #endif
1061
1062 static int __init dccp_init(void)
1063 {
1064 unsigned long goal;
1065 int ehash_order, bhash_order, i;
1066 int rc;
1067
1068 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1069 FIELD_SIZEOF(struct sk_buff, cb));
1070 rc = percpu_counter_init(&dccp_orphan_count, 0);
1071 if (rc)
1072 goto out;
1073 rc = -ENOBUFS;
1074 inet_hashinfo_init(&dccp_hashinfo);
1075 dccp_hashinfo.bind_bucket_cachep =
1076 kmem_cache_create("dccp_bind_bucket",
1077 sizeof(struct inet_bind_bucket), 0,
1078 SLAB_HWCACHE_ALIGN, NULL);
1079 if (!dccp_hashinfo.bind_bucket_cachep)
1080 goto out_free_percpu;
1081
1082 /*
1083 * Size and allocate the main established and bind bucket
1084 * hash tables.
1085 *
1086 * The methodology is similar to that of the buffer cache.
1087 */
1088 if (num_physpages >= (128 * 1024))
1089 goal = num_physpages >> (21 - PAGE_SHIFT);
1090 else
1091 goal = num_physpages >> (23 - PAGE_SHIFT);
1092
1093 if (thash_entries)
1094 goal = (thash_entries *
1095 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1096 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1097 ;
1098 do {
1099 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1100 sizeof(struct inet_ehash_bucket);
1101 while (dccp_hashinfo.ehash_size &
1102 (dccp_hashinfo.ehash_size - 1))
1103 dccp_hashinfo.ehash_size--;
1104 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1105 __get_free_pages(GFP_ATOMIC, ehash_order);
1106 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1107
1108 if (!dccp_hashinfo.ehash) {
1109 DCCP_CRIT("Failed to allocate DCCP established hash table");
1110 goto out_free_bind_bucket_cachep;
1111 }
1112
1113 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1114 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1115 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
1116 }
1117
1118 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1119 goto out_free_dccp_ehash;
1120
1121 bhash_order = ehash_order;
1122
1123 do {
1124 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1125 sizeof(struct inet_bind_hashbucket);
1126 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1127 bhash_order > 0)
1128 continue;
1129 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1130 __get_free_pages(GFP_ATOMIC, bhash_order);
1131 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1132
1133 if (!dccp_hashinfo.bhash) {
1134 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1135 goto out_free_dccp_locks;
1136 }
1137
1138 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1139 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1140 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1141 }
1142
1143 rc = dccp_mib_init();
1144 if (rc)
1145 goto out_free_dccp_bhash;
1146
1147 rc = dccp_ackvec_init();
1148 if (rc)
1149 goto out_free_dccp_mib;
1150
1151 rc = dccp_sysctl_init();
1152 if (rc)
1153 goto out_ackvec_exit;
1154
1155 dccp_timestamping_init();
1156 out:
1157 return rc;
1158 out_ackvec_exit:
1159 dccp_ackvec_exit();
1160 out_free_dccp_mib:
1161 dccp_mib_exit();
1162 out_free_dccp_bhash:
1163 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1164 dccp_hashinfo.bhash = NULL;
1165 out_free_dccp_locks:
1166 inet_ehash_locks_free(&dccp_hashinfo);
1167 out_free_dccp_ehash:
1168 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1169 dccp_hashinfo.ehash = NULL;
1170 out_free_bind_bucket_cachep:
1171 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1172 dccp_hashinfo.bind_bucket_cachep = NULL;
1173 out_free_percpu:
1174 percpu_counter_destroy(&dccp_orphan_count);
1175 goto out;
1176 }
1177
1178 static void __exit dccp_fini(void)
1179 {
1180 dccp_mib_exit();
1181 free_pages((unsigned long)dccp_hashinfo.bhash,
1182 get_order(dccp_hashinfo.bhash_size *
1183 sizeof(struct inet_bind_hashbucket)));
1184 free_pages((unsigned long)dccp_hashinfo.ehash,
1185 get_order(dccp_hashinfo.ehash_size *
1186 sizeof(struct inet_ehash_bucket)));
1187 inet_ehash_locks_free(&dccp_hashinfo);
1188 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1189 dccp_ackvec_exit();
1190 dccp_sysctl_exit();
1191 }
1192
1193 module_init(dccp_init);
1194 module_exit(dccp_fini);
1195
1196 MODULE_LICENSE("GPL");
1197 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1198 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
This page took 0.059066 seconds and 5 git commands to generate.