Commit | Line | Data |
---|---|---|
7c657876 ACM |
1 | /* |
2 | * net/dccp/ccids/ccid3.c | |
3 | * | |
954c2db8 | 4 | * Copyright (c) 2007 The University of Aberdeen, Scotland, UK |
b2f41ff4 IM |
5 | * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. |
6 | * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> | |
7c657876 ACM |
7 | * |
8 | * An implementation of the DCCP protocol | |
9 | * | |
10 | * This code has been developed by the University of Waikato WAND | |
11 | * research group. For further information please see http://www.wand.net.nz/ | |
7c657876 ACM |
12 | * |
13 | * This code also uses code from Lulea University, rereleased as GPL by its | |
14 | * authors: | |
15 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | |
16 | * | |
17 | * Changes to meet Linux coding standards, to make it meet latest ccid3 draft | |
18 | * and to make it work as a loadable module in the DCCP stack written by | |
19 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. | |
20 | * | |
21 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | |
22 | * | |
23 | * This program is free software; you can redistribute it and/or modify | |
24 | * it under the terms of the GNU General Public License as published by | |
25 | * the Free Software Foundation; either version 2 of the License, or | |
26 | * (at your option) any later version. | |
27 | * | |
28 | * This program is distributed in the hope that it will be useful, | |
29 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
30 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
31 | * GNU General Public License for more details. | |
32 | * | |
33 | * You should have received a copy of the GNU General Public License | |
34 | * along with this program; if not, write to the Free Software | |
35 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
36 | */ | |
7c657876 ACM |
37 | #include "../dccp.h" |
38 | #include "ccid3.h" | |
39 | ||
76fd1e87 GR |
40 | #include <asm/unaligned.h> |
41 | ||
56724aa4 GR |
42 | #ifdef CONFIG_IP_DCCP_CCID3_DEBUG |
43 | static int ccid3_debug; | |
44 | #define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a) | |
7c657876 ACM |
45 | #else |
46 | #define ccid3_pr_debug(format, a...) | |
47 | #endif | |
48 | ||
9bf17475 GR |
49 | /* |
50 | * Transmitter Half-Connection Routines | |
51 | */ | |
7c657876 | 52 | |
a21f9f96 | 53 | /* |
6c08b2cf GR |
54 | * Compute the initial sending rate X_init in the manner of RFC 3390: |
55 | * | |
56 | * X_init = min(4 * s, max(2 * s, 4380 bytes)) / RTT | |
57 | * | |
58 | * Note that RFC 3390 uses MSS, RFC 4342 refers to RFC 3390, and rfc3448bis | |
59 | * (rev-02) clarifies the use of RFC 3390 with regard to the above formula. | |
a21f9f96 GR |
60 | * For consistency with other parts of the code, X_init is scaled by 2^6. |
61 | */ | |
62 | static inline u64 rfc3390_initial_rate(struct sock *sk) | |
63 | { | |
6c08b2cf | 64 | const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
842d1ef1 | 65 | const __u32 w_init = clamp_t(__u32, 4380U, 2 * hctx->s, 4 * hctx->s); |
a21f9f96 | 66 | |
842d1ef1 | 67 | return scaled_div(w_init << 6, hctx->rtt); |
a21f9f96 GR |
68 | } |
69 | ||
de6f2b59 GR |
70 | /** |
71 | * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst | |
72 | * This respects the granularity of X_inst (64 * bytes/second). | |
17893bc1 | 73 | */ |
c4e18dad | 74 | static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) |
7c657876 | 75 | { |
842d1ef1 | 76 | hctx->t_ipi = scaled_div32(((u64)hctx->s) << 6, hctx->x); |
7c657876 | 77 | |
de6f2b59 GR |
78 | ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hctx->t_ipi, |
79 | hctx->s, (unsigned)(hctx->x >> 6)); | |
7c657876 | 80 | } |
aa97efd9 | 81 | |
a5358fdc GR |
82 | static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now) |
83 | { | |
842d1ef1 | 84 | u32 delta = ktime_us_delta(now, hctx->t_last_win_count); |
a5358fdc | 85 | |
842d1ef1 | 86 | return delta / hctx->rtt; |
a5358fdc GR |
87 | } |
88 | ||
aa97efd9 GR |
89 | /** |
90 | * ccid3_hc_tx_update_x - Update allowed sending rate X | |
91 | * @stamp: most recent time if available - can be left NULL. | |
92 | * This function tracks draft rfc3448bis, check there for latest details. | |
5c3fbb6a | 93 | * |
1a21e49a GR |
94 | * Note: X and X_recv are both stored in units of 64 * bytes/second, to support |
95 | * fine-grained resolution of sending rates. This requires scaling by 2^6 | |
96 | * throughout the code. Only X_calc is unscaled (in bytes/second). | |
97 | * | |
1a21e49a | 98 | */ |
aa97efd9 | 99 | static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) |
7c657876 | 100 | { |
59725dc2 | 101 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
842d1ef1 GR |
102 | u64 min_rate = 2 * hctx->x_recv; |
103 | const u64 old_x = hctx->x; | |
52515e77 | 104 | ktime_t now = stamp ? *stamp : ktime_get_real(); |
7c657876 | 105 | |
0c150efb GR |
106 | /* |
107 | * Handle IDLE periods: do not reduce below RFC3390 initial sending rate | |
a5358fdc GR |
108 | * when idling [RFC 4342, 5.1]. Definition of idling is from rfc3448bis: |
109 | * a sender is idle if it has not sent anything over a 2-RTT-period. | |
0c150efb GR |
110 | * For consistency with X and X_recv, min_rate is also scaled by 2^6. |
111 | */ | |
a5358fdc | 112 | if (ccid3_hc_tx_idle_rtt(hctx, now) >= 2) { |
0c150efb | 113 | min_rate = rfc3390_initial_rate(sk); |
842d1ef1 | 114 | min_rate = max(min_rate, 2 * hctx->x_recv); |
0c150efb GR |
115 | } |
116 | ||
842d1ef1 | 117 | if (hctx->p > 0) { |
1a21e49a | 118 | |
842d1ef1 GR |
119 | hctx->x = min(((u64)hctx->x_calc) << 6, min_rate); |
120 | hctx->x = max(hctx->x, (((u64)hctx->s) << 6) / TFRC_T_MBI); | |
a79ef76f | 121 | |
842d1ef1 | 122 | } else if (ktime_us_delta(now, hctx->t_ld) - (s64)hctx->rtt >= 0) { |
ac198ea8 | 123 | |
842d1ef1 GR |
124 | hctx->x = min(2 * hctx->x, min_rate); |
125 | hctx->x = max(hctx->x, | |
126 | scaled_div(((u64)hctx->s) << 6, hctx->rtt)); | |
127 | hctx->t_ld = now; | |
ff586298 | 128 | } |
b6ee3d4a | 129 | |
842d1ef1 | 130 | if (hctx->x != old_x) { |
1761f7d7 GR |
131 | ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, " |
132 | "X_recv=%u\n", (unsigned)(old_x >> 6), | |
842d1ef1 GR |
133 | (unsigned)(hctx->x >> 6), hctx->x_calc, |
134 | (unsigned)(hctx->x_recv >> 6)); | |
8699be7d | 135 | |
1266adee | 136 | ccid3_update_send_interval(hctx); |
8699be7d | 137 | } |
7c657876 ACM |
138 | } |
139 | ||
78ad713d | 140 | /* |
8109b02b ACM |
141 | * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1) |
142 | * @len: DCCP packet payload size in bytes | |
78ad713d GR |
143 | */ |
144 | static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) | |
145 | { | |
842d1ef1 | 146 | const u16 old_s = hctx->s; |
1266adee | 147 | |
842d1ef1 | 148 | hctx->s = tfrc_ewma(hctx->s, len, 9); |
1266adee | 149 | |
842d1ef1 | 150 | if (hctx->s != old_s) |
1266adee | 151 | ccid3_update_send_interval(hctx); |
78ad713d GR |
152 | } |
153 | ||
9f8681db | 154 | /* |
8109b02b | 155 | * Update Window Counter using the algorithm from [RFC 4342, 8.1]. |
825de27d | 156 | * As elsewhere, RTT > 0 is assumed by using dccp_sample_rtt(). |
9f8681db GR |
157 | */ |
158 | static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, | |
8132da4d | 159 | ktime_t now) |
9f8681db | 160 | { |
842d1ef1 GR |
161 | u32 delta = ktime_us_delta(now, hctx->t_last_win_count), |
162 | quarter_rtts = (4 * delta) / hctx->rtt; | |
9f8681db GR |
163 | |
164 | if (quarter_rtts > 0) { | |
842d1ef1 GR |
165 | hctx->t_last_win_count = now; |
166 | hctx->last_win_count += min(quarter_rtts, 5U); | |
167 | hctx->last_win_count &= 0xF; /* mod 16 */ | |
9f8681db GR |
168 | } |
169 | } | |
170 | ||
7c657876 ACM |
171 | static void ccid3_hc_tx_no_feedback_timer(unsigned long data) |
172 | { | |
173 | struct sock *sk = (struct sock *)data; | |
59725dc2 | 174 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
2a1fda6f | 175 | unsigned long t_nfb = USEC_PER_SEC / 5; |
7c657876 ACM |
176 | |
177 | bh_lock_sock(sk); | |
178 | if (sock_owned_by_user(sk)) { | |
179 | /* Try again later. */ | |
180 | /* XXX: set some sensible MIB */ | |
48e03eee | 181 | goto restart_timer; |
7c657876 ACM |
182 | } |
183 | ||
d0c05fe4 GR |
184 | ccid3_pr_debug("%s(%p) entry with%s feedback\n", dccp_role(sk), sk, |
185 | hctx->feedback ? "" : "out"); | |
a9672411 | 186 | |
d0995e6a GR |
187 | /* Ignore and do not restart after leaving the established state */ |
188 | if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) | |
189 | goto out; | |
190 | ||
191 | /* Reset feedback state to "no feedback received" */ | |
d0c05fe4 | 192 | hctx->feedback = false; |
52515e77 GR |
193 | |
194 | /* | |
195 | * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 | |
842d1ef1 | 196 | * RTO is 0 if and only if no feedback has been received yet. |
52515e77 | 197 | */ |
842d1ef1 | 198 | if (hctx->t_rto == 0 || hctx->p == 0) { |
52515e77 GR |
199 | |
200 | /* halve send rate directly */ | |
842d1ef1 | 201 | hctx->x = max(hctx->x / 2, (((u64)hctx->s) << 6) / TFRC_T_MBI); |
1266adee | 202 | ccid3_update_send_interval(hctx); |
52515e77 | 203 | } else { |
1f2333ae | 204 | /* |
52515e77 | 205 | * Modify the cached value of X_recv |
0c150efb | 206 | * |
52515e77 | 207 | * If (X_calc > 2 * X_recv) |
0c150efb GR |
208 | * X_recv = max(X_recv / 2, s / (2 * t_mbi)); |
209 | * Else | |
210 | * X_recv = X_calc / 4; | |
211 | * | |
212 | * Note that X_recv is scaled by 2^6 while X_calc is not | |
1f2333ae | 213 | */ |
842d1ef1 | 214 | BUG_ON(hctx->p && !hctx->x_calc); |
0c150efb | 215 | |
842d1ef1 GR |
216 | if (hctx->x_calc > (hctx->x_recv >> 5)) |
217 | hctx->x_recv = | |
218 | max(hctx->x_recv / 2, | |
219 | (((__u64)hctx->s) << 6) / (2 * TFRC_T_MBI)); | |
52515e77 | 220 | else { |
842d1ef1 GR |
221 | hctx->x_recv = hctx->x_calc; |
222 | hctx->x_recv <<= 4; | |
7c657876 | 223 | } |
aa97efd9 | 224 | ccid3_hc_tx_update_x(sk, NULL); |
7c657876 | 225 | } |
52515e77 | 226 | ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n", |
842d1ef1 | 227 | (unsigned long long)hctx->x); |
52515e77 GR |
228 | |
229 | /* | |
230 | * Set new timeout for the nofeedback timer. | |
231 | * See comments in packet_recv() regarding the value of t_RTO. | |
232 | */ | |
842d1ef1 | 233 | if (unlikely(hctx->t_rto == 0)) /* no feedback received yet */ |
52515e77 GR |
234 | t_nfb = TFRC_INITIAL_TIMEOUT; |
235 | else | |
842d1ef1 | 236 | t_nfb = max(hctx->t_rto, 2 * hctx->t_ipi); |
7c657876 | 237 | |
48e03eee | 238 | restart_timer: |
842d1ef1 | 239 | sk_reset_timer(sk, &hctx->no_feedback_timer, |
c9eaf173 | 240 | jiffies + usecs_to_jiffies(t_nfb)); |
7c657876 ACM |
241 | out: |
242 | bh_unlock_sock(sk); | |
243 | sock_put(sk); | |
244 | } | |
245 | ||
f4a66ca4 GR |
246 | /** |
247 | * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets | |
248 | * @skb: next packet candidate to send on @sk | |
249 | * This function uses the convention of ccid_packet_dequeue_eval() and | |
250 | * returns a millisecond-delay value between 0 and t_mbi = 64000 msec. | |
7da7f456 | 251 | */ |
6b57c93d | 252 | static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) |
7c657876 ACM |
253 | { |
254 | struct dccp_sock *dp = dccp_sk(sk); | |
59725dc2 | 255 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
8132da4d GR |
256 | ktime_t now = ktime_get_real(); |
257 | s64 delay; | |
7c657876 | 258 | |
7c657876 | 259 | /* |
da335baf GR |
260 | * This function is called only for Data and DataAck packets. Sending |
261 | * zero-sized Data(Ack)s is theoretically possible, but for congestion | |
262 | * control this case is pathological - ignore it. | |
7c657876 | 263 | */ |
6b57c93d | 264 | if (unlikely(skb->len == 0)) |
da335baf | 265 | return -EBADMSG; |
7c657876 | 266 | |
d0c05fe4 | 267 | if (hctx->s == 0) { |
842d1ef1 | 268 | sk_reset_timer(sk, &hctx->no_feedback_timer, (jiffies + |
c9eaf173 | 269 | usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); |
842d1ef1 GR |
270 | hctx->last_win_count = 0; |
271 | hctx->t_last_win_count = now; | |
90feeb95 GR |
272 | |
273 | /* Set t_0 for initial packet */ | |
842d1ef1 | 274 | hctx->t_nom = now; |
30833ffe | 275 | |
842d1ef1 | 276 | hctx->s = skb->len; |
30833ffe GR |
277 | |
278 | /* | |
279 | * Use initial RTT sample when available: recommended by erratum | |
280 | * to RFC 4342. This implements the initialisation procedure of | |
281 | * draft rfc3448bis, section 4.2. Remember, X is scaled by 2^6. | |
282 | */ | |
283 | if (dp->dccps_syn_rtt) { | |
284 | ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); | |
842d1ef1 GR |
285 | hctx->rtt = dp->dccps_syn_rtt; |
286 | hctx->x = rfc3390_initial_rate(sk); | |
287 | hctx->t_ld = now; | |
30833ffe | 288 | } else { |
3294f202 GR |
289 | /* |
290 | * Sender does not have RTT sample: | |
291 | * - set fallback RTT (RFC 4340, 3.4) since a RTT value | |
292 | * is needed in several parts (e.g. window counter); | |
293 | * - set sending rate X_pps = 1pps as per RFC 3448, 4.2. | |
294 | */ | |
842d1ef1 GR |
295 | hctx->rtt = DCCP_FALLBACK_RTT; |
296 | hctx->x = hctx->s; | |
297 | hctx->x <<= 6; | |
30833ffe GR |
298 | } |
299 | ccid3_update_send_interval(hctx); | |
300 | ||
d0995e6a | 301 | } else { |
842d1ef1 | 302 | delay = ktime_us_delta(hctx->t_nom, now); |
8699be7d | 303 | ccid3_pr_debug("delay=%ld\n", (long)delay); |
91cf5a17 | 304 | /* |
8109b02b | 305 | * Scheduling of packet transmissions [RFC 3448, 4.6] |
91cf5a17 GR |
306 | * |
307 | * if (t_now > t_nom - delta) | |
308 | * // send the packet now | |
309 | * else | |
310 | * // send the packet in (t_nom - t_now) milliseconds. | |
311 | */ | |
de6f2b59 GR |
312 | if (delay >= TFRC_T_DELTA) |
313 | return (u32)delay / USEC_PER_MSEC; | |
9f8681db | 314 | |
8132da4d | 315 | ccid3_hc_tx_update_win_count(hctx, now); |
7c657876 ACM |
316 | } |
317 | ||
7da7f456 GR |
318 | /* prepare to send now (add options etc.) */ |
319 | dp->dccps_hc_tx_insert_options = 1; | |
842d1ef1 | 320 | DCCP_SKB_CB(skb)->dccpd_ccval = hctx->last_win_count; |
e312d100 GR |
321 | |
322 | /* set the nominal send time for the next following packet */ | |
842d1ef1 | 323 | hctx->t_nom = ktime_add_us(hctx->t_nom, hctx->t_ipi); |
f4a66ca4 | 324 | return CCID_PACKET_SEND_AT_ONCE; |
7c657876 ACM |
325 | } |
326 | ||
c506d91d | 327 | static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len) |
7c657876 | 328 | { |
59725dc2 | 329 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
7c657876 | 330 | |
6b57c93d | 331 | ccid3_hc_tx_update_s(hctx, len); |
7c657876 | 332 | |
842d1ef1 | 333 | if (tfrc_tx_hist_add(&hctx->hist, dccp_sk(sk)->dccps_gss)) |
c5a1ae9a | 334 | DCCP_CRIT("packet history - out of memory!"); |
7c657876 ACM |
335 | } |
336 | ||
337 | static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |
338 | { | |
59725dc2 | 339 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
63b3a73b | 340 | struct tfrc_tx_hist_entry *acked; |
0740d49c | 341 | ktime_t now; |
2a1fda6f | 342 | unsigned long t_nfb; |
ce177ae2 | 343 | u32 r_sample; |
1f2333ae | 344 | |
7c657876 ACM |
345 | /* we are only interested in ACKs */ |
346 | if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || | |
347 | DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) | |
348 | return; | |
63b3a73b GR |
349 | /* |
350 | * Locate the acknowledged packet in the TX history. | |
351 | * | |
352 | * Returning "entry not found" here can for instance happen when | |
353 | * - the host has not sent out anything (e.g. a passive server), | |
354 | * - the Ack is outdated (packet with higher Ack number was received), | |
355 | * - it is a bogus Ack (for a packet not sent on this connection). | |
356 | */ | |
357 | acked = tfrc_tx_hist_find_entry(hctx->hist, dccp_hdr_ack_seq(skb)); | |
358 | if (acked == NULL) | |
5bd370a6 | 359 | return; |
63b3a73b GR |
360 | /* For the sake of RTT sampling, ignore/remove all older entries */ |
361 | tfrc_tx_hist_purge(&acked->next); | |
362 | ||
363 | /* Update the moving average for the RTT estimate (RFC 3448, 4.3) */ | |
364 | now = ktime_get_real(); | |
365 | r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp)); | |
366 | hctx->rtt = tfrc_ewma(hctx->rtt, r_sample, 9); | |
7c657876 | 367 | |
d8d1252f GR |
368 | /* |
369 | * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 | |
370 | */ | |
d0c05fe4 GR |
371 | if (!hctx->feedback) { |
372 | hctx->feedback = true; | |
5c3fbb6a | 373 | |
842d1ef1 | 374 | if (hctx->t_rto == 0) { |
d8d1252f GR |
375 | /* |
376 | * Initial feedback packet: Larger Initial Windows (4.2) | |
377 | */ | |
842d1ef1 GR |
378 | hctx->x = rfc3390_initial_rate(sk); |
379 | hctx->t_ld = now; | |
a79ef76f | 380 | |
d8d1252f | 381 | ccid3_update_send_interval(hctx); |
7c657876 | 382 | |
d8d1252f | 383 | goto done_computing_x; |
842d1ef1 | 384 | } else if (hctx->p == 0) { |
d8d1252f GR |
385 | /* |
386 | * First feedback after nofeedback timer expiry (4.3) | |
387 | */ | |
388 | goto done_computing_x; | |
389 | } | |
390 | } | |
7c657876 | 391 | |
d8d1252f | 392 | /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ |
842d1ef1 GR |
393 | if (hctx->p > 0) |
394 | hctx->x_calc = tfrc_calc_x(hctx->s, hctx->rtt, hctx->p); | |
d8d1252f | 395 | ccid3_hc_tx_update_x(sk, &now); |
7c657876 | 396 | |
d8d1252f GR |
397 | done_computing_x: |
398 | ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, " | |
5bd370a6 | 399 | "p=%u, X_calc=%u, X_recv=%u, X=%u\n", |
842d1ef1 GR |
400 | dccp_role(sk), sk, hctx->rtt, r_sample, |
401 | hctx->s, hctx->p, hctx->x_calc, | |
402 | (unsigned)(hctx->x_recv >> 6), | |
403 | (unsigned)(hctx->x >> 6)); | |
7c657876 | 404 | |
5bd370a6 | 405 | /* unschedule no feedback timer */ |
842d1ef1 | 406 | sk_stop_timer(sk, &hctx->no_feedback_timer); |
7c657876 | 407 | |
5bd370a6 GR |
408 | /* |
409 | * As we have calculated new ipi, delta, t_nom it is possible | |
410 | * that we now can send a packet, so wake up dccp_wait_for_ccid | |
411 | */ | |
412 | sk->sk_write_space(sk); | |
8c60f3fa | 413 | |
5bd370a6 GR |
414 | /* |
415 | * Update timeout interval for the nofeedback timer. | |
416 | * We use a configuration option to increase the lower bound. | |
417 | * This can help avoid triggering the nofeedback timer too | |
418 | * often ('spinning') on LANs with small RTTs. | |
419 | */ | |
842d1ef1 GR |
420 | hctx->t_rto = max_t(u32, 4 * hctx->rtt, (CONFIG_IP_DCCP_CCID3_RTO * |
421 | (USEC_PER_SEC / 1000))); | |
5bd370a6 GR |
422 | /* |
423 | * Schedule no feedback timer to expire in | |
424 | * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) | |
425 | */ | |
842d1ef1 | 426 | t_nfb = max(hctx->t_rto, 2 * hctx->t_ipi); |
7c657876 | 427 | |
5bd370a6 GR |
428 | ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " |
429 | "expire in %lu jiffies (%luus)\n", | |
842d1ef1 | 430 | dccp_role(sk), sk, usecs_to_jiffies(t_nfb), t_nfb); |
a9672411 | 431 | |
842d1ef1 | 432 | sk_reset_timer(sk, &hctx->no_feedback_timer, |
5bd370a6 | 433 | jiffies + usecs_to_jiffies(t_nfb)); |
7c657876 ACM |
434 | } |
435 | ||
3306c781 GR |
436 | static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type, |
437 | u8 option, u8 *optval, u8 optlen) | |
7c657876 | 438 | { |
59725dc2 | 439 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
76fd1e87 | 440 | __be32 opt_val; |
7c657876 | 441 | |
7c657876 | 442 | switch (option) { |
47a61e7b | 443 | case TFRC_OPT_RECEIVE_RATE: |
7c657876 | 444 | case TFRC_OPT_LOSS_EVENT_RATE: |
3306c781 GR |
445 | /* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */ |
446 | if (packet_type == DCCP_PKT_DATA) | |
447 | break; | |
448 | if (unlikely(optlen != 4)) { | |
47a61e7b | 449 | DCCP_WARN("%s(%p), invalid len %d for %u\n", |
3306c781 | 450 | dccp_role(sk), sk, optlen, option); |
47a61e7b | 451 | return -EINVAL; |
7c657876 | 452 | } |
3306c781 | 453 | opt_val = ntohl(get_unaligned((__be32 *)optval)); |
47a61e7b GR |
454 | |
455 | if (option == TFRC_OPT_RECEIVE_RATE) { | |
ce177ae2 GR |
456 | /* Receive Rate is kept in units of 64 bytes/second */ |
457 | hctx->x_recv = opt_val; | |
458 | hctx->x_recv <<= 6; | |
459 | ||
a9672411 | 460 | ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", |
47a61e7b GR |
461 | dccp_role(sk), sk, opt_val); |
462 | } else { | |
ce177ae2 GR |
463 | /* Update the fixpoint Loss Event Rate fraction */ |
464 | hctx->p = tfrc_invert_loss_event_rate(opt_val); | |
465 | ||
47a61e7b GR |
466 | ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", |
467 | dccp_role(sk), sk, opt_val); | |
7c657876 | 468 | } |
7c657876 | 469 | } |
47a61e7b | 470 | return 0; |
7c657876 ACM |
471 | } |
472 | ||
91f0ebf7 | 473 | static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) |
7c657876 | 474 | { |
91f0ebf7 | 475 | struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); |
7c657876 | 476 | |
842d1ef1 GR |
477 | hctx->hist = NULL; |
478 | setup_timer(&hctx->no_feedback_timer, | |
479 | ccid3_hc_tx_no_feedback_timer, (unsigned long)sk); | |
7c657876 ACM |
480 | return 0; |
481 | } | |
482 | ||
483 | static void ccid3_hc_tx_exit(struct sock *sk) | |
484 | { | |
59725dc2 | 485 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
7c657876 | 486 | |
842d1ef1 | 487 | sk_stop_timer(sk, &hctx->no_feedback_timer); |
842d1ef1 | 488 | tfrc_tx_hist_purge(&hctx->hist); |
7c657876 ACM |
489 | } |
490 | ||
9bf17475 GR |
491 | static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) |
492 | { | |
b2e317f4 GR |
493 | info->tcpi_rto = ccid3_hc_tx_sk(sk)->t_rto; |
494 | info->tcpi_rtt = ccid3_hc_tx_sk(sk)->rtt; | |
9bf17475 GR |
495 | } |
496 | ||
497 | static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, | |
498 | u32 __user *optval, int __user *optlen) | |
499 | { | |
b2e317f4 | 500 | const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
842d1ef1 | 501 | struct tfrc_tx_info tfrc; |
9bf17475 GR |
502 | const void *val; |
503 | ||
9bf17475 GR |
504 | switch (optname) { |
505 | case DCCP_SOCKOPT_CCID_TX_INFO: | |
842d1ef1 | 506 | if (len < sizeof(tfrc)) |
9bf17475 | 507 | return -EINVAL; |
842d1ef1 GR |
508 | tfrc.tfrctx_x = hctx->x; |
509 | tfrc.tfrctx_x_recv = hctx->x_recv; | |
510 | tfrc.tfrctx_x_calc = hctx->x_calc; | |
511 | tfrc.tfrctx_rtt = hctx->rtt; | |
512 | tfrc.tfrctx_p = hctx->p; | |
513 | tfrc.tfrctx_rto = hctx->t_rto; | |
514 | tfrc.tfrctx_ipi = hctx->t_ipi; | |
515 | len = sizeof(tfrc); | |
516 | val = &tfrc; | |
9bf17475 GR |
517 | break; |
518 | default: | |
519 | return -ENOPROTOOPT; | |
520 | } | |
521 | ||
522 | if (put_user(len, optlen) || copy_to_user(optval, val, len)) | |
523 | return -EFAULT; | |
524 | ||
525 | return 0; | |
526 | } | |
527 | ||
7c657876 | 528 | /* |
9bf17475 | 529 | * Receiver Half-Connection Routines |
7c657876 | 530 | */ |
b84a2189 ACM |
531 | static void ccid3_hc_rx_send_feedback(struct sock *sk, |
532 | const struct sk_buff *skb, | |
533 | enum ccid3_fback_type fbtype) | |
7c657876 | 534 | { |
59725dc2 | 535 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
7c657876 | 536 | |
b84a2189 ACM |
537 | switch (fbtype) { |
538 | case CCID3_FBACK_INITIAL: | |
842d1ef1 GR |
539 | hcrx->x_recv = 0; |
540 | hcrx->p_inverse = ~0U; /* see RFC 4342, 8.5 */ | |
7c657876 | 541 | break; |
b84a2189 | 542 | case CCID3_FBACK_PARAM_CHANGE: |
2f3e3bba | 543 | if (unlikely(hcrx->feedback == CCID3_FBACK_NONE)) { |
d20ed95f GR |
544 | /* |
545 | * rfc3448bis-06, 6.3.1: First packet(s) lost or marked | |
546 | * FIXME: in rfc3448bis the receiver returns X_recv=0 | |
547 | * here as it normally would in the first feedback packet. | |
548 | * However this is not possible yet, since the code still | |
549 | * uses RFC 3448, i.e. | |
550 | * If (p > 0) | |
551 | * Calculate X_calc using the TCP throughput equation. | |
552 | * X = max(min(X_calc, 2*X_recv), s/t_mbi); | |
553 | * would bring X down to s/t_mbi. That is why we return | |
554 | * X_recv according to rfc3448bis-06 for the moment. | |
555 | */ | |
2b81143a GR |
556 | u32 s = tfrc_rx_hist_packet_size(&hcrx->hist), |
557 | rtt = tfrc_rx_hist_rtt(&hcrx->hist); | |
d20ed95f | 558 | |
d20ed95f GR |
559 | hcrx->x_recv = scaled_div32(s, 2 * rtt); |
560 | break; | |
561 | } | |
b84a2189 ACM |
562 | /* |
563 | * When parameters change (new loss or p > p_prev), we do not | |
564 | * have a reliable estimate for R_m of [RFC 3448, 6.2] and so | |
68c89ee5 | 565 | * always check whether at least RTT time units were covered. |
b84a2189 | 566 | */ |
68c89ee5 GR |
567 | hcrx->x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv); |
568 | break; | |
b84a2189 | 569 | case CCID3_FBACK_PERIODIC: |
2b81143a | 570 | /* |
68c89ee5 GR |
571 | * Step (2) of rfc3448bis-06, 6.2: |
572 | * - if no data packets have been received, just restart timer | |
573 | * - if data packets have been received, re-compute X_recv | |
2b81143a | 574 | */ |
68c89ee5 GR |
575 | if (hcrx->hist.bytes_recvd == 0) |
576 | goto prepare_for_next_time; | |
577 | hcrx->x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv); | |
7c657876 | 578 | break; |
b84a2189 | 579 | default: |
7c657876 ACM |
580 | return; |
581 | } | |
582 | ||
68c89ee5 | 583 | ccid3_pr_debug("X_recv=%u, 1/p=%u\n", hcrx->x_recv, hcrx->p_inverse); |
7c657876 | 584 | |
68c89ee5 | 585 | dccp_sk(sk)->dccps_hc_rx_insert_options = 1; |
7c657876 | 586 | dccp_send_ack(sk); |
68c89ee5 GR |
587 | |
588 | prepare_for_next_time: | |
589 | tfrc_rx_hist_restart_byte_counter(&hcrx->hist); | |
590 | hcrx->last_counter = dccp_hdr(skb)->dccph_ccval; | |
591 | hcrx->feedback = fbtype; | |
7c657876 ACM |
592 | } |
593 | ||
2d0817d1 | 594 | static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) |
7c657876 | 595 | { |
b2e317f4 | 596 | const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
60fe62e7 | 597 | __be32 x_recv, pinv; |
7c657876 | 598 | |
59d203f9 | 599 | if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) |
2d0817d1 | 600 | return 0; |
7c657876 | 601 | |
4fded33b | 602 | if (dccp_packet_without_ack(skb)) |
2d0817d1 ACM |
603 | return 0; |
604 | ||
842d1ef1 GR |
605 | x_recv = htonl(hcrx->x_recv); |
606 | pinv = htonl(hcrx->p_inverse); | |
2d0817d1 | 607 | |
385ac2e3 | 608 | if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, |
8109b02b | 609 | &pinv, sizeof(pinv)) || |
2d0817d1 | 610 | dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, |
8109b02b | 611 | &x_recv, sizeof(x_recv))) |
2d0817d1 ACM |
612 | return -1; |
613 | ||
614 | return 0; | |
7c657876 ACM |
615 | } |
616 | ||
954c2db8 GR |
617 | /** ccid3_first_li - Implements [RFC 3448, 6.3.1] |
618 | * | |
619 | * Determine the length of the first loss interval via inverse lookup. | |
620 | * Assume that X_recv can be computed by the throughput equation | |
621 | * s | |
622 | * X_recv = -------- | |
623 | * R * fval | |
624 | * Find some p such that f(p) = fval; return 1/p (scaled). | |
625 | */ | |
626 | static u32 ccid3_first_li(struct sock *sk) | |
627 | { | |
628 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | |
2b81143a | 629 | u32 s = tfrc_rx_hist_packet_size(&hcrx->hist), |
68c89ee5 | 630 | rtt = tfrc_rx_hist_rtt(&hcrx->hist), x_recv, p; |
954c2db8 GR |
631 | u64 fval; |
632 | ||
d20ed95f GR |
633 | /* |
634 | * rfc3448bis-06, 6.3.1: First data packet(s) are marked or lost. Set p | |
635 | * to give the equivalent of X_target = s/(2*R). Thus fval = 2 and so p | |
636 | * is about 20.64%. This yields an interval length of 4.84 (rounded up). | |
637 | */ | |
2f3e3bba | 638 | if (unlikely(hcrx->feedback == CCID3_FBACK_NONE)) |
d20ed95f GR |
639 | return 5; |
640 | ||
68c89ee5 GR |
641 | x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv); |
642 | if (x_recv == 0) | |
643 | goto failed; | |
954c2db8 | 644 | |
2b81143a | 645 | fval = scaled_div32(scaled_div(s, rtt), x_recv); |
954c2db8 GR |
646 | p = tfrc_calc_x_reverse_lookup(fval); |
647 | ||
648 | ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied " | |
649 | "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); | |
650 | ||
68c89ee5 GR |
651 | if (p > 0) |
652 | return scaled_div(1, p); | |
653 | failed: | |
654 | return UINT_MAX; | |
954c2db8 GR |
655 | } |
656 | ||
b84a2189 | 657 | static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) |
7c657876 | 658 | { |
59725dc2 | 659 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
5b5d0e70 | 660 | const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; |
b84a2189 ACM |
661 | const bool is_data_packet = dccp_data_packet(skb); |
662 | ||
d20ed95f GR |
663 | /* |
664 | * Perform loss detection and handle pending losses | |
665 | */ | |
88e97a93 GR |
666 | if (tfrc_rx_congestion_event(&hcrx->hist, &hcrx->li_hist, |
667 | skb, ndp, ccid3_first_li, sk)) | |
668 | ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_PARAM_CHANGE); | |
669 | /* | |
670 | * Feedback for first non-empty data packet (RFC 3448, 6.3) | |
671 | */ | |
672 | else if (unlikely(hcrx->feedback == CCID3_FBACK_NONE && is_data_packet)) | |
673 | ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_INITIAL); | |
b84a2189 ACM |
674 | /* |
675 | * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3 | |
676 | */ | |
88e97a93 GR |
677 | else if (!tfrc_rx_hist_loss_pending(&hcrx->hist) && is_data_packet && |
678 | SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->last_counter) > 3) | |
679 | ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_PERIODIC); | |
7c657876 ACM |
680 | } |
681 | ||
91f0ebf7 | 682 | static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) |
7c657876 | 683 | { |
91f0ebf7 | 684 | struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid); |
7c657876 | 685 | |
842d1ef1 | 686 | tfrc_lh_init(&hcrx->li_hist); |
24b8d343 | 687 | return tfrc_rx_hist_init(&hcrx->hist, sk); |
7c657876 ACM |
688 | } |
689 | ||
690 | static void ccid3_hc_rx_exit(struct sock *sk) | |
691 | { | |
59725dc2 | 692 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
7c657876 | 693 | |
842d1ef1 GR |
694 | tfrc_rx_hist_purge(&hcrx->hist); |
695 | tfrc_lh_cleanup(&hcrx->li_hist); | |
7c657876 ACM |
696 | } |
697 | ||
2babe1f6 ACM |
698 | static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) |
699 | { | |
8109b02b | 700 | info->tcpi_options |= TCPI_OPT_TIMESTAMPS; |
2b81143a | 701 | info->tcpi_rcv_rtt = tfrc_rx_hist_rtt(&ccid3_hc_rx_sk(sk)->hist); |
2babe1f6 ACM |
702 | } |
703 | ||
88f964db ACM |
704 | static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, |
705 | u32 __user *optval, int __user *optlen) | |
706 | { | |
b2e317f4 | 707 | const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
8e138e79 | 708 | struct tfrc_rx_info rx_info; |
88f964db | 709 | const void *val; |
c9eaf173 | 710 | |
88f964db ACM |
711 | switch (optname) { |
712 | case DCCP_SOCKOPT_CCID_RX_INFO: | |
8e138e79 | 713 | if (len < sizeof(rx_info)) |
88f964db | 714 | return -EINVAL; |
842d1ef1 | 715 | rx_info.tfrcrx_x_recv = hcrx->x_recv; |
2b81143a | 716 | rx_info.tfrcrx_rtt = tfrc_rx_hist_rtt(&hcrx->hist); |
535c55df | 717 | rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hcrx->p_inverse); |
8e138e79 GR |
718 | len = sizeof(rx_info); |
719 | val = &rx_info; | |
88f964db ACM |
720 | break; |
721 | default: | |
722 | return -ENOPROTOOPT; | |
723 | } | |
724 | ||
725 | if (put_user(len, optlen) || copy_to_user(optval, val, len)) | |
726 | return -EFAULT; | |
727 | ||
728 | return 0; | |
729 | } | |
730 | ||
91f0ebf7 | 731 | static struct ccid_operations ccid3 = { |
3dd9a7c3 | 732 | .ccid_id = DCCPC_CCID3, |
84a97b0a | 733 | .ccid_name = "TCP-Friendly Rate Control", |
7c657876 | 734 | .ccid_owner = THIS_MODULE, |
91f0ebf7 | 735 | .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock), |
7c657876 ACM |
736 | .ccid_hc_tx_init = ccid3_hc_tx_init, |
737 | .ccid_hc_tx_exit = ccid3_hc_tx_exit, | |
738 | .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, | |
739 | .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent, | |
740 | .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, | |
7c657876 | 741 | .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, |
91f0ebf7 | 742 | .ccid_hc_rx_obj_size = sizeof(struct ccid3_hc_rx_sock), |
7c657876 ACM |
743 | .ccid_hc_rx_init = ccid3_hc_rx_init, |
744 | .ccid_hc_rx_exit = ccid3_hc_rx_exit, | |
745 | .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options, | |
746 | .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, | |
2babe1f6 ACM |
747 | .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, |
748 | .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, | |
88f964db ACM |
749 | .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt, |
750 | .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, | |
7c657876 | 751 | }; |
8109b02b | 752 | |
56724aa4 | 753 | #ifdef CONFIG_IP_DCCP_CCID3_DEBUG |
43264991 | 754 | module_param(ccid3_debug, bool, 0644); |
7c657876 | 755 | MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); |
56724aa4 | 756 | #endif |
7c657876 ACM |
757 | |
758 | static __init int ccid3_module_init(void) | |
759 | { | |
f76fd327 GR |
760 | struct timespec tp; |
761 | ||
762 | /* | |
763 | * Without a fine-grained clock resolution, RTTs/X_recv are not sampled | |
764 | * correctly and feedback is sent either too early or too late. | |
765 | */ | |
766 | hrtimer_get_res(CLOCK_MONOTONIC, &tp); | |
767 | if (tp.tv_sec || tp.tv_nsec > DCCP_TIME_RESOLUTION * NSEC_PER_USEC) { | |
768 | printk(KERN_ERR "%s: Timer too coarse (%ld usec), need %u-usec" | |
769 | " resolution - check your clocksource.\n", __func__, | |
770 | tp.tv_nsec/NSEC_PER_USEC, DCCP_TIME_RESOLUTION); | |
771 | return -ESOCKTNOSUPPORT; | |
772 | } | |
34a9e7ea | 773 | return ccid_register(&ccid3); |
7c657876 ACM |
774 | } |
775 | module_init(ccid3_module_init); | |
776 | ||
777 | static __exit void ccid3_module_exit(void) | |
778 | { | |
779 | ccid_unregister(&ccid3); | |
7c657876 ACM |
780 | } |
781 | module_exit(ccid3_module_exit); | |
782 | ||
e6bccd35 | 783 | MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, " |
1f2333ae | 784 | "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); |
7c657876 ACM |
785 | MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); |
786 | MODULE_LICENSE("GPL"); | |
787 | MODULE_ALIAS("net-dccp-ccid-3"); |