Commit | Line | Data |
---|---|---|
71c87e0c JBT |
1 | /* |
2 | * linux/net/ipv4/inet_lro.c | |
3 | * | |
4 | * Large Receive Offload (ipv4 / tcp) | |
5 | * | |
6 | * (C) Copyright IBM Corp. 2007 | |
7 | * | |
8 | * Authors: | |
9 | * Jan-Bernd Themann <themann@de.ibm.com> | |
10 | * Christoph Raisch <raisch@de.ibm.com> | |
11 | * | |
12 | * | |
13 | * This program is free software; you can redistribute it and/or modify | |
14 | * it under the terms of the GNU General Public License as published by | |
15 | * the Free Software Foundation; either version 2, or (at your option) | |
16 | * any later version. | |
17 | * | |
18 | * This program is distributed in the hope that it will be useful, | |
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | * GNU General Public License for more details. | |
22 | * | |
23 | * You should have received a copy of the GNU General Public License | |
24 | * along with this program; if not, write to the Free Software | |
25 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
26 | */ | |
27 | ||
28 | ||
29 | #include <linux/module.h> | |
30 | #include <linux/if_vlan.h> | |
31 | #include <linux/inet_lro.h> | |
35353c2b | 32 | #include <net/checksum.h> |
71c87e0c JBT |
33 | |
34 | MODULE_LICENSE("GPL"); | |
35 | MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>"); | |
36 | MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)"); | |
37 | ||
38 | #define TCP_HDR_LEN(tcph) (tcph->doff << 2) | |
39 | #define IP_HDR_LEN(iph) (iph->ihl << 2) | |
40 | #define TCP_PAYLOAD_LENGTH(iph, tcph) \ | |
41 | (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph)) | |
42 | ||
43 | #define IPH_LEN_WO_OPTIONS 5 | |
44 | #define TCPH_LEN_WO_OPTIONS 5 | |
45 | #define TCPH_LEN_W_TIMESTAMP 8 | |
46 | ||
47 | #define LRO_MAX_PG_HLEN 64 | |
48 | ||
49 | #define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; } | |
50 | ||
51 | /* | |
52 | * Basic tcp checks whether packet is suitable for LRO | |
53 | */ | |
54 | ||
b71d1d42 ED |
55 | static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph, |
56 | int len, const struct net_lro_desc *lro_desc) | |
71c87e0c JBT |
57 | { |
58 | /* check ip header: don't aggregate padded frames */ | |
59 | if (ntohs(iph->tot_len) != len) | |
60 | return -1; | |
61 | ||
62 | if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0) | |
63 | return -1; | |
64 | ||
65 | if (iph->ihl != IPH_LEN_WO_OPTIONS) | |
66 | return -1; | |
67 | ||
9d4fb27d JP |
68 | if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack || |
69 | tcph->rst || tcph->syn || tcph->fin) | |
71c87e0c JBT |
70 | return -1; |
71 | ||
72 | if (INET_ECN_is_ce(ipv4_get_dsfield(iph))) | |
73 | return -1; | |
74 | ||
9d4fb27d JP |
75 | if (tcph->doff != TCPH_LEN_WO_OPTIONS && |
76 | tcph->doff != TCPH_LEN_W_TIMESTAMP) | |
71c87e0c JBT |
77 | return -1; |
78 | ||
79 | /* check tcp options (only timestamp allowed) */ | |
80 | if (tcph->doff == TCPH_LEN_W_TIMESTAMP) { | |
9df7c98a | 81 | __be32 *topt = (__be32 *)(tcph + 1); |
71c87e0c JBT |
82 | |
83 | if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | |
84 | | (TCPOPT_TIMESTAMP << 8) | |
85 | | TCPOLEN_TIMESTAMP)) | |
86 | return -1; | |
87 | ||
88 | /* timestamp should be in right order */ | |
89 | topt++; | |
90 | if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval), | |
91 | ntohl(*topt))) | |
92 | return -1; | |
93 | ||
94 | /* timestamp reply should not be zero */ | |
95 | topt++; | |
96 | if (*topt == 0) | |
97 | return -1; | |
98 | } | |
99 | ||
100 | return 0; | |
101 | } | |
102 | ||
103 | static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) | |
104 | { | |
105 | struct iphdr *iph = lro_desc->iph; | |
106 | struct tcphdr *tcph = lro_desc->tcph; | |
9df7c98a | 107 | __be32 *p; |
71c87e0c JBT |
108 | __wsum tcp_hdr_csum; |
109 | ||
110 | tcph->ack_seq = lro_desc->tcp_ack; | |
111 | tcph->window = lro_desc->tcp_window; | |
112 | ||
113 | if (lro_desc->tcp_saw_tstamp) { | |
9df7c98a | 114 | p = (__be32 *)(tcph + 1); |
71c87e0c JBT |
115 | *(p+2) = lro_desc->tcp_rcv_tsecr; |
116 | } | |
117 | ||
35353c2b | 118 | csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len)); |
71c87e0c JBT |
119 | iph->tot_len = htons(lro_desc->ip_tot_len); |
120 | ||
71c87e0c | 121 | tcph->check = 0; |
07f0757a | 122 | tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0); |
71c87e0c JBT |
123 | lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); |
124 | tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, | |
125 | lro_desc->ip_tot_len - | |
126 | IP_HDR_LEN(iph), IPPROTO_TCP, | |
127 | lro_desc->data_csum); | |
128 | } | |
129 | ||
130 | static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) | |
131 | { | |
132 | __wsum tcp_csum; | |
133 | __wsum tcp_hdr_csum; | |
134 | __wsum tcp_ps_hdr_csum; | |
135 | ||
136 | tcp_csum = ~csum_unfold(tcph->check); | |
07f0757a | 137 | tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum); |
71c87e0c JBT |
138 | |
139 | tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, | |
140 | len + TCP_HDR_LEN(tcph), | |
141 | IPPROTO_TCP, 0); | |
142 | ||
143 | return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum), | |
144 | tcp_ps_hdr_csum); | |
145 | } | |
146 | ||
147 | static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, | |
9fea0330 | 148 | struct iphdr *iph, struct tcphdr *tcph) |
71c87e0c JBT |
149 | { |
150 | int nr_frags; | |
9df7c98a | 151 | __be32 *ptr; |
71c87e0c JBT |
152 | u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); |
153 | ||
154 | nr_frags = skb_shinfo(skb)->nr_frags; | |
155 | lro_desc->parent = skb; | |
156 | lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]); | |
157 | lro_desc->iph = iph; | |
158 | lro_desc->tcph = tcph; | |
159 | lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len; | |
f53f4137 | 160 | lro_desc->tcp_ack = tcph->ack_seq; |
71c87e0c JBT |
161 | lro_desc->tcp_window = tcph->window; |
162 | ||
163 | lro_desc->pkt_aggr_cnt = 1; | |
164 | lro_desc->ip_tot_len = ntohs(iph->tot_len); | |
165 | ||
166 | if (tcph->doff == 8) { | |
9df7c98a | 167 | ptr = (__be32 *)(tcph+1); |
71c87e0c JBT |
168 | lro_desc->tcp_saw_tstamp = 1; |
169 | lro_desc->tcp_rcv_tsval = *(ptr+1); | |
170 | lro_desc->tcp_rcv_tsecr = *(ptr+2); | |
171 | } | |
172 | ||
173 | lro_desc->mss = tcp_data_len; | |
71c87e0c JBT |
174 | lro_desc->active = 1; |
175 | ||
176 | lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, | |
177 | tcp_data_len); | |
178 | } | |
179 | ||
180 | static inline void lro_clear_desc(struct net_lro_desc *lro_desc) | |
181 | { | |
182 | memset(lro_desc, 0, sizeof(struct net_lro_desc)); | |
183 | } | |
184 | ||
185 | static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph, | |
186 | struct tcphdr *tcph, int tcp_data_len) | |
187 | { | |
188 | struct sk_buff *parent = lro_desc->parent; | |
9df7c98a | 189 | __be32 *topt; |
71c87e0c JBT |
190 | |
191 | lro_desc->pkt_aggr_cnt++; | |
192 | lro_desc->ip_tot_len += tcp_data_len; | |
193 | lro_desc->tcp_next_seq += tcp_data_len; | |
194 | lro_desc->tcp_window = tcph->window; | |
195 | lro_desc->tcp_ack = tcph->ack_seq; | |
196 | ||
197 | /* don't update tcp_rcv_tsval, would not work with PAWS */ | |
198 | if (lro_desc->tcp_saw_tstamp) { | |
9df7c98a | 199 | topt = (__be32 *) (tcph + 1); |
71c87e0c JBT |
200 | lro_desc->tcp_rcv_tsecr = *(topt + 2); |
201 | } | |
202 | ||
203 | lro_desc->data_csum = csum_block_add(lro_desc->data_csum, | |
204 | lro_tcp_data_csum(iph, tcph, | |
205 | tcp_data_len), | |
206 | parent->len); | |
207 | ||
208 | parent->len += tcp_data_len; | |
209 | parent->data_len += tcp_data_len; | |
210 | if (tcp_data_len > lro_desc->mss) | |
211 | lro_desc->mss = tcp_data_len; | |
212 | } | |
213 | ||
214 | static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb, | |
215 | struct iphdr *iph, struct tcphdr *tcph) | |
216 | { | |
217 | struct sk_buff *parent = lro_desc->parent; | |
218 | int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); | |
219 | ||
220 | lro_add_common(lro_desc, iph, tcph, tcp_data_len); | |
221 | ||
222 | skb_pull(skb, (skb->len - tcp_data_len)); | |
223 | parent->truesize += skb->truesize; | |
224 | ||
225 | if (lro_desc->last_skb) | |
226 | lro_desc->last_skb->next = skb; | |
227 | else | |
228 | skb_shinfo(parent)->frag_list = skb; | |
229 | ||
230 | lro_desc->last_skb = skb; | |
231 | } | |
232 | ||
71c87e0c JBT |
233 | |
234 | static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, | |
235 | struct iphdr *iph, | |
236 | struct tcphdr *tcph) | |
237 | { | |
9d4fb27d JP |
238 | if ((lro_desc->iph->saddr != iph->saddr) || |
239 | (lro_desc->iph->daddr != iph->daddr) || | |
240 | (lro_desc->tcph->source != tcph->source) || | |
241 | (lro_desc->tcph->dest != tcph->dest)) | |
71c87e0c JBT |
242 | return -1; |
243 | return 0; | |
244 | } | |
245 | ||
246 | static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr, | |
247 | struct net_lro_desc *lro_arr, | |
248 | struct iphdr *iph, | |
249 | struct tcphdr *tcph) | |
250 | { | |
251 | struct net_lro_desc *lro_desc = NULL; | |
252 | struct net_lro_desc *tmp; | |
253 | int max_desc = lro_mgr->max_desc; | |
254 | int i; | |
255 | ||
256 | for (i = 0; i < max_desc; i++) { | |
257 | tmp = &lro_arr[i]; | |
258 | if (tmp->active) | |
259 | if (!lro_check_tcp_conn(tmp, iph, tcph)) { | |
260 | lro_desc = tmp; | |
261 | goto out; | |
262 | } | |
263 | } | |
264 | ||
265 | for (i = 0; i < max_desc; i++) { | |
266 | if (!lro_arr[i].active) { | |
267 | lro_desc = &lro_arr[i]; | |
268 | goto out; | |
269 | } | |
270 | } | |
271 | ||
272 | LRO_INC_STATS(lro_mgr, no_desc); | |
273 | out: | |
274 | return lro_desc; | |
275 | } | |
276 | ||
277 | static void lro_flush(struct net_lro_mgr *lro_mgr, | |
278 | struct net_lro_desc *lro_desc) | |
279 | { | |
280 | if (lro_desc->pkt_aggr_cnt > 1) | |
281 | lro_update_tcp_ip_header(lro_desc); | |
282 | ||
283 | skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; | |
284 | ||
9fea0330 JP |
285 | if (lro_mgr->features & LRO_F_NAPI) |
286 | netif_receive_skb(lro_desc->parent); | |
287 | else | |
288 | netif_rx(lro_desc->parent); | |
71c87e0c JBT |
289 | |
290 | LRO_INC_STATS(lro_mgr, flushed); | |
291 | lro_clear_desc(lro_desc); | |
292 | } | |
293 | ||
294 | static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, | |
9fea0330 | 295 | void *priv) |
71c87e0c JBT |
296 | { |
297 | struct net_lro_desc *lro_desc; | |
298 | struct iphdr *iph; | |
299 | struct tcphdr *tcph; | |
300 | u64 flags; | |
301 | int vlan_hdr_len = 0; | |
302 | ||
9d4fb27d JP |
303 | if (!lro_mgr->get_skb_header || |
304 | lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph, | |
305 | &flags, priv)) | |
71c87e0c JBT |
306 | goto out; |
307 | ||
308 | if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) | |
309 | goto out; | |
310 | ||
311 | lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); | |
312 | if (!lro_desc) | |
313 | goto out; | |
314 | ||
9d4fb27d JP |
315 | if ((skb->protocol == htons(ETH_P_8021Q)) && |
316 | !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) | |
71c87e0c JBT |
317 | vlan_hdr_len = VLAN_HLEN; |
318 | ||
319 | if (!lro_desc->active) { /* start new lro session */ | |
320 | if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL)) | |
321 | goto out; | |
322 | ||
323 | skb->ip_summed = lro_mgr->ip_summed_aggr; | |
9fea0330 | 324 | lro_init_desc(lro_desc, skb, iph, tcph); |
71c87e0c JBT |
325 | LRO_INC_STATS(lro_mgr, aggregated); |
326 | return 0; | |
327 | } | |
328 | ||
329 | if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) | |
330 | goto out2; | |
331 | ||
332 | if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc)) | |
333 | goto out2; | |
334 | ||
335 | lro_add_packet(lro_desc, skb, iph, tcph); | |
336 | LRO_INC_STATS(lro_mgr, aggregated); | |
337 | ||
338 | if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) || | |
339 | lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) | |
340 | lro_flush(lro_mgr, lro_desc); | |
341 | ||
342 | return 0; | |
343 | ||
344 | out2: /* send aggregated SKBs to stack */ | |
345 | lro_flush(lro_mgr, lro_desc); | |
346 | ||
251a4b32 | 347 | out: |
71c87e0c JBT |
348 | return 1; |
349 | } | |
350 | ||
71c87e0c JBT |
351 | void lro_receive_skb(struct net_lro_mgr *lro_mgr, |
352 | struct sk_buff *skb, | |
353 | void *priv) | |
354 | { | |
9fea0330 | 355 | if (__lro_proc_skb(lro_mgr, skb, priv)) { |
877364e6 | 356 | if (lro_mgr->features & LRO_F_NAPI) |
71c87e0c JBT |
357 | netif_receive_skb(skb); |
358 | else | |
359 | netif_rx(skb); | |
360 | } | |
361 | } | |
362 | EXPORT_SYMBOL(lro_receive_skb); | |
363 | ||
71c87e0c JBT |
364 | void lro_flush_all(struct net_lro_mgr *lro_mgr) |
365 | { | |
366 | int i; | |
367 | struct net_lro_desc *lro_desc = lro_mgr->lro_arr; | |
368 | ||
369 | for (i = 0; i < lro_mgr->max_desc; i++) { | |
370 | if (lro_desc[i].active) | |
371 | lro_flush(lro_mgr, &lro_desc[i]); | |
372 | } | |
373 | } | |
374 | EXPORT_SYMBOL(lro_flush_all); |