Merge branch 'master' into for-2.6.35
[deliverable/linux.git] / net / netfilter / ipvs / ip_vs_proto_tcp.c
1 /*
2 * ip_vs_proto_tcp.c: TCP load balancing support for IPVS
3 *
4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
14 */
15
16 #define KMSG_COMPONENT "IPVS"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
19 #include <linux/kernel.h>
20 #include <linux/ip.h>
21 #include <linux/tcp.h> /* for tcphdr */
22 #include <net/ip.h>
23 #include <net/tcp.h> /* for csum_tcpudp_magic */
24 #include <net/ip6_checksum.h>
25 #include <linux/netfilter.h>
26 #include <linux/netfilter_ipv4.h>
27
28 #include <net/ip_vs.h>
29
30
31 static struct ip_vs_conn *
32 tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
33 const struct ip_vs_iphdr *iph, unsigned int proto_off,
34 int inverse)
35 {
36 __be16 _ports[2], *pptr;
37
38 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
39 if (pptr == NULL)
40 return NULL;
41
42 if (likely(!inverse)) {
43 return ip_vs_conn_in_get(af, iph->protocol,
44 &iph->saddr, pptr[0],
45 &iph->daddr, pptr[1]);
46 } else {
47 return ip_vs_conn_in_get(af, iph->protocol,
48 &iph->daddr, pptr[1],
49 &iph->saddr, pptr[0]);
50 }
51 }
52
53 static struct ip_vs_conn *
54 tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
55 const struct ip_vs_iphdr *iph, unsigned int proto_off,
56 int inverse)
57 {
58 __be16 _ports[2], *pptr;
59
60 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
61 if (pptr == NULL)
62 return NULL;
63
64 if (likely(!inverse)) {
65 return ip_vs_conn_out_get(af, iph->protocol,
66 &iph->saddr, pptr[0],
67 &iph->daddr, pptr[1]);
68 } else {
69 return ip_vs_conn_out_get(af, iph->protocol,
70 &iph->daddr, pptr[1],
71 &iph->saddr, pptr[0]);
72 }
73 }
74
75
76 static int
77 tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
78 int *verdict, struct ip_vs_conn **cpp)
79 {
80 struct ip_vs_service *svc;
81 struct tcphdr _tcph, *th;
82 struct ip_vs_iphdr iph;
83
84 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
85
86 th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
87 if (th == NULL) {
88 *verdict = NF_DROP;
89 return 0;
90 }
91
92 if (th->syn &&
93 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
94 th->dest))) {
95 if (ip_vs_todrop()) {
96 /*
97 * It seems that we are very loaded.
98 * We have to drop this packet :(
99 */
100 ip_vs_service_put(svc);
101 *verdict = NF_DROP;
102 return 0;
103 }
104
105 /*
106 * Let the virtual server select a real server for the
107 * incoming connection, and create a connection entry.
108 */
109 *cpp = ip_vs_schedule(svc, skb);
110 if (!*cpp) {
111 *verdict = ip_vs_leave(svc, skb, pp);
112 return 0;
113 }
114 ip_vs_service_put(svc);
115 }
116 return 1;
117 }
118
119
120 static inline void
121 tcp_fast_csum_update(int af, struct tcphdr *tcph,
122 const union nf_inet_addr *oldip,
123 const union nf_inet_addr *newip,
124 __be16 oldport, __be16 newport)
125 {
126 #ifdef CONFIG_IP_VS_IPV6
127 if (af == AF_INET6)
128 tcph->check =
129 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
130 ip_vs_check_diff2(oldport, newport,
131 ~csum_unfold(tcph->check))));
132 else
133 #endif
134 tcph->check =
135 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
136 ip_vs_check_diff2(oldport, newport,
137 ~csum_unfold(tcph->check))));
138 }
139
140
141 static inline void
142 tcp_partial_csum_update(int af, struct tcphdr *tcph,
143 const union nf_inet_addr *oldip,
144 const union nf_inet_addr *newip,
145 __be16 oldlen, __be16 newlen)
146 {
147 #ifdef CONFIG_IP_VS_IPV6
148 if (af == AF_INET6)
149 tcph->check =
150 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
151 ip_vs_check_diff2(oldlen, newlen,
152 ~csum_unfold(tcph->check))));
153 else
154 #endif
155 tcph->check =
156 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
157 ip_vs_check_diff2(oldlen, newlen,
158 ~csum_unfold(tcph->check))));
159 }
160
161
162 static int
163 tcp_snat_handler(struct sk_buff *skb,
164 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
165 {
166 struct tcphdr *tcph;
167 unsigned int tcphoff;
168 int oldlen;
169
170 #ifdef CONFIG_IP_VS_IPV6
171 if (cp->af == AF_INET6)
172 tcphoff = sizeof(struct ipv6hdr);
173 else
174 #endif
175 tcphoff = ip_hdrlen(skb);
176 oldlen = skb->len - tcphoff;
177
178 /* csum_check requires unshared skb */
179 if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
180 return 0;
181
182 if (unlikely(cp->app != NULL)) {
183 /* Some checks before mangling */
184 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
185 return 0;
186
187 /* Call application helper if needed */
188 if (!ip_vs_app_pkt_out(cp, skb))
189 return 0;
190 }
191
192 tcph = (void *)skb_network_header(skb) + tcphoff;
193 tcph->source = cp->vport;
194
195 /* Adjust TCP checksums */
196 if (skb->ip_summed == CHECKSUM_PARTIAL) {
197 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
198 htons(oldlen),
199 htons(skb->len - tcphoff));
200 } else if (!cp->app) {
201 /* Only port and addr are changed, do fast csum update */
202 tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
203 cp->dport, cp->vport);
204 if (skb->ip_summed == CHECKSUM_COMPLETE)
205 skb->ip_summed = CHECKSUM_NONE;
206 } else {
207 /* full checksum calculation */
208 tcph->check = 0;
209 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
210 #ifdef CONFIG_IP_VS_IPV6
211 if (cp->af == AF_INET6)
212 tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
213 &cp->caddr.in6,
214 skb->len - tcphoff,
215 cp->protocol, skb->csum);
216 else
217 #endif
218 tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
219 cp->caddr.ip,
220 skb->len - tcphoff,
221 cp->protocol,
222 skb->csum);
223
224 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
225 pp->name, tcph->check,
226 (char*)&(tcph->check) - (char*)tcph);
227 }
228 return 1;
229 }
230
231
232 static int
233 tcp_dnat_handler(struct sk_buff *skb,
234 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
235 {
236 struct tcphdr *tcph;
237 unsigned int tcphoff;
238 int oldlen;
239
240 #ifdef CONFIG_IP_VS_IPV6
241 if (cp->af == AF_INET6)
242 tcphoff = sizeof(struct ipv6hdr);
243 else
244 #endif
245 tcphoff = ip_hdrlen(skb);
246 oldlen = skb->len - tcphoff;
247
248 /* csum_check requires unshared skb */
249 if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
250 return 0;
251
252 if (unlikely(cp->app != NULL)) {
253 /* Some checks before mangling */
254 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
255 return 0;
256
257 /*
258 * Attempt ip_vs_app call.
259 * It will fix ip_vs_conn and iph ack_seq stuff
260 */
261 if (!ip_vs_app_pkt_in(cp, skb))
262 return 0;
263 }
264
265 tcph = (void *)skb_network_header(skb) + tcphoff;
266 tcph->dest = cp->dport;
267
268 /*
269 * Adjust TCP checksums
270 */
271 if (skb->ip_summed == CHECKSUM_PARTIAL) {
272 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
273 htons(oldlen),
274 htons(skb->len - tcphoff));
275 } else if (!cp->app) {
276 /* Only port and addr are changed, do fast csum update */
277 tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
278 cp->vport, cp->dport);
279 if (skb->ip_summed == CHECKSUM_COMPLETE)
280 skb->ip_summed = CHECKSUM_NONE;
281 } else {
282 /* full checksum calculation */
283 tcph->check = 0;
284 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
285 #ifdef CONFIG_IP_VS_IPV6
286 if (cp->af == AF_INET6)
287 tcph->check = csum_ipv6_magic(&cp->caddr.in6,
288 &cp->daddr.in6,
289 skb->len - tcphoff,
290 cp->protocol, skb->csum);
291 else
292 #endif
293 tcph->check = csum_tcpudp_magic(cp->caddr.ip,
294 cp->daddr.ip,
295 skb->len - tcphoff,
296 cp->protocol,
297 skb->csum);
298 skb->ip_summed = CHECKSUM_UNNECESSARY;
299 }
300 return 1;
301 }
302
303
304 static int
305 tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
306 {
307 unsigned int tcphoff;
308
309 #ifdef CONFIG_IP_VS_IPV6
310 if (af == AF_INET6)
311 tcphoff = sizeof(struct ipv6hdr);
312 else
313 #endif
314 tcphoff = ip_hdrlen(skb);
315
316 switch (skb->ip_summed) {
317 case CHECKSUM_NONE:
318 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
319 case CHECKSUM_COMPLETE:
320 #ifdef CONFIG_IP_VS_IPV6
321 if (af == AF_INET6) {
322 if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
323 &ipv6_hdr(skb)->daddr,
324 skb->len - tcphoff,
325 ipv6_hdr(skb)->nexthdr,
326 skb->csum)) {
327 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
328 "Failed checksum for");
329 return 0;
330 }
331 } else
332 #endif
333 if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
334 ip_hdr(skb)->daddr,
335 skb->len - tcphoff,
336 ip_hdr(skb)->protocol,
337 skb->csum)) {
338 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
339 "Failed checksum for");
340 return 0;
341 }
342 break;
343 default:
344 /* No need to checksum. */
345 break;
346 }
347
348 return 1;
349 }
350
351
352 #define TCP_DIR_INPUT 0
353 #define TCP_DIR_OUTPUT 4
354 #define TCP_DIR_INPUT_ONLY 8
355
356 static const int tcp_state_off[IP_VS_DIR_LAST] = {
357 [IP_VS_DIR_INPUT] = TCP_DIR_INPUT,
358 [IP_VS_DIR_OUTPUT] = TCP_DIR_OUTPUT,
359 [IP_VS_DIR_INPUT_ONLY] = TCP_DIR_INPUT_ONLY,
360 };
361
362 /*
363 * Timeout table[state]
364 */
365 static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
366 [IP_VS_TCP_S_NONE] = 2*HZ,
367 [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
368 [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
369 [IP_VS_TCP_S_SYN_RECV] = 1*60*HZ,
370 [IP_VS_TCP_S_FIN_WAIT] = 2*60*HZ,
371 [IP_VS_TCP_S_TIME_WAIT] = 2*60*HZ,
372 [IP_VS_TCP_S_CLOSE] = 10*HZ,
373 [IP_VS_TCP_S_CLOSE_WAIT] = 60*HZ,
374 [IP_VS_TCP_S_LAST_ACK] = 30*HZ,
375 [IP_VS_TCP_S_LISTEN] = 2*60*HZ,
376 [IP_VS_TCP_S_SYNACK] = 120*HZ,
377 [IP_VS_TCP_S_LAST] = 2*HZ,
378 };
379
380 static const char *const tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
381 [IP_VS_TCP_S_NONE] = "NONE",
382 [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED",
383 [IP_VS_TCP_S_SYN_SENT] = "SYN_SENT",
384 [IP_VS_TCP_S_SYN_RECV] = "SYN_RECV",
385 [IP_VS_TCP_S_FIN_WAIT] = "FIN_WAIT",
386 [IP_VS_TCP_S_TIME_WAIT] = "TIME_WAIT",
387 [IP_VS_TCP_S_CLOSE] = "CLOSE",
388 [IP_VS_TCP_S_CLOSE_WAIT] = "CLOSE_WAIT",
389 [IP_VS_TCP_S_LAST_ACK] = "LAST_ACK",
390 [IP_VS_TCP_S_LISTEN] = "LISTEN",
391 [IP_VS_TCP_S_SYNACK] = "SYNACK",
392 [IP_VS_TCP_S_LAST] = "BUG!",
393 };
394
395 #define sNO IP_VS_TCP_S_NONE
396 #define sES IP_VS_TCP_S_ESTABLISHED
397 #define sSS IP_VS_TCP_S_SYN_SENT
398 #define sSR IP_VS_TCP_S_SYN_RECV
399 #define sFW IP_VS_TCP_S_FIN_WAIT
400 #define sTW IP_VS_TCP_S_TIME_WAIT
401 #define sCL IP_VS_TCP_S_CLOSE
402 #define sCW IP_VS_TCP_S_CLOSE_WAIT
403 #define sLA IP_VS_TCP_S_LAST_ACK
404 #define sLI IP_VS_TCP_S_LISTEN
405 #define sSA IP_VS_TCP_S_SYNACK
406
407 struct tcp_states_t {
408 int next_state[IP_VS_TCP_S_LAST];
409 };
410
411 static const char * tcp_state_name(int state)
412 {
413 if (state >= IP_VS_TCP_S_LAST)
414 return "ERR!";
415 return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
416 }
417
418 static struct tcp_states_t tcp_states [] = {
419 /* INPUT */
420 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
421 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
422 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
423 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
424 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
425
426 /* OUTPUT */
427 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
428 /*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
429 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
430 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
431 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
432
433 /* INPUT-ONLY */
434 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
435 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
436 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
437 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
438 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
439 };
440
441 static struct tcp_states_t tcp_states_dos [] = {
442 /* INPUT */
443 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
444 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
445 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
446 /*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
447 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
448
449 /* OUTPUT */
450 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
451 /*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
452 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
453 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
454 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
455
456 /* INPUT-ONLY */
457 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
458 /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
459 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
460 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
461 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
462 };
463
464 static struct tcp_states_t *tcp_state_table = tcp_states;
465
466
467 static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
468 {
469 int on = (flags & 1); /* secure_tcp */
470
471 /*
472 ** FIXME: change secure_tcp to independent sysctl var
473 ** or make it per-service or per-app because it is valid
474 ** for most if not for all of the applications. Something
475 ** like "capabilities" (flags) for each object.
476 */
477 tcp_state_table = (on? tcp_states_dos : tcp_states);
478 }
479
480 static int
481 tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
482 {
483 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
484 tcp_state_name_table, sname, to);
485 }
486
487 static inline int tcp_state_idx(struct tcphdr *th)
488 {
489 if (th->rst)
490 return 3;
491 if (th->syn)
492 return 0;
493 if (th->fin)
494 return 1;
495 if (th->ack)
496 return 2;
497 return -1;
498 }
499
500 static inline void
501 set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
502 int direction, struct tcphdr *th)
503 {
504 int state_idx;
505 int new_state = IP_VS_TCP_S_CLOSE;
506 int state_off = tcp_state_off[direction];
507
508 /*
509 * Update state offset to INPUT_ONLY if necessary
510 * or delete NO_OUTPUT flag if output packet detected
511 */
512 if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
513 if (state_off == TCP_DIR_OUTPUT)
514 cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
515 else
516 state_off = TCP_DIR_INPUT_ONLY;
517 }
518
519 if ((state_idx = tcp_state_idx(th)) < 0) {
520 IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
521 goto tcp_state_out;
522 }
523
524 new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
525
526 tcp_state_out:
527 if (new_state != cp->state) {
528 struct ip_vs_dest *dest = cp->dest;
529
530 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
531 "%s:%d state: %s->%s conn->refcnt:%d\n",
532 pp->name,
533 ((state_off == TCP_DIR_OUTPUT) ?
534 "output " : "input "),
535 th->syn ? 'S' : '.',
536 th->fin ? 'F' : '.',
537 th->ack ? 'A' : '.',
538 th->rst ? 'R' : '.',
539 IP_VS_DBG_ADDR(cp->af, &cp->daddr),
540 ntohs(cp->dport),
541 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
542 ntohs(cp->cport),
543 tcp_state_name(cp->state),
544 tcp_state_name(new_state),
545 atomic_read(&cp->refcnt));
546
547 if (dest) {
548 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
549 (new_state != IP_VS_TCP_S_ESTABLISHED)) {
550 atomic_dec(&dest->activeconns);
551 atomic_inc(&dest->inactconns);
552 cp->flags |= IP_VS_CONN_F_INACTIVE;
553 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
554 (new_state == IP_VS_TCP_S_ESTABLISHED)) {
555 atomic_inc(&dest->activeconns);
556 atomic_dec(&dest->inactconns);
557 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
558 }
559 }
560 }
561
562 cp->timeout = pp->timeout_table[cp->state = new_state];
563 }
564
565
566 /*
567 * Handle state transitions
568 */
569 static int
570 tcp_state_transition(struct ip_vs_conn *cp, int direction,
571 const struct sk_buff *skb,
572 struct ip_vs_protocol *pp)
573 {
574 struct tcphdr _tcph, *th;
575
576 #ifdef CONFIG_IP_VS_IPV6
577 int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
578 #else
579 int ihl = ip_hdrlen(skb);
580 #endif
581
582 th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
583 if (th == NULL)
584 return 0;
585
586 spin_lock(&cp->lock);
587 set_tcp_state(pp, cp, direction, th);
588 spin_unlock(&cp->lock);
589
590 return 1;
591 }
592
593
594 /*
595 * Hash table for TCP application incarnations
596 */
597 #define TCP_APP_TAB_BITS 4
598 #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
599 #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
600
601 static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
602 static DEFINE_SPINLOCK(tcp_app_lock);
603
604 static inline __u16 tcp_app_hashkey(__be16 port)
605 {
606 return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
607 & TCP_APP_TAB_MASK;
608 }
609
610
611 static int tcp_register_app(struct ip_vs_app *inc)
612 {
613 struct ip_vs_app *i;
614 __u16 hash;
615 __be16 port = inc->port;
616 int ret = 0;
617
618 hash = tcp_app_hashkey(port);
619
620 spin_lock_bh(&tcp_app_lock);
621 list_for_each_entry(i, &tcp_apps[hash], p_list) {
622 if (i->port == port) {
623 ret = -EEXIST;
624 goto out;
625 }
626 }
627 list_add(&inc->p_list, &tcp_apps[hash]);
628 atomic_inc(&ip_vs_protocol_tcp.appcnt);
629
630 out:
631 spin_unlock_bh(&tcp_app_lock);
632 return ret;
633 }
634
635
636 static void
637 tcp_unregister_app(struct ip_vs_app *inc)
638 {
639 spin_lock_bh(&tcp_app_lock);
640 atomic_dec(&ip_vs_protocol_tcp.appcnt);
641 list_del(&inc->p_list);
642 spin_unlock_bh(&tcp_app_lock);
643 }
644
645
646 static int
647 tcp_app_conn_bind(struct ip_vs_conn *cp)
648 {
649 int hash;
650 struct ip_vs_app *inc;
651 int result = 0;
652
653 /* Default binding: bind app only for NAT */
654 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
655 return 0;
656
657 /* Lookup application incarnations and bind the right one */
658 hash = tcp_app_hashkey(cp->vport);
659
660 spin_lock(&tcp_app_lock);
661 list_for_each_entry(inc, &tcp_apps[hash], p_list) {
662 if (inc->port == cp->vport) {
663 if (unlikely(!ip_vs_app_inc_get(inc)))
664 break;
665 spin_unlock(&tcp_app_lock);
666
667 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
668 "%s:%u to app %s on port %u\n",
669 __func__,
670 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
671 ntohs(cp->cport),
672 IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
673 ntohs(cp->vport),
674 inc->name, ntohs(inc->port));
675
676 cp->app = inc;
677 if (inc->init_conn)
678 result = inc->init_conn(inc, cp);
679 goto out;
680 }
681 }
682 spin_unlock(&tcp_app_lock);
683
684 out:
685 return result;
686 }
687
688
689 /*
690 * Set LISTEN timeout. (ip_vs_conn_put will setup timer)
691 */
692 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
693 {
694 spin_lock(&cp->lock);
695 cp->state = IP_VS_TCP_S_LISTEN;
696 cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
697 spin_unlock(&cp->lock);
698 }
699
700
701 static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
702 {
703 IP_VS_INIT_HASH_TABLE(tcp_apps);
704 pp->timeout_table = tcp_timeouts;
705 }
706
707
708 static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
709 {
710 }
711
712
713 struct ip_vs_protocol ip_vs_protocol_tcp = {
714 .name = "TCP",
715 .protocol = IPPROTO_TCP,
716 .num_states = IP_VS_TCP_S_LAST,
717 .dont_defrag = 0,
718 .appcnt = ATOMIC_INIT(0),
719 .init = ip_vs_tcp_init,
720 .exit = ip_vs_tcp_exit,
721 .register_app = tcp_register_app,
722 .unregister_app = tcp_unregister_app,
723 .conn_schedule = tcp_conn_schedule,
724 .conn_in_get = tcp_conn_in_get,
725 .conn_out_get = tcp_conn_out_get,
726 .snat_handler = tcp_snat_handler,
727 .dnat_handler = tcp_dnat_handler,
728 .csum_check = tcp_csum_check,
729 .state_name = tcp_state_name,
730 .state_transition = tcp_state_transition,
731 .app_conn_bind = tcp_app_conn_bind,
732 .debug_packet = ip_vs_tcpudp_debug_packet,
733 .timeout_change = tcp_timeout_change,
734 .set_state_timeout = tcp_set_state_timeout,
735 };
This page took 0.0601 seconds and 5 git commands to generate.