Commit | Line | Data |
---|---|---|
2906f66a VMR |
1 | #include <linux/kernel.h> |
2 | #include <linux/ip.h> | |
3 | #include <linux/sctp.h> | |
4 | #include <net/ip.h> | |
5 | #include <net/ip6_checksum.h> | |
6 | #include <linux/netfilter.h> | |
7 | #include <linux/netfilter_ipv4.h> | |
8 | #include <net/sctp/checksum.h> | |
9 | #include <net/ip_vs.h> | |
10 | ||
2906f66a | 11 | static int |
9330419d | 12 | sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, |
d4383f04 JDB |
13 | int *verdict, struct ip_vs_conn **cpp, |
14 | struct ip_vs_iphdr *iph) | |
2906f66a | 15 | { |
fc723250 | 16 | struct net *net; |
2906f66a | 17 | struct ip_vs_service *svc; |
c6c96c18 | 18 | struct netns_ipvs *ipvs; |
2906f66a VMR |
19 | sctp_chunkhdr_t _schunkh, *sch; |
20 | sctp_sctphdr_t *sh, _sctph; | |
5e26b1b3 | 21 | __be16 _ports[2], *ports = NULL; |
2906f66a | 22 | |
1daea8ed EB |
23 | net = skb_net(skb); |
24 | ipvs = net_ipvs(net); | |
25 | ||
5e26b1b3 AG |
26 | if (likely(!ip_vs_iph_icmp(iph))) { |
27 | sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); | |
28 | if (sh) { | |
29 | sch = skb_header_pointer( | |
30 | skb, iph->len + sizeof(sctp_sctphdr_t), | |
31 | sizeof(_schunkh), &_schunkh); | |
32 | if (sch && (sch->type == SCTP_CID_INIT || | |
33 | sysctl_sloppy_sctp(ipvs))) | |
34 | ports = &sh->source; | |
35 | } | |
36 | } else { | |
37 | ports = skb_header_pointer( | |
38 | skb, iph->len, sizeof(_ports), &_ports); | |
6e7cd27c | 39 | } |
2906f66a | 40 | |
5e26b1b3 | 41 | if (!ports) { |
6e7cd27c | 42 | *verdict = NF_DROP; |
2906f66a | 43 | return 0; |
6e7cd27c DB |
44 | } |
45 | ||
ceec4c38 | 46 | rcu_read_lock(); |
5e26b1b3 | 47 | if (likely(!ip_vs_iph_inverse(iph))) |
0a4fd6ce | 48 | svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, |
5e26b1b3 AG |
49 | &iph->daddr, ports[1]); |
50 | else | |
0a4fd6ce | 51 | svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, |
5e26b1b3 AG |
52 | &iph->saddr, ports[0]); |
53 | if (svc) { | |
190ecd27 JA |
54 | int ignored; |
55 | ||
c6c96c18 | 56 | if (ip_vs_todrop(ipvs)) { |
2906f66a VMR |
57 | /* |
58 | * It seems that we are very loaded. | |
59 | * We have to drop this packet :( | |
60 | */ | |
ceec4c38 | 61 | rcu_read_unlock(); |
2906f66a VMR |
62 | *verdict = NF_DROP; |
63 | return 0; | |
64 | } | |
65 | /* | |
66 | * Let the virtual server select a real server for the | |
67 | * incoming connection, and create a connection entry. | |
68 | */ | |
d4383f04 | 69 | *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); |
a5959d53 HS |
70 | if (!*cpp && ignored <= 0) { |
71 | if (!ignored) | |
d4383f04 | 72 | *verdict = ip_vs_leave(svc, skb, pd, iph); |
ceec4c38 | 73 | else |
a5959d53 | 74 | *verdict = NF_DROP; |
ceec4c38 | 75 | rcu_read_unlock(); |
2906f66a VMR |
76 | return 0; |
77 | } | |
2906f66a | 78 | } |
ceec4c38 | 79 | rcu_read_unlock(); |
a5959d53 | 80 | /* NF_ACCEPT */ |
2906f66a VMR |
81 | return 1; |
82 | } | |
83 | ||
4b47bc9a DB |
84 | static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph, |
85 | unsigned int sctphoff) | |
86 | { | |
024ec3de | 87 | sctph->checksum = sctp_compute_cksum(skb, sctphoff); |
4b47bc9a DB |
88 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
89 | } | |
90 | ||
2906f66a | 91 | static int |
d4383f04 JDB |
92 | sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, |
93 | struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) | |
2906f66a VMR |
94 | { |
95 | sctp_sctphdr_t *sctph; | |
d4383f04 | 96 | unsigned int sctphoff = iph->len; |
97203abe | 97 | bool payload_csum = false; |
2906f66a VMR |
98 | |
99 | #ifdef CONFIG_IP_VS_IPV6 | |
d4383f04 | 100 | if (cp->af == AF_INET6 && iph->fragoffs) |
63dca2c0 | 101 | return 1; |
2906f66a | 102 | #endif |
2906f66a VMR |
103 | |
104 | /* csum_check requires unshared skb */ | |
105 | if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) | |
106 | return 0; | |
107 | ||
108 | if (unlikely(cp->app != NULL)) { | |
97203abe DB |
109 | int ret; |
110 | ||
2906f66a VMR |
111 | /* Some checks before mangling */ |
112 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) | |
113 | return 0; | |
114 | ||
115 | /* Call application helper if needed */ | |
97203abe DB |
116 | ret = ip_vs_app_pkt_out(cp, skb); |
117 | if (ret == 0) | |
2906f66a | 118 | return 0; |
97203abe DB |
119 | /* ret=2: csum update is needed after payload mangling */ |
120 | if (ret == 2) | |
121 | payload_csum = true; | |
2906f66a VMR |
122 | } |
123 | ||
124 | sctph = (void *) skb_network_header(skb) + sctphoff; | |
2906f66a | 125 | |
97203abe DB |
126 | /* Only update csum if we really have to */ |
127 | if (sctph->source != cp->vport || payload_csum || | |
128 | skb->ip_summed == CHECKSUM_PARTIAL) { | |
129 | sctph->source = cp->vport; | |
130 | sctp_nat_csum(skb, sctph, sctphoff); | |
131 | } else { | |
132 | skb->ip_summed = CHECKSUM_UNNECESSARY; | |
133 | } | |
2906f66a VMR |
134 | |
135 | return 1; | |
136 | } | |
137 | ||
138 | static int | |
d4383f04 JDB |
139 | sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, |
140 | struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) | |
2906f66a | 141 | { |
2906f66a | 142 | sctp_sctphdr_t *sctph; |
d4383f04 | 143 | unsigned int sctphoff = iph->len; |
97203abe | 144 | bool payload_csum = false; |
2906f66a VMR |
145 | |
146 | #ifdef CONFIG_IP_VS_IPV6 | |
d4383f04 | 147 | if (cp->af == AF_INET6 && iph->fragoffs) |
63dca2c0 | 148 | return 1; |
2906f66a | 149 | #endif |
2906f66a VMR |
150 | |
151 | /* csum_check requires unshared skb */ | |
152 | if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) | |
153 | return 0; | |
154 | ||
155 | if (unlikely(cp->app != NULL)) { | |
97203abe DB |
156 | int ret; |
157 | ||
2906f66a VMR |
158 | /* Some checks before mangling */ |
159 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) | |
160 | return 0; | |
161 | ||
162 | /* Call application helper if needed */ | |
97203abe DB |
163 | ret = ip_vs_app_pkt_in(cp, skb); |
164 | if (ret == 0) | |
2906f66a | 165 | return 0; |
97203abe DB |
166 | /* ret=2: csum update is needed after payload mangling */ |
167 | if (ret == 2) | |
168 | payload_csum = true; | |
2906f66a VMR |
169 | } |
170 | ||
171 | sctph = (void *) skb_network_header(skb) + sctphoff; | |
2906f66a | 172 | |
97203abe DB |
173 | /* Only update csum if we really have to */ |
174 | if (sctph->dest != cp->dport || payload_csum || | |
175 | (skb->ip_summed == CHECKSUM_PARTIAL && | |
176 | !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) { | |
177 | sctph->dest = cp->dport; | |
178 | sctp_nat_csum(skb, sctph, sctphoff); | |
179 | } else if (skb->ip_summed != CHECKSUM_PARTIAL) { | |
180 | skb->ip_summed = CHECKSUM_UNNECESSARY; | |
181 | } | |
2906f66a VMR |
182 | |
183 | return 1; | |
184 | } | |
185 | ||
186 | static int | |
187 | sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) | |
188 | { | |
2906f66a VMR |
189 | unsigned int sctphoff; |
190 | struct sctphdr *sh, _sctph; | |
024ec3de | 191 | __le32 cmp, val; |
2906f66a VMR |
192 | |
193 | #ifdef CONFIG_IP_VS_IPV6 | |
194 | if (af == AF_INET6) | |
195 | sctphoff = sizeof(struct ipv6hdr); | |
196 | else | |
197 | #endif | |
198 | sctphoff = ip_hdrlen(skb); | |
199 | ||
200 | sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph); | |
201 | if (sh == NULL) | |
202 | return 0; | |
203 | ||
204 | cmp = sh->checksum; | |
024ec3de | 205 | val = sctp_compute_cksum(skb, sctphoff); |
2906f66a VMR |
206 | |
207 | if (val != cmp) { | |
208 | /* CRC failure, dump it. */ | |
0d79641a | 209 | IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, |
2906f66a VMR |
210 | "Failed checksum for"); |
211 | return 0; | |
212 | } | |
213 | return 1; | |
214 | } | |
215 | ||
2906f66a | 216 | enum ipvs_sctp_event_t { |
61e7c420 JA |
217 | IP_VS_SCTP_DATA = 0, /* DATA, SACK, HEARTBEATs */ |
218 | IP_VS_SCTP_INIT, | |
219 | IP_VS_SCTP_INIT_ACK, | |
220 | IP_VS_SCTP_COOKIE_ECHO, | |
221 | IP_VS_SCTP_COOKIE_ACK, | |
222 | IP_VS_SCTP_SHUTDOWN, | |
223 | IP_VS_SCTP_SHUTDOWN_ACK, | |
224 | IP_VS_SCTP_SHUTDOWN_COMPLETE, | |
225 | IP_VS_SCTP_ERROR, | |
226 | IP_VS_SCTP_ABORT, | |
227 | IP_VS_SCTP_EVENT_LAST | |
2906f66a VMR |
228 | }; |
229 | ||
61e7c420 JA |
230 | /* RFC 2960, 3.2 Chunk Field Descriptions */ |
231 | static __u8 sctp_events[] = { | |
232 | [SCTP_CID_DATA] = IP_VS_SCTP_DATA, | |
233 | [SCTP_CID_INIT] = IP_VS_SCTP_INIT, | |
234 | [SCTP_CID_INIT_ACK] = IP_VS_SCTP_INIT_ACK, | |
235 | [SCTP_CID_SACK] = IP_VS_SCTP_DATA, | |
236 | [SCTP_CID_HEARTBEAT] = IP_VS_SCTP_DATA, | |
237 | [SCTP_CID_HEARTBEAT_ACK] = IP_VS_SCTP_DATA, | |
238 | [SCTP_CID_ABORT] = IP_VS_SCTP_ABORT, | |
239 | [SCTP_CID_SHUTDOWN] = IP_VS_SCTP_SHUTDOWN, | |
240 | [SCTP_CID_SHUTDOWN_ACK] = IP_VS_SCTP_SHUTDOWN_ACK, | |
241 | [SCTP_CID_ERROR] = IP_VS_SCTP_ERROR, | |
242 | [SCTP_CID_COOKIE_ECHO] = IP_VS_SCTP_COOKIE_ECHO, | |
243 | [SCTP_CID_COOKIE_ACK] = IP_VS_SCTP_COOKIE_ACK, | |
244 | [SCTP_CID_ECN_ECNE] = IP_VS_SCTP_DATA, | |
245 | [SCTP_CID_ECN_CWR] = IP_VS_SCTP_DATA, | |
246 | [SCTP_CID_SHUTDOWN_COMPLETE] = IP_VS_SCTP_SHUTDOWN_COMPLETE, | |
2906f66a VMR |
247 | }; |
248 | ||
61e7c420 JA |
249 | /* SCTP States: |
250 | * See RFC 2960, 4. SCTP Association State Diagram | |
251 | * | |
252 | * New states (not in diagram): | |
253 | * - INIT1 state: use shorter timeout for dropped INIT packets | |
254 | * - REJECTED state: use shorter timeout if INIT is rejected with ABORT | |
255 | * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging | |
256 | * | |
257 | * The states are as seen in real server. In the diagram, INIT1, INIT, | |
258 | * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state. | |
259 | * | |
260 | * States as per packets from client (C) and server (S): | |
261 | * | |
262 | * Setup of client connection: | |
263 | * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK | |
264 | * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK | |
265 | * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO | |
266 | * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK | |
267 | * | |
268 | * Setup of server connection: | |
269 | * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK | |
270 | * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO | |
271 | * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK | |
272 | */ | |
2906f66a | 273 | |
61e7c420 JA |
274 | #define sNO IP_VS_SCTP_S_NONE |
275 | #define sI1 IP_VS_SCTP_S_INIT1 | |
276 | #define sIN IP_VS_SCTP_S_INIT | |
277 | #define sCS IP_VS_SCTP_S_COOKIE_SENT | |
278 | #define sCR IP_VS_SCTP_S_COOKIE_REPLIED | |
279 | #define sCW IP_VS_SCTP_S_COOKIE_WAIT | |
280 | #define sCO IP_VS_SCTP_S_COOKIE | |
281 | #define sCE IP_VS_SCTP_S_COOKIE_ECHOED | |
282 | #define sES IP_VS_SCTP_S_ESTABLISHED | |
283 | #define sSS IP_VS_SCTP_S_SHUTDOWN_SENT | |
284 | #define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED | |
285 | #define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT | |
286 | #define sRJ IP_VS_SCTP_S_REJECTED | |
287 | #define sCL IP_VS_SCTP_S_CLOSED | |
288 | ||
289 | static const __u8 sctp_states | |
290 | [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = { | |
291 | { /* INPUT */ | |
292 | /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ | |
293 | /* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
294 | /* i */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, | |
295 | /* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
296 | /* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
297 | /* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL}, | |
298 | /* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, | |
299 | /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL}, | |
300 | /* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL}, | |
301 | /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL}, | |
302 | /* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | |
303 | }, | |
304 | { /* OUTPUT */ | |
305 | /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ | |
306 | /* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
307 | /* i */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW}, | |
308 | /* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
309 | /* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
310 | /* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, | |
311 | /* s */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL}, | |
312 | /* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL}, | |
313 | /* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
314 | /* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
315 | /* ab */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | |
316 | }, | |
317 | { /* INPUT-ONLY */ | |
318 | /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ | |
319 | /* d */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
320 | /* i */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, | |
321 | /* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
322 | /* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
323 | /* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, | |
324 | /* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, | |
325 | /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL}, | |
326 | /* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL}, | |
327 | /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
328 | /* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | |
329 | }, | |
2906f66a VMR |
330 | }; |
331 | ||
61e7c420 JA |
332 | #define IP_VS_SCTP_MAX_RTO ((60 + 1) * HZ) |
333 | ||
334 | /* Timeout table[state] */ | |
9d934878 | 335 | static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { |
61e7c420 JA |
336 | [IP_VS_SCTP_S_NONE] = 2 * HZ, |
337 | [IP_VS_SCTP_S_INIT1] = (0 + 3 + 1) * HZ, | |
338 | [IP_VS_SCTP_S_INIT] = IP_VS_SCTP_MAX_RTO, | |
339 | [IP_VS_SCTP_S_COOKIE_SENT] = IP_VS_SCTP_MAX_RTO, | |
340 | [IP_VS_SCTP_S_COOKIE_REPLIED] = IP_VS_SCTP_MAX_RTO, | |
341 | [IP_VS_SCTP_S_COOKIE_WAIT] = IP_VS_SCTP_MAX_RTO, | |
342 | [IP_VS_SCTP_S_COOKIE] = IP_VS_SCTP_MAX_RTO, | |
343 | [IP_VS_SCTP_S_COOKIE_ECHOED] = IP_VS_SCTP_MAX_RTO, | |
344 | [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, | |
345 | [IP_VS_SCTP_S_SHUTDOWN_SENT] = IP_VS_SCTP_MAX_RTO, | |
346 | [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = IP_VS_SCTP_MAX_RTO, | |
347 | [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = IP_VS_SCTP_MAX_RTO, | |
348 | [IP_VS_SCTP_S_REJECTED] = (0 + 3 + 1) * HZ, | |
349 | [IP_VS_SCTP_S_CLOSED] = IP_VS_SCTP_MAX_RTO, | |
350 | [IP_VS_SCTP_S_LAST] = 2 * HZ, | |
2906f66a VMR |
351 | }; |
352 | ||
353 | static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = { | |
61e7c420 JA |
354 | [IP_VS_SCTP_S_NONE] = "NONE", |
355 | [IP_VS_SCTP_S_INIT1] = "INIT1", | |
356 | [IP_VS_SCTP_S_INIT] = "INIT", | |
357 | [IP_VS_SCTP_S_COOKIE_SENT] = "C-SENT", | |
358 | [IP_VS_SCTP_S_COOKIE_REPLIED] = "C-REPLIED", | |
359 | [IP_VS_SCTP_S_COOKIE_WAIT] = "C-WAIT", | |
360 | [IP_VS_SCTP_S_COOKIE] = "COOKIE", | |
361 | [IP_VS_SCTP_S_COOKIE_ECHOED] = "C-ECHOED", | |
362 | [IP_VS_SCTP_S_ESTABLISHED] = "ESTABLISHED", | |
363 | [IP_VS_SCTP_S_SHUTDOWN_SENT] = "S-SENT", | |
364 | [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = "S-RECEIVED", | |
365 | [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = "S-ACK-SENT", | |
366 | [IP_VS_SCTP_S_REJECTED] = "REJECTED", | |
367 | [IP_VS_SCTP_S_CLOSED] = "CLOSED", | |
368 | [IP_VS_SCTP_S_LAST] = "BUG!", | |
2906f66a VMR |
369 | }; |
370 | ||
371 | ||
372 | static const char *sctp_state_name(int state) | |
373 | { | |
374 | if (state >= IP_VS_SCTP_S_LAST) | |
375 | return "ERR!"; | |
376 | if (sctp_state_name_table[state]) | |
377 | return sctp_state_name_table[state]; | |
378 | return "?"; | |
379 | } | |
380 | ||
4a516f11 | 381 | static inline void |
9330419d | 382 | set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, |
2906f66a VMR |
383 | int direction, const struct sk_buff *skb) |
384 | { | |
385 | sctp_chunkhdr_t _sctpch, *sch; | |
386 | unsigned char chunk_type; | |
387 | int event, next_state; | |
cf2e3942 | 388 | int ihl, cofs; |
2906f66a VMR |
389 | |
390 | #ifdef CONFIG_IP_VS_IPV6 | |
391 | ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); | |
392 | #else | |
393 | ihl = ip_hdrlen(skb); | |
394 | #endif | |
395 | ||
cf2e3942 JA |
396 | cofs = ihl + sizeof(sctp_sctphdr_t); |
397 | sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch); | |
2906f66a | 398 | if (sch == NULL) |
4a516f11 | 399 | return; |
2906f66a VMR |
400 | |
401 | chunk_type = sch->type; | |
402 | /* | |
403 | * Section 3: Multiple chunks can be bundled into one SCTP packet | |
404 | * up to the MTU size, except for the INIT, INIT ACK, and | |
405 | * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with | |
406 | * any other chunk in a packet. | |
407 | * | |
408 | * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control | |
409 | * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be | |
410 | * bundled with an ABORT, but they MUST be placed before the ABORT | |
411 | * in the SCTP packet or they will be ignored by the receiver. | |
412 | */ | |
413 | if ((sch->type == SCTP_CID_COOKIE_ECHO) || | |
414 | (sch->type == SCTP_CID_COOKIE_ACK)) { | |
cf2e3942 JA |
415 | int clen = ntohs(sch->length); |
416 | ||
417 | if (clen >= sizeof(sctp_chunkhdr_t)) { | |
418 | sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4), | |
419 | sizeof(_sctpch), &_sctpch); | |
420 | if (sch && sch->type == SCTP_CID_ABORT) | |
2906f66a VMR |
421 | chunk_type = sch->type; |
422 | } | |
423 | } | |
424 | ||
61e7c420 JA |
425 | event = (chunk_type < sizeof(sctp_events)) ? |
426 | sctp_events[chunk_type] : IP_VS_SCTP_DATA; | |
2906f66a | 427 | |
61e7c420 JA |
428 | /* Update direction to INPUT_ONLY if necessary |
429 | * or delete NO_OUTPUT flag if output packet detected | |
2906f66a | 430 | */ |
61e7c420 JA |
431 | if (cp->flags & IP_VS_CONN_F_NOOUTPUT) { |
432 | if (direction == IP_VS_DIR_OUTPUT) | |
433 | cp->flags &= ~IP_VS_CONN_F_NOOUTPUT; | |
434 | else | |
435 | direction = IP_VS_DIR_INPUT_ONLY; | |
436 | } | |
437 | ||
438 | next_state = sctp_states[direction][event][cp->state]; | |
2906f66a VMR |
439 | |
440 | if (next_state != cp->state) { | |
441 | struct ip_vs_dest *dest = cp->dest; | |
442 | ||
443 | IP_VS_DBG_BUF(8, "%s %s %s:%d->" | |
444 | "%s:%d state: %s->%s conn->refcnt:%d\n", | |
9330419d | 445 | pd->pp->name, |
2906f66a VMR |
446 | ((direction == IP_VS_DIR_OUTPUT) ? |
447 | "output " : "input "), | |
f18ae720 | 448 | IP_VS_DBG_ADDR(cp->daf, &cp->daddr), |
2906f66a VMR |
449 | ntohs(cp->dport), |
450 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), | |
451 | ntohs(cp->cport), | |
452 | sctp_state_name(cp->state), | |
453 | sctp_state_name(next_state), | |
454 | atomic_read(&cp->refcnt)); | |
455 | if (dest) { | |
456 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | |
457 | (next_state != IP_VS_SCTP_S_ESTABLISHED)) { | |
458 | atomic_dec(&dest->activeconns); | |
459 | atomic_inc(&dest->inactconns); | |
460 | cp->flags |= IP_VS_CONN_F_INACTIVE; | |
461 | } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && | |
462 | (next_state == IP_VS_SCTP_S_ESTABLISHED)) { | |
463 | atomic_inc(&dest->activeconns); | |
464 | atomic_dec(&dest->inactconns); | |
465 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | |
466 | } | |
467 | } | |
468 | } | |
9d934878 HS |
469 | if (likely(pd)) |
470 | cp->timeout = pd->timeout_table[cp->state = next_state]; | |
471 | else /* What to do ? */ | |
472 | cp->timeout = sctp_timeouts[cp->state = next_state]; | |
2906f66a VMR |
473 | } |
474 | ||
4a516f11 | 475 | static void |
2906f66a | 476 | sctp_state_transition(struct ip_vs_conn *cp, int direction, |
9330419d | 477 | const struct sk_buff *skb, struct ip_vs_proto_data *pd) |
2906f66a | 478 | { |
ac69269a | 479 | spin_lock_bh(&cp->lock); |
4a516f11 | 480 | set_sctp_state(pd, cp, direction, skb); |
ac69269a | 481 | spin_unlock_bh(&cp->lock); |
2906f66a VMR |
482 | } |
483 | ||
2906f66a VMR |
484 | static inline __u16 sctp_app_hashkey(__be16 port) |
485 | { | |
486 | return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) | |
487 | & SCTP_APP_TAB_MASK; | |
488 | } | |
489 | ||
ab8a5e84 | 490 | static int sctp_register_app(struct net *net, struct ip_vs_app *inc) |
2906f66a VMR |
491 | { |
492 | struct ip_vs_app *i; | |
493 | __u16 hash; | |
494 | __be16 port = inc->port; | |
495 | int ret = 0; | |
ab8a5e84 | 496 | struct netns_ipvs *ipvs = net_ipvs(net); |
18d6ade6 | 497 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP); |
2906f66a VMR |
498 | |
499 | hash = sctp_app_hashkey(port); | |
500 | ||
9d934878 | 501 | list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { |
2906f66a VMR |
502 | if (i->port == port) { |
503 | ret = -EEXIST; | |
504 | goto out; | |
505 | } | |
506 | } | |
363c97d7 | 507 | list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]); |
9bbac6a9 | 508 | atomic_inc(&pd->appcnt); |
2906f66a | 509 | out: |
2906f66a VMR |
510 | |
511 | return ret; | |
512 | } | |
513 | ||
ab8a5e84 | 514 | static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) |
2906f66a | 515 | { |
18d6ade6 | 516 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net_ipvs(net), IPPROTO_SCTP); |
9d934878 | 517 | |
9bbac6a9 | 518 | atomic_dec(&pd->appcnt); |
363c97d7 | 519 | list_del_rcu(&inc->p_list); |
2906f66a VMR |
520 | } |
521 | ||
522 | static int sctp_app_conn_bind(struct ip_vs_conn *cp) | |
523 | { | |
58dbc6f2 | 524 | struct netns_ipvs *ipvs = cp->ipvs; |
2906f66a VMR |
525 | int hash; |
526 | struct ip_vs_app *inc; | |
527 | int result = 0; | |
528 | ||
529 | /* Default binding: bind app only for NAT */ | |
530 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) | |
531 | return 0; | |
532 | /* Lookup application incarnations and bind the right one */ | |
533 | hash = sctp_app_hashkey(cp->vport); | |
534 | ||
363c97d7 JA |
535 | rcu_read_lock(); |
536 | list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) { | |
2906f66a VMR |
537 | if (inc->port == cp->vport) { |
538 | if (unlikely(!ip_vs_app_inc_get(inc))) | |
539 | break; | |
363c97d7 | 540 | rcu_read_unlock(); |
2906f66a VMR |
541 | |
542 | IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" | |
543 | "%s:%u to app %s on port %u\n", | |
544 | __func__, | |
545 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), | |
546 | ntohs(cp->cport), | |
547 | IP_VS_DBG_ADDR(cp->af, &cp->vaddr), | |
548 | ntohs(cp->vport), | |
549 | inc->name, ntohs(inc->port)); | |
550 | cp->app = inc; | |
551 | if (inc->init_conn) | |
552 | result = inc->init_conn(inc, cp); | |
553 | goto out; | |
554 | } | |
555 | } | |
363c97d7 | 556 | rcu_read_unlock(); |
2906f66a VMR |
557 | out: |
558 | return result; | |
559 | } | |
560 | ||
9d934878 HS |
561 | /* --------------------------------------------- |
562 | * timeouts is netns related now. | |
563 | * --------------------------------------------- | |
564 | */ | |
582b8e3e | 565 | static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) |
2906f66a | 566 | { |
9d934878 | 567 | struct netns_ipvs *ipvs = net_ipvs(net); |
2906f66a | 568 | |
9d934878 | 569 | ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); |
9d934878 HS |
570 | pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, |
571 | sizeof(sctp_timeouts)); | |
582b8e3e HS |
572 | if (!pd->timeout_table) |
573 | return -ENOMEM; | |
574 | return 0; | |
9d934878 | 575 | } |
2906f66a | 576 | |
9d934878 | 577 | static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) |
2906f66a | 578 | { |
9d934878 | 579 | kfree(pd->timeout_table); |
2906f66a VMR |
580 | } |
581 | ||
582 | struct ip_vs_protocol ip_vs_protocol_sctp = { | |
9d934878 HS |
583 | .name = "SCTP", |
584 | .protocol = IPPROTO_SCTP, | |
585 | .num_states = IP_VS_SCTP_S_LAST, | |
586 | .dont_defrag = 0, | |
587 | .init = NULL, | |
588 | .exit = NULL, | |
589 | .init_netns = __ip_vs_sctp_init, | |
590 | .exit_netns = __ip_vs_sctp_exit, | |
591 | .register_app = sctp_register_app, | |
2906f66a | 592 | .unregister_app = sctp_unregister_app, |
9d934878 HS |
593 | .conn_schedule = sctp_conn_schedule, |
594 | .conn_in_get = ip_vs_conn_in_get_proto, | |
595 | .conn_out_get = ip_vs_conn_out_get_proto, | |
596 | .snat_handler = sctp_snat_handler, | |
597 | .dnat_handler = sctp_dnat_handler, | |
598 | .csum_check = sctp_csum_check, | |
599 | .state_name = sctp_state_name, | |
2906f66a | 600 | .state_transition = sctp_state_transition, |
9d934878 HS |
601 | .app_conn_bind = sctp_app_conn_bind, |
602 | .debug_packet = ip_vs_tcpudp_debug_packet, | |
603 | .timeout_change = NULL, | |
2906f66a | 604 | }; |