Commit | Line | Data |
---|---|---|
e905a9ed | 1 | /* |
1da177e4 LT |
2 | * xfrm4_policy.c |
3 | * | |
4 | * Changes: | |
5 | * Kazunori MIYAZAWA @USAGI | |
6 | * YOSHIFUJI Hideaki @USAGI | |
7 | * Split up af-specific portion | |
e905a9ed | 8 | * |
1da177e4 LT |
9 | */ |
10 | ||
66cdb3ca HX |
11 | #include <linux/err.h> |
12 | #include <linux/kernel.h> | |
aabc9761 | 13 | #include <linux/inetdevice.h> |
45ff5a3f | 14 | #include <net/dst.h> |
1da177e4 LT |
15 | #include <net/xfrm.h> |
16 | #include <net/ip.h> | |
17 | ||
1da177e4 LT |
18 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo; |
19 | ||
c5b3cf46 AD |
20 | static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, |
21 | xfrm_address_t *saddr, | |
66cdb3ca | 22 | xfrm_address_t *daddr) |
1da177e4 | 23 | { |
66cdb3ca | 24 | struct flowi fl = { |
a1e59abf PM |
25 | .nl_u = { |
26 | .ip4_u = { | |
66cdb3ca | 27 | .tos = tos, |
a1e59abf PM |
28 | .daddr = daddr->a4, |
29 | }, | |
30 | }, | |
31 | }; | |
66cdb3ca HX |
32 | struct dst_entry *dst; |
33 | struct rtable *rt; | |
34 | int err; | |
a1e59abf | 35 | |
66cdb3ca HX |
36 | if (saddr) |
37 | fl.fl4_src = saddr->a4; | |
38 | ||
c5b3cf46 | 39 | err = __ip_route_output_key(net, &rt, &fl); |
66cdb3ca HX |
40 | dst = &rt->u.dst; |
41 | if (err) | |
42 | dst = ERR_PTR(err); | |
43 | return dst; | |
44 | } | |
45 | ||
fbda33b2 AD |
46 | static int xfrm4_get_saddr(struct net *net, |
47 | xfrm_address_t *saddr, xfrm_address_t *daddr) | |
66cdb3ca HX |
48 | { |
49 | struct dst_entry *dst; | |
50 | struct rtable *rt; | |
51 | ||
fbda33b2 | 52 | dst = xfrm4_dst_lookup(net, 0, NULL, daddr); |
66cdb3ca HX |
53 | if (IS_ERR(dst)) |
54 | return -EHOSTUNREACH; | |
55 | ||
56 | rt = (struct rtable *)dst; | |
57 | saddr->a4 = rt->rt_src; | |
58 | dst_release(dst); | |
59 | return 0; | |
a1e59abf PM |
60 | } |
61 | ||
25ee3286 | 62 | static int xfrm4_get_tos(struct flowi *fl) |
1da177e4 | 63 | { |
25ee3286 HX |
64 | return fl->fl4_tos; |
65 | } | |
1da177e4 | 66 | |
a1b05140 MN |
67 | static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, |
68 | int nfheader_len) | |
69 | { | |
70 | return 0; | |
71 | } | |
72 | ||
87c1e12b HX |
73 | static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, |
74 | struct flowi *fl) | |
25ee3286 HX |
75 | { |
76 | struct rtable *rt = (struct rtable *)xdst->route; | |
1da177e4 | 77 | |
87c1e12b | 78 | xdst->u.rt.fl = *fl; |
1da177e4 | 79 | |
25ee3286 HX |
80 | xdst->u.dst.dev = dev; |
81 | dev_hold(dev); | |
43372262 | 82 | |
25ee3286 HX |
83 | xdst->u.rt.idev = in_dev_get(dev); |
84 | if (!xdst->u.rt.idev) | |
85 | return -ENODEV; | |
1da177e4 | 86 | |
25ee3286 HX |
87 | xdst->u.rt.peer = rt->peer; |
88 | if (rt->peer) | |
89 | atomic_inc(&rt->peer->refcnt); | |
66cdb3ca | 90 | |
25ee3286 HX |
91 | /* Sheit... I remember I did this right. Apparently, |
92 | * it was magically lost, so this code needs audit */ | |
93 | xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | | |
94 | RTCF_LOCAL); | |
95 | xdst->u.rt.rt_type = rt->rt_type; | |
96 | xdst->u.rt.rt_src = rt->rt_src; | |
97 | xdst->u.rt.rt_dst = rt->rt_dst; | |
98 | xdst->u.rt.rt_gateway = rt->rt_gateway; | |
99 | xdst->u.rt.rt_spec_dst = rt->rt_spec_dst; | |
1da177e4 | 100 | |
1da177e4 | 101 | return 0; |
1da177e4 LT |
102 | } |
103 | ||
104 | static void | |
d5422efe | 105 | _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) |
1da177e4 | 106 | { |
eddc9ec5 | 107 | struct iphdr *iph = ip_hdr(skb); |
d56f90a7 | 108 | u8 *xprth = skb_network_header(skb) + iph->ihl * 4; |
1da177e4 LT |
109 | |
110 | memset(fl, 0, sizeof(struct flowi)); | |
44b451f1 PK |
111 | fl->mark = skb->mark; |
112 | ||
1da177e4 LT |
113 | if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { |
114 | switch (iph->protocol) { | |
115 | case IPPROTO_UDP: | |
ba4e58ec | 116 | case IPPROTO_UDPLITE: |
1da177e4 LT |
117 | case IPPROTO_TCP: |
118 | case IPPROTO_SCTP: | |
9e999993 | 119 | case IPPROTO_DCCP: |
c615c9f3 WY |
120 | if (xprth + 4 < skb->data || |
121 | pskb_may_pull(skb, xprth + 4 - skb->data)) { | |
8c689a6e | 122 | __be16 *ports = (__be16 *)xprth; |
1da177e4 | 123 | |
d5422efe HX |
124 | fl->fl_ip_sport = ports[!!reverse]; |
125 | fl->fl_ip_dport = ports[!reverse]; | |
1da177e4 LT |
126 | } |
127 | break; | |
128 | ||
129 | case IPPROTO_ICMP: | |
130 | if (pskb_may_pull(skb, xprth + 2 - skb->data)) { | |
131 | u8 *icmp = xprth; | |
132 | ||
133 | fl->fl_icmp_type = icmp[0]; | |
134 | fl->fl_icmp_code = icmp[1]; | |
135 | } | |
136 | break; | |
137 | ||
138 | case IPPROTO_ESP: | |
139 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { | |
4324a174 | 140 | __be32 *ehdr = (__be32 *)xprth; |
1da177e4 LT |
141 | |
142 | fl->fl_ipsec_spi = ehdr[0]; | |
143 | } | |
144 | break; | |
145 | ||
146 | case IPPROTO_AH: | |
147 | if (pskb_may_pull(skb, xprth + 8 - skb->data)) { | |
4324a174 | 148 | __be32 *ah_hdr = (__be32*)xprth; |
1da177e4 LT |
149 | |
150 | fl->fl_ipsec_spi = ah_hdr[1]; | |
151 | } | |
152 | break; | |
153 | ||
154 | case IPPROTO_COMP: | |
155 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { | |
4324a174 | 156 | __be16 *ipcomp_hdr = (__be16 *)xprth; |
1da177e4 | 157 | |
4195f814 | 158 | fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); |
1da177e4 LT |
159 | } |
160 | break; | |
161 | default: | |
162 | fl->fl_ipsec_spi = 0; | |
163 | break; | |
3ff50b79 | 164 | } |
1da177e4 LT |
165 | } |
166 | fl->proto = iph->protocol; | |
d5422efe HX |
167 | fl->fl4_dst = reverse ? iph->saddr : iph->daddr; |
168 | fl->fl4_src = reverse ? iph->daddr : iph->saddr; | |
4da3089f | 169 | fl->fl4_tos = iph->tos; |
1da177e4 LT |
170 | } |
171 | ||
569d3645 | 172 | static inline int xfrm4_garbage_collect(struct dst_ops *ops) |
1da177e4 | 173 | { |
d7c7544c AD |
174 | struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); |
175 | ||
176 | xfrm4_policy_afinfo.garbage_collect(net); | |
177 | return (atomic_read(&ops->entries) > ops->gc_thresh * 2); | |
1da177e4 LT |
178 | } |
179 | ||
180 | static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) | |
181 | { | |
182 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; | |
183 | struct dst_entry *path = xdst->route; | |
184 | ||
185 | path->ops->update_pmtu(path, mtu); | |
186 | } | |
187 | ||
aabc9761 HX |
188 | static void xfrm4_dst_destroy(struct dst_entry *dst) |
189 | { | |
190 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; | |
191 | ||
192 | if (likely(xdst->u.rt.idev)) | |
193 | in_dev_put(xdst->u.rt.idev); | |
ed3e37dd | 194 | if (likely(xdst->u.rt.peer)) |
26db1677 | 195 | inet_putpeer(xdst->u.rt.peer); |
aabc9761 HX |
196 | xfrm_dst_destroy(xdst); |
197 | } | |
198 | ||
199 | static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |
200 | int unregister) | |
201 | { | |
202 | struct xfrm_dst *xdst; | |
203 | ||
204 | if (!unregister) | |
205 | return; | |
206 | ||
207 | xdst = (struct xfrm_dst *)dst; | |
208 | if (xdst->u.rt.idev->dev == dev) { | |
5a3e55d6 | 209 | struct in_device *loopback_idev = |
c346dca1 | 210 | in_dev_get(dev_net(dev)->loopback_dev); |
aabc9761 HX |
211 | BUG_ON(!loopback_idev); |
212 | ||
213 | do { | |
214 | in_dev_put(xdst->u.rt.idev); | |
215 | xdst->u.rt.idev = loopback_idev; | |
216 | in_dev_hold(loopback_idev); | |
217 | xdst = (struct xfrm_dst *)xdst->u.dst.child; | |
218 | } while (xdst->u.dst.xfrm); | |
219 | ||
220 | __in_dev_put(loopback_idev); | |
221 | } | |
222 | ||
223 | xfrm_dst_ifdown(dst, dev); | |
224 | } | |
225 | ||
1da177e4 LT |
226 | static struct dst_ops xfrm4_dst_ops = { |
227 | .family = AF_INET, | |
09640e63 | 228 | .protocol = cpu_to_be16(ETH_P_IP), |
1da177e4 LT |
229 | .gc = xfrm4_garbage_collect, |
230 | .update_pmtu = xfrm4_update_pmtu, | |
aabc9761 HX |
231 | .destroy = xfrm4_dst_destroy, |
232 | .ifdown = xfrm4_dst_ifdown, | |
862b82c6 | 233 | .local_out = __ip_local_out, |
1da177e4 | 234 | .gc_thresh = 1024, |
e2422970 | 235 | .entries = ATOMIC_INIT(0), |
1da177e4 LT |
236 | }; |
237 | ||
238 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { | |
239 | .family = AF_INET, | |
1da177e4 LT |
240 | .dst_ops = &xfrm4_dst_ops, |
241 | .dst_lookup = xfrm4_dst_lookup, | |
a1e59abf | 242 | .get_saddr = xfrm4_get_saddr, |
1da177e4 | 243 | .decode_session = _decode_session4, |
25ee3286 | 244 | .get_tos = xfrm4_get_tos, |
a1b05140 | 245 | .init_path = xfrm4_init_path, |
25ee3286 | 246 | .fill_dst = xfrm4_fill_dst, |
1da177e4 LT |
247 | }; |
248 | ||
f816700a | 249 | #ifdef CONFIG_SYSCTL |
a44a4a00 NH |
250 | static struct ctl_table xfrm4_policy_table[] = { |
251 | { | |
a44a4a00 | 252 | .procname = "xfrm4_gc_thresh", |
d7c7544c | 253 | .data = &init_net.xfrm.xfrm4_dst_ops.gc_thresh, |
a44a4a00 NH |
254 | .maxlen = sizeof(int), |
255 | .mode = 0644, | |
256 | .proc_handler = proc_dointvec, | |
257 | }, | |
258 | { } | |
259 | }; | |
260 | ||
261 | static struct ctl_table_header *sysctl_hdr; | |
f816700a | 262 | #endif |
a44a4a00 | 263 | |
1da177e4 LT |
264 | static void __init xfrm4_policy_init(void) |
265 | { | |
266 | xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); | |
267 | } | |
268 | ||
269 | static void __exit xfrm4_policy_fini(void) | |
270 | { | |
f816700a | 271 | #ifdef CONFIG_SYSCTL |
a44a4a00 NH |
272 | if (sysctl_hdr) |
273 | unregister_net_sysctl_table(sysctl_hdr); | |
f816700a | 274 | #endif |
1da177e4 LT |
275 | xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); |
276 | } | |
277 | ||
a33bc5c1 | 278 | void __init xfrm4_init(int rt_max_size) |
1da177e4 | 279 | { |
a33bc5c1 NH |
280 | /* |
281 | * Select a default value for the gc_thresh based on the main route | |
282 | * table hash size. It seems to me the worst case scenario is when | |
283 | * we have ipsec operating in transport mode, in which we create a | |
284 | * dst_entry per socket. The xfrm gc algorithm starts trying to remove | |
285 | * entries at gc_thresh, and prevents new allocations as 2*gc_thresh | |
286 | * so lets set an initial xfrm gc_thresh value at the rt_max_size/2. | |
287 | * That will let us store an ipsec connection per route table entry, | |
288 | * and start cleaning when were 1/2 full | |
289 | */ | |
290 | xfrm4_dst_ops.gc_thresh = rt_max_size/2; | |
d7c7544c AD |
291 | |
292 | xfrm4_state_init(); | |
293 | xfrm4_policy_init(); | |
f816700a | 294 | #ifdef CONFIG_SYSCTL |
a44a4a00 NH |
295 | sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, |
296 | xfrm4_policy_table); | |
f816700a | 297 | #endif |
1da177e4 LT |
298 | } |
299 |