Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | |
3 | * operating system. INET is implemented using the BSD Socket | |
4 | * interface as the means of communication with the user level. | |
5 | * | |
6 | * The IP forwarding functionality. | |
e905a9ed | 7 | * |
1da177e4 LT |
8 | * Authors: see ip.c |
9 | * | |
10 | * Fixes: | |
e905a9ed | 11 | * Many : Split from ip.c , see ip_input.c for |
1da177e4 | 12 | * history. |
e905a9ed | 13 | * Dave Gregorich : NULL ip_rt_put fix for multicast |
1da177e4 LT |
14 | * routing. |
15 | * Jos Vos : Add call_out_firewall before sending, | |
16 | * use output device for accounting. | |
17 | * Jos Vos : Call forward firewall after routing | |
18 | * (always use output device). | |
19 | * Mike McLagan : Routing by source | |
20 | */ | |
21 | ||
1da177e4 LT |
22 | #include <linux/types.h> |
23 | #include <linux/mm.h> | |
1da177e4 LT |
24 | #include <linux/skbuff.h> |
25 | #include <linux/ip.h> | |
26 | #include <linux/icmp.h> | |
27 | #include <linux/netdevice.h> | |
5a0e3ad6 | 28 | #include <linux/slab.h> |
1da177e4 LT |
29 | #include <net/sock.h> |
30 | #include <net/ip.h> | |
31 | #include <net/tcp.h> | |
32 | #include <net/udp.h> | |
33 | #include <net/icmp.h> | |
34 | #include <linux/tcp.h> | |
35 | #include <linux/udp.h> | |
36 | #include <linux/netfilter_ipv4.h> | |
37 | #include <net/checksum.h> | |
38 | #include <linux/route.h> | |
39 | #include <net/route.h> | |
40 | #include <net/xfrm.h> | |
41 | ||
fe6cc55f FW |
42 | static bool ip_may_fragment(const struct sk_buff *skb) |
43 | { | |
44 | return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || | |
45 | !skb->local_df; | |
46 | } | |
47 | ||
48 | static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) | |
49 | { | |
50 | if (skb->len <= mtu || skb->local_df) | |
51 | return false; | |
52 | ||
53 | if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) | |
54 | return false; | |
55 | ||
56 | return true; | |
57 | } | |
58 | ||
59 | static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb) | |
60 | { | |
61 | unsigned int mtu; | |
62 | ||
63 | if (skb->local_df || !skb_is_gso(skb)) | |
64 | return false; | |
65 | ||
66 | mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true); | |
67 | ||
68 | /* if seglen > mtu, do software segmentation for IP fragmentation on | |
69 | * output. DF bit cannot be set since ip_forward would have sent | |
70 | * icmp error. | |
71 | */ | |
72 | return skb_gso_network_seglen(skb) > mtu; | |
73 | } | |
74 | ||
75 | /* called if GSO skb needs to be fragmented on forward */ | |
76 | static int ip_forward_finish_gso(struct sk_buff *skb) | |
77 | { | |
78 | struct dst_entry *dst = skb_dst(skb); | |
79 | netdev_features_t features; | |
80 | struct sk_buff *segs; | |
81 | int ret = 0; | |
82 | ||
83 | features = netif_skb_dev_features(skb, dst->dev); | |
84 | segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); | |
85 | if (IS_ERR(segs)) { | |
86 | kfree_skb(skb); | |
87 | return -ENOMEM; | |
88 | } | |
89 | ||
90 | consume_skb(skb); | |
91 | ||
92 | do { | |
93 | struct sk_buff *nskb = segs->next; | |
94 | int err; | |
95 | ||
96 | segs->next = NULL; | |
97 | err = dst_output(segs); | |
98 | ||
99 | if (err && ret == 0) | |
100 | ret = err; | |
101 | segs = nskb; | |
102 | } while (segs); | |
103 | ||
104 | return ret; | |
105 | } | |
106 | ||
861d0486 | 107 | static int ip_forward_finish(struct sk_buff *skb) |
1da177e4 | 108 | { |
5e73ea1a | 109 | struct ip_options *opt = &(IPCB(skb)->opt); |
1da177e4 | 110 | |
adf30907 | 111 | IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); |
2d8dbb04 | 112 | IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); |
1da177e4 LT |
113 | |
114 | if (unlikely(opt->optlen)) | |
115 | ip_forward_options(skb); | |
116 | ||
fe6cc55f FW |
117 | if (ip_gso_exceeds_dst_mtu(skb)) |
118 | return ip_forward_finish_gso(skb); | |
119 | ||
1da177e4 LT |
120 | return dst_output(skb); |
121 | } | |
122 | ||
123 | int ip_forward(struct sk_buff *skb) | |
124 | { | |
f87c10a8 | 125 | u32 mtu; |
1da177e4 LT |
126 | struct iphdr *iph; /* Our header */ |
127 | struct rtable *rt; /* Route we use */ | |
5e73ea1a | 128 | struct ip_options *opt = &(IPCB(skb)->opt); |
1da177e4 | 129 | |
d4f2fa6a DK |
130 | /* that should never happen */ |
131 | if (skb->pkt_type != PACKET_HOST) | |
132 | goto drop; | |
133 | ||
4497b076 BH |
134 | if (skb_warn_if_lro(skb)) |
135 | goto drop; | |
136 | ||
1da177e4 LT |
137 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) |
138 | goto drop; | |
139 | ||
140 | if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb)) | |
141 | return NET_RX_SUCCESS; | |
142 | ||
35fc92a9 | 143 | skb_forward_csum(skb); |
e905a9ed | 144 | |
1da177e4 LT |
145 | /* |
146 | * According to the RFC, we must first decrease the TTL field. If | |
147 | * that reaches zero, we must reply an ICMP control message telling | |
148 | * that the packet's lifetime expired. | |
149 | */ | |
eddc9ec5 | 150 | if (ip_hdr(skb)->ttl <= 1) |
e905a9ed | 151 | goto too_many_hops; |
1da177e4 LT |
152 | |
153 | if (!xfrm4_route_forward(skb)) | |
154 | goto drop; | |
155 | ||
511c3f92 | 156 | rt = skb_rtable(skb); |
1da177e4 | 157 | |
155e8336 | 158 | if (opt->is_strictroute && rt->rt_uses_gateway) |
1da177e4 LT |
159 | goto sr_failed; |
160 | ||
f87c10a8 HFS |
161 | IPCB(skb)->flags |= IPSKB_FORWARDED; |
162 | mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); | |
fe6cc55f | 163 | if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, mtu)) { |
d8d1f30b | 164 | IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); |
9af3912e | 165 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
f87c10a8 | 166 | htonl(mtu)); |
9af3912e JH |
167 | goto drop; |
168 | } | |
169 | ||
1da177e4 | 170 | /* We are about to mangle packet. Copy it! */ |
d8d1f30b | 171 | if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len)) |
1da177e4 | 172 | goto drop; |
eddc9ec5 | 173 | iph = ip_hdr(skb); |
1da177e4 LT |
174 | |
175 | /* Decrease ttl after skb cow done */ | |
176 | ip_decrease_ttl(iph); | |
177 | ||
178 | /* | |
179 | * We now generate an ICMP HOST REDIRECT giving the route | |
180 | * we calculated. | |
181 | */ | |
def8b4fa | 182 | if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) |
1da177e4 LT |
183 | ip_rt_send_redirect(skb); |
184 | ||
185 | skb->priority = rt_tos2priority(iph->tos); | |
186 | ||
9bbc768a | 187 | return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, |
d8d1f30b | 188 | rt->dst.dev, ip_forward_finish); |
1da177e4 LT |
189 | |
190 | sr_failed: | |
e905a9ed | 191 | /* |
1da177e4 LT |
192 | * Strict routing permits no gatewaying |
193 | */ | |
e905a9ed YH |
194 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0); |
195 | goto drop; | |
1da177e4 LT |
196 | |
197 | too_many_hops: | |
e905a9ed | 198 | /* Tell the sender its packet died... */ |
adf30907 | 199 | IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_INHDRERRORS); |
e905a9ed | 200 | icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); |
1da177e4 LT |
201 | drop: |
202 | kfree_skb(skb); | |
203 | return NET_RX_DROP; | |
204 | } |