]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/netfilter/ipvs/ip_vs_xmit.c
ipvs: stop ICMP from FORWARD to local
[net-next-2.6.git] / net / netfilter / ipvs / ip_vs_xmit.c
CommitLineData
1da177e4
LT
1/*
2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 *
1da177e4
LT
4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
14 */
15
9aada7ac
HE
16#define KMSG_COMPONENT "IPVS"
17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
1da177e4 19#include <linux/kernel.h>
5a0e3ad6 20#include <linux/slab.h>
1da177e4 21#include <linux/tcp.h> /* for tcphdr */
c439cb2e 22#include <net/ip.h>
1da177e4
LT
23#include <net/tcp.h> /* for csum_tcpudp_magic */
24#include <net/udp.h>
25#include <net/icmp.h> /* for icmp_send */
26#include <net/route.h> /* for ip_route_output */
38cdcc9a
JV
27#include <net/ipv6.h>
28#include <net/ip6_route.h>
714f095f 29#include <net/addrconf.h>
38cdcc9a 30#include <linux/icmpv6.h>
1da177e4
LT
31#include <linux/netfilter.h>
32#include <linux/netfilter_ipv4.h>
33
34#include <net/ip_vs.h>
35
36
37/*
38 * Destination cache to speed up outgoing route lookup
39 */
40static inline void
714f095f
HS
41__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
42 u32 dst_cookie)
1da177e4
LT
43{
44 struct dst_entry *old_dst;
45
46 old_dst = dest->dst_cache;
47 dest->dst_cache = dst;
48 dest->dst_rtos = rtos;
714f095f 49 dest->dst_cookie = dst_cookie;
1da177e4
LT
50 dst_release(old_dst);
51}
52
53static inline struct dst_entry *
714f095f 54__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
1da177e4
LT
55{
56 struct dst_entry *dst = dest->dst_cache;
57
58 if (!dst)
59 return NULL;
714f095f
HS
60 if ((dst->obsolete || rtos != dest->dst_rtos) &&
61 dst->ops->check(dst, dest->dst_cookie) == NULL) {
1da177e4
LT
62 dest->dst_cache = NULL;
63 dst_release(dst);
64 return NULL;
65 }
66 dst_hold(dst);
67 return dst;
68}
69
ad1b30b1 70static struct rtable *
714f095f 71__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos)
1da177e4 72{
714f095f 73 struct net *net = dev_net(skb->dev);
1da177e4
LT
74 struct rtable *rt; /* Route to the other host */
75 struct ip_vs_dest *dest = cp->dest;
76
77 if (dest) {
78 spin_lock(&dest->dst_lock);
79 if (!(rt = (struct rtable *)
714f095f 80 __ip_vs_dst_check(dest, rtos))) {
1da177e4
LT
81 struct flowi fl = {
82 .oif = 0,
83 .nl_u = {
84 .ip4_u = {
e7ade46a 85 .daddr = dest->addr.ip,
1da177e4
LT
86 .saddr = 0,
87 .tos = rtos, } },
88 };
89
714f095f 90 if (ip_route_output_key(net, &rt, &fl)) {
1da177e4 91 spin_unlock(&dest->dst_lock);
14d5e834
HH
92 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
93 &dest->addr.ip);
1da177e4
LT
94 return NULL;
95 }
714f095f 96 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
14d5e834
HH
97 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
98 &dest->addr.ip,
d8d1f30b 99 atomic_read(&rt->dst.__refcnt), rtos);
1da177e4
LT
100 }
101 spin_unlock(&dest->dst_lock);
102 } else {
103 struct flowi fl = {
104 .oif = 0,
105 .nl_u = {
106 .ip4_u = {
e7ade46a 107 .daddr = cp->daddr.ip,
1da177e4
LT
108 .saddr = 0,
109 .tos = rtos, } },
110 };
111
714f095f 112 if (ip_route_output_key(net, &rt, &fl)) {
14d5e834
HH
113 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
114 &cp->daddr.ip);
1da177e4
LT
115 return NULL;
116 }
117 }
118
119 return rt;
120}
121
38cdcc9a 122#ifdef CONFIG_IP_VS_IPV6
714f095f
HS
123
124static struct dst_entry *
125__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
126 struct in6_addr *ret_saddr, int do_xfrm)
127{
128 struct dst_entry *dst;
129 struct flowi fl = {
130 .oif = 0,
131 .nl_u = {
132 .ip6_u = {
133 .daddr = *daddr,
134 },
135 },
136 };
137
138 dst = ip6_route_output(net, NULL, &fl);
139 if (dst->error)
140 goto out_err;
141 if (!ret_saddr)
142 return dst;
143 if (ipv6_addr_any(&fl.fl6_src) &&
144 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
145 &fl.fl6_dst, 0, &fl.fl6_src) < 0)
146 goto out_err;
147 if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0)
148 goto out_err;
149 ipv6_addr_copy(ret_saddr, &fl.fl6_src);
150 return dst;
151
152out_err:
153 dst_release(dst);
154 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
155 return NULL;
156}
157
38cdcc9a 158static struct rt6_info *
714f095f
HS
159__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
160 struct in6_addr *ret_saddr, int do_xfrm)
38cdcc9a 161{
714f095f 162 struct net *net = dev_net(skb->dev);
38cdcc9a
JV
163 struct rt6_info *rt; /* Route to the other host */
164 struct ip_vs_dest *dest = cp->dest;
714f095f 165 struct dst_entry *dst;
38cdcc9a
JV
166
167 if (dest) {
168 spin_lock(&dest->dst_lock);
714f095f 169 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
38cdcc9a 170 if (!rt) {
714f095f 171 u32 cookie;
38cdcc9a 172
714f095f
HS
173 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
174 &dest->dst_saddr,
175 do_xfrm);
176 if (!dst) {
38cdcc9a 177 spin_unlock(&dest->dst_lock);
38cdcc9a
JV
178 return NULL;
179 }
714f095f
HS
180 rt = (struct rt6_info *) dst;
181 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
182 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
183 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
184 &dest->addr.in6, &dest->dst_saddr,
d8d1f30b 185 atomic_read(&rt->dst.__refcnt));
38cdcc9a 186 }
714f095f
HS
187 if (ret_saddr)
188 ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
38cdcc9a
JV
189 spin_unlock(&dest->dst_lock);
190 } else {
714f095f
HS
191 dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr,
192 do_xfrm);
193 if (!dst)
38cdcc9a 194 return NULL;
714f095f 195 rt = (struct rt6_info *) dst;
38cdcc9a
JV
196 }
197
198 return rt;
199}
200#endif
201
1da177e4
LT
202
203/*
204 * Release dest->dst_cache before a dest is removed
205 */
206void
207ip_vs_dst_reset(struct ip_vs_dest *dest)
208{
209 struct dst_entry *old_dst;
210
211 old_dst = dest->dst_cache;
212 dest->dst_cache = NULL;
213 dst_release(old_dst);
214}
215
f4bc17cd
JA
216#define IP_VS_XMIT_TUNNEL(skb, cp) \
217({ \
218 int __ret = NF_ACCEPT; \
219 \
cf356d69 220 (skb)->ipvs_property = 1; \
f4bc17cd
JA
221 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
222 __ret = ip_vs_confirm_conntrack(skb, cp); \
223 if (__ret == NF_ACCEPT) { \
224 nf_reset(skb); \
225 (skb)->ip_summed = CHECKSUM_NONE; \
226 } \
227 __ret; \
228})
229
230#define IP_VS_XMIT_NAT(pf, skb, cp) \
1da177e4 231do { \
cf356d69 232 (skb)->ipvs_property = 1; \
f4bc17cd 233 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
cf356d69 234 ip_vs_notrack(skb); \
f4bc17cd
JA
235 else \
236 ip_vs_update_conntrack(skb, cp, 1); \
ccc7911f 237 skb_forward_csum(skb); \
38cdcc9a 238 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
f4bc17cd
JA
239 skb_dst(skb)->dev, dst_output); \
240} while (0)
241
242#define IP_VS_XMIT(pf, skb, cp) \
243do { \
cf356d69 244 (skb)->ipvs_property = 1; \
f4bc17cd 245 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
cf356d69 246 ip_vs_notrack(skb); \
f4bc17cd
JA
247 skb_forward_csum(skb); \
248 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
249 skb_dst(skb)->dev, dst_output); \
1da177e4
LT
250} while (0)
251
252
253/*
254 * NULL transmitter (do nothing except return NF_ACCEPT)
255 */
256int
257ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
258 struct ip_vs_protocol *pp)
259{
260 /* we do not touch skb and do not need pskb ptr */
261 return NF_ACCEPT;
262}
263
264
265/*
266 * Bypass transmitter
267 * Let packets bypass the destination when the destination is not
268 * available, it may be only used in transparent cache cluster.
269 */
270int
271ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
272 struct ip_vs_protocol *pp)
273{
714f095f 274 struct net *net = dev_net(skb->dev);
1da177e4 275 struct rtable *rt; /* Route to the other host */
eddc9ec5 276 struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
277 u8 tos = iph->tos;
278 int mtu;
279 struct flowi fl = {
280 .oif = 0,
281 .nl_u = {
282 .ip4_u = {
283 .daddr = iph->daddr,
284 .saddr = 0,
285 .tos = RT_TOS(tos), } },
286 };
287
288 EnterFunction(10);
289
714f095f 290 if (ip_route_output_key(net, &rt, &fl)) {
1e3e238e
HE
291 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n",
292 __func__, &iph->daddr);
1da177e4
LT
293 goto tx_error_icmp;
294 }
295
296 /* MTU checking */
d8d1f30b 297 mtu = dst_mtu(&rt->dst);
4412ec49 298 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
1da177e4
LT
299 ip_rt_put(rt);
300 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1e3e238e 301 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1da177e4
LT
302 goto tx_error;
303 }
304
305 /*
306 * Call ip_send_check because we are not sure it is called
307 * after ip_defrag. Is copy-on-write needed?
308 */
309 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
310 ip_rt_put(rt);
311 return NF_STOLEN;
312 }
eddc9ec5 313 ip_send_check(ip_hdr(skb));
1da177e4
LT
314
315 /* drop old route */
adf30907 316 skb_dst_drop(skb);
d8d1f30b 317 skb_dst_set(skb, &rt->dst);
1da177e4
LT
318
319 /* Another hack: avoid icmp_send in ip_fragment */
320 skb->local_df = 1;
321
f4bc17cd 322 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
1da177e4
LT
323
324 LeaveFunction(10);
325 return NF_STOLEN;
326
327 tx_error_icmp:
328 dst_link_failure(skb);
329 tx_error:
330 kfree_skb(skb);
331 LeaveFunction(10);
332 return NF_STOLEN;
333}
334
b3cdd2a7
JV
335#ifdef CONFIG_IP_VS_IPV6
336int
337ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
338 struct ip_vs_protocol *pp)
339{
714f095f
HS
340 struct net *net = dev_net(skb->dev);
341 struct dst_entry *dst;
b3cdd2a7
JV
342 struct rt6_info *rt; /* Route to the other host */
343 struct ipv6hdr *iph = ipv6_hdr(skb);
344 int mtu;
b3cdd2a7
JV
345
346 EnterFunction(10);
347
714f095f
HS
348 dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0);
349 if (!dst)
b3cdd2a7 350 goto tx_error_icmp;
714f095f 351 rt = (struct rt6_info *) dst;
b3cdd2a7
JV
352
353 /* MTU checking */
d8d1f30b 354 mtu = dst_mtu(&rt->dst);
b3cdd2a7 355 if (skb->len > mtu) {
d8d1f30b 356 dst_release(&rt->dst);
3ffe533c 357 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1e3e238e 358 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
b3cdd2a7
JV
359 goto tx_error;
360 }
361
362 /*
363 * Call ip_send_check because we are not sure it is called
364 * after ip_defrag. Is copy-on-write needed?
365 */
366 skb = skb_share_check(skb, GFP_ATOMIC);
367 if (unlikely(skb == NULL)) {
d8d1f30b 368 dst_release(&rt->dst);
b3cdd2a7
JV
369 return NF_STOLEN;
370 }
371
372 /* drop old route */
adf30907 373 skb_dst_drop(skb);
d8d1f30b 374 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
375
376 /* Another hack: avoid icmp_send in ip_fragment */
377 skb->local_df = 1;
378
f4bc17cd 379 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
b3cdd2a7
JV
380
381 LeaveFunction(10);
382 return NF_STOLEN;
383
384 tx_error_icmp:
385 dst_link_failure(skb);
386 tx_error:
387 kfree_skb(skb);
388 LeaveFunction(10);
389 return NF_STOLEN;
390}
391#endif
1da177e4
LT
392
393/*
394 * NAT transmitter (only for outside-to-inside nat forwarding)
395 * Not used for related ICMP
396 */
397int
398ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
399 struct ip_vs_protocol *pp)
400{
401 struct rtable *rt; /* Route to the other host */
402 int mtu;
eddc9ec5 403 struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
404
405 EnterFunction(10);
406
407 /* check if it is a connection of no-client-port */
408 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
014d730d 409 __be16 _pt, *p;
1da177e4
LT
410 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
411 if (p == NULL)
412 goto tx_error;
413 ip_vs_conn_fill_cport(cp, *p);
414 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
415 }
416
714f095f 417 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
1da177e4
LT
418 goto tx_error_icmp;
419
420 /* MTU checking */
d8d1f30b 421 mtu = dst_mtu(&rt->dst);
4412ec49 422 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
1da177e4
LT
423 ip_rt_put(rt);
424 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
425 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
426 goto tx_error;
427 }
428
429 /* copy-on-write the packet before mangling it */
af1e1cf0 430 if (!skb_make_writable(skb, sizeof(struct iphdr)))
1da177e4
LT
431 goto tx_error_put;
432
d8d1f30b 433 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1da177e4
LT
434 goto tx_error_put;
435
436 /* drop old route */
adf30907 437 skb_dst_drop(skb);
d8d1f30b 438 skb_dst_set(skb, &rt->dst);
1da177e4
LT
439
440 /* mangle the packet */
3db05fea 441 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
1da177e4 442 goto tx_error;
e7ade46a 443 ip_hdr(skb)->daddr = cp->daddr.ip;
eddc9ec5 444 ip_send_check(ip_hdr(skb));
1da177e4
LT
445
446 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
447
448 /* FIXME: when application helper enlarges the packet and the length
449 is larger than the MTU of outgoing device, there will be still
450 MTU problem. */
451
452 /* Another hack: avoid icmp_send in ip_fragment */
453 skb->local_df = 1;
454
f4bc17cd 455 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp);
1da177e4
LT
456
457 LeaveFunction(10);
458 return NF_STOLEN;
459
460 tx_error_icmp:
461 dst_link_failure(skb);
462 tx_error:
1da177e4 463 kfree_skb(skb);
f4bc17cd 464 LeaveFunction(10);
1da177e4
LT
465 return NF_STOLEN;
466 tx_error_put:
467 ip_rt_put(rt);
468 goto tx_error;
469}
470
b3cdd2a7
JV
471#ifdef CONFIG_IP_VS_IPV6
472int
473ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
474 struct ip_vs_protocol *pp)
475{
476 struct rt6_info *rt; /* Route to the other host */
477 int mtu;
478
479 EnterFunction(10);
480
481 /* check if it is a connection of no-client-port */
482 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
483 __be16 _pt, *p;
484 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
485 sizeof(_pt), &_pt);
486 if (p == NULL)
487 goto tx_error;
488 ip_vs_conn_fill_cport(cp, *p);
489 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
490 }
491
714f095f 492 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
b3cdd2a7
JV
493 if (!rt)
494 goto tx_error_icmp;
495
496 /* MTU checking */
d8d1f30b 497 mtu = dst_mtu(&rt->dst);
b3cdd2a7 498 if (skb->len > mtu) {
d8d1f30b 499 dst_release(&rt->dst);
3ffe533c 500 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
b3cdd2a7
JV
501 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
502 "ip_vs_nat_xmit_v6(): frag needed for");
503 goto tx_error;
504 }
505
506 /* copy-on-write the packet before mangling it */
507 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
508 goto tx_error_put;
509
d8d1f30b 510 if (skb_cow(skb, rt->dst.dev->hard_header_len))
b3cdd2a7
JV
511 goto tx_error_put;
512
513 /* drop old route */
adf30907 514 skb_dst_drop(skb);
d8d1f30b 515 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
516
517 /* mangle the packet */
518 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
519 goto tx_error;
520 ipv6_hdr(skb)->daddr = cp->daddr.in6;
521
522 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
523
524 /* FIXME: when application helper enlarges the packet and the length
525 is larger than the MTU of outgoing device, there will be still
526 MTU problem. */
527
528 /* Another hack: avoid icmp_send in ip_fragment */
529 skb->local_df = 1;
530
f4bc17cd 531 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp);
b3cdd2a7
JV
532
533 LeaveFunction(10);
534 return NF_STOLEN;
535
536tx_error_icmp:
537 dst_link_failure(skb);
538tx_error:
539 LeaveFunction(10);
540 kfree_skb(skb);
541 return NF_STOLEN;
542tx_error_put:
d8d1f30b 543 dst_release(&rt->dst);
b3cdd2a7
JV
544 goto tx_error;
545}
546#endif
547
1da177e4
LT
548
549/*
550 * IP Tunneling transmitter
551 *
552 * This function encapsulates the packet in a new IP packet, its
553 * destination will be set to cp->daddr. Most code of this function
554 * is taken from ipip.c.
555 *
556 * It is used in VS/TUN cluster. The load balancer selects a real
557 * server from a cluster based on a scheduling algorithm,
558 * encapsulates the request packet and forwards it to the selected
559 * server. For example, all real servers are configured with
560 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
561 * the encapsulated packet, it will decapsulate the packet, processe
562 * the request and return the response packets directly to the client
563 * without passing the load balancer. This can greatly increase the
564 * scalability of virtual server.
565 *
566 * Used for ANY protocol
567 */
568int
569ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
570 struct ip_vs_protocol *pp)
571{
572 struct rtable *rt; /* Route to the other host */
573 struct net_device *tdev; /* Device to other host */
eddc9ec5 574 struct iphdr *old_iph = ip_hdr(skb);
1da177e4 575 u8 tos = old_iph->tos;
76ab608d 576 __be16 df = old_iph->frag_off;
1da177e4 577 struct iphdr *iph; /* Our new IP header */
c2636b4d 578 unsigned int max_headroom; /* The extra header space needed */
1da177e4 579 int mtu;
f4bc17cd 580 int ret;
1da177e4
LT
581
582 EnterFunction(10);
583
4412ec49 584 if (skb->protocol != htons(ETH_P_IP)) {
1e3e238e 585 IP_VS_DBG_RL("%s(): protocol error, "
1da177e4 586 "ETH_P_IP: %d, skb protocol: %d\n",
1e3e238e 587 __func__, htons(ETH_P_IP), skb->protocol);
1da177e4
LT
588 goto tx_error;
589 }
590
714f095f 591 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos))))
1da177e4
LT
592 goto tx_error_icmp;
593
d8d1f30b 594 tdev = rt->dst.dev;
1da177e4 595
d8d1f30b 596 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
1da177e4
LT
597 if (mtu < 68) {
598 ip_rt_put(rt);
1e3e238e 599 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
1da177e4
LT
600 goto tx_error;
601 }
adf30907
ED
602 if (skb_dst(skb))
603 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
1da177e4 604
4412ec49 605 df |= (old_iph->frag_off & htons(IP_DF));
1da177e4 606
4412ec49 607 if ((old_iph->frag_off & htons(IP_DF))
1da177e4
LT
608 && mtu < ntohs(old_iph->tot_len)) {
609 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
610 ip_rt_put(rt);
1e3e238e 611 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1da177e4
LT
612 goto tx_error;
613 }
614
615 /*
616 * Okay, now see if we can stuff it in the buffer as-is.
617 */
618 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
619
620 if (skb_headroom(skb) < max_headroom
621 || skb_cloned(skb) || skb_shared(skb)) {
622 struct sk_buff *new_skb =
623 skb_realloc_headroom(skb, max_headroom);
624 if (!new_skb) {
625 ip_rt_put(rt);
626 kfree_skb(skb);
1e3e238e 627 IP_VS_ERR_RL("%s(): no memory\n", __func__);
1da177e4
LT
628 return NF_STOLEN;
629 }
630 kfree_skb(skb);
631 skb = new_skb;
eddc9ec5 632 old_iph = ip_hdr(skb);
1da177e4
LT
633 }
634
714f095f 635 skb->transport_header = skb->network_header;
1da177e4
LT
636
637 /* fix old IP header checksum */
638 ip_send_check(old_iph);
639
e2d1bca7
ACM
640 skb_push(skb, sizeof(struct iphdr));
641 skb_reset_network_header(skb);
1da177e4
LT
642 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
643
644 /* drop old route */
adf30907 645 skb_dst_drop(skb);
d8d1f30b 646 skb_dst_set(skb, &rt->dst);
1da177e4
LT
647
648 /*
649 * Push down and install the IPIP header.
650 */
eddc9ec5 651 iph = ip_hdr(skb);
1da177e4
LT
652 iph->version = 4;
653 iph->ihl = sizeof(struct iphdr)>>2;
654 iph->frag_off = df;
655 iph->protocol = IPPROTO_IPIP;
656 iph->tos = tos;
657 iph->daddr = rt->rt_dst;
658 iph->saddr = rt->rt_src;
659 iph->ttl = old_iph->ttl;
d8d1f30b 660 ip_select_ident(iph, &rt->dst, NULL);
1da177e4
LT
661
662 /* Another hack: avoid icmp_send in ip_fragment */
663 skb->local_df = 1;
664
f4bc17cd
JA
665 ret = IP_VS_XMIT_TUNNEL(skb, cp);
666 if (ret == NF_ACCEPT)
667 ip_local_out(skb);
668 else if (ret == NF_DROP)
669 kfree_skb(skb);
1da177e4
LT
670
671 LeaveFunction(10);
672
673 return NF_STOLEN;
674
675 tx_error_icmp:
676 dst_link_failure(skb);
677 tx_error:
678 kfree_skb(skb);
679 LeaveFunction(10);
680 return NF_STOLEN;
681}
682
b3cdd2a7
JV
683#ifdef CONFIG_IP_VS_IPV6
684int
685ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
686 struct ip_vs_protocol *pp)
687{
688 struct rt6_info *rt; /* Route to the other host */
714f095f 689 struct in6_addr saddr; /* Source for tunnel */
b3cdd2a7
JV
690 struct net_device *tdev; /* Device to other host */
691 struct ipv6hdr *old_iph = ipv6_hdr(skb);
b3cdd2a7
JV
692 struct ipv6hdr *iph; /* Our new IP header */
693 unsigned int max_headroom; /* The extra header space needed */
694 int mtu;
f4bc17cd 695 int ret;
b3cdd2a7
JV
696
697 EnterFunction(10);
698
699 if (skb->protocol != htons(ETH_P_IPV6)) {
1e3e238e 700 IP_VS_DBG_RL("%s(): protocol error, "
b3cdd2a7 701 "ETH_P_IPV6: %d, skb protocol: %d\n",
1e3e238e 702 __func__, htons(ETH_P_IPV6), skb->protocol);
b3cdd2a7
JV
703 goto tx_error;
704 }
705
714f095f 706 rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1);
b3cdd2a7
JV
707 if (!rt)
708 goto tx_error_icmp;
709
d8d1f30b 710 tdev = rt->dst.dev;
b3cdd2a7 711
d8d1f30b 712 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
714f095f 713 if (mtu < IPV6_MIN_MTU) {
d8d1f30b 714 dst_release(&rt->dst);
714f095f
HS
715 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
716 IPV6_MIN_MTU);
b3cdd2a7
JV
717 goto tx_error;
718 }
adf30907
ED
719 if (skb_dst(skb))
720 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
b3cdd2a7
JV
721
722 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
3ffe533c 723 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
d8d1f30b 724 dst_release(&rt->dst);
1e3e238e 725 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
b3cdd2a7
JV
726 goto tx_error;
727 }
728
729 /*
730 * Okay, now see if we can stuff it in the buffer as-is.
731 */
732 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
733
734 if (skb_headroom(skb) < max_headroom
735 || skb_cloned(skb) || skb_shared(skb)) {
736 struct sk_buff *new_skb =
737 skb_realloc_headroom(skb, max_headroom);
738 if (!new_skb) {
d8d1f30b 739 dst_release(&rt->dst);
b3cdd2a7 740 kfree_skb(skb);
1e3e238e 741 IP_VS_ERR_RL("%s(): no memory\n", __func__);
b3cdd2a7
JV
742 return NF_STOLEN;
743 }
744 kfree_skb(skb);
745 skb = new_skb;
746 old_iph = ipv6_hdr(skb);
747 }
748
714f095f 749 skb->transport_header = skb->network_header;
b3cdd2a7
JV
750
751 skb_push(skb, sizeof(struct ipv6hdr));
752 skb_reset_network_header(skb);
753 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
754
755 /* drop old route */
adf30907 756 skb_dst_drop(skb);
d8d1f30b 757 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
758
759 /*
760 * Push down and install the IPIP header.
761 */
762 iph = ipv6_hdr(skb);
763 iph->version = 6;
764 iph->nexthdr = IPPROTO_IPV6;
b7b45f47
HH
765 iph->payload_len = old_iph->payload_len;
766 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
b3cdd2a7
JV
767 iph->priority = old_iph->priority;
768 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
714f095f
HS
769 ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
770 ipv6_addr_copy(&iph->saddr, &saddr);
b3cdd2a7
JV
771 iph->hop_limit = old_iph->hop_limit;
772
773 /* Another hack: avoid icmp_send in ip_fragment */
774 skb->local_df = 1;
775
f4bc17cd
JA
776 ret = IP_VS_XMIT_TUNNEL(skb, cp);
777 if (ret == NF_ACCEPT)
778 ip6_local_out(skb);
779 else if (ret == NF_DROP)
780 kfree_skb(skb);
b3cdd2a7
JV
781
782 LeaveFunction(10);
783
784 return NF_STOLEN;
785
786tx_error_icmp:
787 dst_link_failure(skb);
788tx_error:
789 kfree_skb(skb);
790 LeaveFunction(10);
791 return NF_STOLEN;
792}
793#endif
794
1da177e4
LT
795
796/*
797 * Direct Routing transmitter
798 * Used for ANY protocol
799 */
800int
801ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
802 struct ip_vs_protocol *pp)
803{
804 struct rtable *rt; /* Route to the other host */
eddc9ec5 805 struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
806 int mtu;
807
808 EnterFunction(10);
809
714f095f 810 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
1da177e4
LT
811 goto tx_error_icmp;
812
813 /* MTU checking */
d8d1f30b 814 mtu = dst_mtu(&rt->dst);
4412ec49 815 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
1da177e4
LT
816 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
817 ip_rt_put(rt);
1e3e238e 818 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1da177e4
LT
819 goto tx_error;
820 }
821
822 /*
823 * Call ip_send_check because we are not sure it is called
824 * after ip_defrag. Is copy-on-write needed?
825 */
826 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
827 ip_rt_put(rt);
828 return NF_STOLEN;
829 }
eddc9ec5 830 ip_send_check(ip_hdr(skb));
1da177e4
LT
831
832 /* drop old route */
adf30907 833 skb_dst_drop(skb);
d8d1f30b 834 skb_dst_set(skb, &rt->dst);
1da177e4
LT
835
836 /* Another hack: avoid icmp_send in ip_fragment */
837 skb->local_df = 1;
838
f4bc17cd 839 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
1da177e4
LT
840
841 LeaveFunction(10);
842 return NF_STOLEN;
843
844 tx_error_icmp:
845 dst_link_failure(skb);
846 tx_error:
847 kfree_skb(skb);
848 LeaveFunction(10);
849 return NF_STOLEN;
850}
851
b3cdd2a7
JV
852#ifdef CONFIG_IP_VS_IPV6
853int
854ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
855 struct ip_vs_protocol *pp)
856{
857 struct rt6_info *rt; /* Route to the other host */
858 int mtu;
859
860 EnterFunction(10);
861
714f095f 862 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
b3cdd2a7
JV
863 if (!rt)
864 goto tx_error_icmp;
865
866 /* MTU checking */
d8d1f30b 867 mtu = dst_mtu(&rt->dst);
b3cdd2a7 868 if (skb->len > mtu) {
3ffe533c 869 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
d8d1f30b 870 dst_release(&rt->dst);
1e3e238e 871 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
b3cdd2a7
JV
872 goto tx_error;
873 }
874
875 /*
876 * Call ip_send_check because we are not sure it is called
877 * after ip_defrag. Is copy-on-write needed?
878 */
879 skb = skb_share_check(skb, GFP_ATOMIC);
880 if (unlikely(skb == NULL)) {
d8d1f30b 881 dst_release(&rt->dst);
b3cdd2a7
JV
882 return NF_STOLEN;
883 }
884
885 /* drop old route */
adf30907 886 skb_dst_drop(skb);
d8d1f30b 887 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
888
889 /* Another hack: avoid icmp_send in ip_fragment */
890 skb->local_df = 1;
891
f4bc17cd 892 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
b3cdd2a7
JV
893
894 LeaveFunction(10);
895 return NF_STOLEN;
896
897tx_error_icmp:
898 dst_link_failure(skb);
899tx_error:
900 kfree_skb(skb);
901 LeaveFunction(10);
902 return NF_STOLEN;
903}
904#endif
905
1da177e4
LT
906
907/*
908 * ICMP packet transmitter
909 * called by the ip_vs_in_icmp
910 */
911int
912ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
913 struct ip_vs_protocol *pp, int offset)
914{
915 struct rtable *rt; /* Route to the other host */
916 int mtu;
917 int rc;
918
919 EnterFunction(10);
920
921 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
922 forwarded directly here, because there is no need to
923 translate address/port back */
924 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
925 if (cp->packet_xmit)
926 rc = cp->packet_xmit(skb, cp, pp);
927 else
928 rc = NF_ACCEPT;
929 /* do not touch skb anymore */
930 atomic_inc(&cp->in_pkts);
1da177e4
LT
931 goto out;
932 }
933
934 /*
935 * mangle and send the packet here (only for VS/NAT)
936 */
937
714f095f 938 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos))))
1da177e4
LT
939 goto tx_error_icmp;
940
941 /* MTU checking */
d8d1f30b 942 mtu = dst_mtu(&rt->dst);
eddc9ec5 943 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
1da177e4
LT
944 ip_rt_put(rt);
945 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1e3e238e 946 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1da177e4
LT
947 goto tx_error;
948 }
949
950 /* copy-on-write the packet before mangling it */
af1e1cf0 951 if (!skb_make_writable(skb, offset))
1da177e4
LT
952 goto tx_error_put;
953
d8d1f30b 954 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1da177e4
LT
955 goto tx_error_put;
956
957 /* drop the old route when skb is not shared */
adf30907 958 skb_dst_drop(skb);
d8d1f30b 959 skb_dst_set(skb, &rt->dst);
1da177e4
LT
960
961 ip_vs_nat_icmp(skb, pp, cp, 0);
962
963 /* Another hack: avoid icmp_send in ip_fragment */
964 skb->local_df = 1;
965
f4bc17cd 966 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
1da177e4
LT
967
968 rc = NF_STOLEN;
969 goto out;
970
971 tx_error_icmp:
972 dst_link_failure(skb);
973 tx_error:
974 dev_kfree_skb(skb);
975 rc = NF_STOLEN;
976 out:
977 LeaveFunction(10);
978 return rc;
979 tx_error_put:
980 ip_rt_put(rt);
981 goto tx_error;
982}
b3cdd2a7
JV
983
984#ifdef CONFIG_IP_VS_IPV6
985int
986ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
987 struct ip_vs_protocol *pp, int offset)
988{
989 struct rt6_info *rt; /* Route to the other host */
990 int mtu;
991 int rc;
992
993 EnterFunction(10);
994
995 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
996 forwarded directly here, because there is no need to
997 translate address/port back */
998 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
999 if (cp->packet_xmit)
1000 rc = cp->packet_xmit(skb, cp, pp);
1001 else
1002 rc = NF_ACCEPT;
1003 /* do not touch skb anymore */
1004 atomic_inc(&cp->in_pkts);
1005 goto out;
1006 }
1007
1008 /*
1009 * mangle and send the packet here (only for VS/NAT)
1010 */
1011
714f095f 1012 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
b3cdd2a7
JV
1013 if (!rt)
1014 goto tx_error_icmp;
1015
1016 /* MTU checking */
d8d1f30b 1017 mtu = dst_mtu(&rt->dst);
b3cdd2a7 1018 if (skb->len > mtu) {
d8d1f30b 1019 dst_release(&rt->dst);
3ffe533c 1020 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1e3e238e 1021 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
b3cdd2a7
JV
1022 goto tx_error;
1023 }
1024
1025 /* copy-on-write the packet before mangling it */
1026 if (!skb_make_writable(skb, offset))
1027 goto tx_error_put;
1028
d8d1f30b 1029 if (skb_cow(skb, rt->dst.dev->hard_header_len))
b3cdd2a7
JV
1030 goto tx_error_put;
1031
1032 /* drop the old route when skb is not shared */
adf30907 1033 skb_dst_drop(skb);
d8d1f30b 1034 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
1035
1036 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1037
1038 /* Another hack: avoid icmp_send in ip_fragment */
1039 skb->local_df = 1;
1040
f4bc17cd 1041 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
b3cdd2a7
JV
1042
1043 rc = NF_STOLEN;
1044 goto out;
1045
1046tx_error_icmp:
1047 dst_link_failure(skb);
1048tx_error:
1049 dev_kfree_skb(skb);
1050 rc = NF_STOLEN;
1051out:
1052 LeaveFunction(10);
1053 return rc;
1054tx_error_put:
d8d1f30b 1055 dst_release(&rt->dst);
b3cdd2a7
JV
1056 goto tx_error;
1057}
1058#endif