]> bbs.cooldavid.org Git - net-next-2.6.git/blame_incremental - net/netfilter/ipvs/ip_vs_xmit.c
ipvs: move ip_route_me_harder for ICMP
[net-next-2.6.git] / net / netfilter / ipvs / ip_vs_xmit.c
... / ...
CommitLineData
1/*
2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 *
4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
14 */
15
16#define KMSG_COMPONENT "IPVS"
17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
19#include <linux/kernel.h>
20#include <linux/slab.h>
21#include <linux/tcp.h> /* for tcphdr */
22#include <net/ip.h>
23#include <net/tcp.h> /* for csum_tcpudp_magic */
24#include <net/udp.h>
25#include <net/icmp.h> /* for icmp_send */
26#include <net/route.h> /* for ip_route_output */
27#include <net/ipv6.h>
28#include <net/ip6_route.h>
29#include <net/addrconf.h>
30#include <linux/icmpv6.h>
31#include <linux/netfilter.h>
32#include <linux/netfilter_ipv4.h>
33
34#include <net/ip_vs.h>
35
36
37/*
38 * Destination cache to speed up outgoing route lookup
39 */
40static inline void
41__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
42 u32 dst_cookie)
43{
44 struct dst_entry *old_dst;
45
46 old_dst = dest->dst_cache;
47 dest->dst_cache = dst;
48 dest->dst_rtos = rtos;
49 dest->dst_cookie = dst_cookie;
50 dst_release(old_dst);
51}
52
53static inline struct dst_entry *
54__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
55{
56 struct dst_entry *dst = dest->dst_cache;
57
58 if (!dst)
59 return NULL;
60 if ((dst->obsolete || rtos != dest->dst_rtos) &&
61 dst->ops->check(dst, dest->dst_cookie) == NULL) {
62 dest->dst_cache = NULL;
63 dst_release(dst);
64 return NULL;
65 }
66 dst_hold(dst);
67 return dst;
68}
69
70static struct rtable *
71__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos)
72{
73 struct net *net = dev_net(skb->dev);
74 struct rtable *rt; /* Route to the other host */
75 struct ip_vs_dest *dest = cp->dest;
76
77 if (dest) {
78 spin_lock(&dest->dst_lock);
79 if (!(rt = (struct rtable *)
80 __ip_vs_dst_check(dest, rtos))) {
81 struct flowi fl = {
82 .oif = 0,
83 .nl_u = {
84 .ip4_u = {
85 .daddr = dest->addr.ip,
86 .saddr = 0,
87 .tos = rtos, } },
88 };
89
90 if (ip_route_output_key(net, &rt, &fl)) {
91 spin_unlock(&dest->dst_lock);
92 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
93 &dest->addr.ip);
94 return NULL;
95 }
96 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
97 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
98 &dest->addr.ip,
99 atomic_read(&rt->dst.__refcnt), rtos);
100 }
101 spin_unlock(&dest->dst_lock);
102 } else {
103 struct flowi fl = {
104 .oif = 0,
105 .nl_u = {
106 .ip4_u = {
107 .daddr = cp->daddr.ip,
108 .saddr = 0,
109 .tos = rtos, } },
110 };
111
112 if (ip_route_output_key(net, &rt, &fl)) {
113 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
114 &cp->daddr.ip);
115 return NULL;
116 }
117 }
118
119 return rt;
120}
121
122#ifdef CONFIG_IP_VS_IPV6
123
124static struct dst_entry *
125__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
126 struct in6_addr *ret_saddr, int do_xfrm)
127{
128 struct dst_entry *dst;
129 struct flowi fl = {
130 .oif = 0,
131 .nl_u = {
132 .ip6_u = {
133 .daddr = *daddr,
134 },
135 },
136 };
137
138 dst = ip6_route_output(net, NULL, &fl);
139 if (dst->error)
140 goto out_err;
141 if (!ret_saddr)
142 return dst;
143 if (ipv6_addr_any(&fl.fl6_src) &&
144 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
145 &fl.fl6_dst, 0, &fl.fl6_src) < 0)
146 goto out_err;
147 if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0)
148 goto out_err;
149 ipv6_addr_copy(ret_saddr, &fl.fl6_src);
150 return dst;
151
152out_err:
153 dst_release(dst);
154 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
155 return NULL;
156}
157
158static struct rt6_info *
159__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
160 struct in6_addr *ret_saddr, int do_xfrm)
161{
162 struct net *net = dev_net(skb->dev);
163 struct rt6_info *rt; /* Route to the other host */
164 struct ip_vs_dest *dest = cp->dest;
165 struct dst_entry *dst;
166
167 if (dest) {
168 spin_lock(&dest->dst_lock);
169 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
170 if (!rt) {
171 u32 cookie;
172
173 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
174 &dest->dst_saddr,
175 do_xfrm);
176 if (!dst) {
177 spin_unlock(&dest->dst_lock);
178 return NULL;
179 }
180 rt = (struct rt6_info *) dst;
181 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
182 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
183 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
184 &dest->addr.in6, &dest->dst_saddr,
185 atomic_read(&rt->dst.__refcnt));
186 }
187 if (ret_saddr)
188 ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
189 spin_unlock(&dest->dst_lock);
190 } else {
191 dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr,
192 do_xfrm);
193 if (!dst)
194 return NULL;
195 rt = (struct rt6_info *) dst;
196 }
197
198 return rt;
199}
200#endif
201
202
203/*
204 * Release dest->dst_cache before a dest is removed
205 */
206void
207ip_vs_dst_reset(struct ip_vs_dest *dest)
208{
209 struct dst_entry *old_dst;
210
211 old_dst = dest->dst_cache;
212 dest->dst_cache = NULL;
213 dst_release(old_dst);
214}
215
216#define IP_VS_XMIT_TUNNEL(skb, cp) \
217({ \
218 int __ret = NF_ACCEPT; \
219 \
220 (skb)->ipvs_property = 1; \
221 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
222 __ret = ip_vs_confirm_conntrack(skb, cp); \
223 if (__ret == NF_ACCEPT) { \
224 nf_reset(skb); \
225 skb_forward_csum(skb); \
226 } \
227 __ret; \
228})
229
230#define IP_VS_XMIT_NAT(pf, skb, cp) \
231do { \
232 (skb)->ipvs_property = 1; \
233 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
234 ip_vs_notrack(skb); \
235 else \
236 ip_vs_update_conntrack(skb, cp, 1); \
237 skb_forward_csum(skb); \
238 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
239 skb_dst(skb)->dev, dst_output); \
240} while (0)
241
242#define IP_VS_XMIT(pf, skb, cp) \
243do { \
244 (skb)->ipvs_property = 1; \
245 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
246 ip_vs_notrack(skb); \
247 skb_forward_csum(skb); \
248 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
249 skb_dst(skb)->dev, dst_output); \
250} while (0)
251
252
253/*
254 * NULL transmitter (do nothing except return NF_ACCEPT)
255 */
256int
257ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
258 struct ip_vs_protocol *pp)
259{
260 /* we do not touch skb and do not need pskb ptr */
261 return NF_ACCEPT;
262}
263
264
265/*
266 * Bypass transmitter
267 * Let packets bypass the destination when the destination is not
268 * available, it may be only used in transparent cache cluster.
269 */
270int
271ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
272 struct ip_vs_protocol *pp)
273{
274 struct net *net = dev_net(skb->dev);
275 struct rtable *rt; /* Route to the other host */
276 struct iphdr *iph = ip_hdr(skb);
277 u8 tos = iph->tos;
278 int mtu;
279 struct flowi fl = {
280 .oif = 0,
281 .nl_u = {
282 .ip4_u = {
283 .daddr = iph->daddr,
284 .saddr = 0,
285 .tos = RT_TOS(tos), } },
286 };
287
288 EnterFunction(10);
289
290 if (ip_route_output_key(net, &rt, &fl)) {
291 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n",
292 __func__, &iph->daddr);
293 goto tx_error_icmp;
294 }
295
296 /* MTU checking */
297 mtu = dst_mtu(&rt->dst);
298 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
299 ip_rt_put(rt);
300 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
301 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
302 goto tx_error;
303 }
304
305 /*
306 * Call ip_send_check because we are not sure it is called
307 * after ip_defrag. Is copy-on-write needed?
308 */
309 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
310 ip_rt_put(rt);
311 return NF_STOLEN;
312 }
313 ip_send_check(ip_hdr(skb));
314
315 /* drop old route */
316 skb_dst_drop(skb);
317 skb_dst_set(skb, &rt->dst);
318
319 /* Another hack: avoid icmp_send in ip_fragment */
320 skb->local_df = 1;
321
322 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
323
324 LeaveFunction(10);
325 return NF_STOLEN;
326
327 tx_error_icmp:
328 dst_link_failure(skb);
329 tx_error:
330 kfree_skb(skb);
331 LeaveFunction(10);
332 return NF_STOLEN;
333}
334
335#ifdef CONFIG_IP_VS_IPV6
336int
337ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
338 struct ip_vs_protocol *pp)
339{
340 struct net *net = dev_net(skb->dev);
341 struct dst_entry *dst;
342 struct rt6_info *rt; /* Route to the other host */
343 struct ipv6hdr *iph = ipv6_hdr(skb);
344 int mtu;
345
346 EnterFunction(10);
347
348 dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0);
349 if (!dst)
350 goto tx_error_icmp;
351 rt = (struct rt6_info *) dst;
352
353 /* MTU checking */
354 mtu = dst_mtu(&rt->dst);
355 if (skb->len > mtu) {
356 dst_release(&rt->dst);
357 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
358 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
359 goto tx_error;
360 }
361
362 /*
363 * Call ip_send_check because we are not sure it is called
364 * after ip_defrag. Is copy-on-write needed?
365 */
366 skb = skb_share_check(skb, GFP_ATOMIC);
367 if (unlikely(skb == NULL)) {
368 dst_release(&rt->dst);
369 return NF_STOLEN;
370 }
371
372 /* drop old route */
373 skb_dst_drop(skb);
374 skb_dst_set(skb, &rt->dst);
375
376 /* Another hack: avoid icmp_send in ip_fragment */
377 skb->local_df = 1;
378
379 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
380
381 LeaveFunction(10);
382 return NF_STOLEN;
383
384 tx_error_icmp:
385 dst_link_failure(skb);
386 tx_error:
387 kfree_skb(skb);
388 LeaveFunction(10);
389 return NF_STOLEN;
390}
391#endif
392
393/*
394 * NAT transmitter (only for outside-to-inside nat forwarding)
395 * Not used for related ICMP
396 */
397int
398ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
399 struct ip_vs_protocol *pp)
400{
401 struct rtable *rt; /* Route to the other host */
402 int mtu;
403 struct iphdr *iph = ip_hdr(skb);
404
405 EnterFunction(10);
406
407 /* check if it is a connection of no-client-port */
408 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
409 __be16 _pt, *p;
410 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
411 if (p == NULL)
412 goto tx_error;
413 ip_vs_conn_fill_cport(cp, *p);
414 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
415 }
416
417 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
418 goto tx_error_icmp;
419
420 /* MTU checking */
421 mtu = dst_mtu(&rt->dst);
422 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
423 ip_rt_put(rt);
424 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
425 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
426 goto tx_error;
427 }
428
429 /* copy-on-write the packet before mangling it */
430 if (!skb_make_writable(skb, sizeof(struct iphdr)))
431 goto tx_error_put;
432
433 if (skb_cow(skb, rt->dst.dev->hard_header_len))
434 goto tx_error_put;
435
436 /* drop old route */
437 skb_dst_drop(skb);
438 skb_dst_set(skb, &rt->dst);
439
440 /* mangle the packet */
441 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
442 goto tx_error;
443 ip_hdr(skb)->daddr = cp->daddr.ip;
444 ip_send_check(ip_hdr(skb));
445
446 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
447
448 /* FIXME: when application helper enlarges the packet and the length
449 is larger than the MTU of outgoing device, there will be still
450 MTU problem. */
451
452 /* Another hack: avoid icmp_send in ip_fragment */
453 skb->local_df = 1;
454
455 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp);
456
457 LeaveFunction(10);
458 return NF_STOLEN;
459
460 tx_error_icmp:
461 dst_link_failure(skb);
462 tx_error:
463 kfree_skb(skb);
464 LeaveFunction(10);
465 return NF_STOLEN;
466 tx_error_put:
467 ip_rt_put(rt);
468 goto tx_error;
469}
470
471#ifdef CONFIG_IP_VS_IPV6
472int
473ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
474 struct ip_vs_protocol *pp)
475{
476 struct rt6_info *rt; /* Route to the other host */
477 int mtu;
478
479 EnterFunction(10);
480
481 /* check if it is a connection of no-client-port */
482 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
483 __be16 _pt, *p;
484 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
485 sizeof(_pt), &_pt);
486 if (p == NULL)
487 goto tx_error;
488 ip_vs_conn_fill_cport(cp, *p);
489 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
490 }
491
492 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
493 if (!rt)
494 goto tx_error_icmp;
495
496 /* MTU checking */
497 mtu = dst_mtu(&rt->dst);
498 if (skb->len > mtu) {
499 dst_release(&rt->dst);
500 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
501 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
502 "ip_vs_nat_xmit_v6(): frag needed for");
503 goto tx_error;
504 }
505
506 /* copy-on-write the packet before mangling it */
507 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
508 goto tx_error_put;
509
510 if (skb_cow(skb, rt->dst.dev->hard_header_len))
511 goto tx_error_put;
512
513 /* drop old route */
514 skb_dst_drop(skb);
515 skb_dst_set(skb, &rt->dst);
516
517 /* mangle the packet */
518 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
519 goto tx_error;
520 ipv6_hdr(skb)->daddr = cp->daddr.in6;
521
522 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
523
524 /* FIXME: when application helper enlarges the packet and the length
525 is larger than the MTU of outgoing device, there will be still
526 MTU problem. */
527
528 /* Another hack: avoid icmp_send in ip_fragment */
529 skb->local_df = 1;
530
531 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp);
532
533 LeaveFunction(10);
534 return NF_STOLEN;
535
536tx_error_icmp:
537 dst_link_failure(skb);
538tx_error:
539 LeaveFunction(10);
540 kfree_skb(skb);
541 return NF_STOLEN;
542tx_error_put:
543 dst_release(&rt->dst);
544 goto tx_error;
545}
546#endif
547
548
549/*
550 * IP Tunneling transmitter
551 *
552 * This function encapsulates the packet in a new IP packet, its
553 * destination will be set to cp->daddr. Most code of this function
554 * is taken from ipip.c.
555 *
556 * It is used in VS/TUN cluster. The load balancer selects a real
557 * server from a cluster based on a scheduling algorithm,
558 * encapsulates the request packet and forwards it to the selected
559 * server. For example, all real servers are configured with
560 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
561 * the encapsulated packet, it will decapsulate the packet, processe
562 * the request and return the response packets directly to the client
563 * without passing the load balancer. This can greatly increase the
564 * scalability of virtual server.
565 *
566 * Used for ANY protocol
567 */
568int
569ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
570 struct ip_vs_protocol *pp)
571{
572 struct rtable *rt; /* Route to the other host */
573 struct net_device *tdev; /* Device to other host */
574 struct iphdr *old_iph = ip_hdr(skb);
575 u8 tos = old_iph->tos;
576 __be16 df = old_iph->frag_off;
577 struct iphdr *iph; /* Our new IP header */
578 unsigned int max_headroom; /* The extra header space needed */
579 int mtu;
580 int ret;
581
582 EnterFunction(10);
583
584 if (skb->protocol != htons(ETH_P_IP)) {
585 IP_VS_DBG_RL("%s(): protocol error, "
586 "ETH_P_IP: %d, skb protocol: %d\n",
587 __func__, htons(ETH_P_IP), skb->protocol);
588 goto tx_error;
589 }
590
591 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos))))
592 goto tx_error_icmp;
593
594 tdev = rt->dst.dev;
595
596 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
597 if (mtu < 68) {
598 ip_rt_put(rt);
599 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
600 goto tx_error;
601 }
602 if (skb_dst(skb))
603 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
604
605 df |= (old_iph->frag_off & htons(IP_DF));
606
607 if ((old_iph->frag_off & htons(IP_DF))
608 && mtu < ntohs(old_iph->tot_len)) {
609 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
610 ip_rt_put(rt);
611 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
612 goto tx_error;
613 }
614
615 /*
616 * Okay, now see if we can stuff it in the buffer as-is.
617 */
618 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
619
620 if (skb_headroom(skb) < max_headroom
621 || skb_cloned(skb) || skb_shared(skb)) {
622 struct sk_buff *new_skb =
623 skb_realloc_headroom(skb, max_headroom);
624 if (!new_skb) {
625 ip_rt_put(rt);
626 kfree_skb(skb);
627 IP_VS_ERR_RL("%s(): no memory\n", __func__);
628 return NF_STOLEN;
629 }
630 kfree_skb(skb);
631 skb = new_skb;
632 old_iph = ip_hdr(skb);
633 }
634
635 skb->transport_header = skb->network_header;
636
637 /* fix old IP header checksum */
638 ip_send_check(old_iph);
639
640 skb_push(skb, sizeof(struct iphdr));
641 skb_reset_network_header(skb);
642 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
643
644 /* drop old route */
645 skb_dst_drop(skb);
646 skb_dst_set(skb, &rt->dst);
647
648 /*
649 * Push down and install the IPIP header.
650 */
651 iph = ip_hdr(skb);
652 iph->version = 4;
653 iph->ihl = sizeof(struct iphdr)>>2;
654 iph->frag_off = df;
655 iph->protocol = IPPROTO_IPIP;
656 iph->tos = tos;
657 iph->daddr = rt->rt_dst;
658 iph->saddr = rt->rt_src;
659 iph->ttl = old_iph->ttl;
660 ip_select_ident(iph, &rt->dst, NULL);
661
662 /* Another hack: avoid icmp_send in ip_fragment */
663 skb->local_df = 1;
664
665 ret = IP_VS_XMIT_TUNNEL(skb, cp);
666 if (ret == NF_ACCEPT)
667 ip_local_out(skb);
668 else if (ret == NF_DROP)
669 kfree_skb(skb);
670
671 LeaveFunction(10);
672
673 return NF_STOLEN;
674
675 tx_error_icmp:
676 dst_link_failure(skb);
677 tx_error:
678 kfree_skb(skb);
679 LeaveFunction(10);
680 return NF_STOLEN;
681}
682
683#ifdef CONFIG_IP_VS_IPV6
684int
685ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
686 struct ip_vs_protocol *pp)
687{
688 struct rt6_info *rt; /* Route to the other host */
689 struct in6_addr saddr; /* Source for tunnel */
690 struct net_device *tdev; /* Device to other host */
691 struct ipv6hdr *old_iph = ipv6_hdr(skb);
692 struct ipv6hdr *iph; /* Our new IP header */
693 unsigned int max_headroom; /* The extra header space needed */
694 int mtu;
695 int ret;
696
697 EnterFunction(10);
698
699 if (skb->protocol != htons(ETH_P_IPV6)) {
700 IP_VS_DBG_RL("%s(): protocol error, "
701 "ETH_P_IPV6: %d, skb protocol: %d\n",
702 __func__, htons(ETH_P_IPV6), skb->protocol);
703 goto tx_error;
704 }
705
706 rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1);
707 if (!rt)
708 goto tx_error_icmp;
709
710 tdev = rt->dst.dev;
711
712 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
713 if (mtu < IPV6_MIN_MTU) {
714 dst_release(&rt->dst);
715 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
716 IPV6_MIN_MTU);
717 goto tx_error;
718 }
719 if (skb_dst(skb))
720 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
721
722 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
723 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
724 dst_release(&rt->dst);
725 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
726 goto tx_error;
727 }
728
729 /*
730 * Okay, now see if we can stuff it in the buffer as-is.
731 */
732 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
733
734 if (skb_headroom(skb) < max_headroom
735 || skb_cloned(skb) || skb_shared(skb)) {
736 struct sk_buff *new_skb =
737 skb_realloc_headroom(skb, max_headroom);
738 if (!new_skb) {
739 dst_release(&rt->dst);
740 kfree_skb(skb);
741 IP_VS_ERR_RL("%s(): no memory\n", __func__);
742 return NF_STOLEN;
743 }
744 kfree_skb(skb);
745 skb = new_skb;
746 old_iph = ipv6_hdr(skb);
747 }
748
749 skb->transport_header = skb->network_header;
750
751 skb_push(skb, sizeof(struct ipv6hdr));
752 skb_reset_network_header(skb);
753 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
754
755 /* drop old route */
756 skb_dst_drop(skb);
757 skb_dst_set(skb, &rt->dst);
758
759 /*
760 * Push down and install the IPIP header.
761 */
762 iph = ipv6_hdr(skb);
763 iph->version = 6;
764 iph->nexthdr = IPPROTO_IPV6;
765 iph->payload_len = old_iph->payload_len;
766 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
767 iph->priority = old_iph->priority;
768 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
769 ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
770 ipv6_addr_copy(&iph->saddr, &saddr);
771 iph->hop_limit = old_iph->hop_limit;
772
773 /* Another hack: avoid icmp_send in ip_fragment */
774 skb->local_df = 1;
775
776 ret = IP_VS_XMIT_TUNNEL(skb, cp);
777 if (ret == NF_ACCEPT)
778 ip6_local_out(skb);
779 else if (ret == NF_DROP)
780 kfree_skb(skb);
781
782 LeaveFunction(10);
783
784 return NF_STOLEN;
785
786tx_error_icmp:
787 dst_link_failure(skb);
788tx_error:
789 kfree_skb(skb);
790 LeaveFunction(10);
791 return NF_STOLEN;
792}
793#endif
794
795
796/*
797 * Direct Routing transmitter
798 * Used for ANY protocol
799 */
800int
801ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
802 struct ip_vs_protocol *pp)
803{
804 struct rtable *rt; /* Route to the other host */
805 struct iphdr *iph = ip_hdr(skb);
806 int mtu;
807
808 EnterFunction(10);
809
810 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
811 goto tx_error_icmp;
812
813 /* MTU checking */
814 mtu = dst_mtu(&rt->dst);
815 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
816 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
817 ip_rt_put(rt);
818 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
819 goto tx_error;
820 }
821
822 /*
823 * Call ip_send_check because we are not sure it is called
824 * after ip_defrag. Is copy-on-write needed?
825 */
826 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
827 ip_rt_put(rt);
828 return NF_STOLEN;
829 }
830 ip_send_check(ip_hdr(skb));
831
832 /* drop old route */
833 skb_dst_drop(skb);
834 skb_dst_set(skb, &rt->dst);
835
836 /* Another hack: avoid icmp_send in ip_fragment */
837 skb->local_df = 1;
838
839 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
840
841 LeaveFunction(10);
842 return NF_STOLEN;
843
844 tx_error_icmp:
845 dst_link_failure(skb);
846 tx_error:
847 kfree_skb(skb);
848 LeaveFunction(10);
849 return NF_STOLEN;
850}
851
852#ifdef CONFIG_IP_VS_IPV6
853int
854ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
855 struct ip_vs_protocol *pp)
856{
857 struct rt6_info *rt; /* Route to the other host */
858 int mtu;
859
860 EnterFunction(10);
861
862 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
863 if (!rt)
864 goto tx_error_icmp;
865
866 /* MTU checking */
867 mtu = dst_mtu(&rt->dst);
868 if (skb->len > mtu) {
869 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
870 dst_release(&rt->dst);
871 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
872 goto tx_error;
873 }
874
875 /*
876 * Call ip_send_check because we are not sure it is called
877 * after ip_defrag. Is copy-on-write needed?
878 */
879 skb = skb_share_check(skb, GFP_ATOMIC);
880 if (unlikely(skb == NULL)) {
881 dst_release(&rt->dst);
882 return NF_STOLEN;
883 }
884
885 /* drop old route */
886 skb_dst_drop(skb);
887 skb_dst_set(skb, &rt->dst);
888
889 /* Another hack: avoid icmp_send in ip_fragment */
890 skb->local_df = 1;
891
892 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
893
894 LeaveFunction(10);
895 return NF_STOLEN;
896
897tx_error_icmp:
898 dst_link_failure(skb);
899tx_error:
900 kfree_skb(skb);
901 LeaveFunction(10);
902 return NF_STOLEN;
903}
904#endif
905
906
907/*
908 * ICMP packet transmitter
909 * called by the ip_vs_in_icmp
910 */
911int
912ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
913 struct ip_vs_protocol *pp, int offset)
914{
915 struct rtable *rt; /* Route to the other host */
916 int mtu;
917 int rc;
918
919 EnterFunction(10);
920
921 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
922 forwarded directly here, because there is no need to
923 translate address/port back */
924 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
925 if (cp->packet_xmit)
926 rc = cp->packet_xmit(skb, cp, pp);
927 else
928 rc = NF_ACCEPT;
929 /* do not touch skb anymore */
930 atomic_inc(&cp->in_pkts);
931 goto out;
932 }
933
934 /*
935 * mangle and send the packet here (only for VS/NAT)
936 */
937
938 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos))))
939 goto tx_error_icmp;
940
941 /* MTU checking */
942 mtu = dst_mtu(&rt->dst);
943 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
944 ip_rt_put(rt);
945 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
946 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
947 goto tx_error;
948 }
949
950 /* copy-on-write the packet before mangling it */
951 if (!skb_make_writable(skb, offset))
952 goto tx_error_put;
953
954 if (skb_cow(skb, rt->dst.dev->hard_header_len))
955 goto tx_error_put;
956
957 /* drop the old route when skb is not shared */
958 skb_dst_drop(skb);
959 skb_dst_set(skb, &rt->dst);
960
961 ip_vs_nat_icmp(skb, pp, cp, 0);
962
963 /* Another hack: avoid icmp_send in ip_fragment */
964 skb->local_df = 1;
965
966 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
967
968 rc = NF_STOLEN;
969 goto out;
970
971 tx_error_icmp:
972 dst_link_failure(skb);
973 tx_error:
974 dev_kfree_skb(skb);
975 rc = NF_STOLEN;
976 out:
977 LeaveFunction(10);
978 return rc;
979 tx_error_put:
980 ip_rt_put(rt);
981 goto tx_error;
982}
983
984#ifdef CONFIG_IP_VS_IPV6
985int
986ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
987 struct ip_vs_protocol *pp, int offset)
988{
989 struct rt6_info *rt; /* Route to the other host */
990 int mtu;
991 int rc;
992
993 EnterFunction(10);
994
995 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
996 forwarded directly here, because there is no need to
997 translate address/port back */
998 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
999 if (cp->packet_xmit)
1000 rc = cp->packet_xmit(skb, cp, pp);
1001 else
1002 rc = NF_ACCEPT;
1003 /* do not touch skb anymore */
1004 atomic_inc(&cp->in_pkts);
1005 goto out;
1006 }
1007
1008 /*
1009 * mangle and send the packet here (only for VS/NAT)
1010 */
1011
1012 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
1013 if (!rt)
1014 goto tx_error_icmp;
1015
1016 /* MTU checking */
1017 mtu = dst_mtu(&rt->dst);
1018 if (skb->len > mtu) {
1019 dst_release(&rt->dst);
1020 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1021 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1022 goto tx_error;
1023 }
1024
1025 /* copy-on-write the packet before mangling it */
1026 if (!skb_make_writable(skb, offset))
1027 goto tx_error_put;
1028
1029 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1030 goto tx_error_put;
1031
1032 /* drop the old route when skb is not shared */
1033 skb_dst_drop(skb);
1034 skb_dst_set(skb, &rt->dst);
1035
1036 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1037
1038 /* Another hack: avoid icmp_send in ip_fragment */
1039 skb->local_df = 1;
1040
1041 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
1042
1043 rc = NF_STOLEN;
1044 goto out;
1045
1046tx_error_icmp:
1047 dst_link_failure(skb);
1048tx_error:
1049 dev_kfree_skb(skb);
1050 rc = NF_STOLEN;
1051out:
1052 LeaveFunction(10);
1053 return rc;
1054tx_error_put:
1055 dst_release(&rt->dst);
1056 goto tx_error;
1057}
1058#endif