]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/netfilter/ipvs/ip_vs_xmit.c
ipvs: fix CHECKSUM_PARTIAL for TUN method
[net-next-2.6.git] / net / netfilter / ipvs / ip_vs_xmit.c
1 /*
2  * ip_vs_xmit.c: various packet transmitters for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  */
15
16 #define KMSG_COMPONENT "IPVS"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
19 #include <linux/kernel.h>
20 #include <linux/slab.h>
21 #include <linux/tcp.h>                  /* for tcphdr */
22 #include <net/ip.h>
23 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
24 #include <net/udp.h>
25 #include <net/icmp.h>                   /* for icmp_send */
26 #include <net/route.h>                  /* for ip_route_output */
27 #include <net/ipv6.h>
28 #include <net/ip6_route.h>
29 #include <net/addrconf.h>
30 #include <linux/icmpv6.h>
31 #include <linux/netfilter.h>
32 #include <linux/netfilter_ipv4.h>
33
34 #include <net/ip_vs.h>
35
36
37 /*
38  *      Destination cache to speed up outgoing route lookup
39  */
40 static inline void
41 __ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
42                 u32 dst_cookie)
43 {
44         struct dst_entry *old_dst;
45
46         old_dst = dest->dst_cache;
47         dest->dst_cache = dst;
48         dest->dst_rtos = rtos;
49         dest->dst_cookie = dst_cookie;
50         dst_release(old_dst);
51 }
52
53 static inline struct dst_entry *
54 __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
55 {
56         struct dst_entry *dst = dest->dst_cache;
57
58         if (!dst)
59                 return NULL;
60         if ((dst->obsolete || rtos != dest->dst_rtos) &&
61             dst->ops->check(dst, dest->dst_cookie) == NULL) {
62                 dest->dst_cache = NULL;
63                 dst_release(dst);
64                 return NULL;
65         }
66         dst_hold(dst);
67         return dst;
68 }
69
70 static struct rtable *
71 __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos)
72 {
73         struct net *net = dev_net(skb->dev);
74         struct rtable *rt;                      /* Route to the other host */
75         struct ip_vs_dest *dest = cp->dest;
76
77         if (dest) {
78                 spin_lock(&dest->dst_lock);
79                 if (!(rt = (struct rtable *)
80                       __ip_vs_dst_check(dest, rtos))) {
81                         struct flowi fl = {
82                                 .oif = 0,
83                                 .nl_u = {
84                                         .ip4_u = {
85                                                 .daddr = dest->addr.ip,
86                                                 .saddr = 0,
87                                                 .tos = rtos, } },
88                         };
89
90                         if (ip_route_output_key(net, &rt, &fl)) {
91                                 spin_unlock(&dest->dst_lock);
92                                 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
93                                              &dest->addr.ip);
94                                 return NULL;
95                         }
96                         __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
97                         IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
98                                   &dest->addr.ip,
99                                   atomic_read(&rt->dst.__refcnt), rtos);
100                 }
101                 spin_unlock(&dest->dst_lock);
102         } else {
103                 struct flowi fl = {
104                         .oif = 0,
105                         .nl_u = {
106                                 .ip4_u = {
107                                         .daddr = cp->daddr.ip,
108                                         .saddr = 0,
109                                         .tos = rtos, } },
110                 };
111
112                 if (ip_route_output_key(net, &rt, &fl)) {
113                         IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
114                                      &cp->daddr.ip);
115                         return NULL;
116                 }
117         }
118
119         return rt;
120 }
121
122 #ifdef CONFIG_IP_VS_IPV6
123
124 static struct dst_entry *
125 __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
126                         struct in6_addr *ret_saddr, int do_xfrm)
127 {
128         struct dst_entry *dst;
129         struct flowi fl = {
130                 .oif = 0,
131                 .nl_u = {
132                         .ip6_u = {
133                                 .daddr = *daddr,
134                         },
135                 },
136         };
137
138         dst = ip6_route_output(net, NULL, &fl);
139         if (dst->error)
140                 goto out_err;
141         if (!ret_saddr)
142                 return dst;
143         if (ipv6_addr_any(&fl.fl6_src) &&
144             ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
145                                &fl.fl6_dst, 0, &fl.fl6_src) < 0)
146                 goto out_err;
147         if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0)
148                 goto out_err;
149         ipv6_addr_copy(ret_saddr, &fl.fl6_src);
150         return dst;
151
152 out_err:
153         dst_release(dst);
154         IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
155         return NULL;
156 }
157
158 static struct rt6_info *
159 __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
160                       struct in6_addr *ret_saddr, int do_xfrm)
161 {
162         struct net *net = dev_net(skb->dev);
163         struct rt6_info *rt;                    /* Route to the other host */
164         struct ip_vs_dest *dest = cp->dest;
165         struct dst_entry *dst;
166
167         if (dest) {
168                 spin_lock(&dest->dst_lock);
169                 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
170                 if (!rt) {
171                         u32 cookie;
172
173                         dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
174                                                       &dest->dst_saddr,
175                                                       do_xfrm);
176                         if (!dst) {
177                                 spin_unlock(&dest->dst_lock);
178                                 return NULL;
179                         }
180                         rt = (struct rt6_info *) dst;
181                         cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
182                         __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
183                         IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
184                                   &dest->addr.in6, &dest->dst_saddr,
185                                   atomic_read(&rt->dst.__refcnt));
186                 }
187                 if (ret_saddr)
188                         ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
189                 spin_unlock(&dest->dst_lock);
190         } else {
191                 dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr,
192                                               do_xfrm);
193                 if (!dst)
194                         return NULL;
195                 rt = (struct rt6_info *) dst;
196         }
197
198         return rt;
199 }
200 #endif
201
202
203 /*
204  *      Release dest->dst_cache before a dest is removed
205  */
206 void
207 ip_vs_dst_reset(struct ip_vs_dest *dest)
208 {
209         struct dst_entry *old_dst;
210
211         old_dst = dest->dst_cache;
212         dest->dst_cache = NULL;
213         dst_release(old_dst);
214 }
215
216 #define IP_VS_XMIT_TUNNEL(skb, cp)                              \
217 ({                                                              \
218         int __ret = NF_ACCEPT;                                  \
219                                                                 \
220         (skb)->ipvs_property = 1;                               \
221         if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT))          \
222                 __ret = ip_vs_confirm_conntrack(skb, cp);       \
223         if (__ret == NF_ACCEPT) {                               \
224                 nf_reset(skb);                                  \
225                 skb_forward_csum(skb);                          \
226         }                                                       \
227         __ret;                                                  \
228 })
229
230 #define IP_VS_XMIT_NAT(pf, skb, cp)                             \
231 do {                                                    \
232         (skb)->ipvs_property = 1;                       \
233         if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
234                 ip_vs_notrack(skb);                     \
235         else                                            \
236                 ip_vs_update_conntrack(skb, cp, 1);     \
237         skb_forward_csum(skb);                          \
238         NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,     \
239                 skb_dst(skb)->dev, dst_output);         \
240 } while (0)
241
242 #define IP_VS_XMIT(pf, skb, cp)                         \
243 do {                                                    \
244         (skb)->ipvs_property = 1;                       \
245         if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
246                 ip_vs_notrack(skb);                     \
247         skb_forward_csum(skb);                          \
248         NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,     \
249                 skb_dst(skb)->dev, dst_output);         \
250 } while (0)
251
252
253 /*
254  *      NULL transmitter (do nothing except return NF_ACCEPT)
255  */
256 int
257 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
258                 struct ip_vs_protocol *pp)
259 {
260         /* we do not touch skb and do not need pskb ptr */
261         return NF_ACCEPT;
262 }
263
264
265 /*
266  *      Bypass transmitter
267  *      Let packets bypass the destination when the destination is not
268  *      available, it may be only used in transparent cache cluster.
269  */
270 int
271 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
272                   struct ip_vs_protocol *pp)
273 {
274         struct net *net = dev_net(skb->dev);
275         struct rtable *rt;                      /* Route to the other host */
276         struct iphdr  *iph = ip_hdr(skb);
277         u8     tos = iph->tos;
278         int    mtu;
279         struct flowi fl = {
280                 .oif = 0,
281                 .nl_u = {
282                         .ip4_u = {
283                                 .daddr = iph->daddr,
284                                 .saddr = 0,
285                                 .tos = RT_TOS(tos), } },
286         };
287
288         EnterFunction(10);
289
290         if (ip_route_output_key(net, &rt, &fl)) {
291                 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n",
292                              __func__, &iph->daddr);
293                 goto tx_error_icmp;
294         }
295
296         /* MTU checking */
297         mtu = dst_mtu(&rt->dst);
298         if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
299                 ip_rt_put(rt);
300                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
301                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
302                 goto tx_error;
303         }
304
305         /*
306          * Call ip_send_check because we are not sure it is called
307          * after ip_defrag. Is copy-on-write needed?
308          */
309         if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
310                 ip_rt_put(rt);
311                 return NF_STOLEN;
312         }
313         ip_send_check(ip_hdr(skb));
314
315         /* drop old route */
316         skb_dst_drop(skb);
317         skb_dst_set(skb, &rt->dst);
318
319         /* Another hack: avoid icmp_send in ip_fragment */
320         skb->local_df = 1;
321
322         IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
323
324         LeaveFunction(10);
325         return NF_STOLEN;
326
327  tx_error_icmp:
328         dst_link_failure(skb);
329  tx_error:
330         kfree_skb(skb);
331         LeaveFunction(10);
332         return NF_STOLEN;
333 }
334
335 #ifdef CONFIG_IP_VS_IPV6
336 int
337 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
338                      struct ip_vs_protocol *pp)
339 {
340         struct net *net = dev_net(skb->dev);
341         struct dst_entry *dst;
342         struct rt6_info *rt;                    /* Route to the other host */
343         struct ipv6hdr  *iph = ipv6_hdr(skb);
344         int    mtu;
345
346         EnterFunction(10);
347
348         dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0);
349         if (!dst)
350                 goto tx_error_icmp;
351         rt = (struct rt6_info *) dst;
352
353         /* MTU checking */
354         mtu = dst_mtu(&rt->dst);
355         if (skb->len > mtu) {
356                 dst_release(&rt->dst);
357                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
358                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
359                 goto tx_error;
360         }
361
362         /*
363          * Call ip_send_check because we are not sure it is called
364          * after ip_defrag. Is copy-on-write needed?
365          */
366         skb = skb_share_check(skb, GFP_ATOMIC);
367         if (unlikely(skb == NULL)) {
368                 dst_release(&rt->dst);
369                 return NF_STOLEN;
370         }
371
372         /* drop old route */
373         skb_dst_drop(skb);
374         skb_dst_set(skb, &rt->dst);
375
376         /* Another hack: avoid icmp_send in ip_fragment */
377         skb->local_df = 1;
378
379         IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
380
381         LeaveFunction(10);
382         return NF_STOLEN;
383
384  tx_error_icmp:
385         dst_link_failure(skb);
386  tx_error:
387         kfree_skb(skb);
388         LeaveFunction(10);
389         return NF_STOLEN;
390 }
391 #endif
392
393 /*
394  *      NAT transmitter (only for outside-to-inside nat forwarding)
395  *      Not used for related ICMP
396  */
397 int
398 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
399                struct ip_vs_protocol *pp)
400 {
401         struct rtable *rt;              /* Route to the other host */
402         int mtu;
403         struct iphdr *iph = ip_hdr(skb);
404
405         EnterFunction(10);
406
407         /* check if it is a connection of no-client-port */
408         if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
409                 __be16 _pt, *p;
410                 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
411                 if (p == NULL)
412                         goto tx_error;
413                 ip_vs_conn_fill_cport(cp, *p);
414                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
415         }
416
417         if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
418                 goto tx_error_icmp;
419
420         /* MTU checking */
421         mtu = dst_mtu(&rt->dst);
422         if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
423                 ip_rt_put(rt);
424                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
425                 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
426                 goto tx_error;
427         }
428
429         /* copy-on-write the packet before mangling it */
430         if (!skb_make_writable(skb, sizeof(struct iphdr)))
431                 goto tx_error_put;
432
433         if (skb_cow(skb, rt->dst.dev->hard_header_len))
434                 goto tx_error_put;
435
436         /* drop old route */
437         skb_dst_drop(skb);
438         skb_dst_set(skb, &rt->dst);
439
440         /* mangle the packet */
441         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
442                 goto tx_error;
443         ip_hdr(skb)->daddr = cp->daddr.ip;
444         ip_send_check(ip_hdr(skb));
445
446         IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
447
448         /* FIXME: when application helper enlarges the packet and the length
449            is larger than the MTU of outgoing device, there will be still
450            MTU problem. */
451
452         /* Another hack: avoid icmp_send in ip_fragment */
453         skb->local_df = 1;
454
455         IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp);
456
457         LeaveFunction(10);
458         return NF_STOLEN;
459
460   tx_error_icmp:
461         dst_link_failure(skb);
462   tx_error:
463         kfree_skb(skb);
464         LeaveFunction(10);
465         return NF_STOLEN;
466   tx_error_put:
467         ip_rt_put(rt);
468         goto tx_error;
469 }
470
471 #ifdef CONFIG_IP_VS_IPV6
472 int
473 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
474                   struct ip_vs_protocol *pp)
475 {
476         struct rt6_info *rt;            /* Route to the other host */
477         int mtu;
478
479         EnterFunction(10);
480
481         /* check if it is a connection of no-client-port */
482         if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
483                 __be16 _pt, *p;
484                 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
485                                        sizeof(_pt), &_pt);
486                 if (p == NULL)
487                         goto tx_error;
488                 ip_vs_conn_fill_cport(cp, *p);
489                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
490         }
491
492         rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
493         if (!rt)
494                 goto tx_error_icmp;
495
496         /* MTU checking */
497         mtu = dst_mtu(&rt->dst);
498         if (skb->len > mtu) {
499                 dst_release(&rt->dst);
500                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
501                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
502                                  "ip_vs_nat_xmit_v6(): frag needed for");
503                 goto tx_error;
504         }
505
506         /* copy-on-write the packet before mangling it */
507         if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
508                 goto tx_error_put;
509
510         if (skb_cow(skb, rt->dst.dev->hard_header_len))
511                 goto tx_error_put;
512
513         /* drop old route */
514         skb_dst_drop(skb);
515         skb_dst_set(skb, &rt->dst);
516
517         /* mangle the packet */
518         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
519                 goto tx_error;
520         ipv6_hdr(skb)->daddr = cp->daddr.in6;
521
522         IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
523
524         /* FIXME: when application helper enlarges the packet and the length
525            is larger than the MTU of outgoing device, there will be still
526            MTU problem. */
527
528         /* Another hack: avoid icmp_send in ip_fragment */
529         skb->local_df = 1;
530
531         IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp);
532
533         LeaveFunction(10);
534         return NF_STOLEN;
535
536 tx_error_icmp:
537         dst_link_failure(skb);
538 tx_error:
539         LeaveFunction(10);
540         kfree_skb(skb);
541         return NF_STOLEN;
542 tx_error_put:
543         dst_release(&rt->dst);
544         goto tx_error;
545 }
546 #endif
547
548
549 /*
550  *   IP Tunneling transmitter
551  *
552  *   This function encapsulates the packet in a new IP packet, its
553  *   destination will be set to cp->daddr. Most code of this function
554  *   is taken from ipip.c.
555  *
556  *   It is used in VS/TUN cluster. The load balancer selects a real
557  *   server from a cluster based on a scheduling algorithm,
558  *   encapsulates the request packet and forwards it to the selected
559  *   server. For example, all real servers are configured with
560  *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
561  *   the encapsulated packet, it will decapsulate the packet, processe
562  *   the request and return the response packets directly to the client
563  *   without passing the load balancer. This can greatly increase the
564  *   scalability of virtual server.
565  *
566  *   Used for ANY protocol
567  */
568 int
569 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
570                   struct ip_vs_protocol *pp)
571 {
572         struct rtable *rt;                      /* Route to the other host */
573         struct net_device *tdev;                /* Device to other host */
574         struct iphdr  *old_iph = ip_hdr(skb);
575         u8     tos = old_iph->tos;
576         __be16 df = old_iph->frag_off;
577         struct iphdr  *iph;                     /* Our new IP header */
578         unsigned int max_headroom;              /* The extra header space needed */
579         int    mtu;
580         int ret;
581
582         EnterFunction(10);
583
584         if (skb->protocol != htons(ETH_P_IP)) {
585                 IP_VS_DBG_RL("%s(): protocol error, "
586                              "ETH_P_IP: %d, skb protocol: %d\n",
587                              __func__, htons(ETH_P_IP), skb->protocol);
588                 goto tx_error;
589         }
590
591         if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos))))
592                 goto tx_error_icmp;
593
594         tdev = rt->dst.dev;
595
596         mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
597         if (mtu < 68) {
598                 ip_rt_put(rt);
599                 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
600                 goto tx_error;
601         }
602         if (skb_dst(skb))
603                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
604
605         df |= (old_iph->frag_off & htons(IP_DF));
606
607         if ((old_iph->frag_off & htons(IP_DF))
608             && mtu < ntohs(old_iph->tot_len)) {
609                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
610                 ip_rt_put(rt);
611                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
612                 goto tx_error;
613         }
614
615         /*
616          * Okay, now see if we can stuff it in the buffer as-is.
617          */
618         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
619
620         if (skb_headroom(skb) < max_headroom
621             || skb_cloned(skb) || skb_shared(skb)) {
622                 struct sk_buff *new_skb =
623                         skb_realloc_headroom(skb, max_headroom);
624                 if (!new_skb) {
625                         ip_rt_put(rt);
626                         kfree_skb(skb);
627                         IP_VS_ERR_RL("%s(): no memory\n", __func__);
628                         return NF_STOLEN;
629                 }
630                 kfree_skb(skb);
631                 skb = new_skb;
632                 old_iph = ip_hdr(skb);
633         }
634
635         skb->transport_header = skb->network_header;
636
637         /* fix old IP header checksum */
638         ip_send_check(old_iph);
639
640         skb_push(skb, sizeof(struct iphdr));
641         skb_reset_network_header(skb);
642         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
643
644         /* drop old route */
645         skb_dst_drop(skb);
646         skb_dst_set(skb, &rt->dst);
647
648         /*
649          *      Push down and install the IPIP header.
650          */
651         iph                     =       ip_hdr(skb);
652         iph->version            =       4;
653         iph->ihl                =       sizeof(struct iphdr)>>2;
654         iph->frag_off           =       df;
655         iph->protocol           =       IPPROTO_IPIP;
656         iph->tos                =       tos;
657         iph->daddr              =       rt->rt_dst;
658         iph->saddr              =       rt->rt_src;
659         iph->ttl                =       old_iph->ttl;
660         ip_select_ident(iph, &rt->dst, NULL);
661
662         /* Another hack: avoid icmp_send in ip_fragment */
663         skb->local_df = 1;
664
665         ret = IP_VS_XMIT_TUNNEL(skb, cp);
666         if (ret == NF_ACCEPT)
667                 ip_local_out(skb);
668         else if (ret == NF_DROP)
669                 kfree_skb(skb);
670
671         LeaveFunction(10);
672
673         return NF_STOLEN;
674
675   tx_error_icmp:
676         dst_link_failure(skb);
677   tx_error:
678         kfree_skb(skb);
679         LeaveFunction(10);
680         return NF_STOLEN;
681 }
682
683 #ifdef CONFIG_IP_VS_IPV6
684 int
685 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
686                      struct ip_vs_protocol *pp)
687 {
688         struct rt6_info *rt;            /* Route to the other host */
689         struct in6_addr saddr;          /* Source for tunnel */
690         struct net_device *tdev;        /* Device to other host */
691         struct ipv6hdr  *old_iph = ipv6_hdr(skb);
692         struct ipv6hdr  *iph;           /* Our new IP header */
693         unsigned int max_headroom;      /* The extra header space needed */
694         int    mtu;
695         int ret;
696
697         EnterFunction(10);
698
699         if (skb->protocol != htons(ETH_P_IPV6)) {
700                 IP_VS_DBG_RL("%s(): protocol error, "
701                              "ETH_P_IPV6: %d, skb protocol: %d\n",
702                              __func__, htons(ETH_P_IPV6), skb->protocol);
703                 goto tx_error;
704         }
705
706         rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1);
707         if (!rt)
708                 goto tx_error_icmp;
709
710         tdev = rt->dst.dev;
711
712         mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
713         if (mtu < IPV6_MIN_MTU) {
714                 dst_release(&rt->dst);
715                 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
716                              IPV6_MIN_MTU);
717                 goto tx_error;
718         }
719         if (skb_dst(skb))
720                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
721
722         if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
723                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
724                 dst_release(&rt->dst);
725                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
726                 goto tx_error;
727         }
728
729         /*
730          * Okay, now see if we can stuff it in the buffer as-is.
731          */
732         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
733
734         if (skb_headroom(skb) < max_headroom
735             || skb_cloned(skb) || skb_shared(skb)) {
736                 struct sk_buff *new_skb =
737                         skb_realloc_headroom(skb, max_headroom);
738                 if (!new_skb) {
739                         dst_release(&rt->dst);
740                         kfree_skb(skb);
741                         IP_VS_ERR_RL("%s(): no memory\n", __func__);
742                         return NF_STOLEN;
743                 }
744                 kfree_skb(skb);
745                 skb = new_skb;
746                 old_iph = ipv6_hdr(skb);
747         }
748
749         skb->transport_header = skb->network_header;
750
751         skb_push(skb, sizeof(struct ipv6hdr));
752         skb_reset_network_header(skb);
753         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
754
755         /* drop old route */
756         skb_dst_drop(skb);
757         skb_dst_set(skb, &rt->dst);
758
759         /*
760          *      Push down and install the IPIP header.
761          */
762         iph                     =       ipv6_hdr(skb);
763         iph->version            =       6;
764         iph->nexthdr            =       IPPROTO_IPV6;
765         iph->payload_len        =       old_iph->payload_len;
766         be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
767         iph->priority           =       old_iph->priority;
768         memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
769         ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
770         ipv6_addr_copy(&iph->saddr, &saddr);
771         iph->hop_limit          =       old_iph->hop_limit;
772
773         /* Another hack: avoid icmp_send in ip_fragment */
774         skb->local_df = 1;
775
776         ret = IP_VS_XMIT_TUNNEL(skb, cp);
777         if (ret == NF_ACCEPT)
778                 ip6_local_out(skb);
779         else if (ret == NF_DROP)
780                 kfree_skb(skb);
781
782         LeaveFunction(10);
783
784         return NF_STOLEN;
785
786 tx_error_icmp:
787         dst_link_failure(skb);
788 tx_error:
789         kfree_skb(skb);
790         LeaveFunction(10);
791         return NF_STOLEN;
792 }
793 #endif
794
795
796 /*
797  *      Direct Routing transmitter
798  *      Used for ANY protocol
799  */
800 int
801 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
802               struct ip_vs_protocol *pp)
803 {
804         struct rtable *rt;                      /* Route to the other host */
805         struct iphdr  *iph = ip_hdr(skb);
806         int    mtu;
807
808         EnterFunction(10);
809
810         if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
811                 goto tx_error_icmp;
812
813         /* MTU checking */
814         mtu = dst_mtu(&rt->dst);
815         if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
816                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
817                 ip_rt_put(rt);
818                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
819                 goto tx_error;
820         }
821
822         /*
823          * Call ip_send_check because we are not sure it is called
824          * after ip_defrag. Is copy-on-write needed?
825          */
826         if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
827                 ip_rt_put(rt);
828                 return NF_STOLEN;
829         }
830         ip_send_check(ip_hdr(skb));
831
832         /* drop old route */
833         skb_dst_drop(skb);
834         skb_dst_set(skb, &rt->dst);
835
836         /* Another hack: avoid icmp_send in ip_fragment */
837         skb->local_df = 1;
838
839         IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
840
841         LeaveFunction(10);
842         return NF_STOLEN;
843
844   tx_error_icmp:
845         dst_link_failure(skb);
846   tx_error:
847         kfree_skb(skb);
848         LeaveFunction(10);
849         return NF_STOLEN;
850 }
851
852 #ifdef CONFIG_IP_VS_IPV6
853 int
854 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
855                  struct ip_vs_protocol *pp)
856 {
857         struct rt6_info *rt;                    /* Route to the other host */
858         int    mtu;
859
860         EnterFunction(10);
861
862         rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
863         if (!rt)
864                 goto tx_error_icmp;
865
866         /* MTU checking */
867         mtu = dst_mtu(&rt->dst);
868         if (skb->len > mtu) {
869                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
870                 dst_release(&rt->dst);
871                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
872                 goto tx_error;
873         }
874
875         /*
876          * Call ip_send_check because we are not sure it is called
877          * after ip_defrag. Is copy-on-write needed?
878          */
879         skb = skb_share_check(skb, GFP_ATOMIC);
880         if (unlikely(skb == NULL)) {
881                 dst_release(&rt->dst);
882                 return NF_STOLEN;
883         }
884
885         /* drop old route */
886         skb_dst_drop(skb);
887         skb_dst_set(skb, &rt->dst);
888
889         /* Another hack: avoid icmp_send in ip_fragment */
890         skb->local_df = 1;
891
892         IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
893
894         LeaveFunction(10);
895         return NF_STOLEN;
896
897 tx_error_icmp:
898         dst_link_failure(skb);
899 tx_error:
900         kfree_skb(skb);
901         LeaveFunction(10);
902         return NF_STOLEN;
903 }
904 #endif
905
906
907 /*
908  *      ICMP packet transmitter
909  *      called by the ip_vs_in_icmp
910  */
911 int
912 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
913                 struct ip_vs_protocol *pp, int offset)
914 {
915         struct rtable   *rt;    /* Route to the other host */
916         int mtu;
917         int rc;
918
919         EnterFunction(10);
920
921         /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
922            forwarded directly here, because there is no need to
923            translate address/port back */
924         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
925                 if (cp->packet_xmit)
926                         rc = cp->packet_xmit(skb, cp, pp);
927                 else
928                         rc = NF_ACCEPT;
929                 /* do not touch skb anymore */
930                 atomic_inc(&cp->in_pkts);
931                 goto out;
932         }
933
934         /*
935          * mangle and send the packet here (only for VS/NAT)
936          */
937
938         if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos))))
939                 goto tx_error_icmp;
940
941         /* MTU checking */
942         mtu = dst_mtu(&rt->dst);
943         if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
944                 ip_rt_put(rt);
945                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
946                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
947                 goto tx_error;
948         }
949
950         /* copy-on-write the packet before mangling it */
951         if (!skb_make_writable(skb, offset))
952                 goto tx_error_put;
953
954         if (skb_cow(skb, rt->dst.dev->hard_header_len))
955                 goto tx_error_put;
956
957         /* drop the old route when skb is not shared */
958         skb_dst_drop(skb);
959         skb_dst_set(skb, &rt->dst);
960
961         ip_vs_nat_icmp(skb, pp, cp, 0);
962
963         /* Another hack: avoid icmp_send in ip_fragment */
964         skb->local_df = 1;
965
966         IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
967
968         rc = NF_STOLEN;
969         goto out;
970
971   tx_error_icmp:
972         dst_link_failure(skb);
973   tx_error:
974         dev_kfree_skb(skb);
975         rc = NF_STOLEN;
976   out:
977         LeaveFunction(10);
978         return rc;
979   tx_error_put:
980         ip_rt_put(rt);
981         goto tx_error;
982 }
983
984 #ifdef CONFIG_IP_VS_IPV6
985 int
986 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
987                 struct ip_vs_protocol *pp, int offset)
988 {
989         struct rt6_info *rt;    /* Route to the other host */
990         int mtu;
991         int rc;
992
993         EnterFunction(10);
994
995         /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
996            forwarded directly here, because there is no need to
997            translate address/port back */
998         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
999                 if (cp->packet_xmit)
1000                         rc = cp->packet_xmit(skb, cp, pp);
1001                 else
1002                         rc = NF_ACCEPT;
1003                 /* do not touch skb anymore */
1004                 atomic_inc(&cp->in_pkts);
1005                 goto out;
1006         }
1007
1008         /*
1009          * mangle and send the packet here (only for VS/NAT)
1010          */
1011
1012         rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
1013         if (!rt)
1014                 goto tx_error_icmp;
1015
1016         /* MTU checking */
1017         mtu = dst_mtu(&rt->dst);
1018         if (skb->len > mtu) {
1019                 dst_release(&rt->dst);
1020                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1021                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1022                 goto tx_error;
1023         }
1024
1025         /* copy-on-write the packet before mangling it */
1026         if (!skb_make_writable(skb, offset))
1027                 goto tx_error_put;
1028
1029         if (skb_cow(skb, rt->dst.dev->hard_header_len))
1030                 goto tx_error_put;
1031
1032         /* drop the old route when skb is not shared */
1033         skb_dst_drop(skb);
1034         skb_dst_set(skb, &rt->dst);
1035
1036         ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1037
1038         /* Another hack: avoid icmp_send in ip_fragment */
1039         skb->local_df = 1;
1040
1041         IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
1042
1043         rc = NF_STOLEN;
1044         goto out;
1045
1046 tx_error_icmp:
1047         dst_link_failure(skb);
1048 tx_error:
1049         dev_kfree_skb(skb);
1050         rc = NF_STOLEN;
1051 out:
1052         LeaveFunction(10);
1053         return rc;
1054 tx_error_put:
1055         dst_release(&rt->dst);
1056         goto tx_error;
1057 }
1058 #endif