]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/netfilter/ipvs/ip_vs_xmit.c
netfilter: ctnetlink: add expectation deletion events
[net-next-2.6.git] / net / netfilter / ipvs / ip_vs_xmit.c
CommitLineData
1da177e4
LT
1/*
2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 *
1da177e4
LT
4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
14 */
15
9aada7ac
HE
16#define KMSG_COMPONENT "IPVS"
17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
1da177e4 19#include <linux/kernel.h>
5a0e3ad6 20#include <linux/slab.h>
1da177e4 21#include <linux/tcp.h> /* for tcphdr */
c439cb2e 22#include <net/ip.h>
1da177e4
LT
23#include <net/tcp.h> /* for csum_tcpudp_magic */
24#include <net/udp.h>
25#include <net/icmp.h> /* for icmp_send */
26#include <net/route.h> /* for ip_route_output */
38cdcc9a
JV
27#include <net/ipv6.h>
28#include <net/ip6_route.h>
29#include <linux/icmpv6.h>
1da177e4
LT
30#include <linux/netfilter.h>
31#include <linux/netfilter_ipv4.h>
32
33#include <net/ip_vs.h>
34
35
36/*
37 * Destination cache to speed up outgoing route lookup
38 */
39static inline void
40__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
41{
42 struct dst_entry *old_dst;
43
44 old_dst = dest->dst_cache;
45 dest->dst_cache = dst;
46 dest->dst_rtos = rtos;
47 dst_release(old_dst);
48}
49
50static inline struct dst_entry *
51__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
52{
53 struct dst_entry *dst = dest->dst_cache;
54
55 if (!dst)
56 return NULL;
38cdcc9a
JV
57 if ((dst->obsolete
58 || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
1da177e4
LT
59 dst->ops->check(dst, cookie) == NULL) {
60 dest->dst_cache = NULL;
61 dst_release(dst);
62 return NULL;
63 }
64 dst_hold(dst);
65 return dst;
66}
67
ad1b30b1 68static struct rtable *
1da177e4
LT
69__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
70{
71 struct rtable *rt; /* Route to the other host */
72 struct ip_vs_dest *dest = cp->dest;
73
74 if (dest) {
75 spin_lock(&dest->dst_lock);
76 if (!(rt = (struct rtable *)
77 __ip_vs_dst_check(dest, rtos, 0))) {
78 struct flowi fl = {
79 .oif = 0,
80 .nl_u = {
81 .ip4_u = {
e7ade46a 82 .daddr = dest->addr.ip,
1da177e4
LT
83 .saddr = 0,
84 .tos = rtos, } },
85 };
86
f206351a 87 if (ip_route_output_key(&init_net, &rt, &fl)) {
1da177e4 88 spin_unlock(&dest->dst_lock);
14d5e834
HH
89 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
90 &dest->addr.ip);
1da177e4
LT
91 return NULL;
92 }
d8d1f30b 93 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst));
14d5e834
HH
94 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
95 &dest->addr.ip,
d8d1f30b 96 atomic_read(&rt->dst.__refcnt), rtos);
1da177e4
LT
97 }
98 spin_unlock(&dest->dst_lock);
99 } else {
100 struct flowi fl = {
101 .oif = 0,
102 .nl_u = {
103 .ip4_u = {
e7ade46a 104 .daddr = cp->daddr.ip,
1da177e4
LT
105 .saddr = 0,
106 .tos = rtos, } },
107 };
108
f206351a 109 if (ip_route_output_key(&init_net, &rt, &fl)) {
14d5e834
HH
110 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
111 &cp->daddr.ip);
1da177e4
LT
112 return NULL;
113 }
114 }
115
116 return rt;
117}
118
38cdcc9a
JV
119#ifdef CONFIG_IP_VS_IPV6
120static struct rt6_info *
121__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
122{
123 struct rt6_info *rt; /* Route to the other host */
124 struct ip_vs_dest *dest = cp->dest;
125
126 if (dest) {
127 spin_lock(&dest->dst_lock);
128 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
129 if (!rt) {
130 struct flowi fl = {
131 .oif = 0,
132 .nl_u = {
133 .ip6_u = {
134 .daddr = dest->addr.in6,
135 .saddr = {
136 .s6_addr32 =
137 { 0, 0, 0, 0 },
138 },
139 },
140 },
141 };
142
143 rt = (struct rt6_info *)ip6_route_output(&init_net,
144 NULL, &fl);
145 if (!rt) {
146 spin_unlock(&dest->dst_lock);
5b095d98 147 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
38ff4fa4 148 &dest->addr.in6);
38cdcc9a
JV
149 return NULL;
150 }
d8d1f30b 151 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst));
5b095d98 152 IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
38ff4fa4 153 &dest->addr.in6,
d8d1f30b 154 atomic_read(&rt->dst.__refcnt));
38cdcc9a
JV
155 }
156 spin_unlock(&dest->dst_lock);
157 } else {
158 struct flowi fl = {
159 .oif = 0,
160 .nl_u = {
161 .ip6_u = {
162 .daddr = cp->daddr.in6,
163 .saddr = {
164 .s6_addr32 = { 0, 0, 0, 0 },
165 },
166 },
167 },
168 };
169
170 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
171 if (!rt) {
5b095d98 172 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
38ff4fa4 173 &cp->daddr.in6);
38cdcc9a
JV
174 return NULL;
175 }
176 }
177
178 return rt;
179}
180#endif
181
1da177e4
LT
182
183/*
184 * Release dest->dst_cache before a dest is removed
185 */
186void
187ip_vs_dst_reset(struct ip_vs_dest *dest)
188{
189 struct dst_entry *old_dst;
190
191 old_dst = dest->dst_cache;
192 dest->dst_cache = NULL;
193 dst_release(old_dst);
194}
195
f4bc17cd
JA
196#define IP_VS_XMIT_TUNNEL(skb, cp) \
197({ \
198 int __ret = NF_ACCEPT; \
199 \
200 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
201 __ret = ip_vs_confirm_conntrack(skb, cp); \
202 if (__ret == NF_ACCEPT) { \
203 nf_reset(skb); \
204 (skb)->ip_summed = CHECKSUM_NONE; \
205 } \
206 __ret; \
207})
208
209#define IP_VS_XMIT_NAT(pf, skb, cp) \
1da177e4 210do { \
f4bc17cd
JA
211 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
212 (skb)->ipvs_property = 1; \
213 else \
214 ip_vs_update_conntrack(skb, cp, 1); \
ccc7911f 215 skb_forward_csum(skb); \
38cdcc9a 216 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
f4bc17cd
JA
217 skb_dst(skb)->dev, dst_output); \
218} while (0)
219
220#define IP_VS_XMIT(pf, skb, cp) \
221do { \
222 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
223 (skb)->ipvs_property = 1; \
224 skb_forward_csum(skb); \
225 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
226 skb_dst(skb)->dev, dst_output); \
1da177e4
LT
227} while (0)
228
229
230/*
231 * NULL transmitter (do nothing except return NF_ACCEPT)
232 */
233int
234ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
235 struct ip_vs_protocol *pp)
236{
237 /* we do not touch skb and do not need pskb ptr */
238 return NF_ACCEPT;
239}
240
241
242/*
243 * Bypass transmitter
244 * Let packets bypass the destination when the destination is not
245 * available, it may be only used in transparent cache cluster.
246 */
247int
248ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
249 struct ip_vs_protocol *pp)
250{
251 struct rtable *rt; /* Route to the other host */
eddc9ec5 252 struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
253 u8 tos = iph->tos;
254 int mtu;
255 struct flowi fl = {
256 .oif = 0,
257 .nl_u = {
258 .ip4_u = {
259 .daddr = iph->daddr,
260 .saddr = 0,
261 .tos = RT_TOS(tos), } },
262 };
263
264 EnterFunction(10);
265
f206351a 266 if (ip_route_output_key(&init_net, &rt, &fl)) {
1e3e238e
HE
267 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n",
268 __func__, &iph->daddr);
1da177e4
LT
269 goto tx_error_icmp;
270 }
271
272 /* MTU checking */
d8d1f30b 273 mtu = dst_mtu(&rt->dst);
4412ec49 274 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
1da177e4
LT
275 ip_rt_put(rt);
276 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1e3e238e 277 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1da177e4
LT
278 goto tx_error;
279 }
280
281 /*
282 * Call ip_send_check because we are not sure it is called
283 * after ip_defrag. Is copy-on-write needed?
284 */
285 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
286 ip_rt_put(rt);
287 return NF_STOLEN;
288 }
eddc9ec5 289 ip_send_check(ip_hdr(skb));
1da177e4
LT
290
291 /* drop old route */
adf30907 292 skb_dst_drop(skb);
d8d1f30b 293 skb_dst_set(skb, &rt->dst);
1da177e4
LT
294
295 /* Another hack: avoid icmp_send in ip_fragment */
296 skb->local_df = 1;
297
f4bc17cd 298 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
1da177e4
LT
299
300 LeaveFunction(10);
301 return NF_STOLEN;
302
303 tx_error_icmp:
304 dst_link_failure(skb);
305 tx_error:
306 kfree_skb(skb);
307 LeaveFunction(10);
308 return NF_STOLEN;
309}
310
b3cdd2a7
JV
311#ifdef CONFIG_IP_VS_IPV6
312int
313ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
314 struct ip_vs_protocol *pp)
315{
316 struct rt6_info *rt; /* Route to the other host */
317 struct ipv6hdr *iph = ipv6_hdr(skb);
318 int mtu;
319 struct flowi fl = {
320 .oif = 0,
321 .nl_u = {
322 .ip6_u = {
323 .daddr = iph->daddr,
324 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
325 };
326
327 EnterFunction(10);
328
329 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
330 if (!rt) {
1e3e238e
HE
331 IP_VS_DBG_RL("%s(): ip6_route_output error, dest: %pI6\n",
332 __func__, &iph->daddr);
b3cdd2a7
JV
333 goto tx_error_icmp;
334 }
335
336 /* MTU checking */
d8d1f30b 337 mtu = dst_mtu(&rt->dst);
b3cdd2a7 338 if (skb->len > mtu) {
d8d1f30b 339 dst_release(&rt->dst);
3ffe533c 340 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1e3e238e 341 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
b3cdd2a7
JV
342 goto tx_error;
343 }
344
345 /*
346 * Call ip_send_check because we are not sure it is called
347 * after ip_defrag. Is copy-on-write needed?
348 */
349 skb = skb_share_check(skb, GFP_ATOMIC);
350 if (unlikely(skb == NULL)) {
d8d1f30b 351 dst_release(&rt->dst);
b3cdd2a7
JV
352 return NF_STOLEN;
353 }
354
355 /* drop old route */
adf30907 356 skb_dst_drop(skb);
d8d1f30b 357 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
358
359 /* Another hack: avoid icmp_send in ip_fragment */
360 skb->local_df = 1;
361
f4bc17cd 362 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
b3cdd2a7
JV
363
364 LeaveFunction(10);
365 return NF_STOLEN;
366
367 tx_error_icmp:
368 dst_link_failure(skb);
369 tx_error:
370 kfree_skb(skb);
371 LeaveFunction(10);
372 return NF_STOLEN;
373}
374#endif
1da177e4
LT
375
376/*
377 * NAT transmitter (only for outside-to-inside nat forwarding)
378 * Not used for related ICMP
379 */
380int
381ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
382 struct ip_vs_protocol *pp)
383{
384 struct rtable *rt; /* Route to the other host */
385 int mtu;
eddc9ec5 386 struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
387
388 EnterFunction(10);
389
390 /* check if it is a connection of no-client-port */
391 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
014d730d 392 __be16 _pt, *p;
1da177e4
LT
393 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
394 if (p == NULL)
395 goto tx_error;
396 ip_vs_conn_fill_cport(cp, *p);
397 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
398 }
399
400 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
401 goto tx_error_icmp;
402
403 /* MTU checking */
d8d1f30b 404 mtu = dst_mtu(&rt->dst);
4412ec49 405 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
1da177e4
LT
406 ip_rt_put(rt);
407 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
408 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
409 goto tx_error;
410 }
411
412 /* copy-on-write the packet before mangling it */
af1e1cf0 413 if (!skb_make_writable(skb, sizeof(struct iphdr)))
1da177e4
LT
414 goto tx_error_put;
415
d8d1f30b 416 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1da177e4
LT
417 goto tx_error_put;
418
419 /* drop old route */
adf30907 420 skb_dst_drop(skb);
d8d1f30b 421 skb_dst_set(skb, &rt->dst);
1da177e4
LT
422
423 /* mangle the packet */
3db05fea 424 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
1da177e4 425 goto tx_error;
e7ade46a 426 ip_hdr(skb)->daddr = cp->daddr.ip;
eddc9ec5 427 ip_send_check(ip_hdr(skb));
1da177e4
LT
428
429 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
430
431 /* FIXME: when application helper enlarges the packet and the length
432 is larger than the MTU of outgoing device, there will be still
433 MTU problem. */
434
435 /* Another hack: avoid icmp_send in ip_fragment */
436 skb->local_df = 1;
437
f4bc17cd 438 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp);
1da177e4
LT
439
440 LeaveFunction(10);
441 return NF_STOLEN;
442
443 tx_error_icmp:
444 dst_link_failure(skb);
445 tx_error:
1da177e4 446 kfree_skb(skb);
f4bc17cd 447 LeaveFunction(10);
1da177e4
LT
448 return NF_STOLEN;
449 tx_error_put:
450 ip_rt_put(rt);
451 goto tx_error;
452}
453
b3cdd2a7
JV
454#ifdef CONFIG_IP_VS_IPV6
455int
456ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
457 struct ip_vs_protocol *pp)
458{
459 struct rt6_info *rt; /* Route to the other host */
460 int mtu;
461
462 EnterFunction(10);
463
464 /* check if it is a connection of no-client-port */
465 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
466 __be16 _pt, *p;
467 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
468 sizeof(_pt), &_pt);
469 if (p == NULL)
470 goto tx_error;
471 ip_vs_conn_fill_cport(cp, *p);
472 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
473 }
474
475 rt = __ip_vs_get_out_rt_v6(cp);
476 if (!rt)
477 goto tx_error_icmp;
478
479 /* MTU checking */
d8d1f30b 480 mtu = dst_mtu(&rt->dst);
b3cdd2a7 481 if (skb->len > mtu) {
d8d1f30b 482 dst_release(&rt->dst);
3ffe533c 483 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
b3cdd2a7
JV
484 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
485 "ip_vs_nat_xmit_v6(): frag needed for");
486 goto tx_error;
487 }
488
489 /* copy-on-write the packet before mangling it */
490 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
491 goto tx_error_put;
492
d8d1f30b 493 if (skb_cow(skb, rt->dst.dev->hard_header_len))
b3cdd2a7
JV
494 goto tx_error_put;
495
496 /* drop old route */
adf30907 497 skb_dst_drop(skb);
d8d1f30b 498 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
499
500 /* mangle the packet */
501 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
502 goto tx_error;
503 ipv6_hdr(skb)->daddr = cp->daddr.in6;
504
505 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
506
507 /* FIXME: when application helper enlarges the packet and the length
508 is larger than the MTU of outgoing device, there will be still
509 MTU problem. */
510
511 /* Another hack: avoid icmp_send in ip_fragment */
512 skb->local_df = 1;
513
f4bc17cd 514 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp);
b3cdd2a7
JV
515
516 LeaveFunction(10);
517 return NF_STOLEN;
518
519tx_error_icmp:
520 dst_link_failure(skb);
521tx_error:
522 LeaveFunction(10);
523 kfree_skb(skb);
524 return NF_STOLEN;
525tx_error_put:
d8d1f30b 526 dst_release(&rt->dst);
b3cdd2a7
JV
527 goto tx_error;
528}
529#endif
530
1da177e4
LT
531
532/*
533 * IP Tunneling transmitter
534 *
535 * This function encapsulates the packet in a new IP packet, its
536 * destination will be set to cp->daddr. Most code of this function
537 * is taken from ipip.c.
538 *
539 * It is used in VS/TUN cluster. The load balancer selects a real
540 * server from a cluster based on a scheduling algorithm,
541 * encapsulates the request packet and forwards it to the selected
542 * server. For example, all real servers are configured with
543 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
544 * the encapsulated packet, it will decapsulate the packet, processe
545 * the request and return the response packets directly to the client
546 * without passing the load balancer. This can greatly increase the
547 * scalability of virtual server.
548 *
549 * Used for ANY protocol
550 */
551int
552ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
553 struct ip_vs_protocol *pp)
554{
555 struct rtable *rt; /* Route to the other host */
556 struct net_device *tdev; /* Device to other host */
eddc9ec5 557 struct iphdr *old_iph = ip_hdr(skb);
1da177e4 558 u8 tos = old_iph->tos;
76ab608d 559 __be16 df = old_iph->frag_off;
2e07fa9c 560 sk_buff_data_t old_transport_header = skb->transport_header;
1da177e4 561 struct iphdr *iph; /* Our new IP header */
c2636b4d 562 unsigned int max_headroom; /* The extra header space needed */
1da177e4 563 int mtu;
f4bc17cd 564 int ret;
1da177e4
LT
565
566 EnterFunction(10);
567
4412ec49 568 if (skb->protocol != htons(ETH_P_IP)) {
1e3e238e 569 IP_VS_DBG_RL("%s(): protocol error, "
1da177e4 570 "ETH_P_IP: %d, skb protocol: %d\n",
1e3e238e 571 __func__, htons(ETH_P_IP), skb->protocol);
1da177e4
LT
572 goto tx_error;
573 }
574
575 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
576 goto tx_error_icmp;
577
d8d1f30b 578 tdev = rt->dst.dev;
1da177e4 579
d8d1f30b 580 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
1da177e4
LT
581 if (mtu < 68) {
582 ip_rt_put(rt);
1e3e238e 583 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
1da177e4
LT
584 goto tx_error;
585 }
adf30907
ED
586 if (skb_dst(skb))
587 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
1da177e4 588
4412ec49 589 df |= (old_iph->frag_off & htons(IP_DF));
1da177e4 590
4412ec49 591 if ((old_iph->frag_off & htons(IP_DF))
1da177e4
LT
592 && mtu < ntohs(old_iph->tot_len)) {
593 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
594 ip_rt_put(rt);
1e3e238e 595 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1da177e4
LT
596 goto tx_error;
597 }
598
599 /*
600 * Okay, now see if we can stuff it in the buffer as-is.
601 */
602 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
603
604 if (skb_headroom(skb) < max_headroom
605 || skb_cloned(skb) || skb_shared(skb)) {
606 struct sk_buff *new_skb =
607 skb_realloc_headroom(skb, max_headroom);
608 if (!new_skb) {
609 ip_rt_put(rt);
610 kfree_skb(skb);
1e3e238e 611 IP_VS_ERR_RL("%s(): no memory\n", __func__);
1da177e4
LT
612 return NF_STOLEN;
613 }
614 kfree_skb(skb);
615 skb = new_skb;
eddc9ec5 616 old_iph = ip_hdr(skb);
1da177e4
LT
617 }
618
b0e380b1 619 skb->transport_header = old_transport_header;
1da177e4
LT
620
621 /* fix old IP header checksum */
622 ip_send_check(old_iph);
623
e2d1bca7
ACM
624 skb_push(skb, sizeof(struct iphdr));
625 skb_reset_network_header(skb);
1da177e4
LT
626 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
627
628 /* drop old route */
adf30907 629 skb_dst_drop(skb);
d8d1f30b 630 skb_dst_set(skb, &rt->dst);
1da177e4
LT
631
632 /*
633 * Push down and install the IPIP header.
634 */
eddc9ec5 635 iph = ip_hdr(skb);
1da177e4
LT
636 iph->version = 4;
637 iph->ihl = sizeof(struct iphdr)>>2;
638 iph->frag_off = df;
639 iph->protocol = IPPROTO_IPIP;
640 iph->tos = tos;
641 iph->daddr = rt->rt_dst;
642 iph->saddr = rt->rt_src;
643 iph->ttl = old_iph->ttl;
d8d1f30b 644 ip_select_ident(iph, &rt->dst, NULL);
1da177e4
LT
645
646 /* Another hack: avoid icmp_send in ip_fragment */
647 skb->local_df = 1;
648
f4bc17cd
JA
649 ret = IP_VS_XMIT_TUNNEL(skb, cp);
650 if (ret == NF_ACCEPT)
651 ip_local_out(skb);
652 else if (ret == NF_DROP)
653 kfree_skb(skb);
1da177e4
LT
654
655 LeaveFunction(10);
656
657 return NF_STOLEN;
658
659 tx_error_icmp:
660 dst_link_failure(skb);
661 tx_error:
662 kfree_skb(skb);
663 LeaveFunction(10);
664 return NF_STOLEN;
665}
666
b3cdd2a7
JV
667#ifdef CONFIG_IP_VS_IPV6
668int
669ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
670 struct ip_vs_protocol *pp)
671{
672 struct rt6_info *rt; /* Route to the other host */
673 struct net_device *tdev; /* Device to other host */
674 struct ipv6hdr *old_iph = ipv6_hdr(skb);
675 sk_buff_data_t old_transport_header = skb->transport_header;
676 struct ipv6hdr *iph; /* Our new IP header */
677 unsigned int max_headroom; /* The extra header space needed */
678 int mtu;
f4bc17cd 679 int ret;
b3cdd2a7
JV
680
681 EnterFunction(10);
682
683 if (skb->protocol != htons(ETH_P_IPV6)) {
1e3e238e 684 IP_VS_DBG_RL("%s(): protocol error, "
b3cdd2a7 685 "ETH_P_IPV6: %d, skb protocol: %d\n",
1e3e238e 686 __func__, htons(ETH_P_IPV6), skb->protocol);
b3cdd2a7
JV
687 goto tx_error;
688 }
689
690 rt = __ip_vs_get_out_rt_v6(cp);
691 if (!rt)
692 goto tx_error_icmp;
693
d8d1f30b 694 tdev = rt->dst.dev;
b3cdd2a7 695
d8d1f30b 696 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
b3cdd2a7
JV
697 /* TODO IPv6: do we need this check in IPv6? */
698 if (mtu < 1280) {
d8d1f30b 699 dst_release(&rt->dst);
1e3e238e 700 IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__);
b3cdd2a7
JV
701 goto tx_error;
702 }
adf30907
ED
703 if (skb_dst(skb))
704 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
b3cdd2a7
JV
705
706 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
3ffe533c 707 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
d8d1f30b 708 dst_release(&rt->dst);
1e3e238e 709 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
b3cdd2a7
JV
710 goto tx_error;
711 }
712
713 /*
714 * Okay, now see if we can stuff it in the buffer as-is.
715 */
716 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
717
718 if (skb_headroom(skb) < max_headroom
719 || skb_cloned(skb) || skb_shared(skb)) {
720 struct sk_buff *new_skb =
721 skb_realloc_headroom(skb, max_headroom);
722 if (!new_skb) {
d8d1f30b 723 dst_release(&rt->dst);
b3cdd2a7 724 kfree_skb(skb);
1e3e238e 725 IP_VS_ERR_RL("%s(): no memory\n", __func__);
b3cdd2a7
JV
726 return NF_STOLEN;
727 }
728 kfree_skb(skb);
729 skb = new_skb;
730 old_iph = ipv6_hdr(skb);
731 }
732
733 skb->transport_header = old_transport_header;
734
735 skb_push(skb, sizeof(struct ipv6hdr));
736 skb_reset_network_header(skb);
737 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
738
739 /* drop old route */
adf30907 740 skb_dst_drop(skb);
d8d1f30b 741 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
742
743 /*
744 * Push down and install the IPIP header.
745 */
746 iph = ipv6_hdr(skb);
747 iph->version = 6;
748 iph->nexthdr = IPPROTO_IPV6;
b7b45f47
HH
749 iph->payload_len = old_iph->payload_len;
750 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
b3cdd2a7
JV
751 iph->priority = old_iph->priority;
752 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
753 iph->daddr = rt->rt6i_dst.addr;
754 iph->saddr = cp->vaddr.in6; /* rt->rt6i_src.addr; */
755 iph->hop_limit = old_iph->hop_limit;
756
757 /* Another hack: avoid icmp_send in ip_fragment */
758 skb->local_df = 1;
759
f4bc17cd
JA
760 ret = IP_VS_XMIT_TUNNEL(skb, cp);
761 if (ret == NF_ACCEPT)
762 ip6_local_out(skb);
763 else if (ret == NF_DROP)
764 kfree_skb(skb);
b3cdd2a7
JV
765
766 LeaveFunction(10);
767
768 return NF_STOLEN;
769
770tx_error_icmp:
771 dst_link_failure(skb);
772tx_error:
773 kfree_skb(skb);
774 LeaveFunction(10);
775 return NF_STOLEN;
776}
777#endif
778
1da177e4
LT
779
780/*
781 * Direct Routing transmitter
782 * Used for ANY protocol
783 */
784int
785ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
786 struct ip_vs_protocol *pp)
787{
788 struct rtable *rt; /* Route to the other host */
eddc9ec5 789 struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
790 int mtu;
791
792 EnterFunction(10);
793
794 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
795 goto tx_error_icmp;
796
797 /* MTU checking */
d8d1f30b 798 mtu = dst_mtu(&rt->dst);
4412ec49 799 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
1da177e4
LT
800 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
801 ip_rt_put(rt);
1e3e238e 802 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1da177e4
LT
803 goto tx_error;
804 }
805
806 /*
807 * Call ip_send_check because we are not sure it is called
808 * after ip_defrag. Is copy-on-write needed?
809 */
810 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
811 ip_rt_put(rt);
812 return NF_STOLEN;
813 }
eddc9ec5 814 ip_send_check(ip_hdr(skb));
1da177e4
LT
815
816 /* drop old route */
adf30907 817 skb_dst_drop(skb);
d8d1f30b 818 skb_dst_set(skb, &rt->dst);
1da177e4
LT
819
820 /* Another hack: avoid icmp_send in ip_fragment */
821 skb->local_df = 1;
822
f4bc17cd 823 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
1da177e4
LT
824
825 LeaveFunction(10);
826 return NF_STOLEN;
827
828 tx_error_icmp:
829 dst_link_failure(skb);
830 tx_error:
831 kfree_skb(skb);
832 LeaveFunction(10);
833 return NF_STOLEN;
834}
835
b3cdd2a7
JV
836#ifdef CONFIG_IP_VS_IPV6
837int
838ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
839 struct ip_vs_protocol *pp)
840{
841 struct rt6_info *rt; /* Route to the other host */
842 int mtu;
843
844 EnterFunction(10);
845
846 rt = __ip_vs_get_out_rt_v6(cp);
847 if (!rt)
848 goto tx_error_icmp;
849
850 /* MTU checking */
d8d1f30b 851 mtu = dst_mtu(&rt->dst);
b3cdd2a7 852 if (skb->len > mtu) {
3ffe533c 853 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
d8d1f30b 854 dst_release(&rt->dst);
1e3e238e 855 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
b3cdd2a7
JV
856 goto tx_error;
857 }
858
859 /*
860 * Call ip_send_check because we are not sure it is called
861 * after ip_defrag. Is copy-on-write needed?
862 */
863 skb = skb_share_check(skb, GFP_ATOMIC);
864 if (unlikely(skb == NULL)) {
d8d1f30b 865 dst_release(&rt->dst);
b3cdd2a7
JV
866 return NF_STOLEN;
867 }
868
869 /* drop old route */
adf30907 870 skb_dst_drop(skb);
d8d1f30b 871 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
872
873 /* Another hack: avoid icmp_send in ip_fragment */
874 skb->local_df = 1;
875
f4bc17cd 876 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
b3cdd2a7
JV
877
878 LeaveFunction(10);
879 return NF_STOLEN;
880
881tx_error_icmp:
882 dst_link_failure(skb);
883tx_error:
884 kfree_skb(skb);
885 LeaveFunction(10);
886 return NF_STOLEN;
887}
888#endif
889
1da177e4
LT
890
891/*
892 * ICMP packet transmitter
893 * called by the ip_vs_in_icmp
894 */
895int
896ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
897 struct ip_vs_protocol *pp, int offset)
898{
899 struct rtable *rt; /* Route to the other host */
900 int mtu;
901 int rc;
902
903 EnterFunction(10);
904
905 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
906 forwarded directly here, because there is no need to
907 translate address/port back */
908 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
909 if (cp->packet_xmit)
910 rc = cp->packet_xmit(skb, cp, pp);
911 else
912 rc = NF_ACCEPT;
913 /* do not touch skb anymore */
914 atomic_inc(&cp->in_pkts);
1da177e4
LT
915 goto out;
916 }
917
918 /*
919 * mangle and send the packet here (only for VS/NAT)
920 */
921
eddc9ec5 922 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
1da177e4
LT
923 goto tx_error_icmp;
924
925 /* MTU checking */
d8d1f30b 926 mtu = dst_mtu(&rt->dst);
eddc9ec5 927 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
1da177e4
LT
928 ip_rt_put(rt);
929 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1e3e238e 930 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1da177e4
LT
931 goto tx_error;
932 }
933
934 /* copy-on-write the packet before mangling it */
af1e1cf0 935 if (!skb_make_writable(skb, offset))
1da177e4
LT
936 goto tx_error_put;
937
d8d1f30b 938 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1da177e4
LT
939 goto tx_error_put;
940
941 /* drop the old route when skb is not shared */
adf30907 942 skb_dst_drop(skb);
d8d1f30b 943 skb_dst_set(skb, &rt->dst);
1da177e4
LT
944
945 ip_vs_nat_icmp(skb, pp, cp, 0);
946
947 /* Another hack: avoid icmp_send in ip_fragment */
948 skb->local_df = 1;
949
f4bc17cd 950 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
1da177e4
LT
951
952 rc = NF_STOLEN;
953 goto out;
954
955 tx_error_icmp:
956 dst_link_failure(skb);
957 tx_error:
958 dev_kfree_skb(skb);
959 rc = NF_STOLEN;
960 out:
961 LeaveFunction(10);
962 return rc;
963 tx_error_put:
964 ip_rt_put(rt);
965 goto tx_error;
966}
b3cdd2a7
JV
967
968#ifdef CONFIG_IP_VS_IPV6
969int
970ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
971 struct ip_vs_protocol *pp, int offset)
972{
973 struct rt6_info *rt; /* Route to the other host */
974 int mtu;
975 int rc;
976
977 EnterFunction(10);
978
979 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
980 forwarded directly here, because there is no need to
981 translate address/port back */
982 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
983 if (cp->packet_xmit)
984 rc = cp->packet_xmit(skb, cp, pp);
985 else
986 rc = NF_ACCEPT;
987 /* do not touch skb anymore */
988 atomic_inc(&cp->in_pkts);
989 goto out;
990 }
991
992 /*
993 * mangle and send the packet here (only for VS/NAT)
994 */
995
996 rt = __ip_vs_get_out_rt_v6(cp);
997 if (!rt)
998 goto tx_error_icmp;
999
1000 /* MTU checking */
d8d1f30b 1001 mtu = dst_mtu(&rt->dst);
b3cdd2a7 1002 if (skb->len > mtu) {
d8d1f30b 1003 dst_release(&rt->dst);
3ffe533c 1004 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1e3e238e 1005 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
b3cdd2a7
JV
1006 goto tx_error;
1007 }
1008
1009 /* copy-on-write the packet before mangling it */
1010 if (!skb_make_writable(skb, offset))
1011 goto tx_error_put;
1012
d8d1f30b 1013 if (skb_cow(skb, rt->dst.dev->hard_header_len))
b3cdd2a7
JV
1014 goto tx_error_put;
1015
1016 /* drop the old route when skb is not shared */
adf30907 1017 skb_dst_drop(skb);
d8d1f30b 1018 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
1019
1020 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1021
1022 /* Another hack: avoid icmp_send in ip_fragment */
1023 skb->local_df = 1;
1024
f4bc17cd 1025 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
b3cdd2a7
JV
1026
1027 rc = NF_STOLEN;
1028 goto out;
1029
1030tx_error_icmp:
1031 dst_link_failure(skb);
1032tx_error:
1033 dev_kfree_skb(skb);
1034 rc = NF_STOLEN;
1035out:
1036 LeaveFunction(10);
1037 return rc;
1038tx_error_put:
d8d1f30b 1039 dst_release(&rt->dst);
b3cdd2a7
JV
1040 goto tx_error;
1041}
1042#endif