]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/ip6_output.c
[TCP]: tcp_highspeed: fix AIMD table out-of-bounds access
[net-next-2.6.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on linux/net/ipv4/ip_output.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
22 * etc.
23 *
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
28 * for datagram xmit
29 */
30
31#include <linux/config.h>
32#include <linux/errno.h>
33#include <linux/types.h>
34#include <linux/string.h>
35#include <linux/socket.h>
36#include <linux/net.h>
37#include <linux/netdevice.h>
38#include <linux/if_arp.h>
39#include <linux/in6.h>
40#include <linux/tcp.h>
41#include <linux/route.h>
42
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
58
59static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62{
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
65
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
71}
72
73static inline int ip6_output_finish(struct sk_buff *skb)
74{
75
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
78
79 if (hh) {
80 int hh_alen;
81
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
90
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
92 kfree_skb(skb);
93 return -EINVAL;
94
95}
96
97/* dev_loopback_xmit for use with netfilter. */
98static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
99{
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
105
106 netif_rx(newskb);
107 return 0;
108}
109
110
111static int ip6_output2(struct sk_buff *skb)
112{
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
115
116 skb->protocol = htons(ETH_P_IPV6);
117 skb->dev = dev;
118
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
121
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
126
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
129 */
130 if (newskb)
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
132 newskb->dev,
133 ip6_dev_loopback_xmit);
134
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
137 kfree_skb(skb);
138 return 0;
139 }
140 }
141
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
143 }
144
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
146}
147
148int ip6_output(struct sk_buff *skb)
149{
e89e9cf5
AR
150 if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) ||
151 dst_allfrag(skb->dst))
1da177e4
LT
152 return ip6_fragment(skb, ip6_output2);
153 else
154 return ip6_output2(skb);
155}
156
1da177e4
LT
157/*
158 * xmit an sk_buff (used by TCP)
159 */
160
161int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
162 struct ipv6_txoptions *opt, int ipfragok)
163{
164 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
165 struct in6_addr *first_hop = &fl->fl6_dst;
166 struct dst_entry *dst = skb->dst;
167 struct ipv6hdr *hdr;
168 u8 proto = fl->proto;
169 int seg_len = skb->len;
41a1f8ea 170 int hlimit, tclass;
1da177e4
LT
171 u32 mtu;
172
173 if (opt) {
174 int head_room;
175
176 /* First: exthdrs may take lots of space (~8K for now)
177 MAX_HEADER is not enough.
178 */
179 head_room = opt->opt_nflen + opt->opt_flen;
180 seg_len += head_room;
181 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
182
183 if (skb_headroom(skb) < head_room) {
184 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
185 kfree_skb(skb);
186 skb = skb2;
187 if (skb == NULL) {
188 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
189 return -ENOBUFS;
190 }
191 if (sk)
192 skb_set_owner_w(skb, sk);
193 }
194 if (opt->opt_flen)
195 ipv6_push_frag_opts(skb, opt, &proto);
196 if (opt->opt_nflen)
197 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
198 }
199
200 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
201
202 /*
203 * Fill in the IPv6 header
204 */
205
1da177e4
LT
206 hlimit = -1;
207 if (np)
208 hlimit = np->hop_limit;
209 if (hlimit < 0)
210 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
211 if (hlimit < 0)
212 hlimit = ipv6_get_hoplimit(dst->dev);
213
41a1f8ea
YH
214 tclass = -1;
215 if (np)
216 tclass = np->tclass;
217 if (tclass < 0)
218 tclass = 0;
219
220 *(u32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
221
1da177e4
LT
222 hdr->payload_len = htons(seg_len);
223 hdr->nexthdr = proto;
224 hdr->hop_limit = hlimit;
225
226 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
227 ipv6_addr_copy(&hdr->daddr, first_hop);
228
a2c2064f
PM
229 skb->priority = sk->sk_priority;
230
1da177e4
LT
231 mtu = dst_mtu(dst);
232 if ((skb->len <= mtu) || ipfragok) {
233 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
6869c4d8
HW
234 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
235 dst_output);
1da177e4
LT
236 }
237
238 if (net_ratelimit())
239 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
240 skb->dev = dst->dev;
241 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
242 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
243 kfree_skb(skb);
244 return -EMSGSIZE;
245}
246
247/*
248 * To avoid extra problems ND packets are send through this
249 * routine. It's code duplication but I really want to avoid
250 * extra checks since ipv6_build_header is used by TCP (which
251 * is for us performance critical)
252 */
253
254int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
255 struct in6_addr *saddr, struct in6_addr *daddr,
256 int proto, int len)
257{
258 struct ipv6_pinfo *np = inet6_sk(sk);
259 struct ipv6hdr *hdr;
260 int totlen;
261
262 skb->protocol = htons(ETH_P_IPV6);
263 skb->dev = dev;
264
265 totlen = len + sizeof(struct ipv6hdr);
266
267 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
268 skb->nh.ipv6h = hdr;
269
270 *(u32*)hdr = htonl(0x60000000);
271
272 hdr->payload_len = htons(len);
273 hdr->nexthdr = proto;
274 hdr->hop_limit = np->hop_limit;
275
276 ipv6_addr_copy(&hdr->saddr, saddr);
277 ipv6_addr_copy(&hdr->daddr, daddr);
278
279 return 0;
280}
281
282static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
283{
284 struct ip6_ra_chain *ra;
285 struct sock *last = NULL;
286
287 read_lock(&ip6_ra_lock);
288 for (ra = ip6_ra_chain; ra; ra = ra->next) {
289 struct sock *sk = ra->sk;
0bd1b59b
AM
290 if (sk && ra->sel == sel &&
291 (!sk->sk_bound_dev_if ||
292 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
293 if (last) {
294 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
295 if (skb2)
296 rawv6_rcv(last, skb2);
297 }
298 last = sk;
299 }
300 }
301
302 if (last) {
303 rawv6_rcv(last, skb);
304 read_unlock(&ip6_ra_lock);
305 return 1;
306 }
307 read_unlock(&ip6_ra_lock);
308 return 0;
309}
310
311static inline int ip6_forward_finish(struct sk_buff *skb)
312{
313 return dst_output(skb);
314}
315
316int ip6_forward(struct sk_buff *skb)
317{
318 struct dst_entry *dst = skb->dst;
319 struct ipv6hdr *hdr = skb->nh.ipv6h;
320 struct inet6_skb_parm *opt = IP6CB(skb);
321
322 if (ipv6_devconf.forwarding == 0)
323 goto error;
324
325 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
326 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
327 goto drop;
328 }
329
330 skb->ip_summed = CHECKSUM_NONE;
331
332 /*
333 * We DO NOT make any processing on
334 * RA packets, pushing them to user level AS IS
335 * without ane WARRANTY that application will be able
336 * to interpret them. The reason is that we
337 * cannot make anything clever here.
338 *
339 * We are not end-node, so that if packet contains
340 * AH/ESP, we cannot make anything.
341 * Defragmentation also would be mistake, RA packets
342 * cannot be fragmented, because there is no warranty
343 * that different fragments will go along one path. --ANK
344 */
345 if (opt->ra) {
346 u8 *ptr = skb->nh.raw + opt->ra;
347 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
348 return 0;
349 }
350
351 /*
352 * check and decrement ttl
353 */
354 if (hdr->hop_limit <= 1) {
355 /* Force OUTPUT device used as source address */
356 skb->dev = dst->dev;
357 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
358 0, skb->dev);
359
360 kfree_skb(skb);
361 return -ETIMEDOUT;
362 }
363
364 if (!xfrm6_route_forward(skb)) {
365 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
366 goto drop;
367 }
368 dst = skb->dst;
369
370 /* IPv6 specs say nothing about it, but it is clear that we cannot
371 send redirects to source routed frames.
372 */
373 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
374 struct in6_addr *target = NULL;
375 struct rt6_info *rt;
376 struct neighbour *n = dst->neighbour;
377
378 /*
379 * incoming and outgoing devices are the same
380 * send a redirect.
381 */
382
383 rt = (struct rt6_info *) dst;
384 if ((rt->rt6i_flags & RTF_GATEWAY))
385 target = (struct in6_addr*)&n->primary_key;
386 else
387 target = &hdr->daddr;
388
389 /* Limit redirects both by destination (here)
390 and by source (inside ndisc_send_redirect)
391 */
392 if (xrlim_allow(dst, 1*HZ))
393 ndisc_send_redirect(skb, n, target);
394 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
395 |IPV6_ADDR_LINKLOCAL)) {
396 /* This check is security critical. */
397 goto error;
398 }
399
400 if (skb->len > dst_mtu(dst)) {
401 /* Again, force OUTPUT device used as source address */
402 skb->dev = dst->dev;
403 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
404 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
405 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
406 kfree_skb(skb);
407 return -EMSGSIZE;
408 }
409
410 if (skb_cow(skb, dst->dev->hard_header_len)) {
411 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
412 goto drop;
413 }
414
415 hdr = skb->nh.ipv6h;
416
417 /* Mangling hops number delayed to point after skb COW */
418
419 hdr->hop_limit--;
420
421 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
422 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
423
424error:
425 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
426drop:
427 kfree_skb(skb);
428 return -EINVAL;
429}
430
431static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
432{
433 to->pkt_type = from->pkt_type;
434 to->priority = from->priority;
435 to->protocol = from->protocol;
1da177e4
LT
436 dst_release(to->dst);
437 to->dst = dst_clone(from->dst);
438 to->dev = from->dev;
439
440#ifdef CONFIG_NET_SCHED
441 to->tc_index = from->tc_index;
442#endif
443#ifdef CONFIG_NETFILTER
444 to->nfmark = from->nfmark;
445 /* Connection association is same as pre-frag packet */
9fb9cbb1 446 nf_conntrack_put(to->nfct);
1da177e4
LT
447 to->nfct = from->nfct;
448 nf_conntrack_get(to->nfct);
449 to->nfctinfo = from->nfctinfo;
9fb9cbb1
YK
450#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
451 nf_conntrack_put_reasm(to->nfct_reasm);
452 to->nfct_reasm = from->nfct_reasm;
453 nf_conntrack_get_reasm(to->nfct_reasm);
454#endif
1da177e4
LT
455#ifdef CONFIG_BRIDGE_NETFILTER
456 nf_bridge_put(to->nf_bridge);
457 to->nf_bridge = from->nf_bridge;
458 nf_bridge_get(to->nf_bridge);
459#endif
1da177e4
LT
460#endif
461}
462
463int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
464{
465 u16 offset = sizeof(struct ipv6hdr);
466 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
467 unsigned int packet_len = skb->tail - skb->nh.raw;
468 int found_rhdr = 0;
469 *nexthdr = &skb->nh.ipv6h->nexthdr;
470
471 while (offset + 1 <= packet_len) {
472
473 switch (**nexthdr) {
474
475 case NEXTHDR_HOP:
476 case NEXTHDR_ROUTING:
477 case NEXTHDR_DEST:
478 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
479 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
480 offset += ipv6_optlen(exthdr);
481 *nexthdr = &exthdr->nexthdr;
482 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
483 break;
484 default :
485 return offset;
486 }
487 }
488
489 return offset;
490}
491
492static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
493{
494 struct net_device *dev;
495 struct sk_buff *frag;
496 struct rt6_info *rt = (struct rt6_info*)skb->dst;
d91675f9 497 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
498 struct ipv6hdr *tmp_hdr;
499 struct frag_hdr *fh;
500 unsigned int mtu, hlen, left, len;
501 u32 frag_id = 0;
502 int ptr, offset = 0, err=0;
503 u8 *prevhdr, nexthdr = 0;
504
505 dev = rt->u.dst.dev;
506 hlen = ip6_find_1stfragopt(skb, &prevhdr);
507 nexthdr = *prevhdr;
508
d91675f9
YH
509 mtu = dst_mtu(&rt->u.dst);
510 if (np && np->frag_size < mtu) {
511 if (np->frag_size)
512 mtu = np->frag_size;
513 }
514 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4
LT
515
516 if (skb_shinfo(skb)->frag_list) {
517 int first_len = skb_pagelen(skb);
518
519 if (first_len - hlen > mtu ||
520 ((first_len - hlen) & 7) ||
521 skb_cloned(skb))
522 goto slow_path;
523
524 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
525 /* Correct geometry. */
526 if (frag->len > mtu ||
527 ((frag->len & 7) && frag->next) ||
528 skb_headroom(frag) < hlen)
529 goto slow_path;
530
1da177e4
LT
531 /* Partially cloned skb? */
532 if (skb_shared(frag))
533 goto slow_path;
2fdba6b0
HX
534
535 BUG_ON(frag->sk);
536 if (skb->sk) {
537 sock_hold(skb->sk);
538 frag->sk = skb->sk;
539 frag->destructor = sock_wfree;
540 skb->truesize -= frag->truesize;
541 }
1da177e4
LT
542 }
543
544 err = 0;
545 offset = 0;
546 frag = skb_shinfo(skb)->frag_list;
547 skb_shinfo(skb)->frag_list = NULL;
548 /* BUILD HEADER */
549
550 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
551 if (!tmp_hdr) {
552 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
553 return -ENOMEM;
554 }
555
556 *prevhdr = NEXTHDR_FRAGMENT;
557 memcpy(tmp_hdr, skb->nh.raw, hlen);
558 __skb_pull(skb, hlen);
559 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
560 skb->nh.raw = __skb_push(skb, hlen);
561 memcpy(skb->nh.raw, tmp_hdr, hlen);
562
563 ipv6_select_ident(skb, fh);
564 fh->nexthdr = nexthdr;
565 fh->reserved = 0;
566 fh->frag_off = htons(IP6_MF);
567 frag_id = fh->identification;
568
569 first_len = skb_pagelen(skb);
570 skb->data_len = first_len - skb_headlen(skb);
571 skb->len = first_len;
572 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
573
574
575 for (;;) {
576 /* Prepare header of the next frame,
577 * before previous one went down. */
578 if (frag) {
579 frag->ip_summed = CHECKSUM_NONE;
580 frag->h.raw = frag->data;
581 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
582 frag->nh.raw = __skb_push(frag, hlen);
583 memcpy(frag->nh.raw, tmp_hdr, hlen);
584 offset += skb->len - hlen - sizeof(struct frag_hdr);
585 fh->nexthdr = nexthdr;
586 fh->reserved = 0;
587 fh->frag_off = htons(offset);
588 if (frag->next != NULL)
589 fh->frag_off |= htons(IP6_MF);
590 fh->identification = frag_id;
591 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
592 ip6_copy_metadata(frag, skb);
593 }
594
595 err = output(skb);
596 if (err || !frag)
597 break;
598
599 skb = frag;
600 frag = skb->next;
601 skb->next = NULL;
602 }
603
a51482bd 604 kfree(tmp_hdr);
1da177e4
LT
605
606 if (err == 0) {
607 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
608 return 0;
609 }
610
611 while (frag) {
612 skb = frag->next;
613 kfree_skb(frag);
614 frag = skb;
615 }
616
617 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
618 return err;
619 }
620
621slow_path:
622 left = skb->len - hlen; /* Space per frame */
623 ptr = hlen; /* Where to start from */
624
625 /*
626 * Fragment the datagram.
627 */
628
629 *prevhdr = NEXTHDR_FRAGMENT;
630
631 /*
632 * Keep copying data until we run out.
633 */
634 while(left > 0) {
635 len = left;
636 /* IF: it doesn't fit, use 'mtu' - the data space left */
637 if (len > mtu)
638 len = mtu;
639 /* IF: we are not sending upto and including the packet end
640 then align the next start on an eight byte boundary */
641 if (len < left) {
642 len &= ~7;
643 }
644 /*
645 * Allocate buffer.
646 */
647
648 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 649 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
1da177e4
LT
650 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
651 err = -ENOMEM;
652 goto fail;
653 }
654
655 /*
656 * Set up data on packet
657 */
658
659 ip6_copy_metadata(frag, skb);
660 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
661 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
662 frag->nh.raw = frag->data;
663 fh = (struct frag_hdr*)(frag->data + hlen);
664 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
665
666 /*
667 * Charge the memory for the fragment to any owner
668 * it might possess
669 */
670 if (skb->sk)
671 skb_set_owner_w(frag, skb->sk);
672
673 /*
674 * Copy the packet header into the new buffer.
675 */
676 memcpy(frag->nh.raw, skb->data, hlen);
677
678 /*
679 * Build fragment header.
680 */
681 fh->nexthdr = nexthdr;
682 fh->reserved = 0;
f36d6ab1 683 if (!frag_id) {
1da177e4
LT
684 ipv6_select_ident(skb, fh);
685 frag_id = fh->identification;
686 } else
687 fh->identification = frag_id;
688
689 /*
690 * Copy a block of the IP datagram.
691 */
692 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
693 BUG();
694 left -= len;
695
696 fh->frag_off = htons(offset);
697 if (left > 0)
698 fh->frag_off |= htons(IP6_MF);
699 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
700
701 ptr += len;
702 offset += len;
703
704 /*
705 * Put this fragment into the sending queue.
706 */
707
708 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
709
710 err = output(frag);
711 if (err)
712 goto fail;
713 }
714 kfree_skb(skb);
715 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
716 return err;
717
718fail:
719 kfree_skb(skb);
720 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
721 return err;
722}
723
724int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
725{
726 int err = 0;
727
728 *dst = NULL;
729 if (sk) {
730 struct ipv6_pinfo *np = inet6_sk(sk);
731
732 *dst = sk_dst_check(sk, np->dst_cookie);
733 if (*dst) {
734 struct rt6_info *rt = (struct rt6_info*)*dst;
735
736 /* Yes, checking route validity in not connected
737 case is not very simple. Take into account,
738 that we do not support routing by source, TOS,
739 and MSG_DONTROUTE --ANK (980726)
740
741 1. If route was host route, check that
742 cached destination is current.
743 If it is network route, we still may
744 check its validity using saved pointer
745 to the last used address: daddr_cache.
746 We do not want to save whole address now,
747 (because main consumer of this service
748 is tcp, which has not this problem),
749 so that the last trick works only on connected
750 sockets.
751 2. oif also should be the same.
752 */
753
754 if (((rt->rt6i_dst.plen != 128 ||
755 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
756 && (np->daddr_cache == NULL ||
757 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
758 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
759 dst_release(*dst);
760 *dst = NULL;
761 }
762 }
763 }
764
765 if (*dst == NULL)
766 *dst = ip6_route_output(sk, fl);
767
768 if ((err = (*dst)->error))
769 goto out_err_release;
770
771 if (ipv6_addr_any(&fl->fl6_src)) {
772 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
773
44456d37 774 if (err)
1da177e4 775 goto out_err_release;
1da177e4
LT
776 }
777
778 return 0;
779
780out_err_release:
781 dst_release(*dst);
782 *dst = NULL;
783 return err;
784}
34a0b3cd 785
3cf3dc6c
ACM
786EXPORT_SYMBOL_GPL(ip6_dst_lookup);
787
34a0b3cd 788static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
789 int getfrag(void *from, char *to, int offset, int len,
790 int odd, struct sk_buff *skb),
791 void *from, int length, int hh_len, int fragheaderlen,
792 int transhdrlen, int mtu,unsigned int flags)
793
794{
795 struct sk_buff *skb;
796 int err;
797
798 /* There is support for UDP large send offload by network
799 * device, so create one single skb packet containing complete
800 * udp datagram
801 */
802 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
803 skb = sock_alloc_send_skb(sk,
804 hh_len + fragheaderlen + transhdrlen + 20,
805 (flags & MSG_DONTWAIT), &err);
806 if (skb == NULL)
807 return -ENOMEM;
808
809 /* reserve space for Hardware header */
810 skb_reserve(skb, hh_len);
811
812 /* create space for UDP/IP header */
813 skb_put(skb,fragheaderlen + transhdrlen);
814
815 /* initialize network header pointer */
816 skb->nh.raw = skb->data;
817
818 /* initialize protocol header pointer */
819 skb->h.raw = skb->data + fragheaderlen;
820
821 skb->ip_summed = CHECKSUM_HW;
822 skb->csum = 0;
823 sk->sk_sndmsg_off = 0;
824 }
825
826 err = skb_append_datato_frags(sk,skb, getfrag, from,
827 (length - transhdrlen));
828 if (!err) {
829 struct frag_hdr fhdr;
830
831 /* specify the length of each IP datagram fragment*/
832 skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) -
833 sizeof(struct frag_hdr);
834 ipv6_select_ident(skb, &fhdr);
835 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
836 __skb_queue_tail(&sk->sk_write_queue, skb);
837
838 return 0;
839 }
840 /* There is not enough support do UPD LSO,
841 * so follow normal path
842 */
843 kfree_skb(skb);
844
845 return err;
846}
1da177e4 847
41a1f8ea
YH
848int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
849 int offset, int len, int odd, struct sk_buff *skb),
850 void *from, int length, int transhdrlen,
851 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
852 struct rt6_info *rt, unsigned int flags)
1da177e4
LT
853{
854 struct inet_sock *inet = inet_sk(sk);
855 struct ipv6_pinfo *np = inet6_sk(sk);
856 struct sk_buff *skb;
857 unsigned int maxfraglen, fragheaderlen;
858 int exthdrlen;
859 int hh_len;
860 int mtu;
861 int copy;
862 int err;
863 int offset = 0;
864 int csummode = CHECKSUM_NONE;
865
866 if (flags&MSG_PROBE)
867 return 0;
868 if (skb_queue_empty(&sk->sk_write_queue)) {
869 /*
870 * setup for corking
871 */
872 if (opt) {
873 if (np->cork.opt == NULL) {
874 np->cork.opt = kmalloc(opt->tot_len,
875 sk->sk_allocation);
876 if (unlikely(np->cork.opt == NULL))
877 return -ENOBUFS;
878 } else if (np->cork.opt->tot_len < opt->tot_len) {
879 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
880 return -EINVAL;
881 }
882 memcpy(np->cork.opt, opt, opt->tot_len);
883 inet->cork.flags |= IPCORK_OPT;
884 /* need source address above miyazawa*/
885 }
886 dst_hold(&rt->u.dst);
887 np->cork.rt = rt;
888 inet->cork.fl = *fl;
889 np->cork.hop_limit = hlimit;
41a1f8ea 890 np->cork.tclass = tclass;
d91675f9
YH
891 mtu = dst_mtu(rt->u.dst.path);
892 if (np && np->frag_size < mtu) {
893 if (np->frag_size)
894 mtu = np->frag_size;
895 }
896 inet->cork.fragsize = mtu;
1da177e4
LT
897 if (dst_allfrag(rt->u.dst.path))
898 inet->cork.flags |= IPCORK_ALLFRAG;
899 inet->cork.length = 0;
900 sk->sk_sndmsg_page = NULL;
901 sk->sk_sndmsg_off = 0;
902 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
903 length += exthdrlen;
904 transhdrlen += exthdrlen;
905 } else {
906 rt = np->cork.rt;
907 fl = &inet->cork.fl;
908 if (inet->cork.flags & IPCORK_OPT)
909 opt = np->cork.opt;
910 transhdrlen = 0;
911 exthdrlen = 0;
912 mtu = inet->cork.fragsize;
913 }
914
915 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
916
917 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
918 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
919
920 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
921 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
922 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
923 return -EMSGSIZE;
924 }
925 }
926
927 /*
928 * Let's try using as much space as possible.
929 * Use MTU if total length of the message fits into the MTU.
930 * Otherwise, we need to reserve fragment header and
931 * fragment alignment (= 8-15 octects, in total).
932 *
933 * Note that we may need to "move" the data from the tail of
934 * of the buffer to the new fragment when we split
935 * the message.
936 *
937 * FIXME: It may be fragmented into multiple chunks
938 * at once if non-fragmentable extension headers
939 * are too large.
940 * --yoshfuji
941 */
942
943 inet->cork.length += length;
e89e9cf5
AR
944 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
945 (rt->u.dst.dev->features & NETIF_F_UFO)) {
946
947 if(ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
948 fragheaderlen, transhdrlen, mtu, flags))
949 goto error;
950
951 return 0;
952 }
1da177e4
LT
953
954 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
955 goto alloc_new_skb;
956
957 while (length > 0) {
958 /* Check if the remaining data fits into current packet. */
959 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
960 if (copy < length)
961 copy = maxfraglen - skb->len;
962
963 if (copy <= 0) {
964 char *data;
965 unsigned int datalen;
966 unsigned int fraglen;
967 unsigned int fraggap;
968 unsigned int alloclen;
969 struct sk_buff *skb_prev;
970alloc_new_skb:
971 skb_prev = skb;
972
973 /* There's no room in the current skb */
974 if (skb_prev)
975 fraggap = skb_prev->len - maxfraglen;
976 else
977 fraggap = 0;
978
979 /*
980 * If remaining data exceeds the mtu,
981 * we know we need more fragment(s).
982 */
983 datalen = length + fraggap;
984 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
985 datalen = maxfraglen - fragheaderlen;
986
987 fraglen = datalen + fragheaderlen;
988 if ((flags & MSG_MORE) &&
989 !(rt->u.dst.dev->features&NETIF_F_SG))
990 alloclen = mtu;
991 else
992 alloclen = datalen + fragheaderlen;
993
994 /*
995 * The last fragment gets additional space at tail.
996 * Note: we overallocate on fragments with MSG_MODE
997 * because we have no idea if we're the last one.
998 */
999 if (datalen == length + fraggap)
1000 alloclen += rt->u.dst.trailer_len;
1001
1002 /*
1003 * We just reserve space for fragment header.
1004 * Note: this may be overallocation if the message
1005 * (without MSG_MORE) fits into the MTU.
1006 */
1007 alloclen += sizeof(struct frag_hdr);
1008
1009 if (transhdrlen) {
1010 skb = sock_alloc_send_skb(sk,
1011 alloclen + hh_len,
1012 (flags & MSG_DONTWAIT), &err);
1013 } else {
1014 skb = NULL;
1015 if (atomic_read(&sk->sk_wmem_alloc) <=
1016 2 * sk->sk_sndbuf)
1017 skb = sock_wmalloc(sk,
1018 alloclen + hh_len, 1,
1019 sk->sk_allocation);
1020 if (unlikely(skb == NULL))
1021 err = -ENOBUFS;
1022 }
1023 if (skb == NULL)
1024 goto error;
1025 /*
1026 * Fill in the control structures
1027 */
1028 skb->ip_summed = csummode;
1029 skb->csum = 0;
1030 /* reserve for fragmentation */
1031 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1032
1033 /*
1034 * Find where to start putting bytes
1035 */
1036 data = skb_put(skb, fraglen);
1037 skb->nh.raw = data + exthdrlen;
1038 data += fragheaderlen;
1039 skb->h.raw = data + exthdrlen;
1040
1041 if (fraggap) {
1042 skb->csum = skb_copy_and_csum_bits(
1043 skb_prev, maxfraglen,
1044 data + transhdrlen, fraggap, 0);
1045 skb_prev->csum = csum_sub(skb_prev->csum,
1046 skb->csum);
1047 data += fraggap;
1048 skb_trim(skb_prev, maxfraglen);
1049 }
1050 copy = datalen - transhdrlen - fraggap;
1051 if (copy < 0) {
1052 err = -EINVAL;
1053 kfree_skb(skb);
1054 goto error;
1055 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1056 err = -EFAULT;
1057 kfree_skb(skb);
1058 goto error;
1059 }
1060
1061 offset += copy;
1062 length -= datalen - fraggap;
1063 transhdrlen = 0;
1064 exthdrlen = 0;
1065 csummode = CHECKSUM_NONE;
1066
1067 /*
1068 * Put the packet on the pending queue
1069 */
1070 __skb_queue_tail(&sk->sk_write_queue, skb);
1071 continue;
1072 }
1073
1074 if (copy > length)
1075 copy = length;
1076
1077 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1078 unsigned int off;
1079
1080 off = skb->len;
1081 if (getfrag(from, skb_put(skb, copy),
1082 offset, copy, off, skb) < 0) {
1083 __skb_trim(skb, off);
1084 err = -EFAULT;
1085 goto error;
1086 }
1087 } else {
1088 int i = skb_shinfo(skb)->nr_frags;
1089 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1090 struct page *page = sk->sk_sndmsg_page;
1091 int off = sk->sk_sndmsg_off;
1092 unsigned int left;
1093
1094 if (page && (left = PAGE_SIZE - off) > 0) {
1095 if (copy >= left)
1096 copy = left;
1097 if (page != frag->page) {
1098 if (i == MAX_SKB_FRAGS) {
1099 err = -EMSGSIZE;
1100 goto error;
1101 }
1102 get_page(page);
1103 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1104 frag = &skb_shinfo(skb)->frags[i];
1105 }
1106 } else if(i < MAX_SKB_FRAGS) {
1107 if (copy > PAGE_SIZE)
1108 copy = PAGE_SIZE;
1109 page = alloc_pages(sk->sk_allocation, 0);
1110 if (page == NULL) {
1111 err = -ENOMEM;
1112 goto error;
1113 }
1114 sk->sk_sndmsg_page = page;
1115 sk->sk_sndmsg_off = 0;
1116
1117 skb_fill_page_desc(skb, i, page, 0, 0);
1118 frag = &skb_shinfo(skb)->frags[i];
1119 skb->truesize += PAGE_SIZE;
1120 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1121 } else {
1122 err = -EMSGSIZE;
1123 goto error;
1124 }
1125 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1126 err = -EFAULT;
1127 goto error;
1128 }
1129 sk->sk_sndmsg_off += copy;
1130 frag->size += copy;
1131 skb->len += copy;
1132 skb->data_len += copy;
1133 }
1134 offset += copy;
1135 length -= copy;
1136 }
1137 return 0;
1138error:
1139 inet->cork.length -= length;
1140 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1141 return err;
1142}
1143
1144int ip6_push_pending_frames(struct sock *sk)
1145{
1146 struct sk_buff *skb, *tmp_skb;
1147 struct sk_buff **tail_skb;
1148 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1149 struct inet_sock *inet = inet_sk(sk);
1150 struct ipv6_pinfo *np = inet6_sk(sk);
1151 struct ipv6hdr *hdr;
1152 struct ipv6_txoptions *opt = np->cork.opt;
1153 struct rt6_info *rt = np->cork.rt;
1154 struct flowi *fl = &inet->cork.fl;
1155 unsigned char proto = fl->proto;
1156 int err = 0;
1157
1158 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1159 goto out;
1160 tail_skb = &(skb_shinfo(skb)->frag_list);
1161
1162 /* move skb->data to ip header from ext header */
1163 if (skb->data < skb->nh.raw)
1164 __skb_pull(skb, skb->nh.raw - skb->data);
1165 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1166 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1167 *tail_skb = tmp_skb;
1168 tail_skb = &(tmp_skb->next);
1169 skb->len += tmp_skb->len;
1170 skb->data_len += tmp_skb->len;
1da177e4
LT
1171 skb->truesize += tmp_skb->truesize;
1172 __sock_put(tmp_skb->sk);
1173 tmp_skb->destructor = NULL;
1174 tmp_skb->sk = NULL;
1da177e4
LT
1175 }
1176
1177 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1178 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1179 if (opt && opt->opt_flen)
1180 ipv6_push_frag_opts(skb, opt, &proto);
1181 if (opt && opt->opt_nflen)
1182 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1183
1184 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1185
41a1f8ea
YH
1186 *(u32*)hdr = fl->fl6_flowlabel |
1187 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4
LT
1188
1189 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1190 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1191 else
1192 hdr->payload_len = 0;
1193 hdr->hop_limit = np->cork.hop_limit;
1194 hdr->nexthdr = proto;
1195 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1196 ipv6_addr_copy(&hdr->daddr, final_dst);
1197
a2c2064f
PM
1198 skb->priority = sk->sk_priority;
1199
1da177e4
LT
1200 skb->dst = dst_clone(&rt->u.dst);
1201 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1202 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1203 if (err) {
1204 if (err > 0)
3320da89 1205 err = np->recverr ? net_xmit_errno(err) : 0;
1da177e4
LT
1206 if (err)
1207 goto error;
1208 }
1209
1210out:
1211 inet->cork.flags &= ~IPCORK_OPT;
a51482bd
JJ
1212 kfree(np->cork.opt);
1213 np->cork.opt = NULL;
1da177e4
LT
1214 if (np->cork.rt) {
1215 dst_release(&np->cork.rt->u.dst);
1216 np->cork.rt = NULL;
1217 inet->cork.flags &= ~IPCORK_ALLFRAG;
1218 }
1219 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1220 return err;
1221error:
1222 goto out;
1223}
1224
1225void ip6_flush_pending_frames(struct sock *sk)
1226{
1227 struct inet_sock *inet = inet_sk(sk);
1228 struct ipv6_pinfo *np = inet6_sk(sk);
1229 struct sk_buff *skb;
1230
1231 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1232 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1233 kfree_skb(skb);
1234 }
1235
1236 inet->cork.flags &= ~IPCORK_OPT;
1237
a51482bd
JJ
1238 kfree(np->cork.opt);
1239 np->cork.opt = NULL;
1da177e4
LT
1240 if (np->cork.rt) {
1241 dst_release(&np->cork.rt->u.dst);
1242 np->cork.rt = NULL;
1243 inet->cork.flags &= ~IPCORK_ALLFRAG;
1244 }
1245 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1246}