]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv6/ip6_output.c
[IPV6]: Move nfheader_len into rt6_info
[net-next-2.6.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9  *
10  *      Based on linux/net/ipv4/ip_output.c
11  *
12  *      This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  *
17  *      Changes:
18  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
19  *                              extension headers are implemented.
20  *                              route changes now work.
21  *                              ip6_forward does not confuse sniffers.
22  *                              etc.
23  *
24  *      H. von Brand    :       Added missing #include <linux/string.h>
25  *      Imran Patel     :       frag id should be in NBO
26  *      Kazunori MIYAZAWA @USAGI
27  *                      :       add ip6_append_data and related functions
28  *                              for datagram xmit
29  */
30
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/string.h>
34 #include <linux/socket.h>
35 #include <linux/net.h>
36 #include <linux/netdevice.h>
37 #include <linux/if_arp.h>
38 #include <linux/in6.h>
39 #include <linux/tcp.h>
40 #include <linux/route.h>
41 #include <linux/module.h>
42
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45
46 #include <net/sock.h>
47 #include <net/snmp.h>
48
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
55 #include <net/icmp.h>
56 #include <net/xfrm.h>
57 #include <net/checksum.h>
58
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62 {
63         static u32 ipv6_fragmentation_id = 1;
64         static DEFINE_SPINLOCK(ip6_id_lock);
65
66         spin_lock_bh(&ip6_id_lock);
67         fhdr->identification = htonl(ipv6_fragmentation_id);
68         if (++ipv6_fragmentation_id == 0)
69                 ipv6_fragmentation_id = 1;
70         spin_unlock_bh(&ip6_id_lock);
71 }
72
73 static int ip6_output_finish(struct sk_buff *skb)
74 {
75         struct dst_entry *dst = skb->dst;
76
77         if (dst->hh)
78                 return neigh_hh_output(dst->hh, skb);
79         else if (dst->neighbour)
80                 return dst->neighbour->output(skb);
81
82         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
83         kfree_skb(skb);
84         return -EINVAL;
85
86 }
87
88 /* dev_loopback_xmit for use with netfilter. */
89 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
90 {
91         skb_reset_mac_header(newskb);
92         __skb_pull(newskb, skb_network_offset(newskb));
93         newskb->pkt_type = PACKET_LOOPBACK;
94         newskb->ip_summed = CHECKSUM_UNNECESSARY;
95         BUG_TRAP(newskb->dst);
96
97         netif_rx(newskb);
98         return 0;
99 }
100
101
102 static int ip6_output2(struct sk_buff *skb)
103 {
104         struct dst_entry *dst = skb->dst;
105         struct net_device *dev = dst->dev;
106
107         skb->protocol = htons(ETH_P_IPV6);
108         skb->dev = dev;
109
110         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
111                 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
112                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
113
114                 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
115                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
116                                         &ipv6_hdr(skb)->saddr)) {
117                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
118
119                         /* Do not check for IFF_ALLMULTI; multicast routing
120                            is not supported in any case.
121                          */
122                         if (newskb)
123                                 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
124                                         newskb->dev,
125                                         ip6_dev_loopback_xmit);
126
127                         if (ipv6_hdr(skb)->hop_limit == 0) {
128                                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
129                                 kfree_skb(skb);
130                                 return 0;
131                         }
132                 }
133
134                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
135         }
136
137         return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
138 }
139
140 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
141 {
142         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
143
144         return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
145                skb->dst->dev->mtu : dst_mtu(skb->dst);
146 }
147
148 int ip6_output(struct sk_buff *skb)
149 {
150         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
151                                 dst_allfrag(skb->dst))
152                 return ip6_fragment(skb, ip6_output2);
153         else
154                 return ip6_output2(skb);
155 }
156
157 /*
158  *      xmit an sk_buff (used by TCP)
159  */
160
161 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
162              struct ipv6_txoptions *opt, int ipfragok)
163 {
164         struct ipv6_pinfo *np = inet6_sk(sk);
165         struct in6_addr *first_hop = &fl->fl6_dst;
166         struct dst_entry *dst = skb->dst;
167         struct ipv6hdr *hdr;
168         u8  proto = fl->proto;
169         int seg_len = skb->len;
170         int hlimit, tclass;
171         u32 mtu;
172
173         if (opt) {
174                 unsigned int head_room;
175
176                 /* First: exthdrs may take lots of space (~8K for now)
177                    MAX_HEADER is not enough.
178                  */
179                 head_room = opt->opt_nflen + opt->opt_flen;
180                 seg_len += head_room;
181                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
182
183                 if (skb_headroom(skb) < head_room) {
184                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
185                         if (skb2 == NULL) {
186                                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
187                                               IPSTATS_MIB_OUTDISCARDS);
188                                 kfree_skb(skb);
189                                 return -ENOBUFS;
190                         }
191                         kfree_skb(skb);
192                         skb = skb2;
193                         if (sk)
194                                 skb_set_owner_w(skb, sk);
195                 }
196                 if (opt->opt_flen)
197                         ipv6_push_frag_opts(skb, opt, &proto);
198                 if (opt->opt_nflen)
199                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
200         }
201
202         skb_push(skb, sizeof(struct ipv6hdr));
203         skb_reset_network_header(skb);
204         hdr = ipv6_hdr(skb);
205
206         /*
207          *      Fill in the IPv6 header
208          */
209
210         hlimit = -1;
211         if (np)
212                 hlimit = np->hop_limit;
213         if (hlimit < 0)
214                 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
215         if (hlimit < 0)
216                 hlimit = ipv6_get_hoplimit(dst->dev);
217
218         tclass = -1;
219         if (np)
220                 tclass = np->tclass;
221         if (tclass < 0)
222                 tclass = 0;
223
224         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
225
226         hdr->payload_len = htons(seg_len);
227         hdr->nexthdr = proto;
228         hdr->hop_limit = hlimit;
229
230         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
231         ipv6_addr_copy(&hdr->daddr, first_hop);
232
233         skb->priority = sk->sk_priority;
234
235         mtu = dst_mtu(dst);
236         if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
237                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
238                               IPSTATS_MIB_OUTREQUESTS);
239                 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
240                                 dst_output);
241         }
242
243         if (net_ratelimit())
244                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
245         skb->dev = dst->dev;
246         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
247         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
248         kfree_skb(skb);
249         return -EMSGSIZE;
250 }
251
252 EXPORT_SYMBOL(ip6_xmit);
253
254 /*
255  *      To avoid extra problems ND packets are send through this
256  *      routine. It's code duplication but I really want to avoid
257  *      extra checks since ipv6_build_header is used by TCP (which
258  *      is for us performance critical)
259  */
260
261 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
262                struct in6_addr *saddr, struct in6_addr *daddr,
263                int proto, int len)
264 {
265         struct ipv6_pinfo *np = inet6_sk(sk);
266         struct ipv6hdr *hdr;
267         int totlen;
268
269         skb->protocol = htons(ETH_P_IPV6);
270         skb->dev = dev;
271
272         totlen = len + sizeof(struct ipv6hdr);
273
274         skb_reset_network_header(skb);
275         skb_put(skb, sizeof(struct ipv6hdr));
276         hdr = ipv6_hdr(skb);
277
278         *(__be32*)hdr = htonl(0x60000000);
279
280         hdr->payload_len = htons(len);
281         hdr->nexthdr = proto;
282         hdr->hop_limit = np->hop_limit;
283
284         ipv6_addr_copy(&hdr->saddr, saddr);
285         ipv6_addr_copy(&hdr->daddr, daddr);
286
287         return 0;
288 }
289
290 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
291 {
292         struct ip6_ra_chain *ra;
293         struct sock *last = NULL;
294
295         read_lock(&ip6_ra_lock);
296         for (ra = ip6_ra_chain; ra; ra = ra->next) {
297                 struct sock *sk = ra->sk;
298                 if (sk && ra->sel == sel &&
299                     (!sk->sk_bound_dev_if ||
300                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
301                         if (last) {
302                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
303                                 if (skb2)
304                                         rawv6_rcv(last, skb2);
305                         }
306                         last = sk;
307                 }
308         }
309
310         if (last) {
311                 rawv6_rcv(last, skb);
312                 read_unlock(&ip6_ra_lock);
313                 return 1;
314         }
315         read_unlock(&ip6_ra_lock);
316         return 0;
317 }
318
319 static int ip6_forward_proxy_check(struct sk_buff *skb)
320 {
321         struct ipv6hdr *hdr = ipv6_hdr(skb);
322         u8 nexthdr = hdr->nexthdr;
323         int offset;
324
325         if (ipv6_ext_hdr(nexthdr)) {
326                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
327                 if (offset < 0)
328                         return 0;
329         } else
330                 offset = sizeof(struct ipv6hdr);
331
332         if (nexthdr == IPPROTO_ICMPV6) {
333                 struct icmp6hdr *icmp6;
334
335                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
336                                          offset + 1 - skb->data)))
337                         return 0;
338
339                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
340
341                 switch (icmp6->icmp6_type) {
342                 case NDISC_ROUTER_SOLICITATION:
343                 case NDISC_ROUTER_ADVERTISEMENT:
344                 case NDISC_NEIGHBOUR_SOLICITATION:
345                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
346                 case NDISC_REDIRECT:
347                         /* For reaction involving unicast neighbor discovery
348                          * message destined to the proxied address, pass it to
349                          * input function.
350                          */
351                         return 1;
352                 default:
353                         break;
354                 }
355         }
356
357         /*
358          * The proxying router can't forward traffic sent to a link-local
359          * address, so signal the sender and discard the packet. This
360          * behavior is clarified by the MIPv6 specification.
361          */
362         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
363                 dst_link_failure(skb);
364                 return -1;
365         }
366
367         return 0;
368 }
369
370 static inline int ip6_forward_finish(struct sk_buff *skb)
371 {
372         return dst_output(skb);
373 }
374
375 int ip6_forward(struct sk_buff *skb)
376 {
377         struct dst_entry *dst = skb->dst;
378         struct ipv6hdr *hdr = ipv6_hdr(skb);
379         struct inet6_skb_parm *opt = IP6CB(skb);
380
381         if (ipv6_devconf.forwarding == 0)
382                 goto error;
383
384         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
385                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
386                 goto drop;
387         }
388
389         skb_forward_csum(skb);
390
391         /*
392          *      We DO NOT make any processing on
393          *      RA packets, pushing them to user level AS IS
394          *      without ane WARRANTY that application will be able
395          *      to interpret them. The reason is that we
396          *      cannot make anything clever here.
397          *
398          *      We are not end-node, so that if packet contains
399          *      AH/ESP, we cannot make anything.
400          *      Defragmentation also would be mistake, RA packets
401          *      cannot be fragmented, because there is no warranty
402          *      that different fragments will go along one path. --ANK
403          */
404         if (opt->ra) {
405                 u8 *ptr = skb_network_header(skb) + opt->ra;
406                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
407                         return 0;
408         }
409
410         /*
411          *      check and decrement ttl
412          */
413         if (hdr->hop_limit <= 1) {
414                 /* Force OUTPUT device used as source address */
415                 skb->dev = dst->dev;
416                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
417                             0, skb->dev);
418                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
419
420                 kfree_skb(skb);
421                 return -ETIMEDOUT;
422         }
423
424         /* XXX: idev->cnf.proxy_ndp? */
425         if (ipv6_devconf.proxy_ndp &&
426             pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
427                 int proxied = ip6_forward_proxy_check(skb);
428                 if (proxied > 0)
429                         return ip6_input(skb);
430                 else if (proxied < 0) {
431                         IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
432                         goto drop;
433                 }
434         }
435
436         if (!xfrm6_route_forward(skb)) {
437                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
438                 goto drop;
439         }
440         dst = skb->dst;
441
442         /* IPv6 specs say nothing about it, but it is clear that we cannot
443            send redirects to source routed frames.
444            We don't send redirects to frames decapsulated from IPsec.
445          */
446         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
447             !skb->sp) {
448                 struct in6_addr *target = NULL;
449                 struct rt6_info *rt;
450                 struct neighbour *n = dst->neighbour;
451
452                 /*
453                  *      incoming and outgoing devices are the same
454                  *      send a redirect.
455                  */
456
457                 rt = (struct rt6_info *) dst;
458                 if ((rt->rt6i_flags & RTF_GATEWAY))
459                         target = (struct in6_addr*)&n->primary_key;
460                 else
461                         target = &hdr->daddr;
462
463                 /* Limit redirects both by destination (here)
464                    and by source (inside ndisc_send_redirect)
465                  */
466                 if (xrlim_allow(dst, 1*HZ))
467                         ndisc_send_redirect(skb, n, target);
468         } else {
469                 int addrtype = ipv6_addr_type(&hdr->saddr);
470
471                 /* This check is security critical. */
472                 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
473                         goto error;
474                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
475                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
476                                 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
477                         goto error;
478                 }
479         }
480
481         if (skb->len > dst_mtu(dst)) {
482                 /* Again, force OUTPUT device used as source address */
483                 skb->dev = dst->dev;
484                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
485                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
486                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
487                 kfree_skb(skb);
488                 return -EMSGSIZE;
489         }
490
491         if (skb_cow(skb, dst->dev->hard_header_len)) {
492                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
493                 goto drop;
494         }
495
496         hdr = ipv6_hdr(skb);
497
498         /* Mangling hops number delayed to point after skb COW */
499
500         hdr->hop_limit--;
501
502         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
503         return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
504
505 error:
506         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
507 drop:
508         kfree_skb(skb);
509         return -EINVAL;
510 }
511
512 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
513 {
514         to->pkt_type = from->pkt_type;
515         to->priority = from->priority;
516         to->protocol = from->protocol;
517         dst_release(to->dst);
518         to->dst = dst_clone(from->dst);
519         to->dev = from->dev;
520         to->mark = from->mark;
521
522 #ifdef CONFIG_NET_SCHED
523         to->tc_index = from->tc_index;
524 #endif
525         nf_copy(to, from);
526 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
527     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
528         to->nf_trace = from->nf_trace;
529 #endif
530         skb_copy_secmark(to, from);
531 }
532
533 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
534 {
535         u16 offset = sizeof(struct ipv6hdr);
536         struct ipv6_opt_hdr *exthdr =
537                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
538         unsigned int packet_len = skb->tail - skb->network_header;
539         int found_rhdr = 0;
540         *nexthdr = &ipv6_hdr(skb)->nexthdr;
541
542         while (offset + 1 <= packet_len) {
543
544                 switch (**nexthdr) {
545
546                 case NEXTHDR_HOP:
547                         break;
548                 case NEXTHDR_ROUTING:
549                         found_rhdr = 1;
550                         break;
551                 case NEXTHDR_DEST:
552 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
553                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
554                                 break;
555 #endif
556                         if (found_rhdr)
557                                 return offset;
558                         break;
559                 default :
560                         return offset;
561                 }
562
563                 offset += ipv6_optlen(exthdr);
564                 *nexthdr = &exthdr->nexthdr;
565                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
566                                                  offset);
567         }
568
569         return offset;
570 }
571 EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
572
573 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
574 {
575         struct net_device *dev;
576         struct sk_buff *frag;
577         struct rt6_info *rt = (struct rt6_info*)skb->dst;
578         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
579         struct ipv6hdr *tmp_hdr;
580         struct frag_hdr *fh;
581         unsigned int mtu, hlen, left, len;
582         __be32 frag_id = 0;
583         int ptr, offset = 0, err=0;
584         u8 *prevhdr, nexthdr = 0;
585
586         dev = rt->u.dst.dev;
587         hlen = ip6_find_1stfragopt(skb, &prevhdr);
588         nexthdr = *prevhdr;
589
590         mtu = ip6_skb_dst_mtu(skb);
591
592         /* We must not fragment if the socket is set to force MTU discovery
593          * or if the skb it not generated by a local socket.  (This last
594          * check should be redundant, but it's free.)
595          */
596         if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
597                 skb->dev = skb->dst->dev;
598                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
599                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
600                 kfree_skb(skb);
601                 return -EMSGSIZE;
602         }
603
604         if (np && np->frag_size < mtu) {
605                 if (np->frag_size)
606                         mtu = np->frag_size;
607         }
608         mtu -= hlen + sizeof(struct frag_hdr);
609
610         if (skb_shinfo(skb)->frag_list) {
611                 int first_len = skb_pagelen(skb);
612
613                 if (first_len - hlen > mtu ||
614                     ((first_len - hlen) & 7) ||
615                     skb_cloned(skb))
616                         goto slow_path;
617
618                 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
619                         /* Correct geometry. */
620                         if (frag->len > mtu ||
621                             ((frag->len & 7) && frag->next) ||
622                             skb_headroom(frag) < hlen)
623                             goto slow_path;
624
625                         /* Partially cloned skb? */
626                         if (skb_shared(frag))
627                                 goto slow_path;
628
629                         BUG_ON(frag->sk);
630                         if (skb->sk) {
631                                 sock_hold(skb->sk);
632                                 frag->sk = skb->sk;
633                                 frag->destructor = sock_wfree;
634                                 skb->truesize -= frag->truesize;
635                         }
636                 }
637
638                 err = 0;
639                 offset = 0;
640                 frag = skb_shinfo(skb)->frag_list;
641                 skb_shinfo(skb)->frag_list = NULL;
642                 /* BUILD HEADER */
643
644                 *prevhdr = NEXTHDR_FRAGMENT;
645                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
646                 if (!tmp_hdr) {
647                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
648                         return -ENOMEM;
649                 }
650
651                 __skb_pull(skb, hlen);
652                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
653                 __skb_push(skb, hlen);
654                 skb_reset_network_header(skb);
655                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
656
657                 ipv6_select_ident(skb, fh);
658                 fh->nexthdr = nexthdr;
659                 fh->reserved = 0;
660                 fh->frag_off = htons(IP6_MF);
661                 frag_id = fh->identification;
662
663                 first_len = skb_pagelen(skb);
664                 skb->data_len = first_len - skb_headlen(skb);
665                 skb->len = first_len;
666                 ipv6_hdr(skb)->payload_len = htons(first_len -
667                                                    sizeof(struct ipv6hdr));
668
669                 dst_hold(&rt->u.dst);
670
671                 for (;;) {
672                         /* Prepare header of the next frame,
673                          * before previous one went down. */
674                         if (frag) {
675                                 frag->ip_summed = CHECKSUM_NONE;
676                                 skb_reset_transport_header(frag);
677                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
678                                 __skb_push(frag, hlen);
679                                 skb_reset_network_header(frag);
680                                 memcpy(skb_network_header(frag), tmp_hdr,
681                                        hlen);
682                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
683                                 fh->nexthdr = nexthdr;
684                                 fh->reserved = 0;
685                                 fh->frag_off = htons(offset);
686                                 if (frag->next != NULL)
687                                         fh->frag_off |= htons(IP6_MF);
688                                 fh->identification = frag_id;
689                                 ipv6_hdr(frag)->payload_len =
690                                                 htons(frag->len -
691                                                       sizeof(struct ipv6hdr));
692                                 ip6_copy_metadata(frag, skb);
693                         }
694
695                         err = output(skb);
696                         if(!err)
697                                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
698
699                         if (err || !frag)
700                                 break;
701
702                         skb = frag;
703                         frag = skb->next;
704                         skb->next = NULL;
705                 }
706
707                 kfree(tmp_hdr);
708
709                 if (err == 0) {
710                         IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
711                         dst_release(&rt->u.dst);
712                         return 0;
713                 }
714
715                 while (frag) {
716                         skb = frag->next;
717                         kfree_skb(frag);
718                         frag = skb;
719                 }
720
721                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
722                 dst_release(&rt->u.dst);
723                 return err;
724         }
725
726 slow_path:
727         left = skb->len - hlen;         /* Space per frame */
728         ptr = hlen;                     /* Where to start from */
729
730         /*
731          *      Fragment the datagram.
732          */
733
734         *prevhdr = NEXTHDR_FRAGMENT;
735
736         /*
737          *      Keep copying data until we run out.
738          */
739         while(left > 0) {
740                 len = left;
741                 /* IF: it doesn't fit, use 'mtu' - the data space left */
742                 if (len > mtu)
743                         len = mtu;
744                 /* IF: we are not sending upto and including the packet end
745                    then align the next start on an eight byte boundary */
746                 if (len < left) {
747                         len &= ~7;
748                 }
749                 /*
750                  *      Allocate buffer.
751                  */
752
753                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
754                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
755                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
756                                       IPSTATS_MIB_FRAGFAILS);
757                         err = -ENOMEM;
758                         goto fail;
759                 }
760
761                 /*
762                  *      Set up data on packet
763                  */
764
765                 ip6_copy_metadata(frag, skb);
766                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
767                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
768                 skb_reset_network_header(frag);
769                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
770                 frag->transport_header = (frag->network_header + hlen +
771                                           sizeof(struct frag_hdr));
772
773                 /*
774                  *      Charge the memory for the fragment to any owner
775                  *      it might possess
776                  */
777                 if (skb->sk)
778                         skb_set_owner_w(frag, skb->sk);
779
780                 /*
781                  *      Copy the packet header into the new buffer.
782                  */
783                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
784
785                 /*
786                  *      Build fragment header.
787                  */
788                 fh->nexthdr = nexthdr;
789                 fh->reserved = 0;
790                 if (!frag_id) {
791                         ipv6_select_ident(skb, fh);
792                         frag_id = fh->identification;
793                 } else
794                         fh->identification = frag_id;
795
796                 /*
797                  *      Copy a block of the IP datagram.
798                  */
799                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
800                         BUG();
801                 left -= len;
802
803                 fh->frag_off = htons(offset);
804                 if (left > 0)
805                         fh->frag_off |= htons(IP6_MF);
806                 ipv6_hdr(frag)->payload_len = htons(frag->len -
807                                                     sizeof(struct ipv6hdr));
808
809                 ptr += len;
810                 offset += len;
811
812                 /*
813                  *      Put this fragment into the sending queue.
814                  */
815                 err = output(frag);
816                 if (err)
817                         goto fail;
818
819                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
820         }
821         IP6_INC_STATS(ip6_dst_idev(skb->dst),
822                       IPSTATS_MIB_FRAGOKS);
823         kfree_skb(skb);
824         return err;
825
826 fail:
827         IP6_INC_STATS(ip6_dst_idev(skb->dst),
828                       IPSTATS_MIB_FRAGFAILS);
829         kfree_skb(skb);
830         return err;
831 }
832
833 static inline int ip6_rt_check(struct rt6key *rt_key,
834                                struct in6_addr *fl_addr,
835                                struct in6_addr *addr_cache)
836 {
837         return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
838                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
839 }
840
841 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
842                                           struct dst_entry *dst,
843                                           struct flowi *fl)
844 {
845         struct ipv6_pinfo *np = inet6_sk(sk);
846         struct rt6_info *rt = (struct rt6_info *)dst;
847
848         if (!dst)
849                 goto out;
850
851         /* Yes, checking route validity in not connected
852          * case is not very simple. Take into account,
853          * that we do not support routing by source, TOS,
854          * and MSG_DONTROUTE            --ANK (980726)
855          *
856          * 1. ip6_rt_check(): If route was host route,
857          *    check that cached destination is current.
858          *    If it is network route, we still may
859          *    check its validity using saved pointer
860          *    to the last used address: daddr_cache.
861          *    We do not want to save whole address now,
862          *    (because main consumer of this service
863          *    is tcp, which has not this problem),
864          *    so that the last trick works only on connected
865          *    sockets.
866          * 2. oif also should be the same.
867          */
868         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
869 #ifdef CONFIG_IPV6_SUBTREES
870             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
871 #endif
872             (fl->oif && fl->oif != dst->dev->ifindex)) {
873                 dst_release(dst);
874                 dst = NULL;
875         }
876
877 out:
878         return dst;
879 }
880
881 static int ip6_dst_lookup_tail(struct sock *sk,
882                                struct dst_entry **dst, struct flowi *fl)
883 {
884         int err;
885
886         if (*dst == NULL)
887                 *dst = ip6_route_output(sk, fl);
888
889         if ((err = (*dst)->error))
890                 goto out_err_release;
891
892         if (ipv6_addr_any(&fl->fl6_src)) {
893                 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
894                 if (err)
895                         goto out_err_release;
896         }
897
898 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
899                 /*
900                  * Here if the dst entry we've looked up
901                  * has a neighbour entry that is in the INCOMPLETE
902                  * state and the src address from the flow is
903                  * marked as OPTIMISTIC, we release the found
904                  * dst entry and replace it instead with the
905                  * dst entry of the nexthop router
906                  */
907                 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
908                         struct inet6_ifaddr *ifp;
909                         struct flowi fl_gw;
910                         int redirect;
911
912                         ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
913
914                         redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
915                         if (ifp)
916                                 in6_ifa_put(ifp);
917
918                         if (redirect) {
919                                 /*
920                                  * We need to get the dst entry for the
921                                  * default router instead
922                                  */
923                                 dst_release(*dst);
924                                 memcpy(&fl_gw, fl, sizeof(struct flowi));
925                                 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
926                                 *dst = ip6_route_output(sk, &fl_gw);
927                                 if ((err = (*dst)->error))
928                                         goto out_err_release;
929                         }
930                 }
931 #endif
932
933         return 0;
934
935 out_err_release:
936         if (err == -ENETUNREACH)
937                 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
938         dst_release(*dst);
939         *dst = NULL;
940         return err;
941 }
942
943 /**
944  *      ip6_dst_lookup - perform route lookup on flow
945  *      @sk: socket which provides route info
946  *      @dst: pointer to dst_entry * for result
947  *      @fl: flow to lookup
948  *
949  *      This function performs a route lookup on the given flow.
950  *
951  *      It returns zero on success, or a standard errno code on error.
952  */
953 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
954 {
955         *dst = NULL;
956         return ip6_dst_lookup_tail(sk, dst, fl);
957 }
958 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
959
960 /**
961  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
962  *      @sk: socket which provides the dst cache and route info
963  *      @dst: pointer to dst_entry * for result
964  *      @fl: flow to lookup
965  *
966  *      This function performs a route lookup on the given flow with the
967  *      possibility of using the cached route in the socket if it is valid.
968  *      It will take the socket dst lock when operating on the dst cache.
969  *      As a result, this function can only be used in process context.
970  *
971  *      It returns zero on success, or a standard errno code on error.
972  */
973 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
974 {
975         *dst = NULL;
976         if (sk) {
977                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
978                 *dst = ip6_sk_dst_check(sk, *dst, fl);
979         }
980
981         return ip6_dst_lookup_tail(sk, dst, fl);
982 }
983 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
984
985 static inline int ip6_ufo_append_data(struct sock *sk,
986                         int getfrag(void *from, char *to, int offset, int len,
987                         int odd, struct sk_buff *skb),
988                         void *from, int length, int hh_len, int fragheaderlen,
989                         int transhdrlen, int mtu,unsigned int flags)
990
991 {
992         struct sk_buff *skb;
993         int err;
994
995         /* There is support for UDP large send offload by network
996          * device, so create one single skb packet containing complete
997          * udp datagram
998          */
999         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1000                 skb = sock_alloc_send_skb(sk,
1001                         hh_len + fragheaderlen + transhdrlen + 20,
1002                         (flags & MSG_DONTWAIT), &err);
1003                 if (skb == NULL)
1004                         return -ENOMEM;
1005
1006                 /* reserve space for Hardware header */
1007                 skb_reserve(skb, hh_len);
1008
1009                 /* create space for UDP/IP header */
1010                 skb_put(skb,fragheaderlen + transhdrlen);
1011
1012                 /* initialize network header pointer */
1013                 skb_reset_network_header(skb);
1014
1015                 /* initialize protocol header pointer */
1016                 skb->transport_header = skb->network_header + fragheaderlen;
1017
1018                 skb->ip_summed = CHECKSUM_PARTIAL;
1019                 skb->csum = 0;
1020                 sk->sk_sndmsg_off = 0;
1021         }
1022
1023         err = skb_append_datato_frags(sk,skb, getfrag, from,
1024                                       (length - transhdrlen));
1025         if (!err) {
1026                 struct frag_hdr fhdr;
1027
1028                 /* specify the length of each IP datagram fragment*/
1029                 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1030                                             sizeof(struct frag_hdr);
1031                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1032                 ipv6_select_ident(skb, &fhdr);
1033                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1034                 __skb_queue_tail(&sk->sk_write_queue, skb);
1035
1036                 return 0;
1037         }
1038         /* There is not enough support do UPD LSO,
1039          * so follow normal path
1040          */
1041         kfree_skb(skb);
1042
1043         return err;
1044 }
1045
1046 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1047         int offset, int len, int odd, struct sk_buff *skb),
1048         void *from, int length, int transhdrlen,
1049         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1050         struct rt6_info *rt, unsigned int flags)
1051 {
1052         struct inet_sock *inet = inet_sk(sk);
1053         struct ipv6_pinfo *np = inet6_sk(sk);
1054         struct sk_buff *skb;
1055         unsigned int maxfraglen, fragheaderlen;
1056         int exthdrlen;
1057         int hh_len;
1058         int mtu;
1059         int copy;
1060         int err;
1061         int offset = 0;
1062         int csummode = CHECKSUM_NONE;
1063
1064         if (flags&MSG_PROBE)
1065                 return 0;
1066         if (skb_queue_empty(&sk->sk_write_queue)) {
1067                 /*
1068                  * setup for corking
1069                  */
1070                 if (opt) {
1071                         if (np->cork.opt == NULL) {
1072                                 np->cork.opt = kmalloc(opt->tot_len,
1073                                                        sk->sk_allocation);
1074                                 if (unlikely(np->cork.opt == NULL))
1075                                         return -ENOBUFS;
1076                         } else if (np->cork.opt->tot_len < opt->tot_len) {
1077                                 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1078                                 return -EINVAL;
1079                         }
1080                         memcpy(np->cork.opt, opt, opt->tot_len);
1081                         inet->cork.flags |= IPCORK_OPT;
1082                         /* need source address above miyazawa*/
1083                 }
1084                 dst_hold(&rt->u.dst);
1085                 np->cork.rt = rt;
1086                 inet->cork.fl = *fl;
1087                 np->cork.hop_limit = hlimit;
1088                 np->cork.tclass = tclass;
1089                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1090                       rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1091                 if (np->frag_size < mtu) {
1092                         if (np->frag_size)
1093                                 mtu = np->frag_size;
1094                 }
1095                 inet->cork.fragsize = mtu;
1096                 if (dst_allfrag(rt->u.dst.path))
1097                         inet->cork.flags |= IPCORK_ALLFRAG;
1098                 inet->cork.length = 0;
1099                 sk->sk_sndmsg_page = NULL;
1100                 sk->sk_sndmsg_off = 0;
1101                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1102                             rt->nfheader_len;
1103                 length += exthdrlen;
1104                 transhdrlen += exthdrlen;
1105         } else {
1106                 rt = np->cork.rt;
1107                 fl = &inet->cork.fl;
1108                 if (inet->cork.flags & IPCORK_OPT)
1109                         opt = np->cork.opt;
1110                 transhdrlen = 0;
1111                 exthdrlen = 0;
1112                 mtu = inet->cork.fragsize;
1113         }
1114
1115         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1116
1117         fragheaderlen = sizeof(struct ipv6hdr) + rt->nfheader_len +
1118                         (opt ? opt->opt_nflen : 0);
1119         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1120
1121         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1122                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1123                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1124                         return -EMSGSIZE;
1125                 }
1126         }
1127
1128         /*
1129          * Let's try using as much space as possible.
1130          * Use MTU if total length of the message fits into the MTU.
1131          * Otherwise, we need to reserve fragment header and
1132          * fragment alignment (= 8-15 octects, in total).
1133          *
1134          * Note that we may need to "move" the data from the tail of
1135          * of the buffer to the new fragment when we split
1136          * the message.
1137          *
1138          * FIXME: It may be fragmented into multiple chunks
1139          *        at once if non-fragmentable extension headers
1140          *        are too large.
1141          * --yoshfuji
1142          */
1143
1144         inet->cork.length += length;
1145         if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1146             (rt->u.dst.dev->features & NETIF_F_UFO)) {
1147
1148                 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1149                                           fragheaderlen, transhdrlen, mtu,
1150                                           flags);
1151                 if (err)
1152                         goto error;
1153                 return 0;
1154         }
1155
1156         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1157                 goto alloc_new_skb;
1158
1159         while (length > 0) {
1160                 /* Check if the remaining data fits into current packet. */
1161                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1162                 if (copy < length)
1163                         copy = maxfraglen - skb->len;
1164
1165                 if (copy <= 0) {
1166                         char *data;
1167                         unsigned int datalen;
1168                         unsigned int fraglen;
1169                         unsigned int fraggap;
1170                         unsigned int alloclen;
1171                         struct sk_buff *skb_prev;
1172 alloc_new_skb:
1173                         skb_prev = skb;
1174
1175                         /* There's no room in the current skb */
1176                         if (skb_prev)
1177                                 fraggap = skb_prev->len - maxfraglen;
1178                         else
1179                                 fraggap = 0;
1180
1181                         /*
1182                          * If remaining data exceeds the mtu,
1183                          * we know we need more fragment(s).
1184                          */
1185                         datalen = length + fraggap;
1186                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1187                                 datalen = maxfraglen - fragheaderlen;
1188
1189                         fraglen = datalen + fragheaderlen;
1190                         if ((flags & MSG_MORE) &&
1191                             !(rt->u.dst.dev->features&NETIF_F_SG))
1192                                 alloclen = mtu;
1193                         else
1194                                 alloclen = datalen + fragheaderlen;
1195
1196                         /*
1197                          * The last fragment gets additional space at tail.
1198                          * Note: we overallocate on fragments with MSG_MODE
1199                          * because we have no idea if we're the last one.
1200                          */
1201                         if (datalen == length + fraggap)
1202                                 alloclen += rt->u.dst.trailer_len;
1203
1204                         /*
1205                          * We just reserve space for fragment header.
1206                          * Note: this may be overallocation if the message
1207                          * (without MSG_MORE) fits into the MTU.
1208                          */
1209                         alloclen += sizeof(struct frag_hdr);
1210
1211                         if (transhdrlen) {
1212                                 skb = sock_alloc_send_skb(sk,
1213                                                 alloclen + hh_len,
1214                                                 (flags & MSG_DONTWAIT), &err);
1215                         } else {
1216                                 skb = NULL;
1217                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1218                                     2 * sk->sk_sndbuf)
1219                                         skb = sock_wmalloc(sk,
1220                                                            alloclen + hh_len, 1,
1221                                                            sk->sk_allocation);
1222                                 if (unlikely(skb == NULL))
1223                                         err = -ENOBUFS;
1224                         }
1225                         if (skb == NULL)
1226                                 goto error;
1227                         /*
1228                          *      Fill in the control structures
1229                          */
1230                         skb->ip_summed = csummode;
1231                         skb->csum = 0;
1232                         /* reserve for fragmentation */
1233                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1234
1235                         /*
1236                          *      Find where to start putting bytes
1237                          */
1238                         data = skb_put(skb, fraglen);
1239                         skb_set_network_header(skb, exthdrlen);
1240                         data += fragheaderlen;
1241                         skb->transport_header = (skb->network_header +
1242                                                  fragheaderlen);
1243                         if (fraggap) {
1244                                 skb->csum = skb_copy_and_csum_bits(
1245                                         skb_prev, maxfraglen,
1246                                         data + transhdrlen, fraggap, 0);
1247                                 skb_prev->csum = csum_sub(skb_prev->csum,
1248                                                           skb->csum);
1249                                 data += fraggap;
1250                                 pskb_trim_unique(skb_prev, maxfraglen);
1251                         }
1252                         copy = datalen - transhdrlen - fraggap;
1253                         if (copy < 0) {
1254                                 err = -EINVAL;
1255                                 kfree_skb(skb);
1256                                 goto error;
1257                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1258                                 err = -EFAULT;
1259                                 kfree_skb(skb);
1260                                 goto error;
1261                         }
1262
1263                         offset += copy;
1264                         length -= datalen - fraggap;
1265                         transhdrlen = 0;
1266                         exthdrlen = 0;
1267                         csummode = CHECKSUM_NONE;
1268
1269                         /*
1270                          * Put the packet on the pending queue
1271                          */
1272                         __skb_queue_tail(&sk->sk_write_queue, skb);
1273                         continue;
1274                 }
1275
1276                 if (copy > length)
1277                         copy = length;
1278
1279                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1280                         unsigned int off;
1281
1282                         off = skb->len;
1283                         if (getfrag(from, skb_put(skb, copy),
1284                                                 offset, copy, off, skb) < 0) {
1285                                 __skb_trim(skb, off);
1286                                 err = -EFAULT;
1287                                 goto error;
1288                         }
1289                 } else {
1290                         int i = skb_shinfo(skb)->nr_frags;
1291                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1292                         struct page *page = sk->sk_sndmsg_page;
1293                         int off = sk->sk_sndmsg_off;
1294                         unsigned int left;
1295
1296                         if (page && (left = PAGE_SIZE - off) > 0) {
1297                                 if (copy >= left)
1298                                         copy = left;
1299                                 if (page != frag->page) {
1300                                         if (i == MAX_SKB_FRAGS) {
1301                                                 err = -EMSGSIZE;
1302                                                 goto error;
1303                                         }
1304                                         get_page(page);
1305                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1306                                         frag = &skb_shinfo(skb)->frags[i];
1307                                 }
1308                         } else if(i < MAX_SKB_FRAGS) {
1309                                 if (copy > PAGE_SIZE)
1310                                         copy = PAGE_SIZE;
1311                                 page = alloc_pages(sk->sk_allocation, 0);
1312                                 if (page == NULL) {
1313                                         err = -ENOMEM;
1314                                         goto error;
1315                                 }
1316                                 sk->sk_sndmsg_page = page;
1317                                 sk->sk_sndmsg_off = 0;
1318
1319                                 skb_fill_page_desc(skb, i, page, 0, 0);
1320                                 frag = &skb_shinfo(skb)->frags[i];
1321                         } else {
1322                                 err = -EMSGSIZE;
1323                                 goto error;
1324                         }
1325                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1326                                 err = -EFAULT;
1327                                 goto error;
1328                         }
1329                         sk->sk_sndmsg_off += copy;
1330                         frag->size += copy;
1331                         skb->len += copy;
1332                         skb->data_len += copy;
1333                         skb->truesize += copy;
1334                         atomic_add(copy, &sk->sk_wmem_alloc);
1335                 }
1336                 offset += copy;
1337                 length -= copy;
1338         }
1339         return 0;
1340 error:
1341         inet->cork.length -= length;
1342         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1343         return err;
1344 }
1345
1346 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1347 {
1348         inet->cork.flags &= ~IPCORK_OPT;
1349         kfree(np->cork.opt);
1350         np->cork.opt = NULL;
1351         if (np->cork.rt) {
1352                 dst_release(&np->cork.rt->u.dst);
1353                 np->cork.rt = NULL;
1354                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1355         }
1356         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1357 }
1358
1359 int ip6_push_pending_frames(struct sock *sk)
1360 {
1361         struct sk_buff *skb, *tmp_skb;
1362         struct sk_buff **tail_skb;
1363         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1364         struct inet_sock *inet = inet_sk(sk);
1365         struct ipv6_pinfo *np = inet6_sk(sk);
1366         struct ipv6hdr *hdr;
1367         struct ipv6_txoptions *opt = np->cork.opt;
1368         struct rt6_info *rt = np->cork.rt;
1369         struct flowi *fl = &inet->cork.fl;
1370         unsigned char proto = fl->proto;
1371         int err = 0;
1372
1373         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1374                 goto out;
1375         tail_skb = &(skb_shinfo(skb)->frag_list);
1376
1377         /* move skb->data to ip header from ext header */
1378         if (skb->data < skb_network_header(skb))
1379                 __skb_pull(skb, skb_network_offset(skb));
1380         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1381                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1382                 *tail_skb = tmp_skb;
1383                 tail_skb = &(tmp_skb->next);
1384                 skb->len += tmp_skb->len;
1385                 skb->data_len += tmp_skb->len;
1386                 skb->truesize += tmp_skb->truesize;
1387                 __sock_put(tmp_skb->sk);
1388                 tmp_skb->destructor = NULL;
1389                 tmp_skb->sk = NULL;
1390         }
1391
1392         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1393         __skb_pull(skb, skb_network_header_len(skb));
1394         if (opt && opt->opt_flen)
1395                 ipv6_push_frag_opts(skb, opt, &proto);
1396         if (opt && opt->opt_nflen)
1397                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1398
1399         skb_push(skb, sizeof(struct ipv6hdr));
1400         skb_reset_network_header(skb);
1401         hdr = ipv6_hdr(skb);
1402
1403         *(__be32*)hdr = fl->fl6_flowlabel |
1404                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1405
1406         if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1407                 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1408         else
1409                 hdr->payload_len = 0;
1410         hdr->hop_limit = np->cork.hop_limit;
1411         hdr->nexthdr = proto;
1412         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1413         ipv6_addr_copy(&hdr->daddr, final_dst);
1414
1415         skb->priority = sk->sk_priority;
1416
1417         skb->dst = dst_clone(&rt->u.dst);
1418         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1419         if (proto == IPPROTO_ICMPV6) {
1420                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1421
1422                 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1423                 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1424         }
1425
1426         err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1427         if (err) {
1428                 if (err > 0)
1429                         err = np->recverr ? net_xmit_errno(err) : 0;
1430                 if (err)
1431                         goto error;
1432         }
1433
1434 out:
1435         ip6_cork_release(inet, np);
1436         return err;
1437 error:
1438         goto out;
1439 }
1440
1441 void ip6_flush_pending_frames(struct sock *sk)
1442 {
1443         struct sk_buff *skb;
1444
1445         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1446                 if (skb->dst)
1447                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
1448                                       IPSTATS_MIB_OUTDISCARDS);
1449                 kfree_skb(skb);
1450         }
1451
1452         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1453 }