]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv6/ip6_output.c
udpv6: Remove unused skb argument of ipv6_select_ident()
[net-next-2.6.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
43
44 #include <net/sock.h>
45 #include <net/snmp.h>
46
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
57
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60 int __ip6_local_out(struct sk_buff *skb)
61 {
62         int len;
63
64         len = skb->len - sizeof(struct ipv6hdr);
65         if (len > IPV6_MAXPLEN)
66                 len = 0;
67         ipv6_hdr(skb)->payload_len = htons(len);
68
69         return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
70                        dst_output);
71 }
72
73 int ip6_local_out(struct sk_buff *skb)
74 {
75         int err;
76
77         err = __ip6_local_out(skb);
78         if (likely(err == 1))
79                 err = dst_output(skb);
80
81         return err;
82 }
83 EXPORT_SYMBOL_GPL(ip6_local_out);
84
85 static int ip6_output_finish(struct sk_buff *skb)
86 {
87         struct dst_entry *dst = skb_dst(skb);
88
89         if (dst->hh)
90                 return neigh_hh_output(dst->hh, skb);
91         else if (dst->neighbour)
92                 return dst->neighbour->output(skb);
93
94         IP6_INC_STATS_BH(dev_net(dst->dev),
95                          ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
96         kfree_skb(skb);
97         return -EINVAL;
98
99 }
100
101 /* dev_loopback_xmit for use with netfilter. */
102 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
103 {
104         skb_reset_mac_header(newskb);
105         __skb_pull(newskb, skb_network_offset(newskb));
106         newskb->pkt_type = PACKET_LOOPBACK;
107         newskb->ip_summed = CHECKSUM_UNNECESSARY;
108         WARN_ON(!skb_dst(newskb));
109
110         netif_rx(newskb);
111         return 0;
112 }
113
114
115 static int ip6_output2(struct sk_buff *skb)
116 {
117         struct dst_entry *dst = skb_dst(skb);
118         struct net_device *dev = dst->dev;
119
120         skb->protocol = htons(ETH_P_IPV6);
121         skb->dev = dev;
122
123         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
124                 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
125                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
126
127                 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
128                     ((mroute6_socket(dev_net(dev)) &&
129                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
130                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
131                                          &ipv6_hdr(skb)->saddr))) {
132                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
133
134                         /* Do not check for IFF_ALLMULTI; multicast routing
135                            is not supported in any case.
136                          */
137                         if (newskb)
138                                 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
139                                         NULL, newskb->dev,
140                                         ip6_dev_loopback_xmit);
141
142                         if (ipv6_hdr(skb)->hop_limit == 0) {
143                                 IP6_INC_STATS(dev_net(dev), idev,
144                                               IPSTATS_MIB_OUTDISCARDS);
145                                 kfree_skb(skb);
146                                 return 0;
147                         }
148                 }
149
150                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
151                                 skb->len);
152         }
153
154         return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
155                        ip6_output_finish);
156 }
157
158 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
159 {
160         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
161
162         return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
163                skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
164 }
165
166 int ip6_output(struct sk_buff *skb)
167 {
168         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
169         if (unlikely(idev->cnf.disable_ipv6)) {
170                 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
171                               IPSTATS_MIB_OUTDISCARDS);
172                 kfree_skb(skb);
173                 return 0;
174         }
175
176         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
177                                 dst_allfrag(skb_dst(skb)))
178                 return ip6_fragment(skb, ip6_output2);
179         else
180                 return ip6_output2(skb);
181 }
182
183 /*
184  *      xmit an sk_buff (used by TCP)
185  */
186
187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188              struct ipv6_txoptions *opt, int ipfragok)
189 {
190         struct net *net = sock_net(sk);
191         struct ipv6_pinfo *np = inet6_sk(sk);
192         struct in6_addr *first_hop = &fl->fl6_dst;
193         struct dst_entry *dst = skb_dst(skb);
194         struct ipv6hdr *hdr;
195         u8  proto = fl->proto;
196         int seg_len = skb->len;
197         int hlimit, tclass;
198         u32 mtu;
199
200         if (opt) {
201                 unsigned int head_room;
202
203                 /* First: exthdrs may take lots of space (~8K for now)
204                    MAX_HEADER is not enough.
205                  */
206                 head_room = opt->opt_nflen + opt->opt_flen;
207                 seg_len += head_room;
208                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
209
210                 if (skb_headroom(skb) < head_room) {
211                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
212                         if (skb2 == NULL) {
213                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
214                                               IPSTATS_MIB_OUTDISCARDS);
215                                 kfree_skb(skb);
216                                 return -ENOBUFS;
217                         }
218                         kfree_skb(skb);
219                         skb = skb2;
220                         if (sk)
221                                 skb_set_owner_w(skb, sk);
222                 }
223                 if (opt->opt_flen)
224                         ipv6_push_frag_opts(skb, opt, &proto);
225                 if (opt->opt_nflen)
226                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
227         }
228
229         skb_push(skb, sizeof(struct ipv6hdr));
230         skb_reset_network_header(skb);
231         hdr = ipv6_hdr(skb);
232
233         /* Allow local fragmentation. */
234         if (ipfragok)
235                 skb->local_df = 1;
236
237         /*
238          *      Fill in the IPv6 header
239          */
240
241         hlimit = -1;
242         if (np)
243                 hlimit = np->hop_limit;
244         if (hlimit < 0)
245                 hlimit = ip6_dst_hoplimit(dst);
246
247         tclass = -1;
248         if (np)
249                 tclass = np->tclass;
250         if (tclass < 0)
251                 tclass = 0;
252
253         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
254
255         hdr->payload_len = htons(seg_len);
256         hdr->nexthdr = proto;
257         hdr->hop_limit = hlimit;
258
259         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
260         ipv6_addr_copy(&hdr->daddr, first_hop);
261
262         skb->priority = sk->sk_priority;
263         skb->mark = sk->sk_mark;
264
265         mtu = dst_mtu(dst);
266         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
267                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
268                               IPSTATS_MIB_OUT, skb->len);
269                 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
270                                 dst_output);
271         }
272
273         if (net_ratelimit())
274                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
275         skb->dev = dst->dev;
276         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
277         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
278         kfree_skb(skb);
279         return -EMSGSIZE;
280 }
281
282 EXPORT_SYMBOL(ip6_xmit);
283
284 /*
285  *      To avoid extra problems ND packets are send through this
286  *      routine. It's code duplication but I really want to avoid
287  *      extra checks since ipv6_build_header is used by TCP (which
288  *      is for us performance critical)
289  */
290
291 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
292                const struct in6_addr *saddr, const struct in6_addr *daddr,
293                int proto, int len)
294 {
295         struct ipv6_pinfo *np = inet6_sk(sk);
296         struct ipv6hdr *hdr;
297         int totlen;
298
299         skb->protocol = htons(ETH_P_IPV6);
300         skb->dev = dev;
301
302         totlen = len + sizeof(struct ipv6hdr);
303
304         skb_reset_network_header(skb);
305         skb_put(skb, sizeof(struct ipv6hdr));
306         hdr = ipv6_hdr(skb);
307
308         *(__be32*)hdr = htonl(0x60000000);
309
310         hdr->payload_len = htons(len);
311         hdr->nexthdr = proto;
312         hdr->hop_limit = np->hop_limit;
313
314         ipv6_addr_copy(&hdr->saddr, saddr);
315         ipv6_addr_copy(&hdr->daddr, daddr);
316
317         return 0;
318 }
319
320 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
321 {
322         struct ip6_ra_chain *ra;
323         struct sock *last = NULL;
324
325         read_lock(&ip6_ra_lock);
326         for (ra = ip6_ra_chain; ra; ra = ra->next) {
327                 struct sock *sk = ra->sk;
328                 if (sk && ra->sel == sel &&
329                     (!sk->sk_bound_dev_if ||
330                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
331                         if (last) {
332                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
333                                 if (skb2)
334                                         rawv6_rcv(last, skb2);
335                         }
336                         last = sk;
337                 }
338         }
339
340         if (last) {
341                 rawv6_rcv(last, skb);
342                 read_unlock(&ip6_ra_lock);
343                 return 1;
344         }
345         read_unlock(&ip6_ra_lock);
346         return 0;
347 }
348
349 static int ip6_forward_proxy_check(struct sk_buff *skb)
350 {
351         struct ipv6hdr *hdr = ipv6_hdr(skb);
352         u8 nexthdr = hdr->nexthdr;
353         int offset;
354
355         if (ipv6_ext_hdr(nexthdr)) {
356                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
357                 if (offset < 0)
358                         return 0;
359         } else
360                 offset = sizeof(struct ipv6hdr);
361
362         if (nexthdr == IPPROTO_ICMPV6) {
363                 struct icmp6hdr *icmp6;
364
365                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
366                                          offset + 1 - skb->data)))
367                         return 0;
368
369                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
370
371                 switch (icmp6->icmp6_type) {
372                 case NDISC_ROUTER_SOLICITATION:
373                 case NDISC_ROUTER_ADVERTISEMENT:
374                 case NDISC_NEIGHBOUR_SOLICITATION:
375                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
376                 case NDISC_REDIRECT:
377                         /* For reaction involving unicast neighbor discovery
378                          * message destined to the proxied address, pass it to
379                          * input function.
380                          */
381                         return 1;
382                 default:
383                         break;
384                 }
385         }
386
387         /*
388          * The proxying router can't forward traffic sent to a link-local
389          * address, so signal the sender and discard the packet. This
390          * behavior is clarified by the MIPv6 specification.
391          */
392         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
393                 dst_link_failure(skb);
394                 return -1;
395         }
396
397         return 0;
398 }
399
400 static inline int ip6_forward_finish(struct sk_buff *skb)
401 {
402         return dst_output(skb);
403 }
404
405 int ip6_forward(struct sk_buff *skb)
406 {
407         struct dst_entry *dst = skb_dst(skb);
408         struct ipv6hdr *hdr = ipv6_hdr(skb);
409         struct inet6_skb_parm *opt = IP6CB(skb);
410         struct net *net = dev_net(dst->dev);
411
412         if (net->ipv6.devconf_all->forwarding == 0)
413                 goto error;
414
415         if (skb_warn_if_lro(skb))
416                 goto drop;
417
418         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
419                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
420                 goto drop;
421         }
422
423         skb_forward_csum(skb);
424
425         /*
426          *      We DO NOT make any processing on
427          *      RA packets, pushing them to user level AS IS
428          *      without ane WARRANTY that application will be able
429          *      to interpret them. The reason is that we
430          *      cannot make anything clever here.
431          *
432          *      We are not end-node, so that if packet contains
433          *      AH/ESP, we cannot make anything.
434          *      Defragmentation also would be mistake, RA packets
435          *      cannot be fragmented, because there is no warranty
436          *      that different fragments will go along one path. --ANK
437          */
438         if (opt->ra) {
439                 u8 *ptr = skb_network_header(skb) + opt->ra;
440                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
441                         return 0;
442         }
443
444         /*
445          *      check and decrement ttl
446          */
447         if (hdr->hop_limit <= 1) {
448                 /* Force OUTPUT device used as source address */
449                 skb->dev = dst->dev;
450                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
451                             0, skb->dev);
452                 IP6_INC_STATS_BH(net,
453                                  ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
454
455                 kfree_skb(skb);
456                 return -ETIMEDOUT;
457         }
458
459         /* XXX: idev->cnf.proxy_ndp? */
460         if (net->ipv6.devconf_all->proxy_ndp &&
461             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
462                 int proxied = ip6_forward_proxy_check(skb);
463                 if (proxied > 0)
464                         return ip6_input(skb);
465                 else if (proxied < 0) {
466                         IP6_INC_STATS(net, ip6_dst_idev(dst),
467                                       IPSTATS_MIB_INDISCARDS);
468                         goto drop;
469                 }
470         }
471
472         if (!xfrm6_route_forward(skb)) {
473                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
474                 goto drop;
475         }
476         dst = skb_dst(skb);
477
478         /* IPv6 specs say nothing about it, but it is clear that we cannot
479            send redirects to source routed frames.
480            We don't send redirects to frames decapsulated from IPsec.
481          */
482         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
483             !skb_sec_path(skb)) {
484                 struct in6_addr *target = NULL;
485                 struct rt6_info *rt;
486                 struct neighbour *n = dst->neighbour;
487
488                 /*
489                  *      incoming and outgoing devices are the same
490                  *      send a redirect.
491                  */
492
493                 rt = (struct rt6_info *) dst;
494                 if ((rt->rt6i_flags & RTF_GATEWAY))
495                         target = (struct in6_addr*)&n->primary_key;
496                 else
497                         target = &hdr->daddr;
498
499                 /* Limit redirects both by destination (here)
500                    and by source (inside ndisc_send_redirect)
501                  */
502                 if (xrlim_allow(dst, 1*HZ))
503                         ndisc_send_redirect(skb, n, target);
504         } else {
505                 int addrtype = ipv6_addr_type(&hdr->saddr);
506
507                 /* This check is security critical. */
508                 if (addrtype == IPV6_ADDR_ANY ||
509                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
510                         goto error;
511                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
512                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
513                                 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
514                         goto error;
515                 }
516         }
517
518         if (skb->len > dst_mtu(dst)) {
519                 /* Again, force OUTPUT device used as source address */
520                 skb->dev = dst->dev;
521                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
522                 IP6_INC_STATS_BH(net,
523                                  ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
524                 IP6_INC_STATS_BH(net,
525                                  ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
526                 kfree_skb(skb);
527                 return -EMSGSIZE;
528         }
529
530         if (skb_cow(skb, dst->dev->hard_header_len)) {
531                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
532                 goto drop;
533         }
534
535         hdr = ipv6_hdr(skb);
536
537         /* Mangling hops number delayed to point after skb COW */
538
539         hdr->hop_limit--;
540
541         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
542         return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
543                        ip6_forward_finish);
544
545 error:
546         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
547 drop:
548         kfree_skb(skb);
549         return -EINVAL;
550 }
551
552 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
553 {
554         to->pkt_type = from->pkt_type;
555         to->priority = from->priority;
556         to->protocol = from->protocol;
557         skb_dst_drop(to);
558         skb_dst_set(to, dst_clone(skb_dst(from)));
559         to->dev = from->dev;
560         to->mark = from->mark;
561
562 #ifdef CONFIG_NET_SCHED
563         to->tc_index = from->tc_index;
564 #endif
565         nf_copy(to, from);
566 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
567     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
568         to->nf_trace = from->nf_trace;
569 #endif
570         skb_copy_secmark(to, from);
571 }
572
573 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
574 {
575         u16 offset = sizeof(struct ipv6hdr);
576         struct ipv6_opt_hdr *exthdr =
577                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
578         unsigned int packet_len = skb->tail - skb->network_header;
579         int found_rhdr = 0;
580         *nexthdr = &ipv6_hdr(skb)->nexthdr;
581
582         while (offset + 1 <= packet_len) {
583
584                 switch (**nexthdr) {
585
586                 case NEXTHDR_HOP:
587                         break;
588                 case NEXTHDR_ROUTING:
589                         found_rhdr = 1;
590                         break;
591                 case NEXTHDR_DEST:
592 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
593                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
594                                 break;
595 #endif
596                         if (found_rhdr)
597                                 return offset;
598                         break;
599                 default :
600                         return offset;
601                 }
602
603                 offset += ipv6_optlen(exthdr);
604                 *nexthdr = &exthdr->nexthdr;
605                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
606                                                  offset);
607         }
608
609         return offset;
610 }
611
612 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
613 {
614         struct sk_buff *frag;
615         struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
616         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
617         struct ipv6hdr *tmp_hdr;
618         struct frag_hdr *fh;
619         unsigned int mtu, hlen, left, len;
620         __be32 frag_id = 0;
621         int ptr, offset = 0, err=0;
622         u8 *prevhdr, nexthdr = 0;
623         struct net *net = dev_net(skb_dst(skb)->dev);
624
625         hlen = ip6_find_1stfragopt(skb, &prevhdr);
626         nexthdr = *prevhdr;
627
628         mtu = ip6_skb_dst_mtu(skb);
629
630         /* We must not fragment if the socket is set to force MTU discovery
631          * or if the skb it not generated by a local socket.  (This last
632          * check should be redundant, but it's free.)
633          */
634         if (!skb->local_df) {
635                 skb->dev = skb_dst(skb)->dev;
636                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
637                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
638                               IPSTATS_MIB_FRAGFAILS);
639                 kfree_skb(skb);
640                 return -EMSGSIZE;
641         }
642
643         if (np && np->frag_size < mtu) {
644                 if (np->frag_size)
645                         mtu = np->frag_size;
646         }
647         mtu -= hlen + sizeof(struct frag_hdr);
648
649         if (skb_has_frags(skb)) {
650                 int first_len = skb_pagelen(skb);
651                 int truesizes = 0;
652
653                 if (first_len - hlen > mtu ||
654                     ((first_len - hlen) & 7) ||
655                     skb_cloned(skb))
656                         goto slow_path;
657
658                 skb_walk_frags(skb, frag) {
659                         /* Correct geometry. */
660                         if (frag->len > mtu ||
661                             ((frag->len & 7) && frag->next) ||
662                             skb_headroom(frag) < hlen)
663                             goto slow_path;
664
665                         /* Partially cloned skb? */
666                         if (skb_shared(frag))
667                                 goto slow_path;
668
669                         BUG_ON(frag->sk);
670                         if (skb->sk) {
671                                 frag->sk = skb->sk;
672                                 frag->destructor = sock_wfree;
673                                 truesizes += frag->truesize;
674                         }
675                 }
676
677                 err = 0;
678                 offset = 0;
679                 frag = skb_shinfo(skb)->frag_list;
680                 skb_frag_list_init(skb);
681                 /* BUILD HEADER */
682
683                 *prevhdr = NEXTHDR_FRAGMENT;
684                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
685                 if (!tmp_hdr) {
686                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
687                                       IPSTATS_MIB_FRAGFAILS);
688                         return -ENOMEM;
689                 }
690
691                 __skb_pull(skb, hlen);
692                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
693                 __skb_push(skb, hlen);
694                 skb_reset_network_header(skb);
695                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
696
697                 ipv6_select_ident(fh);
698                 fh->nexthdr = nexthdr;
699                 fh->reserved = 0;
700                 fh->frag_off = htons(IP6_MF);
701                 frag_id = fh->identification;
702
703                 first_len = skb_pagelen(skb);
704                 skb->data_len = first_len - skb_headlen(skb);
705                 skb->truesize -= truesizes;
706                 skb->len = first_len;
707                 ipv6_hdr(skb)->payload_len = htons(first_len -
708                                                    sizeof(struct ipv6hdr));
709
710                 dst_hold(&rt->u.dst);
711
712                 for (;;) {
713                         /* Prepare header of the next frame,
714                          * before previous one went down. */
715                         if (frag) {
716                                 frag->ip_summed = CHECKSUM_NONE;
717                                 skb_reset_transport_header(frag);
718                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
719                                 __skb_push(frag, hlen);
720                                 skb_reset_network_header(frag);
721                                 memcpy(skb_network_header(frag), tmp_hdr,
722                                        hlen);
723                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
724                                 fh->nexthdr = nexthdr;
725                                 fh->reserved = 0;
726                                 fh->frag_off = htons(offset);
727                                 if (frag->next != NULL)
728                                         fh->frag_off |= htons(IP6_MF);
729                                 fh->identification = frag_id;
730                                 ipv6_hdr(frag)->payload_len =
731                                                 htons(frag->len -
732                                                       sizeof(struct ipv6hdr));
733                                 ip6_copy_metadata(frag, skb);
734                         }
735
736                         err = output(skb);
737                         if(!err)
738                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
739                                               IPSTATS_MIB_FRAGCREATES);
740
741                         if (err || !frag)
742                                 break;
743
744                         skb = frag;
745                         frag = skb->next;
746                         skb->next = NULL;
747                 }
748
749                 kfree(tmp_hdr);
750
751                 if (err == 0) {
752                         IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
753                                       IPSTATS_MIB_FRAGOKS);
754                         dst_release(&rt->u.dst);
755                         return 0;
756                 }
757
758                 while (frag) {
759                         skb = frag->next;
760                         kfree_skb(frag);
761                         frag = skb;
762                 }
763
764                 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
765                               IPSTATS_MIB_FRAGFAILS);
766                 dst_release(&rt->u.dst);
767                 return err;
768         }
769
770 slow_path:
771         left = skb->len - hlen;         /* Space per frame */
772         ptr = hlen;                     /* Where to start from */
773
774         /*
775          *      Fragment the datagram.
776          */
777
778         *prevhdr = NEXTHDR_FRAGMENT;
779
780         /*
781          *      Keep copying data until we run out.
782          */
783         while(left > 0) {
784                 len = left;
785                 /* IF: it doesn't fit, use 'mtu' - the data space left */
786                 if (len > mtu)
787                         len = mtu;
788                 /* IF: we are not sending upto and including the packet end
789                    then align the next start on an eight byte boundary */
790                 if (len < left) {
791                         len &= ~7;
792                 }
793                 /*
794                  *      Allocate buffer.
795                  */
796
797                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
798                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
799                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
800                                       IPSTATS_MIB_FRAGFAILS);
801                         err = -ENOMEM;
802                         goto fail;
803                 }
804
805                 /*
806                  *      Set up data on packet
807                  */
808
809                 ip6_copy_metadata(frag, skb);
810                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
811                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
812                 skb_reset_network_header(frag);
813                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
814                 frag->transport_header = (frag->network_header + hlen +
815                                           sizeof(struct frag_hdr));
816
817                 /*
818                  *      Charge the memory for the fragment to any owner
819                  *      it might possess
820                  */
821                 if (skb->sk)
822                         skb_set_owner_w(frag, skb->sk);
823
824                 /*
825                  *      Copy the packet header into the new buffer.
826                  */
827                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
828
829                 /*
830                  *      Build fragment header.
831                  */
832                 fh->nexthdr = nexthdr;
833                 fh->reserved = 0;
834                 if (!frag_id) {
835                         ipv6_select_ident(fh);
836                         frag_id = fh->identification;
837                 } else
838                         fh->identification = frag_id;
839
840                 /*
841                  *      Copy a block of the IP datagram.
842                  */
843                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
844                         BUG();
845                 left -= len;
846
847                 fh->frag_off = htons(offset);
848                 if (left > 0)
849                         fh->frag_off |= htons(IP6_MF);
850                 ipv6_hdr(frag)->payload_len = htons(frag->len -
851                                                     sizeof(struct ipv6hdr));
852
853                 ptr += len;
854                 offset += len;
855
856                 /*
857                  *      Put this fragment into the sending queue.
858                  */
859                 err = output(frag);
860                 if (err)
861                         goto fail;
862
863                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
864                               IPSTATS_MIB_FRAGCREATES);
865         }
866         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
867                       IPSTATS_MIB_FRAGOKS);
868         kfree_skb(skb);
869         return err;
870
871 fail:
872         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
873                       IPSTATS_MIB_FRAGFAILS);
874         kfree_skb(skb);
875         return err;
876 }
877
878 static inline int ip6_rt_check(struct rt6key *rt_key,
879                                struct in6_addr *fl_addr,
880                                struct in6_addr *addr_cache)
881 {
882         return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
883                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
884 }
885
886 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
887                                           struct dst_entry *dst,
888                                           struct flowi *fl)
889 {
890         struct ipv6_pinfo *np = inet6_sk(sk);
891         struct rt6_info *rt = (struct rt6_info *)dst;
892
893         if (!dst)
894                 goto out;
895
896         /* Yes, checking route validity in not connected
897          * case is not very simple. Take into account,
898          * that we do not support routing by source, TOS,
899          * and MSG_DONTROUTE            --ANK (980726)
900          *
901          * 1. ip6_rt_check(): If route was host route,
902          *    check that cached destination is current.
903          *    If it is network route, we still may
904          *    check its validity using saved pointer
905          *    to the last used address: daddr_cache.
906          *    We do not want to save whole address now,
907          *    (because main consumer of this service
908          *    is tcp, which has not this problem),
909          *    so that the last trick works only on connected
910          *    sockets.
911          * 2. oif also should be the same.
912          */
913         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
914 #ifdef CONFIG_IPV6_SUBTREES
915             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
916 #endif
917             (fl->oif && fl->oif != dst->dev->ifindex)) {
918                 dst_release(dst);
919                 dst = NULL;
920         }
921
922 out:
923         return dst;
924 }
925
926 static int ip6_dst_lookup_tail(struct sock *sk,
927                                struct dst_entry **dst, struct flowi *fl)
928 {
929         int err;
930         struct net *net = sock_net(sk);
931
932         if (*dst == NULL)
933                 *dst = ip6_route_output(net, sk, fl);
934
935         if ((err = (*dst)->error))
936                 goto out_err_release;
937
938         if (ipv6_addr_any(&fl->fl6_src)) {
939                 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
940                                          &fl->fl6_dst,
941                                          sk ? inet6_sk(sk)->srcprefs : 0,
942                                          &fl->fl6_src);
943                 if (err)
944                         goto out_err_release;
945         }
946
947 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
948         /*
949          * Here if the dst entry we've looked up
950          * has a neighbour entry that is in the INCOMPLETE
951          * state and the src address from the flow is
952          * marked as OPTIMISTIC, we release the found
953          * dst entry and replace it instead with the
954          * dst entry of the nexthop router
955          */
956         if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
957                 struct inet6_ifaddr *ifp;
958                 struct flowi fl_gw;
959                 int redirect;
960
961                 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
962                                       (*dst)->dev, 1);
963
964                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
965                 if (ifp)
966                         in6_ifa_put(ifp);
967
968                 if (redirect) {
969                         /*
970                          * We need to get the dst entry for the
971                          * default router instead
972                          */
973                         dst_release(*dst);
974                         memcpy(&fl_gw, fl, sizeof(struct flowi));
975                         memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
976                         *dst = ip6_route_output(net, sk, &fl_gw);
977                         if ((err = (*dst)->error))
978                                 goto out_err_release;
979                 }
980         }
981 #endif
982
983         return 0;
984
985 out_err_release:
986         if (err == -ENETUNREACH)
987                 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
988         dst_release(*dst);
989         *dst = NULL;
990         return err;
991 }
992
993 /**
994  *      ip6_dst_lookup - perform route lookup on flow
995  *      @sk: socket which provides route info
996  *      @dst: pointer to dst_entry * for result
997  *      @fl: flow to lookup
998  *
999  *      This function performs a route lookup on the given flow.
1000  *
1001  *      It returns zero on success, or a standard errno code on error.
1002  */
1003 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1004 {
1005         *dst = NULL;
1006         return ip6_dst_lookup_tail(sk, dst, fl);
1007 }
1008 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1009
1010 /**
1011  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
1012  *      @sk: socket which provides the dst cache and route info
1013  *      @dst: pointer to dst_entry * for result
1014  *      @fl: flow to lookup
1015  *
1016  *      This function performs a route lookup on the given flow with the
1017  *      possibility of using the cached route in the socket if it is valid.
1018  *      It will take the socket dst lock when operating on the dst cache.
1019  *      As a result, this function can only be used in process context.
1020  *
1021  *      It returns zero on success, or a standard errno code on error.
1022  */
1023 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1024 {
1025         *dst = NULL;
1026         if (sk) {
1027                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1028                 *dst = ip6_sk_dst_check(sk, *dst, fl);
1029         }
1030
1031         return ip6_dst_lookup_tail(sk, dst, fl);
1032 }
1033 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1034
1035 static inline int ip6_ufo_append_data(struct sock *sk,
1036                         int getfrag(void *from, char *to, int offset, int len,
1037                         int odd, struct sk_buff *skb),
1038                         void *from, int length, int hh_len, int fragheaderlen,
1039                         int transhdrlen, int mtu,unsigned int flags)
1040
1041 {
1042         struct sk_buff *skb;
1043         int err;
1044
1045         /* There is support for UDP large send offload by network
1046          * device, so create one single skb packet containing complete
1047          * udp datagram
1048          */
1049         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1050                 skb = sock_alloc_send_skb(sk,
1051                         hh_len + fragheaderlen + transhdrlen + 20,
1052                         (flags & MSG_DONTWAIT), &err);
1053                 if (skb == NULL)
1054                         return -ENOMEM;
1055
1056                 /* reserve space for Hardware header */
1057                 skb_reserve(skb, hh_len);
1058
1059                 /* create space for UDP/IP header */
1060                 skb_put(skb,fragheaderlen + transhdrlen);
1061
1062                 /* initialize network header pointer */
1063                 skb_reset_network_header(skb);
1064
1065                 /* initialize protocol header pointer */
1066                 skb->transport_header = skb->network_header + fragheaderlen;
1067
1068                 skb->ip_summed = CHECKSUM_PARTIAL;
1069                 skb->csum = 0;
1070                 sk->sk_sndmsg_off = 0;
1071         }
1072
1073         err = skb_append_datato_frags(sk,skb, getfrag, from,
1074                                       (length - transhdrlen));
1075         if (!err) {
1076                 struct frag_hdr fhdr;
1077
1078                 /* Specify the length of each IPv6 datagram fragment.
1079                  * It has to be a multiple of 8.
1080                  */
1081                 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1082                                              sizeof(struct frag_hdr)) & ~7;
1083                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1084                 ipv6_select_ident(&fhdr);
1085                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1086                 __skb_queue_tail(&sk->sk_write_queue, skb);
1087
1088                 return 0;
1089         }
1090         /* There is not enough support do UPD LSO,
1091          * so follow normal path
1092          */
1093         kfree_skb(skb);
1094
1095         return err;
1096 }
1097
1098 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1099                                                gfp_t gfp)
1100 {
1101         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1102 }
1103
1104 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1105                                                 gfp_t gfp)
1106 {
1107         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1108 }
1109
1110 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1111         int offset, int len, int odd, struct sk_buff *skb),
1112         void *from, int length, int transhdrlen,
1113         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1114         struct rt6_info *rt, unsigned int flags)
1115 {
1116         struct inet_sock *inet = inet_sk(sk);
1117         struct ipv6_pinfo *np = inet6_sk(sk);
1118         struct sk_buff *skb;
1119         unsigned int maxfraglen, fragheaderlen;
1120         int exthdrlen;
1121         int hh_len;
1122         int mtu;
1123         int copy;
1124         int err;
1125         int offset = 0;
1126         int csummode = CHECKSUM_NONE;
1127
1128         if (flags&MSG_PROBE)
1129                 return 0;
1130         if (skb_queue_empty(&sk->sk_write_queue)) {
1131                 /*
1132                  * setup for corking
1133                  */
1134                 if (opt) {
1135                         if (WARN_ON(np->cork.opt))
1136                                 return -EINVAL;
1137
1138                         np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1139                         if (unlikely(np->cork.opt == NULL))
1140                                 return -ENOBUFS;
1141
1142                         np->cork.opt->tot_len = opt->tot_len;
1143                         np->cork.opt->opt_flen = opt->opt_flen;
1144                         np->cork.opt->opt_nflen = opt->opt_nflen;
1145
1146                         np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1147                                                             sk->sk_allocation);
1148                         if (opt->dst0opt && !np->cork.opt->dst0opt)
1149                                 return -ENOBUFS;
1150
1151                         np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1152                                                             sk->sk_allocation);
1153                         if (opt->dst1opt && !np->cork.opt->dst1opt)
1154                                 return -ENOBUFS;
1155
1156                         np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1157                                                            sk->sk_allocation);
1158                         if (opt->hopopt && !np->cork.opt->hopopt)
1159                                 return -ENOBUFS;
1160
1161                         np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1162                                                             sk->sk_allocation);
1163                         if (opt->srcrt && !np->cork.opt->srcrt)
1164                                 return -ENOBUFS;
1165
1166                         /* need source address above miyazawa*/
1167                 }
1168                 dst_hold(&rt->u.dst);
1169                 inet->cork.dst = &rt->u.dst;
1170                 inet->cork.fl = *fl;
1171                 np->cork.hop_limit = hlimit;
1172                 np->cork.tclass = tclass;
1173                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1174                       rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1175                 if (np->frag_size < mtu) {
1176                         if (np->frag_size)
1177                                 mtu = np->frag_size;
1178                 }
1179                 inet->cork.fragsize = mtu;
1180                 if (dst_allfrag(rt->u.dst.path))
1181                         inet->cork.flags |= IPCORK_ALLFRAG;
1182                 inet->cork.length = 0;
1183                 sk->sk_sndmsg_page = NULL;
1184                 sk->sk_sndmsg_off = 0;
1185                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1186                             rt->rt6i_nfheader_len;
1187                 length += exthdrlen;
1188                 transhdrlen += exthdrlen;
1189         } else {
1190                 rt = (struct rt6_info *)inet->cork.dst;
1191                 fl = &inet->cork.fl;
1192                 opt = np->cork.opt;
1193                 transhdrlen = 0;
1194                 exthdrlen = 0;
1195                 mtu = inet->cork.fragsize;
1196         }
1197
1198         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1199
1200         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1201                         (opt ? opt->opt_nflen : 0);
1202         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1203
1204         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1205                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1206                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1207                         return -EMSGSIZE;
1208                 }
1209         }
1210
1211         /*
1212          * Let's try using as much space as possible.
1213          * Use MTU if total length of the message fits into the MTU.
1214          * Otherwise, we need to reserve fragment header and
1215          * fragment alignment (= 8-15 octects, in total).
1216          *
1217          * Note that we may need to "move" the data from the tail of
1218          * of the buffer to the new fragment when we split
1219          * the message.
1220          *
1221          * FIXME: It may be fragmented into multiple chunks
1222          *        at once if non-fragmentable extension headers
1223          *        are too large.
1224          * --yoshfuji
1225          */
1226
1227         inet->cork.length += length;
1228         if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1229             (rt->u.dst.dev->features & NETIF_F_UFO)) {
1230
1231                 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1232                                           fragheaderlen, transhdrlen, mtu,
1233                                           flags);
1234                 if (err)
1235                         goto error;
1236                 return 0;
1237         }
1238
1239         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1240                 goto alloc_new_skb;
1241
1242         while (length > 0) {
1243                 /* Check if the remaining data fits into current packet. */
1244                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1245                 if (copy < length)
1246                         copy = maxfraglen - skb->len;
1247
1248                 if (copy <= 0) {
1249                         char *data;
1250                         unsigned int datalen;
1251                         unsigned int fraglen;
1252                         unsigned int fraggap;
1253                         unsigned int alloclen;
1254                         struct sk_buff *skb_prev;
1255 alloc_new_skb:
1256                         skb_prev = skb;
1257
1258                         /* There's no room in the current skb */
1259                         if (skb_prev)
1260                                 fraggap = skb_prev->len - maxfraglen;
1261                         else
1262                                 fraggap = 0;
1263
1264                         /*
1265                          * If remaining data exceeds the mtu,
1266                          * we know we need more fragment(s).
1267                          */
1268                         datalen = length + fraggap;
1269                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1270                                 datalen = maxfraglen - fragheaderlen;
1271
1272                         fraglen = datalen + fragheaderlen;
1273                         if ((flags & MSG_MORE) &&
1274                             !(rt->u.dst.dev->features&NETIF_F_SG))
1275                                 alloclen = mtu;
1276                         else
1277                                 alloclen = datalen + fragheaderlen;
1278
1279                         /*
1280                          * The last fragment gets additional space at tail.
1281                          * Note: we overallocate on fragments with MSG_MODE
1282                          * because we have no idea if we're the last one.
1283                          */
1284                         if (datalen == length + fraggap)
1285                                 alloclen += rt->u.dst.trailer_len;
1286
1287                         /*
1288                          * We just reserve space for fragment header.
1289                          * Note: this may be overallocation if the message
1290                          * (without MSG_MORE) fits into the MTU.
1291                          */
1292                         alloclen += sizeof(struct frag_hdr);
1293
1294                         if (transhdrlen) {
1295                                 skb = sock_alloc_send_skb(sk,
1296                                                 alloclen + hh_len,
1297                                                 (flags & MSG_DONTWAIT), &err);
1298                         } else {
1299                                 skb = NULL;
1300                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1301                                     2 * sk->sk_sndbuf)
1302                                         skb = sock_wmalloc(sk,
1303                                                            alloclen + hh_len, 1,
1304                                                            sk->sk_allocation);
1305                                 if (unlikely(skb == NULL))
1306                                         err = -ENOBUFS;
1307                         }
1308                         if (skb == NULL)
1309                                 goto error;
1310                         /*
1311                          *      Fill in the control structures
1312                          */
1313                         skb->ip_summed = csummode;
1314                         skb->csum = 0;
1315                         /* reserve for fragmentation */
1316                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1317
1318                         /*
1319                          *      Find where to start putting bytes
1320                          */
1321                         data = skb_put(skb, fraglen);
1322                         skb_set_network_header(skb, exthdrlen);
1323                         data += fragheaderlen;
1324                         skb->transport_header = (skb->network_header +
1325                                                  fragheaderlen);
1326                         if (fraggap) {
1327                                 skb->csum = skb_copy_and_csum_bits(
1328                                         skb_prev, maxfraglen,
1329                                         data + transhdrlen, fraggap, 0);
1330                                 skb_prev->csum = csum_sub(skb_prev->csum,
1331                                                           skb->csum);
1332                                 data += fraggap;
1333                                 pskb_trim_unique(skb_prev, maxfraglen);
1334                         }
1335                         copy = datalen - transhdrlen - fraggap;
1336                         if (copy < 0) {
1337                                 err = -EINVAL;
1338                                 kfree_skb(skb);
1339                                 goto error;
1340                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1341                                 err = -EFAULT;
1342                                 kfree_skb(skb);
1343                                 goto error;
1344                         }
1345
1346                         offset += copy;
1347                         length -= datalen - fraggap;
1348                         transhdrlen = 0;
1349                         exthdrlen = 0;
1350                         csummode = CHECKSUM_NONE;
1351
1352                         /*
1353                          * Put the packet on the pending queue
1354                          */
1355                         __skb_queue_tail(&sk->sk_write_queue, skb);
1356                         continue;
1357                 }
1358
1359                 if (copy > length)
1360                         copy = length;
1361
1362                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1363                         unsigned int off;
1364
1365                         off = skb->len;
1366                         if (getfrag(from, skb_put(skb, copy),
1367                                                 offset, copy, off, skb) < 0) {
1368                                 __skb_trim(skb, off);
1369                                 err = -EFAULT;
1370                                 goto error;
1371                         }
1372                 } else {
1373                         int i = skb_shinfo(skb)->nr_frags;
1374                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1375                         struct page *page = sk->sk_sndmsg_page;
1376                         int off = sk->sk_sndmsg_off;
1377                         unsigned int left;
1378
1379                         if (page && (left = PAGE_SIZE - off) > 0) {
1380                                 if (copy >= left)
1381                                         copy = left;
1382                                 if (page != frag->page) {
1383                                         if (i == MAX_SKB_FRAGS) {
1384                                                 err = -EMSGSIZE;
1385                                                 goto error;
1386                                         }
1387                                         get_page(page);
1388                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1389                                         frag = &skb_shinfo(skb)->frags[i];
1390                                 }
1391                         } else if(i < MAX_SKB_FRAGS) {
1392                                 if (copy > PAGE_SIZE)
1393                                         copy = PAGE_SIZE;
1394                                 page = alloc_pages(sk->sk_allocation, 0);
1395                                 if (page == NULL) {
1396                                         err = -ENOMEM;
1397                                         goto error;
1398                                 }
1399                                 sk->sk_sndmsg_page = page;
1400                                 sk->sk_sndmsg_off = 0;
1401
1402                                 skb_fill_page_desc(skb, i, page, 0, 0);
1403                                 frag = &skb_shinfo(skb)->frags[i];
1404                         } else {
1405                                 err = -EMSGSIZE;
1406                                 goto error;
1407                         }
1408                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1409                                 err = -EFAULT;
1410                                 goto error;
1411                         }
1412                         sk->sk_sndmsg_off += copy;
1413                         frag->size += copy;
1414                         skb->len += copy;
1415                         skb->data_len += copy;
1416                         skb->truesize += copy;
1417                         atomic_add(copy, &sk->sk_wmem_alloc);
1418                 }
1419                 offset += copy;
1420                 length -= copy;
1421         }
1422         return 0;
1423 error:
1424         inet->cork.length -= length;
1425         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1426         return err;
1427 }
1428
1429 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1430 {
1431         if (np->cork.opt) {
1432                 kfree(np->cork.opt->dst0opt);
1433                 kfree(np->cork.opt->dst1opt);
1434                 kfree(np->cork.opt->hopopt);
1435                 kfree(np->cork.opt->srcrt);
1436                 kfree(np->cork.opt);
1437                 np->cork.opt = NULL;
1438         }
1439
1440         if (inet->cork.dst) {
1441                 dst_release(inet->cork.dst);
1442                 inet->cork.dst = NULL;
1443                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1444         }
1445         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1446 }
1447
1448 int ip6_push_pending_frames(struct sock *sk)
1449 {
1450         struct sk_buff *skb, *tmp_skb;
1451         struct sk_buff **tail_skb;
1452         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1453         struct inet_sock *inet = inet_sk(sk);
1454         struct ipv6_pinfo *np = inet6_sk(sk);
1455         struct net *net = sock_net(sk);
1456         struct ipv6hdr *hdr;
1457         struct ipv6_txoptions *opt = np->cork.opt;
1458         struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1459         struct flowi *fl = &inet->cork.fl;
1460         unsigned char proto = fl->proto;
1461         int err = 0;
1462
1463         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1464                 goto out;
1465         tail_skb = &(skb_shinfo(skb)->frag_list);
1466
1467         /* move skb->data to ip header from ext header */
1468         if (skb->data < skb_network_header(skb))
1469                 __skb_pull(skb, skb_network_offset(skb));
1470         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1471                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1472                 *tail_skb = tmp_skb;
1473                 tail_skb = &(tmp_skb->next);
1474                 skb->len += tmp_skb->len;
1475                 skb->data_len += tmp_skb->len;
1476                 skb->truesize += tmp_skb->truesize;
1477                 __sock_put(tmp_skb->sk);
1478                 tmp_skb->destructor = NULL;
1479                 tmp_skb->sk = NULL;
1480         }
1481
1482         /* Allow local fragmentation. */
1483         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1484                 skb->local_df = 1;
1485
1486         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1487         __skb_pull(skb, skb_network_header_len(skb));
1488         if (opt && opt->opt_flen)
1489                 ipv6_push_frag_opts(skb, opt, &proto);
1490         if (opt && opt->opt_nflen)
1491                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1492
1493         skb_push(skb, sizeof(struct ipv6hdr));
1494         skb_reset_network_header(skb);
1495         hdr = ipv6_hdr(skb);
1496
1497         *(__be32*)hdr = fl->fl6_flowlabel |
1498                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1499
1500         hdr->hop_limit = np->cork.hop_limit;
1501         hdr->nexthdr = proto;
1502         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1503         ipv6_addr_copy(&hdr->daddr, final_dst);
1504
1505         skb->priority = sk->sk_priority;
1506         skb->mark = sk->sk_mark;
1507
1508         skb_dst_set(skb, dst_clone(&rt->u.dst));
1509         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1510         if (proto == IPPROTO_ICMPV6) {
1511                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1512
1513                 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1514                 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1515         }
1516
1517         err = ip6_local_out(skb);
1518         if (err) {
1519                 if (err > 0)
1520                         err = np->recverr ? net_xmit_errno(err) : 0;
1521                 if (err)
1522                         goto error;
1523         }
1524
1525 out:
1526         ip6_cork_release(inet, np);
1527         return err;
1528 error:
1529         goto out;
1530 }
1531
1532 void ip6_flush_pending_frames(struct sock *sk)
1533 {
1534         struct sk_buff *skb;
1535
1536         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1537                 if (skb_dst(skb))
1538                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1539                                       IPSTATS_MIB_OUTDISCARDS);
1540                 kfree_skb(skb);
1541         }
1542
1543         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1544 }