]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/ip6_output.c
udpv6: Remove unused skb argument of ipv6_select_ident()
[net-next-2.6.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
1da177e4
LT
40
41#include <linux/netfilter.h>
42#include <linux/netfilter_ipv6.h>
43
44#include <net/sock.h>
45#include <net/snmp.h>
46
47#include <net/ipv6.h>
48#include <net/ndisc.h>
49#include <net/protocol.h>
50#include <net/ip6_route.h>
51#include <net/addrconf.h>
52#include <net/rawv6.h>
53#include <net/icmp.h>
54#include <net/xfrm.h>
55#include <net/checksum.h>
7bc570c8 56#include <linux/mroute6.h>
1da177e4
LT
57
58static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
ef76bc23
HX
60int __ip6_local_out(struct sk_buff *skb)
61{
62 int len;
63
64 len = skb->len - sizeof(struct ipv6hdr);
65 if (len > IPV6_MAXPLEN)
66 len = 0;
67 ipv6_hdr(skb)->payload_len = htons(len);
68
adf30907 69 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
ef76bc23
HX
70 dst_output);
71}
72
73int ip6_local_out(struct sk_buff *skb)
74{
75 int err;
76
77 err = __ip6_local_out(skb);
78 if (likely(err == 1))
79 err = dst_output(skb);
80
81 return err;
82}
83EXPORT_SYMBOL_GPL(ip6_local_out);
84
ad643a79 85static int ip6_output_finish(struct sk_buff *skb)
1da177e4 86{
adf30907 87 struct dst_entry *dst = skb_dst(skb);
1da177e4 88
3644f0ce
SH
89 if (dst->hh)
90 return neigh_hh_output(dst->hh, skb);
91 else if (dst->neighbour)
1da177e4
LT
92 return dst->neighbour->output(skb);
93
483a47d2
DL
94 IP6_INC_STATS_BH(dev_net(dst->dev),
95 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
96 kfree_skb(skb);
97 return -EINVAL;
98
99}
100
101/* dev_loopback_xmit for use with netfilter. */
102static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
103{
459a98ed 104 skb_reset_mac_header(newskb);
bbe735e4 105 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
106 newskb->pkt_type = PACKET_LOOPBACK;
107 newskb->ip_summed = CHECKSUM_UNNECESSARY;
adf30907 108 WARN_ON(!skb_dst(newskb));
1da177e4
LT
109
110 netif_rx(newskb);
111 return 0;
112}
113
114
115static int ip6_output2(struct sk_buff *skb)
116{
adf30907 117 struct dst_entry *dst = skb_dst(skb);
1da177e4
LT
118 struct net_device *dev = dst->dev;
119
120 skb->protocol = htons(ETH_P_IPV6);
121 skb->dev = dev;
122
0660e03f 123 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1da177e4 124 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
adf30907 125 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4
LT
126
127 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
bd91b8bf
BT
128 ((mroute6_socket(dev_net(dev)) &&
129 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
130 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
131 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
132 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
133
134 /* Do not check for IFF_ALLMULTI; multicast routing
135 is not supported in any case.
136 */
137 if (newskb)
6e23ae2a
PM
138 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
139 NULL, newskb->dev,
1da177e4
LT
140 ip6_dev_loopback_xmit);
141
0660e03f 142 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
143 IP6_INC_STATS(dev_net(dev), idev,
144 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
145 kfree_skb(skb);
146 return 0;
147 }
148 }
149
edf391ff
NH
150 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
151 skb->len);
1da177e4
LT
152 }
153
6e23ae2a
PM
154 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
155 ip6_output_finish);
1da177e4
LT
156}
157
628a5c56
JH
158static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
159{
160 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
161
162 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
adf30907 163 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
628a5c56
JH
164}
165
1da177e4
LT
166int ip6_output(struct sk_buff *skb)
167{
adf30907 168 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 169 if (unlikely(idev->cnf.disable_ipv6)) {
adf30907 170 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
3bd653c8 171 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
172 kfree_skb(skb);
173 return 0;
174 }
175
628a5c56 176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
adf30907 177 dst_allfrag(skb_dst(skb)))
1da177e4
LT
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
181}
182
1da177e4
LT
183/*
184 * xmit an sk_buff (used by TCP)
185 */
186
187int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
189{
3bd653c8 190 struct net *net = sock_net(sk);
b30bd282 191 struct ipv6_pinfo *np = inet6_sk(sk);
1da177e4 192 struct in6_addr *first_hop = &fl->fl6_dst;
adf30907 193 struct dst_entry *dst = skb_dst(skb);
1da177e4
LT
194 struct ipv6hdr *hdr;
195 u8 proto = fl->proto;
196 int seg_len = skb->len;
41a1f8ea 197 int hlimit, tclass;
1da177e4
LT
198 u32 mtu;
199
200 if (opt) {
c2636b4d 201 unsigned int head_room;
1da177e4
LT
202
203 /* First: exthdrs may take lots of space (~8K for now)
204 MAX_HEADER is not enough.
205 */
206 head_room = opt->opt_nflen + opt->opt_flen;
207 seg_len += head_room;
208 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
209
210 if (skb_headroom(skb) < head_room) {
211 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 212 if (skb2 == NULL) {
adf30907 213 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
214 IPSTATS_MIB_OUTDISCARDS);
215 kfree_skb(skb);
1da177e4
LT
216 return -ENOBUFS;
217 }
a11d206d
YH
218 kfree_skb(skb);
219 skb = skb2;
1da177e4
LT
220 if (sk)
221 skb_set_owner_w(skb, sk);
222 }
223 if (opt->opt_flen)
224 ipv6_push_frag_opts(skb, opt, &proto);
225 if (opt->opt_nflen)
226 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
227 }
228
e2d1bca7
ACM
229 skb_push(skb, sizeof(struct ipv6hdr));
230 skb_reset_network_header(skb);
0660e03f 231 hdr = ipv6_hdr(skb);
1da177e4 232
77e2f14f
WY
233 /* Allow local fragmentation. */
234 if (ipfragok)
235 skb->local_df = 1;
236
1da177e4
LT
237 /*
238 * Fill in the IPv6 header
239 */
240
1da177e4
LT
241 hlimit = -1;
242 if (np)
243 hlimit = np->hop_limit;
244 if (hlimit < 0)
6b75d090 245 hlimit = ip6_dst_hoplimit(dst);
1da177e4 246
41a1f8ea
YH
247 tclass = -1;
248 if (np)
249 tclass = np->tclass;
250 if (tclass < 0)
251 tclass = 0;
252
90bcaf7b 253 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
41a1f8ea 254
1da177e4
LT
255 hdr->payload_len = htons(seg_len);
256 hdr->nexthdr = proto;
257 hdr->hop_limit = hlimit;
258
259 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
260 ipv6_addr_copy(&hdr->daddr, first_hop);
261
a2c2064f 262 skb->priority = sk->sk_priority;
4a19ec58 263 skb->mark = sk->sk_mark;
a2c2064f 264
1da177e4 265 mtu = dst_mtu(dst);
283d07ac 266 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
adf30907 267 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 268 IPSTATS_MIB_OUT, skb->len);
6e23ae2a 269 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
6869c4d8 270 dst_output);
1da177e4
LT
271 }
272
273 if (net_ratelimit())
274 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
275 skb->dev = dst->dev;
276 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
adf30907 277 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
278 kfree_skb(skb);
279 return -EMSGSIZE;
280}
281
7159039a
YH
282EXPORT_SYMBOL(ip6_xmit);
283
1da177e4
LT
284/*
285 * To avoid extra problems ND packets are send through this
286 * routine. It's code duplication but I really want to avoid
287 * extra checks since ipv6_build_header is used by TCP (which
288 * is for us performance critical)
289 */
290
291int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 292 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
293 int proto, int len)
294{
295 struct ipv6_pinfo *np = inet6_sk(sk);
296 struct ipv6hdr *hdr;
297 int totlen;
298
299 skb->protocol = htons(ETH_P_IPV6);
300 skb->dev = dev;
301
302 totlen = len + sizeof(struct ipv6hdr);
303
55f79cc0
ACM
304 skb_reset_network_header(skb);
305 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 306 hdr = ipv6_hdr(skb);
1da177e4 307
ae08e1f0 308 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
309
310 hdr->payload_len = htons(len);
311 hdr->nexthdr = proto;
312 hdr->hop_limit = np->hop_limit;
313
314 ipv6_addr_copy(&hdr->saddr, saddr);
315 ipv6_addr_copy(&hdr->daddr, daddr);
316
317 return 0;
318}
319
320static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
321{
322 struct ip6_ra_chain *ra;
323 struct sock *last = NULL;
324
325 read_lock(&ip6_ra_lock);
326 for (ra = ip6_ra_chain; ra; ra = ra->next) {
327 struct sock *sk = ra->sk;
0bd1b59b
AM
328 if (sk && ra->sel == sel &&
329 (!sk->sk_bound_dev_if ||
330 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
331 if (last) {
332 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
333 if (skb2)
334 rawv6_rcv(last, skb2);
335 }
336 last = sk;
337 }
338 }
339
340 if (last) {
341 rawv6_rcv(last, skb);
342 read_unlock(&ip6_ra_lock);
343 return 1;
344 }
345 read_unlock(&ip6_ra_lock);
346 return 0;
347}
348
e21e0b5f
VN
349static int ip6_forward_proxy_check(struct sk_buff *skb)
350{
0660e03f 351 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
352 u8 nexthdr = hdr->nexthdr;
353 int offset;
354
355 if (ipv6_ext_hdr(nexthdr)) {
356 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
357 if (offset < 0)
358 return 0;
359 } else
360 offset = sizeof(struct ipv6hdr);
361
362 if (nexthdr == IPPROTO_ICMPV6) {
363 struct icmp6hdr *icmp6;
364
d56f90a7
ACM
365 if (!pskb_may_pull(skb, (skb_network_header(skb) +
366 offset + 1 - skb->data)))
e21e0b5f
VN
367 return 0;
368
d56f90a7 369 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
370
371 switch (icmp6->icmp6_type) {
372 case NDISC_ROUTER_SOLICITATION:
373 case NDISC_ROUTER_ADVERTISEMENT:
374 case NDISC_NEIGHBOUR_SOLICITATION:
375 case NDISC_NEIGHBOUR_ADVERTISEMENT:
376 case NDISC_REDIRECT:
377 /* For reaction involving unicast neighbor discovery
378 * message destined to the proxied address, pass it to
379 * input function.
380 */
381 return 1;
382 default:
383 break;
384 }
385 }
386
74553b09
VN
387 /*
388 * The proxying router can't forward traffic sent to a link-local
389 * address, so signal the sender and discard the packet. This
390 * behavior is clarified by the MIPv6 specification.
391 */
392 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
393 dst_link_failure(skb);
394 return -1;
395 }
396
e21e0b5f
VN
397 return 0;
398}
399
1da177e4
LT
400static inline int ip6_forward_finish(struct sk_buff *skb)
401{
402 return dst_output(skb);
403}
404
405int ip6_forward(struct sk_buff *skb)
406{
adf30907 407 struct dst_entry *dst = skb_dst(skb);
0660e03f 408 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 409 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 410 struct net *net = dev_net(dst->dev);
1ab1457c 411
53b7997f 412 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
413 goto error;
414
4497b076
BH
415 if (skb_warn_if_lro(skb))
416 goto drop;
417
1da177e4 418 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
3bd653c8 419 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
420 goto drop;
421 }
422
35fc92a9 423 skb_forward_csum(skb);
1da177e4
LT
424
425 /*
426 * We DO NOT make any processing on
427 * RA packets, pushing them to user level AS IS
428 * without ane WARRANTY that application will be able
429 * to interpret them. The reason is that we
430 * cannot make anything clever here.
431 *
432 * We are not end-node, so that if packet contains
433 * AH/ESP, we cannot make anything.
434 * Defragmentation also would be mistake, RA packets
435 * cannot be fragmented, because there is no warranty
436 * that different fragments will go along one path. --ANK
437 */
438 if (opt->ra) {
d56f90a7 439 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
440 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
441 return 0;
442 }
443
444 /*
445 * check and decrement ttl
446 */
447 if (hdr->hop_limit <= 1) {
448 /* Force OUTPUT device used as source address */
449 skb->dev = dst->dev;
450 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
451 0, skb->dev);
483a47d2
DL
452 IP6_INC_STATS_BH(net,
453 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
454
455 kfree_skb(skb);
456 return -ETIMEDOUT;
457 }
458
fbea49e1 459 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 460 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 461 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
462 int proxied = ip6_forward_proxy_check(skb);
463 if (proxied > 0)
e21e0b5f 464 return ip6_input(skb);
74553b09 465 else if (proxied < 0) {
3bd653c8
DL
466 IP6_INC_STATS(net, ip6_dst_idev(dst),
467 IPSTATS_MIB_INDISCARDS);
74553b09
VN
468 goto drop;
469 }
e21e0b5f
VN
470 }
471
1da177e4 472 if (!xfrm6_route_forward(skb)) {
3bd653c8 473 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
474 goto drop;
475 }
adf30907 476 dst = skb_dst(skb);
1da177e4
LT
477
478 /* IPv6 specs say nothing about it, but it is clear that we cannot
479 send redirects to source routed frames.
1e5dc146 480 We don't send redirects to frames decapsulated from IPsec.
1da177e4 481 */
1e5dc146 482 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
def8b4fa 483 !skb_sec_path(skb)) {
1da177e4
LT
484 struct in6_addr *target = NULL;
485 struct rt6_info *rt;
486 struct neighbour *n = dst->neighbour;
487
488 /*
489 * incoming and outgoing devices are the same
490 * send a redirect.
491 */
492
493 rt = (struct rt6_info *) dst;
494 if ((rt->rt6i_flags & RTF_GATEWAY))
495 target = (struct in6_addr*)&n->primary_key;
496 else
497 target = &hdr->daddr;
498
499 /* Limit redirects both by destination (here)
500 and by source (inside ndisc_send_redirect)
501 */
502 if (xrlim_allow(dst, 1*HZ))
503 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
504 } else {
505 int addrtype = ipv6_addr_type(&hdr->saddr);
506
1da177e4 507 /* This check is security critical. */
f81b2e7d
YH
508 if (addrtype == IPV6_ADDR_ANY ||
509 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
510 goto error;
511 if (addrtype & IPV6_ADDR_LINKLOCAL) {
512 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
513 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
514 goto error;
515 }
1da177e4
LT
516 }
517
518 if (skb->len > dst_mtu(dst)) {
519 /* Again, force OUTPUT device used as source address */
520 skb->dev = dst->dev;
521 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
483a47d2
DL
522 IP6_INC_STATS_BH(net,
523 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
524 IP6_INC_STATS_BH(net,
525 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
526 kfree_skb(skb);
527 return -EMSGSIZE;
528 }
529
530 if (skb_cow(skb, dst->dev->hard_header_len)) {
3bd653c8 531 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
532 goto drop;
533 }
534
0660e03f 535 hdr = ipv6_hdr(skb);
1da177e4
LT
536
537 /* Mangling hops number delayed to point after skb COW */
1ab1457c 538
1da177e4
LT
539 hdr->hop_limit--;
540
483a47d2 541 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
6e23ae2a
PM
542 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
543 ip6_forward_finish);
1da177e4
LT
544
545error:
483a47d2 546 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
547drop:
548 kfree_skb(skb);
549 return -EINVAL;
550}
551
552static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
553{
554 to->pkt_type = from->pkt_type;
555 to->priority = from->priority;
556 to->protocol = from->protocol;
adf30907
ED
557 skb_dst_drop(to);
558 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 559 to->dev = from->dev;
82e91ffe 560 to->mark = from->mark;
1da177e4
LT
561
562#ifdef CONFIG_NET_SCHED
563 to->tc_index = from->tc_index;
564#endif
e7ac05f3 565 nf_copy(to, from);
ba9dda3a
JK
566#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
567 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
568 to->nf_trace = from->nf_trace;
569#endif
984bc16c 570 skb_copy_secmark(to, from);
1da177e4
LT
571}
572
573int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
574{
575 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
576 struct ipv6_opt_hdr *exthdr =
577 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 578 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 579 int found_rhdr = 0;
0660e03f 580 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
581
582 while (offset + 1 <= packet_len) {
583
584 switch (**nexthdr) {
585
586 case NEXTHDR_HOP:
27637df9 587 break;
1da177e4 588 case NEXTHDR_ROUTING:
27637df9
MN
589 found_rhdr = 1;
590 break;
1da177e4 591 case NEXTHDR_DEST:
59fbb3a6 592#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
593 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
594 break;
595#endif
596 if (found_rhdr)
597 return offset;
1da177e4
LT
598 break;
599 default :
600 return offset;
601 }
27637df9
MN
602
603 offset += ipv6_optlen(exthdr);
604 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
605 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
606 offset);
1da177e4
LT
607 }
608
609 return offset;
610}
611
612static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
613{
1da177e4 614 struct sk_buff *frag;
adf30907 615 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
d91675f9 616 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
617 struct ipv6hdr *tmp_hdr;
618 struct frag_hdr *fh;
619 unsigned int mtu, hlen, left, len;
ae08e1f0 620 __be32 frag_id = 0;
1da177e4
LT
621 int ptr, offset = 0, err=0;
622 u8 *prevhdr, nexthdr = 0;
adf30907 623 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 624
1da177e4
LT
625 hlen = ip6_find_1stfragopt(skb, &prevhdr);
626 nexthdr = *prevhdr;
627
628a5c56 628 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
629
630 /* We must not fragment if the socket is set to force MTU discovery
631 * or if the skb it not generated by a local socket. (This last
632 * check should be redundant, but it's free.)
633 */
b5c15fc0 634 if (!skb->local_df) {
adf30907 635 skb->dev = skb_dst(skb)->dev;
b881ef76 636 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
adf30907 637 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 638 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
639 kfree_skb(skb);
640 return -EMSGSIZE;
641 }
642
d91675f9
YH
643 if (np && np->frag_size < mtu) {
644 if (np->frag_size)
645 mtu = np->frag_size;
646 }
647 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 648
4d9092bb 649 if (skb_has_frags(skb)) {
1da177e4 650 int first_len = skb_pagelen(skb);
29ffe1a5 651 int truesizes = 0;
1da177e4
LT
652
653 if (first_len - hlen > mtu ||
654 ((first_len - hlen) & 7) ||
655 skb_cloned(skb))
656 goto slow_path;
657
4d9092bb 658 skb_walk_frags(skb, frag) {
1da177e4
LT
659 /* Correct geometry. */
660 if (frag->len > mtu ||
661 ((frag->len & 7) && frag->next) ||
662 skb_headroom(frag) < hlen)
663 goto slow_path;
664
1da177e4
LT
665 /* Partially cloned skb? */
666 if (skb_shared(frag))
667 goto slow_path;
2fdba6b0
HX
668
669 BUG_ON(frag->sk);
670 if (skb->sk) {
2fdba6b0
HX
671 frag->sk = skb->sk;
672 frag->destructor = sock_wfree;
29ffe1a5 673 truesizes += frag->truesize;
2fdba6b0 674 }
1da177e4
LT
675 }
676
677 err = 0;
678 offset = 0;
679 frag = skb_shinfo(skb)->frag_list;
4d9092bb 680 skb_frag_list_init(skb);
1da177e4
LT
681 /* BUILD HEADER */
682
9a217a1c 683 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 684 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 685 if (!tmp_hdr) {
adf30907 686 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 687 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
688 return -ENOMEM;
689 }
690
1da177e4
LT
691 __skb_pull(skb, hlen);
692 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
693 __skb_push(skb, hlen);
694 skb_reset_network_header(skb);
d56f90a7 695 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 696
7ea2f2c5 697 ipv6_select_ident(fh);
1da177e4
LT
698 fh->nexthdr = nexthdr;
699 fh->reserved = 0;
700 fh->frag_off = htons(IP6_MF);
701 frag_id = fh->identification;
702
703 first_len = skb_pagelen(skb);
704 skb->data_len = first_len - skb_headlen(skb);
29ffe1a5 705 skb->truesize -= truesizes;
1da177e4 706 skb->len = first_len;
0660e03f
ACM
707 ipv6_hdr(skb)->payload_len = htons(first_len -
708 sizeof(struct ipv6hdr));
a11d206d
YH
709
710 dst_hold(&rt->u.dst);
1da177e4
LT
711
712 for (;;) {
713 /* Prepare header of the next frame,
714 * before previous one went down. */
715 if (frag) {
716 frag->ip_summed = CHECKSUM_NONE;
badff6d0 717 skb_reset_transport_header(frag);
1da177e4 718 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
719 __skb_push(frag, hlen);
720 skb_reset_network_header(frag);
d56f90a7
ACM
721 memcpy(skb_network_header(frag), tmp_hdr,
722 hlen);
1da177e4
LT
723 offset += skb->len - hlen - sizeof(struct frag_hdr);
724 fh->nexthdr = nexthdr;
725 fh->reserved = 0;
726 fh->frag_off = htons(offset);
727 if (frag->next != NULL)
728 fh->frag_off |= htons(IP6_MF);
729 fh->identification = frag_id;
0660e03f
ACM
730 ipv6_hdr(frag)->payload_len =
731 htons(frag->len -
732 sizeof(struct ipv6hdr));
1da177e4
LT
733 ip6_copy_metadata(frag, skb);
734 }
1ab1457c 735
1da177e4 736 err = output(skb);
dafee490 737 if(!err)
3bd653c8
DL
738 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
739 IPSTATS_MIB_FRAGCREATES);
dafee490 740
1da177e4
LT
741 if (err || !frag)
742 break;
743
744 skb = frag;
745 frag = skb->next;
746 skb->next = NULL;
747 }
748
a51482bd 749 kfree(tmp_hdr);
1da177e4
LT
750
751 if (err == 0) {
3bd653c8
DL
752 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
753 IPSTATS_MIB_FRAGOKS);
a11d206d 754 dst_release(&rt->u.dst);
1da177e4
LT
755 return 0;
756 }
757
758 while (frag) {
759 skb = frag->next;
760 kfree_skb(frag);
761 frag = skb;
762 }
763
3bd653c8
DL
764 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
765 IPSTATS_MIB_FRAGFAILS);
a11d206d 766 dst_release(&rt->u.dst);
1da177e4
LT
767 return err;
768 }
769
770slow_path:
771 left = skb->len - hlen; /* Space per frame */
772 ptr = hlen; /* Where to start from */
773
774 /*
775 * Fragment the datagram.
776 */
777
778 *prevhdr = NEXTHDR_FRAGMENT;
779
780 /*
781 * Keep copying data until we run out.
782 */
783 while(left > 0) {
784 len = left;
785 /* IF: it doesn't fit, use 'mtu' - the data space left */
786 if (len > mtu)
787 len = mtu;
788 /* IF: we are not sending upto and including the packet end
789 then align the next start on an eight byte boundary */
790 if (len < left) {
791 len &= ~7;
792 }
793 /*
794 * Allocate buffer.
795 */
796
f5184d26 797 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 798 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
adf30907 799 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 800 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
801 err = -ENOMEM;
802 goto fail;
803 }
804
805 /*
806 * Set up data on packet
807 */
808
809 ip6_copy_metadata(frag, skb);
810 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
811 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 812 skb_reset_network_header(frag);
badff6d0 813 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
814 frag->transport_header = (frag->network_header + hlen +
815 sizeof(struct frag_hdr));
1da177e4
LT
816
817 /*
818 * Charge the memory for the fragment to any owner
819 * it might possess
820 */
821 if (skb->sk)
822 skb_set_owner_w(frag, skb->sk);
823
824 /*
825 * Copy the packet header into the new buffer.
826 */
d626f62b 827 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
828
829 /*
830 * Build fragment header.
831 */
832 fh->nexthdr = nexthdr;
833 fh->reserved = 0;
f36d6ab1 834 if (!frag_id) {
7ea2f2c5 835 ipv6_select_ident(fh);
1da177e4
LT
836 frag_id = fh->identification;
837 } else
838 fh->identification = frag_id;
839
840 /*
841 * Copy a block of the IP datagram.
842 */
8984e41d 843 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
844 BUG();
845 left -= len;
846
847 fh->frag_off = htons(offset);
848 if (left > 0)
849 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
850 ipv6_hdr(frag)->payload_len = htons(frag->len -
851 sizeof(struct ipv6hdr));
1da177e4
LT
852
853 ptr += len;
854 offset += len;
855
856 /*
857 * Put this fragment into the sending queue.
858 */
1da177e4
LT
859 err = output(frag);
860 if (err)
861 goto fail;
dafee490 862
adf30907 863 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 864 IPSTATS_MIB_FRAGCREATES);
1da177e4 865 }
adf30907 866 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 867 IPSTATS_MIB_FRAGOKS);
1da177e4 868 kfree_skb(skb);
1da177e4
LT
869 return err;
870
871fail:
adf30907 872 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 873 IPSTATS_MIB_FRAGFAILS);
1ab1457c 874 kfree_skb(skb);
1da177e4
LT
875 return err;
876}
877
cf6b1982
YH
878static inline int ip6_rt_check(struct rt6key *rt_key,
879 struct in6_addr *fl_addr,
880 struct in6_addr *addr_cache)
881{
882 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
883 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
884}
885
497c615a
HX
886static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
887 struct dst_entry *dst,
888 struct flowi *fl)
1da177e4 889{
497c615a
HX
890 struct ipv6_pinfo *np = inet6_sk(sk);
891 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 892
497c615a
HX
893 if (!dst)
894 goto out;
895
896 /* Yes, checking route validity in not connected
897 * case is not very simple. Take into account,
898 * that we do not support routing by source, TOS,
899 * and MSG_DONTROUTE --ANK (980726)
900 *
cf6b1982
YH
901 * 1. ip6_rt_check(): If route was host route,
902 * check that cached destination is current.
497c615a
HX
903 * If it is network route, we still may
904 * check its validity using saved pointer
905 * to the last used address: daddr_cache.
906 * We do not want to save whole address now,
907 * (because main consumer of this service
908 * is tcp, which has not this problem),
909 * so that the last trick works only on connected
910 * sockets.
911 * 2. oif also should be the same.
912 */
cf6b1982 913 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
8e1ef0a9
YH
914#ifdef CONFIG_IPV6_SUBTREES
915 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
916#endif
cf6b1982 917 (fl->oif && fl->oif != dst->dev->ifindex)) {
497c615a
HX
918 dst_release(dst);
919 dst = NULL;
1da177e4
LT
920 }
921
497c615a
HX
922out:
923 return dst;
924}
925
926static int ip6_dst_lookup_tail(struct sock *sk,
927 struct dst_entry **dst, struct flowi *fl)
928{
929 int err;
3b1e0a65 930 struct net *net = sock_net(sk);
497c615a 931
1da177e4 932 if (*dst == NULL)
8a3edd80 933 *dst = ip6_route_output(net, sk, fl);
1da177e4
LT
934
935 if ((err = (*dst)->error))
936 goto out_err_release;
937
938 if (ipv6_addr_any(&fl->fl6_src)) {
191cd582 939 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
7cbca67c
YH
940 &fl->fl6_dst,
941 sk ? inet6_sk(sk)->srcprefs : 0,
942 &fl->fl6_src);
44456d37 943 if (err)
1da177e4 944 goto out_err_release;
1da177e4
LT
945 }
946
95c385b4 947#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
948 /*
949 * Here if the dst entry we've looked up
950 * has a neighbour entry that is in the INCOMPLETE
951 * state and the src address from the flow is
952 * marked as OPTIMISTIC, we release the found
953 * dst entry and replace it instead with the
954 * dst entry of the nexthop router
955 */
956 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
957 struct inet6_ifaddr *ifp;
958 struct flowi fl_gw;
959 int redirect;
960
961 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
962 (*dst)->dev, 1);
963
964 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
965 if (ifp)
966 in6_ifa_put(ifp);
967
968 if (redirect) {
969 /*
970 * We need to get the dst entry for the
971 * default router instead
972 */
973 dst_release(*dst);
974 memcpy(&fl_gw, fl, sizeof(struct flowi));
975 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
976 *dst = ip6_route_output(net, sk, &fl_gw);
977 if ((err = (*dst)->error))
978 goto out_err_release;
95c385b4 979 }
e550dfb0 980 }
95c385b4
NH
981#endif
982
1da177e4
LT
983 return 0;
984
985out_err_release:
ca46f9c8 986 if (err == -ENETUNREACH)
483a47d2 987 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
988 dst_release(*dst);
989 *dst = NULL;
990 return err;
991}
34a0b3cd 992
497c615a
HX
993/**
994 * ip6_dst_lookup - perform route lookup on flow
995 * @sk: socket which provides route info
996 * @dst: pointer to dst_entry * for result
997 * @fl: flow to lookup
998 *
999 * This function performs a route lookup on the given flow.
1000 *
1001 * It returns zero on success, or a standard errno code on error.
1002 */
1003int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1004{
1005 *dst = NULL;
1006 return ip6_dst_lookup_tail(sk, dst, fl);
1007}
3cf3dc6c
ACM
1008EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1009
497c615a
HX
1010/**
1011 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1012 * @sk: socket which provides the dst cache and route info
1013 * @dst: pointer to dst_entry * for result
1014 * @fl: flow to lookup
1015 *
1016 * This function performs a route lookup on the given flow with the
1017 * possibility of using the cached route in the socket if it is valid.
1018 * It will take the socket dst lock when operating on the dst cache.
1019 * As a result, this function can only be used in process context.
1020 *
1021 * It returns zero on success, or a standard errno code on error.
1022 */
1023int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1024{
1025 *dst = NULL;
1026 if (sk) {
1027 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1028 *dst = ip6_sk_dst_check(sk, *dst, fl);
1029 }
1030
1031 return ip6_dst_lookup_tail(sk, dst, fl);
1032}
1033EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1034
34a0b3cd 1035static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1036 int getfrag(void *from, char *to, int offset, int len,
1037 int odd, struct sk_buff *skb),
1038 void *from, int length, int hh_len, int fragheaderlen,
1039 int transhdrlen, int mtu,unsigned int flags)
1040
1041{
1042 struct sk_buff *skb;
1043 int err;
1044
1045 /* There is support for UDP large send offload by network
1046 * device, so create one single skb packet containing complete
1047 * udp datagram
1048 */
1049 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1050 skb = sock_alloc_send_skb(sk,
1051 hh_len + fragheaderlen + transhdrlen + 20,
1052 (flags & MSG_DONTWAIT), &err);
1053 if (skb == NULL)
1054 return -ENOMEM;
1055
1056 /* reserve space for Hardware header */
1057 skb_reserve(skb, hh_len);
1058
1059 /* create space for UDP/IP header */
1060 skb_put(skb,fragheaderlen + transhdrlen);
1061
1062 /* initialize network header pointer */
c1d2bbe1 1063 skb_reset_network_header(skb);
e89e9cf5
AR
1064
1065 /* initialize protocol header pointer */
b0e380b1 1066 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1067
84fa7933 1068 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5
AR
1069 skb->csum = 0;
1070 sk->sk_sndmsg_off = 0;
1071 }
1072
1073 err = skb_append_datato_frags(sk,skb, getfrag, from,
1074 (length - transhdrlen));
1075 if (!err) {
1076 struct frag_hdr fhdr;
1077
c31d5326
SS
1078 /* Specify the length of each IPv6 datagram fragment.
1079 * It has to be a multiple of 8.
1080 */
1081 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1082 sizeof(struct frag_hdr)) & ~7;
f83ef8c0 1083 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
7ea2f2c5 1084 ipv6_select_ident(&fhdr);
e89e9cf5
AR
1085 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1086 __skb_queue_tail(&sk->sk_write_queue, skb);
1087
1088 return 0;
1089 }
1090 /* There is not enough support do UPD LSO,
1091 * so follow normal path
1092 */
1093 kfree_skb(skb);
1094
1095 return err;
1096}
1da177e4 1097
0178b695
HX
1098static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1099 gfp_t gfp)
1100{
1101 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1102}
1103
1104static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1105 gfp_t gfp)
1106{
1107 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1108}
1109
41a1f8ea
YH
1110int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1111 int offset, int len, int odd, struct sk_buff *skb),
1112 void *from, int length, int transhdrlen,
1113 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1114 struct rt6_info *rt, unsigned int flags)
1da177e4
LT
1115{
1116 struct inet_sock *inet = inet_sk(sk);
1117 struct ipv6_pinfo *np = inet6_sk(sk);
1118 struct sk_buff *skb;
1119 unsigned int maxfraglen, fragheaderlen;
1120 int exthdrlen;
1121 int hh_len;
1122 int mtu;
1123 int copy;
1124 int err;
1125 int offset = 0;
1126 int csummode = CHECKSUM_NONE;
1127
1128 if (flags&MSG_PROBE)
1129 return 0;
1130 if (skb_queue_empty(&sk->sk_write_queue)) {
1131 /*
1132 * setup for corking
1133 */
1134 if (opt) {
0178b695 1135 if (WARN_ON(np->cork.opt))
1da177e4 1136 return -EINVAL;
0178b695
HX
1137
1138 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1139 if (unlikely(np->cork.opt == NULL))
1140 return -ENOBUFS;
1141
1142 np->cork.opt->tot_len = opt->tot_len;
1143 np->cork.opt->opt_flen = opt->opt_flen;
1144 np->cork.opt->opt_nflen = opt->opt_nflen;
1145
1146 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1147 sk->sk_allocation);
1148 if (opt->dst0opt && !np->cork.opt->dst0opt)
1149 return -ENOBUFS;
1150
1151 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1152 sk->sk_allocation);
1153 if (opt->dst1opt && !np->cork.opt->dst1opt)
1154 return -ENOBUFS;
1155
1156 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1157 sk->sk_allocation);
1158 if (opt->hopopt && !np->cork.opt->hopopt)
1159 return -ENOBUFS;
1160
1161 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1162 sk->sk_allocation);
1163 if (opt->srcrt && !np->cork.opt->srcrt)
1164 return -ENOBUFS;
1165
1da177e4
LT
1166 /* need source address above miyazawa*/
1167 }
1168 dst_hold(&rt->u.dst);
c8cdaf99 1169 inet->cork.dst = &rt->u.dst;
1da177e4
LT
1170 inet->cork.fl = *fl;
1171 np->cork.hop_limit = hlimit;
41a1f8ea 1172 np->cork.tclass = tclass;
628a5c56
JH
1173 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1174 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
c7503609 1175 if (np->frag_size < mtu) {
d91675f9
YH
1176 if (np->frag_size)
1177 mtu = np->frag_size;
1178 }
1179 inet->cork.fragsize = mtu;
1da177e4
LT
1180 if (dst_allfrag(rt->u.dst.path))
1181 inet->cork.flags |= IPCORK_ALLFRAG;
1182 inet->cork.length = 0;
1183 sk->sk_sndmsg_page = NULL;
1184 sk->sk_sndmsg_off = 0;
01488942 1185 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
a1b05140 1186 rt->rt6i_nfheader_len;
1da177e4
LT
1187 length += exthdrlen;
1188 transhdrlen += exthdrlen;
1189 } else {
c8cdaf99 1190 rt = (struct rt6_info *)inet->cork.dst;
1da177e4 1191 fl = &inet->cork.fl;
0178b695 1192 opt = np->cork.opt;
1da177e4
LT
1193 transhdrlen = 0;
1194 exthdrlen = 0;
1195 mtu = inet->cork.fragsize;
1196 }
1197
1198 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1199
a1b05140 1200 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1201 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1202 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1203
1204 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1205 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1206 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1207 return -EMSGSIZE;
1208 }
1209 }
1210
1211 /*
1212 * Let's try using as much space as possible.
1213 * Use MTU if total length of the message fits into the MTU.
1214 * Otherwise, we need to reserve fragment header and
1215 * fragment alignment (= 8-15 octects, in total).
1216 *
1217 * Note that we may need to "move" the data from the tail of
1ab1457c 1218 * of the buffer to the new fragment when we split
1da177e4
LT
1219 * the message.
1220 *
1ab1457c 1221 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1222 * at once if non-fragmentable extension headers
1223 * are too large.
1ab1457c 1224 * --yoshfuji
1da177e4
LT
1225 */
1226
1227 inet->cork.length += length;
e89e9cf5
AR
1228 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1229 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1230
baa829d8
PM
1231 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1232 fragheaderlen, transhdrlen, mtu,
1233 flags);
1234 if (err)
e89e9cf5 1235 goto error;
e89e9cf5
AR
1236 return 0;
1237 }
1da177e4
LT
1238
1239 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1240 goto alloc_new_skb;
1241
1242 while (length > 0) {
1243 /* Check if the remaining data fits into current packet. */
1244 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1245 if (copy < length)
1246 copy = maxfraglen - skb->len;
1247
1248 if (copy <= 0) {
1249 char *data;
1250 unsigned int datalen;
1251 unsigned int fraglen;
1252 unsigned int fraggap;
1253 unsigned int alloclen;
1254 struct sk_buff *skb_prev;
1255alloc_new_skb:
1256 skb_prev = skb;
1257
1258 /* There's no room in the current skb */
1259 if (skb_prev)
1260 fraggap = skb_prev->len - maxfraglen;
1261 else
1262 fraggap = 0;
1263
1264 /*
1265 * If remaining data exceeds the mtu,
1266 * we know we need more fragment(s).
1267 */
1268 datalen = length + fraggap;
1269 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1270 datalen = maxfraglen - fragheaderlen;
1271
1272 fraglen = datalen + fragheaderlen;
1273 if ((flags & MSG_MORE) &&
1274 !(rt->u.dst.dev->features&NETIF_F_SG))
1275 alloclen = mtu;
1276 else
1277 alloclen = datalen + fragheaderlen;
1278
1279 /*
1280 * The last fragment gets additional space at tail.
1281 * Note: we overallocate on fragments with MSG_MODE
1282 * because we have no idea if we're the last one.
1283 */
1284 if (datalen == length + fraggap)
1285 alloclen += rt->u.dst.trailer_len;
1286
1287 /*
1288 * We just reserve space for fragment header.
1ab1457c 1289 * Note: this may be overallocation if the message
1da177e4
LT
1290 * (without MSG_MORE) fits into the MTU.
1291 */
1292 alloclen += sizeof(struct frag_hdr);
1293
1294 if (transhdrlen) {
1295 skb = sock_alloc_send_skb(sk,
1296 alloclen + hh_len,
1297 (flags & MSG_DONTWAIT), &err);
1298 } else {
1299 skb = NULL;
1300 if (atomic_read(&sk->sk_wmem_alloc) <=
1301 2 * sk->sk_sndbuf)
1302 skb = sock_wmalloc(sk,
1303 alloclen + hh_len, 1,
1304 sk->sk_allocation);
1305 if (unlikely(skb == NULL))
1306 err = -ENOBUFS;
1307 }
1308 if (skb == NULL)
1309 goto error;
1310 /*
1311 * Fill in the control structures
1312 */
1313 skb->ip_summed = csummode;
1314 skb->csum = 0;
1315 /* reserve for fragmentation */
1316 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1317
1318 /*
1319 * Find where to start putting bytes
1320 */
1321 data = skb_put(skb, fraglen);
c14d2450 1322 skb_set_network_header(skb, exthdrlen);
1da177e4 1323 data += fragheaderlen;
b0e380b1
ACM
1324 skb->transport_header = (skb->network_header +
1325 fragheaderlen);
1da177e4
LT
1326 if (fraggap) {
1327 skb->csum = skb_copy_and_csum_bits(
1328 skb_prev, maxfraglen,
1329 data + transhdrlen, fraggap, 0);
1330 skb_prev->csum = csum_sub(skb_prev->csum,
1331 skb->csum);
1332 data += fraggap;
e9fa4f7b 1333 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1334 }
1335 copy = datalen - transhdrlen - fraggap;
1336 if (copy < 0) {
1337 err = -EINVAL;
1338 kfree_skb(skb);
1339 goto error;
1340 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1341 err = -EFAULT;
1342 kfree_skb(skb);
1343 goto error;
1344 }
1345
1346 offset += copy;
1347 length -= datalen - fraggap;
1348 transhdrlen = 0;
1349 exthdrlen = 0;
1350 csummode = CHECKSUM_NONE;
1351
1352 /*
1353 * Put the packet on the pending queue
1354 */
1355 __skb_queue_tail(&sk->sk_write_queue, skb);
1356 continue;
1357 }
1358
1359 if (copy > length)
1360 copy = length;
1361
1362 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1363 unsigned int off;
1364
1365 off = skb->len;
1366 if (getfrag(from, skb_put(skb, copy),
1367 offset, copy, off, skb) < 0) {
1368 __skb_trim(skb, off);
1369 err = -EFAULT;
1370 goto error;
1371 }
1372 } else {
1373 int i = skb_shinfo(skb)->nr_frags;
1374 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1375 struct page *page = sk->sk_sndmsg_page;
1376 int off = sk->sk_sndmsg_off;
1377 unsigned int left;
1378
1379 if (page && (left = PAGE_SIZE - off) > 0) {
1380 if (copy >= left)
1381 copy = left;
1382 if (page != frag->page) {
1383 if (i == MAX_SKB_FRAGS) {
1384 err = -EMSGSIZE;
1385 goto error;
1386 }
1387 get_page(page);
1388 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1389 frag = &skb_shinfo(skb)->frags[i];
1390 }
1391 } else if(i < MAX_SKB_FRAGS) {
1392 if (copy > PAGE_SIZE)
1393 copy = PAGE_SIZE;
1394 page = alloc_pages(sk->sk_allocation, 0);
1395 if (page == NULL) {
1396 err = -ENOMEM;
1397 goto error;
1398 }
1399 sk->sk_sndmsg_page = page;
1400 sk->sk_sndmsg_off = 0;
1401
1402 skb_fill_page_desc(skb, i, page, 0, 0);
1403 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1404 } else {
1405 err = -EMSGSIZE;
1406 goto error;
1407 }
1408 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1409 err = -EFAULT;
1410 goto error;
1411 }
1412 sk->sk_sndmsg_off += copy;
1413 frag->size += copy;
1414 skb->len += copy;
1415 skb->data_len += copy;
f945fa7a
HX
1416 skb->truesize += copy;
1417 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1418 }
1419 offset += copy;
1420 length -= copy;
1421 }
1422 return 0;
1423error:
1424 inet->cork.length -= length;
3bd653c8 1425 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1426 return err;
1427}
1428
bf138862
PE
1429static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1430{
0178b695
HX
1431 if (np->cork.opt) {
1432 kfree(np->cork.opt->dst0opt);
1433 kfree(np->cork.opt->dst1opt);
1434 kfree(np->cork.opt->hopopt);
1435 kfree(np->cork.opt->srcrt);
1436 kfree(np->cork.opt);
1437 np->cork.opt = NULL;
1438 }
1439
c8cdaf99
YH
1440 if (inet->cork.dst) {
1441 dst_release(inet->cork.dst);
1442 inet->cork.dst = NULL;
bf138862
PE
1443 inet->cork.flags &= ~IPCORK_ALLFRAG;
1444 }
1445 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1446}
1447
1da177e4
LT
1448int ip6_push_pending_frames(struct sock *sk)
1449{
1450 struct sk_buff *skb, *tmp_skb;
1451 struct sk_buff **tail_skb;
1452 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1453 struct inet_sock *inet = inet_sk(sk);
1454 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1455 struct net *net = sock_net(sk);
1da177e4
LT
1456 struct ipv6hdr *hdr;
1457 struct ipv6_txoptions *opt = np->cork.opt;
c8cdaf99 1458 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1459 struct flowi *fl = &inet->cork.fl;
1460 unsigned char proto = fl->proto;
1461 int err = 0;
1462
1463 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1464 goto out;
1465 tail_skb = &(skb_shinfo(skb)->frag_list);
1466
1467 /* move skb->data to ip header from ext header */
d56f90a7 1468 if (skb->data < skb_network_header(skb))
bbe735e4 1469 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1470 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1471 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1472 *tail_skb = tmp_skb;
1473 tail_skb = &(tmp_skb->next);
1474 skb->len += tmp_skb->len;
1475 skb->data_len += tmp_skb->len;
1da177e4
LT
1476 skb->truesize += tmp_skb->truesize;
1477 __sock_put(tmp_skb->sk);
1478 tmp_skb->destructor = NULL;
1479 tmp_skb->sk = NULL;
1da177e4
LT
1480 }
1481
28a89453 1482 /* Allow local fragmentation. */
b5c15fc0 1483 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1484 skb->local_df = 1;
1485
1da177e4 1486 ipv6_addr_copy(final_dst, &fl->fl6_dst);
cfe1fc77 1487 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1488 if (opt && opt->opt_flen)
1489 ipv6_push_frag_opts(skb, opt, &proto);
1490 if (opt && opt->opt_nflen)
1491 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1492
e2d1bca7
ACM
1493 skb_push(skb, sizeof(struct ipv6hdr));
1494 skb_reset_network_header(skb);
0660e03f 1495 hdr = ipv6_hdr(skb);
1ab1457c 1496
90bcaf7b 1497 *(__be32*)hdr = fl->fl6_flowlabel |
41a1f8ea 1498 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1499
1da177e4
LT
1500 hdr->hop_limit = np->cork.hop_limit;
1501 hdr->nexthdr = proto;
1502 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1503 ipv6_addr_copy(&hdr->daddr, final_dst);
1504
a2c2064f 1505 skb->priority = sk->sk_priority;
4a19ec58 1506 skb->mark = sk->sk_mark;
a2c2064f 1507
adf30907 1508 skb_dst_set(skb, dst_clone(&rt->u.dst));
edf391ff 1509 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1510 if (proto == IPPROTO_ICMPV6) {
adf30907 1511 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1512
5a57d4c7 1513 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
e41b5368 1514 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1515 }
1516
ef76bc23 1517 err = ip6_local_out(skb);
1da177e4
LT
1518 if (err) {
1519 if (err > 0)
3320da89 1520 err = np->recverr ? net_xmit_errno(err) : 0;
1da177e4
LT
1521 if (err)
1522 goto error;
1523 }
1524
1525out:
bf138862 1526 ip6_cork_release(inet, np);
1da177e4
LT
1527 return err;
1528error:
1529 goto out;
1530}
1531
1532void ip6_flush_pending_frames(struct sock *sk)
1533{
1da177e4
LT
1534 struct sk_buff *skb;
1535
1536 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
adf30907
ED
1537 if (skb_dst(skb))
1538 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1539 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1540 kfree_skb(skb);
1541 }
1542
bf138862 1543 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1544}