]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_output.c
[PKT_SCHED]: Fix qdisc return code.
[net-next-2.6.git] / net / ipv4 / ip_output.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * The Internet Protocol (IP) output module.
7 *
8 * Version: $Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $
9 *
02c30a84 10 * Authors: Ross Biro
1da177e4
LT
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Donald Becker, <becker@super.org>
13 * Alan Cox, <Alan.Cox@linux.org>
14 * Richard Underwood
15 * Stefan Becker, <stefanb@yello.ping.de>
16 * Jorge Cwik, <jorge@laser.satlink.net>
17 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18 * Hirokazu Takahashi, <taka@valinux.co.jp>
19 *
20 * See ip_input.c for original log
21 *
22 * Fixes:
23 * Alan Cox : Missing nonblock feature in ip_build_xmit.
24 * Mike Kilburn : htons() missing in ip_build_xmit.
25 * Bradford Johnson: Fix faulty handling of some frames when
26 * no route is found.
27 * Alexander Demenshin: Missing sk/skb free in ip_queue_xmit
28 * (in case if packet not accepted by
29 * output firewall rules)
30 * Mike McLagan : Routing by source
31 * Alexey Kuznetsov: use new route cache
32 * Andi Kleen: Fix broken PMTU recovery and remove
33 * some redundant tests.
34 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
35 * Andi Kleen : Replace ip_reply with ip_send_reply.
36 * Andi Kleen : Split fast and slow ip_build_xmit path
37 * for decreased register pressure on x86
38 * and more readibility.
39 * Marc Boucher : When call_out_firewall returns FW_QUEUE,
40 * silently drop skb instead of failing with -EPERM.
41 * Detlev Wengorz : Copy protocol for fragments.
42 * Hirokazu Takahashi: HW checksumming for outgoing UDP
43 * datagrams.
44 * Hirokazu Takahashi: sendfile() on UDP works now.
45 */
46
47#include <asm/uaccess.h>
48#include <asm/system.h>
49#include <linux/module.h>
50#include <linux/types.h>
51#include <linux/kernel.h>
52#include <linux/sched.h>
53#include <linux/mm.h>
54#include <linux/string.h>
55#include <linux/errno.h>
56#include <linux/config.h>
57
58#include <linux/socket.h>
59#include <linux/sockios.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/etherdevice.h>
64#include <linux/proc_fs.h>
65#include <linux/stat.h>
66#include <linux/init.h>
67
68#include <net/snmp.h>
69#include <net/ip.h>
70#include <net/protocol.h>
71#include <net/route.h>
1da177e4
LT
72#include <linux/skbuff.h>
73#include <net/sock.h>
74#include <net/arp.h>
75#include <net/icmp.h>
1da177e4
LT
76#include <net/checksum.h>
77#include <net/inetpeer.h>
78#include <net/checksum.h>
79#include <linux/igmp.h>
80#include <linux/netfilter_ipv4.h>
81#include <linux/netfilter_bridge.h>
82#include <linux/mroute.h>
83#include <linux/netlink.h>
6cbb0df7 84#include <linux/tcp.h>
1da177e4 85
1da177e4
LT
86int sysctl_ip_default_ttl = IPDEFTTL;
87
97dc627f
AB
88static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
89
1da177e4
LT
90/* Generate a checksum for an outgoing IP datagram. */
91__inline__ void ip_send_check(struct iphdr *iph)
92{
93 iph->check = 0;
94 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
95}
96
97/* dev_loopback_xmit for use with netfilter. */
98static int ip_dev_loopback_xmit(struct sk_buff *newskb)
99{
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
1da177e4
LT
105 netif_rx(newskb);
106 return 0;
107}
108
109static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
110{
111 int ttl = inet->uc_ttl;
112
113 if (ttl < 0)
114 ttl = dst_metric(dst, RTAX_HOPLIMIT);
115 return ttl;
116}
117
118/*
119 * Add an ip header to a skbuff and send it out.
120 *
121 */
122int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
123 u32 saddr, u32 daddr, struct ip_options *opt)
124{
125 struct inet_sock *inet = inet_sk(sk);
126 struct rtable *rt = (struct rtable *)skb->dst;
127 struct iphdr *iph;
128
129 /* Build the IP header. */
130 if (opt)
131 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
132 else
133 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
134
135 iph->version = 4;
136 iph->ihl = 5;
137 iph->tos = inet->tos;
138 if (ip_dont_fragment(sk, &rt->u.dst))
139 iph->frag_off = htons(IP_DF);
140 else
141 iph->frag_off = 0;
142 iph->ttl = ip_select_ttl(inet, &rt->u.dst);
143 iph->daddr = rt->rt_dst;
144 iph->saddr = rt->rt_src;
145 iph->protocol = sk->sk_protocol;
146 iph->tot_len = htons(skb->len);
147 ip_select_ident(iph, &rt->u.dst, sk);
148 skb->nh.iph = iph;
149
150 if (opt && opt->optlen) {
151 iph->ihl += opt->optlen>>2;
152 ip_options_build(skb, opt, daddr, rt, 0);
153 }
154 ip_send_check(iph);
155
156 skb->priority = sk->sk_priority;
157
158 /* Send it out. */
159 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
160 dst_output);
161}
162
d8c97a94
ACM
163EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
164
1da177e4
LT
165static inline int ip_finish_output2(struct sk_buff *skb)
166{
167 struct dst_entry *dst = skb->dst;
168 struct hh_cache *hh = dst->hh;
169 struct net_device *dev = dst->dev;
170 int hh_len = LL_RESERVED_SPACE(dev);
171
172 /* Be paranoid, rather than too clever. */
173 if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
174 struct sk_buff *skb2;
175
176 skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
177 if (skb2 == NULL) {
178 kfree_skb(skb);
179 return -ENOMEM;
180 }
181 if (skb->sk)
182 skb_set_owner_w(skb2, skb->sk);
183 kfree_skb(skb);
184 skb = skb2;
185 }
186
1da177e4
LT
187 if (hh) {
188 int hh_alen;
189
190 read_lock_bh(&hh->hh_lock);
191 hh_alen = HH_DATA_ALIGN(hh->hh_len);
192 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
193 read_unlock_bh(&hh->hh_lock);
194 skb_push(skb, hh->hh_len);
195 return hh->hh_output(skb);
196 } else if (dst->neighbour)
197 return dst->neighbour->output(skb);
198
199 if (net_ratelimit())
200 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
201 kfree_skb(skb);
202 return -EINVAL;
203}
204
33d043d6 205static inline int ip_finish_output(struct sk_buff *skb)
1da177e4 206{
5c901daa
PM
207#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
208 /* Policy lookup after SNAT yielded a new policy */
209 if (skb->dst->xfrm != NULL)
210 return xfrm4_output_finish(skb);
211#endif
1bd9bef6
PM
212 if (skb->len > dst_mtu(skb->dst) &&
213 !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
214 return ip_fragment(skb, ip_finish_output2);
215 else
216 return ip_finish_output2(skb);
1da177e4
LT
217}
218
219int ip_mc_output(struct sk_buff *skb)
220{
221 struct sock *sk = skb->sk;
222 struct rtable *rt = (struct rtable*)skb->dst;
223 struct net_device *dev = rt->u.dst.dev;
224
225 /*
226 * If the indicated interface is up and running, send the packet.
227 */
228 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
229
230 skb->dev = dev;
231 skb->protocol = htons(ETH_P_IP);
232
233 /*
234 * Multicasts are looped back for other local users
235 */
236
237 if (rt->rt_flags&RTCF_MULTICAST) {
238 if ((!sk || inet_sk(sk)->mc_loop)
239#ifdef CONFIG_IP_MROUTE
240 /* Small optimization: do not loopback not local frames,
241 which returned after forwarding; they will be dropped
242 by ip_mr_input in any case.
243 Note, that local frames are looped back to be delivered
244 to local recipients.
245
246 This check is duplicated in ip_mr_input at the moment.
247 */
248 && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
249#endif
250 ) {
251 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
252 if (newskb)
253 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
254 newskb->dev,
255 ip_dev_loopback_xmit);
256 }
257
258 /* Multicasts with ttl 0 must not go beyond the host */
259
260 if (skb->nh.iph->ttl == 0) {
261 kfree_skb(skb);
262 return 0;
263 }
264 }
265
266 if (rt->rt_flags&RTCF_BROADCAST) {
267 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
268 if (newskb)
269 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
270 newskb->dev, ip_dev_loopback_xmit);
271 }
272
1bd9bef6
PM
273 return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
274 ip_finish_output);
1da177e4
LT
275}
276
277int ip_output(struct sk_buff *skb)
278{
1bd9bef6
PM
279 struct net_device *dev = skb->dst->dev;
280
1da177e4
LT
281 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
282
1bd9bef6
PM
283 skb->dev = dev;
284 skb->protocol = htons(ETH_P_IP);
285
286 return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
287 ip_finish_output);
1da177e4
LT
288}
289
290int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
291{
292 struct sock *sk = skb->sk;
293 struct inet_sock *inet = inet_sk(sk);
294 struct ip_options *opt = inet->opt;
295 struct rtable *rt;
296 struct iphdr *iph;
297
298 /* Skip all of this if the packet is already routed,
299 * f.e. by something like SCTP.
300 */
301 rt = (struct rtable *) skb->dst;
302 if (rt != NULL)
303 goto packet_routed;
304
305 /* Make sure we can route this packet. */
306 rt = (struct rtable *)__sk_dst_check(sk, 0);
307 if (rt == NULL) {
308 u32 daddr;
309
310 /* Use correct destination address if we have options. */
311 daddr = inet->daddr;
312 if(opt && opt->srr)
313 daddr = opt->faddr;
314
315 {
316 struct flowi fl = { .oif = sk->sk_bound_dev_if,
317 .nl_u = { .ip4_u =
318 { .daddr = daddr,
319 .saddr = inet->saddr,
320 .tos = RT_CONN_FLAGS(sk) } },
321 .proto = sk->sk_protocol,
322 .uli_u = { .ports =
323 { .sport = inet->sport,
324 .dport = inet->dport } } };
325
326 /* If this fails, retransmit mechanism of transport layer will
327 * keep trying until route appears or the connection times
328 * itself out.
329 */
330 if (ip_route_output_flow(&rt, &fl, sk, 0))
331 goto no_route;
332 }
6cbb0df7 333 sk_setup_caps(sk, &rt->u.dst);
1da177e4
LT
334 }
335 skb->dst = dst_clone(&rt->u.dst);
336
337packet_routed:
338 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
339 goto no_route;
340
341 /* OK, we know where to send it, allocate and build IP header. */
342 iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
343 *((__u16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
344 iph->tot_len = htons(skb->len);
345 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
346 iph->frag_off = htons(IP_DF);
347 else
348 iph->frag_off = 0;
349 iph->ttl = ip_select_ttl(inet, &rt->u.dst);
350 iph->protocol = sk->sk_protocol;
351 iph->saddr = rt->rt_src;
352 iph->daddr = rt->rt_dst;
353 skb->nh.iph = iph;
354 /* Transport layer set skb->h.foo itself. */
355
356 if (opt && opt->optlen) {
357 iph->ihl += opt->optlen >> 2;
358 ip_options_build(skb, opt, inet->daddr, rt, 0);
359 }
360
89f5f0ae
HX
361 ip_select_ident_more(iph, &rt->u.dst, sk,
362 (skb_shinfo(skb)->tso_segs ?: 1) - 1);
1da177e4
LT
363
364 /* Add an IP checksum. */
365 ip_send_check(iph);
366
367 skb->priority = sk->sk_priority;
368
369 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
370 dst_output);
371
372no_route:
373 IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
374 kfree_skb(skb);
375 return -EHOSTUNREACH;
376}
377
378
379static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
380{
381 to->pkt_type = from->pkt_type;
382 to->priority = from->priority;
383 to->protocol = from->protocol;
1da177e4
LT
384 dst_release(to->dst);
385 to->dst = dst_clone(from->dst);
386 to->dev = from->dev;
387
388 /* Copy the flags to each fragment. */
389 IPCB(to)->flags = IPCB(from)->flags;
390
391#ifdef CONFIG_NET_SCHED
392 to->tc_index = from->tc_index;
393#endif
394#ifdef CONFIG_NETFILTER
395 to->nfmark = from->nfmark;
1da177e4
LT
396 /* Connection association is same as pre-frag packet */
397 nf_conntrack_put(to->nfct);
398 to->nfct = from->nfct;
399 nf_conntrack_get(to->nfct);
400 to->nfctinfo = from->nfctinfo;
c98d80ed
JA
401#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
402 to->ipvs_property = from->ipvs_property;
403#endif
1da177e4
LT
404#ifdef CONFIG_BRIDGE_NETFILTER
405 nf_bridge_put(to->nf_bridge);
406 to->nf_bridge = from->nf_bridge;
407 nf_bridge_get(to->nf_bridge);
408#endif
1da177e4
LT
409#endif
410}
411
412/*
413 * This IP datagram is too large to be sent in one piece. Break it up into
414 * smaller pieces (each of size equal to IP header plus
415 * a block of the data of the original IP data part) that will yet fit in a
416 * single device frame, and queue such a frame for sending.
417 */
418
97dc627f 419static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
1da177e4
LT
420{
421 struct iphdr *iph;
422 int raw = 0;
423 int ptr;
424 struct net_device *dev;
425 struct sk_buff *skb2;
426 unsigned int mtu, hlen, left, len, ll_rs;
427 int offset;
76ab608d 428 __be16 not_last_frag;
1da177e4
LT
429 struct rtable *rt = (struct rtable*)skb->dst;
430 int err = 0;
431
432 dev = rt->u.dst.dev;
433
434 /*
435 * Point into the IP datagram header.
436 */
437
438 iph = skb->nh.iph;
439
440 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
441 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
442 htonl(dst_mtu(&rt->u.dst)));
443 kfree_skb(skb);
444 return -EMSGSIZE;
445 }
446
447 /*
448 * Setup starting values.
449 */
450
451 hlen = iph->ihl * 4;
452 mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */
89cee8b1 453 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
1da177e4
LT
454
455 /* When frag_list is given, use it. First, check its validity:
456 * some transformers could create wrong frag_list or break existing
457 * one, it is not prohibited. In this case fall back to copying.
458 *
459 * LATER: this step can be merged to real generation of fragments,
460 * we can switch to copy when see the first bad fragment.
461 */
462 if (skb_shinfo(skb)->frag_list) {
463 struct sk_buff *frag;
464 int first_len = skb_pagelen(skb);
465
466 if (first_len - hlen > mtu ||
467 ((first_len - hlen) & 7) ||
468 (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
469 skb_cloned(skb))
470 goto slow_path;
471
472 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
473 /* Correct geometry. */
474 if (frag->len > mtu ||
475 ((frag->len & 7) && frag->next) ||
476 skb_headroom(frag) < hlen)
477 goto slow_path;
478
479 /* Partially cloned skb? */
480 if (skb_shared(frag))
481 goto slow_path;
2fdba6b0
HX
482
483 BUG_ON(frag->sk);
484 if (skb->sk) {
485 sock_hold(skb->sk);
486 frag->sk = skb->sk;
487 frag->destructor = sock_wfree;
488 skb->truesize -= frag->truesize;
489 }
1da177e4
LT
490 }
491
492 /* Everything is OK. Generate! */
493
494 err = 0;
495 offset = 0;
496 frag = skb_shinfo(skb)->frag_list;
497 skb_shinfo(skb)->frag_list = NULL;
498 skb->data_len = first_len - skb_headlen(skb);
499 skb->len = first_len;
500 iph->tot_len = htons(first_len);
501 iph->frag_off = htons(IP_MF);
502 ip_send_check(iph);
503
504 for (;;) {
505 /* Prepare header of the next frame,
506 * before previous one went down. */
507 if (frag) {
508 frag->ip_summed = CHECKSUM_NONE;
509 frag->h.raw = frag->data;
510 frag->nh.raw = __skb_push(frag, hlen);
511 memcpy(frag->nh.raw, iph, hlen);
512 iph = frag->nh.iph;
513 iph->tot_len = htons(frag->len);
514 ip_copy_metadata(frag, skb);
515 if (offset == 0)
516 ip_options_fragment(frag);
517 offset += skb->len - hlen;
518 iph->frag_off = htons(offset>>3);
519 if (frag->next != NULL)
520 iph->frag_off |= htons(IP_MF);
521 /* Ready, complete checksum */
522 ip_send_check(iph);
523 }
524
525 err = output(skb);
526
527 if (err || !frag)
528 break;
529
530 skb = frag;
531 frag = skb->next;
532 skb->next = NULL;
533 }
534
535 if (err == 0) {
536 IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
537 return 0;
538 }
539
540 while (frag) {
541 skb = frag->next;
542 kfree_skb(frag);
543 frag = skb;
544 }
545 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
546 return err;
547 }
548
549slow_path:
550 left = skb->len - hlen; /* Space per frame */
551 ptr = raw + hlen; /* Where to start from */
552
553#ifdef CONFIG_BRIDGE_NETFILTER
554 /* for bridged IP traffic encapsulated inside f.e. a vlan header,
555 * we need to make room for the encapsulating header */
556 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb));
557 mtu -= nf_bridge_pad(skb);
558#else
559 ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev);
560#endif
561 /*
562 * Fragment the datagram.
563 */
564
565 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
566 not_last_frag = iph->frag_off & htons(IP_MF);
567
568 /*
569 * Keep copying data until we run out.
570 */
571
572 while(left > 0) {
573 len = left;
574 /* IF: it doesn't fit, use 'mtu' - the data space left */
575 if (len > mtu)
576 len = mtu;
577 /* IF: we are not sending upto and including the packet end
578 then align the next start on an eight byte boundary */
579 if (len < left) {
580 len &= ~7;
581 }
582 /*
583 * Allocate buffer.
584 */
585
586 if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
64ce2073 587 NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
1da177e4
LT
588 err = -ENOMEM;
589 goto fail;
590 }
591
592 /*
593 * Set up data on packet
594 */
595
596 ip_copy_metadata(skb2, skb);
597 skb_reserve(skb2, ll_rs);
598 skb_put(skb2, len + hlen);
599 skb2->nh.raw = skb2->data;
600 skb2->h.raw = skb2->data + hlen;
601
602 /*
603 * Charge the memory for the fragment to any owner
604 * it might possess
605 */
606
607 if (skb->sk)
608 skb_set_owner_w(skb2, skb->sk);
609
610 /*
611 * Copy the packet header into the new buffer.
612 */
613
614 memcpy(skb2->nh.raw, skb->data, hlen);
615
616 /*
617 * Copy a block of the IP datagram.
618 */
619 if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
620 BUG();
621 left -= len;
622
623 /*
624 * Fill in the new header fields.
625 */
626 iph = skb2->nh.iph;
627 iph->frag_off = htons((offset >> 3));
628
629 /* ANK: dirty, but effective trick. Upgrade options only if
630 * the segment to be fragmented was THE FIRST (otherwise,
631 * options are already fixed) and make it ONCE
632 * on the initial skb, so that all the following fragments
633 * will inherit fixed options.
634 */
635 if (offset == 0)
636 ip_options_fragment(skb);
637
638 /*
639 * Added AC : If we are fragmenting a fragment that's not the
640 * last fragment then keep MF on each bit
641 */
642 if (left > 0 || not_last_frag)
643 iph->frag_off |= htons(IP_MF);
644 ptr += len;
645 offset += len;
646
647 /*
648 * Put this fragment into the sending queue.
649 */
650
651 IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
652
653 iph->tot_len = htons(len + hlen);
654
655 ip_send_check(iph);
656
657 err = output(skb2);
658 if (err)
659 goto fail;
660 }
661 kfree_skb(skb);
662 IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
663 return err;
664
665fail:
666 kfree_skb(skb);
667 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
668 return err;
669}
670
671int
672ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
673{
674 struct iovec *iov = from;
675
676 if (skb->ip_summed == CHECKSUM_HW) {
677 if (memcpy_fromiovecend(to, iov, offset, len) < 0)
678 return -EFAULT;
679 } else {
680 unsigned int csum = 0;
681 if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
682 return -EFAULT;
683 skb->csum = csum_block_add(skb->csum, csum, odd);
684 }
685 return 0;
686}
687
688static inline unsigned int
689csum_page(struct page *page, int offset, int copy)
690{
691 char *kaddr;
692 unsigned int csum;
693 kaddr = kmap(page);
694 csum = csum_partial(kaddr + offset, copy, 0);
695 kunmap(page);
696 return csum;
697}
698
4b30b1c6 699static inline int ip_ufo_append_data(struct sock *sk,
e89e9cf5
AR
700 int getfrag(void *from, char *to, int offset, int len,
701 int odd, struct sk_buff *skb),
702 void *from, int length, int hh_len, int fragheaderlen,
703 int transhdrlen, int mtu,unsigned int flags)
704{
705 struct sk_buff *skb;
706 int err;
707
708 /* There is support for UDP fragmentation offload by network
709 * device, so create one single skb packet containing complete
710 * udp datagram
711 */
712 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
713 skb = sock_alloc_send_skb(sk,
714 hh_len + fragheaderlen + transhdrlen + 20,
715 (flags & MSG_DONTWAIT), &err);
716
717 if (skb == NULL)
718 return err;
719
720 /* reserve space for Hardware header */
721 skb_reserve(skb, hh_len);
722
723 /* create space for UDP/IP header */
724 skb_put(skb,fragheaderlen + transhdrlen);
725
726 /* initialize network header pointer */
727 skb->nh.raw = skb->data;
728
729 /* initialize protocol header pointer */
730 skb->h.raw = skb->data + fragheaderlen;
731
732 skb->ip_summed = CHECKSUM_HW;
733 skb->csum = 0;
734 sk->sk_sndmsg_off = 0;
735 }
736
737 err = skb_append_datato_frags(sk,skb, getfrag, from,
738 (length - transhdrlen));
739 if (!err) {
740 /* specify the length of each IP datagram fragment*/
741 skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
742 __skb_queue_tail(&sk->sk_write_queue, skb);
743
744 return 0;
745 }
746 /* There is not enough support do UFO ,
747 * so follow normal path
748 */
749 kfree_skb(skb);
750 return err;
751}
752
1da177e4
LT
753/*
754 * ip_append_data() and ip_append_page() can make one large IP datagram
755 * from many pieces of data. Each pieces will be holded on the socket
756 * until ip_push_pending_frames() is called. Each piece can be a page
757 * or non-page data.
758 *
759 * Not only UDP, other transport protocols - e.g. raw sockets - can use
760 * this interface potentially.
761 *
762 * LATER: length must be adjusted by pad at tail, when it is required.
763 */
764int ip_append_data(struct sock *sk,
765 int getfrag(void *from, char *to, int offset, int len,
766 int odd, struct sk_buff *skb),
767 void *from, int length, int transhdrlen,
768 struct ipcm_cookie *ipc, struct rtable *rt,
769 unsigned int flags)
770{
771 struct inet_sock *inet = inet_sk(sk);
772 struct sk_buff *skb;
773
774 struct ip_options *opt = NULL;
775 int hh_len;
776 int exthdrlen;
777 int mtu;
778 int copy;
779 int err;
780 int offset = 0;
781 unsigned int maxfraglen, fragheaderlen;
782 int csummode = CHECKSUM_NONE;
783
784 if (flags&MSG_PROBE)
785 return 0;
786
787 if (skb_queue_empty(&sk->sk_write_queue)) {
788 /*
789 * setup for corking.
790 */
791 opt = ipc->opt;
792 if (opt) {
793 if (inet->cork.opt == NULL) {
794 inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
795 if (unlikely(inet->cork.opt == NULL))
796 return -ENOBUFS;
797 }
798 memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
799 inet->cork.flags |= IPCORK_OPT;
800 inet->cork.addr = ipc->addr;
801 }
802 dst_hold(&rt->u.dst);
803 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
804 inet->cork.rt = rt;
805 inet->cork.length = 0;
806 sk->sk_sndmsg_page = NULL;
807 sk->sk_sndmsg_off = 0;
808 if ((exthdrlen = rt->u.dst.header_len) != 0) {
809 length += exthdrlen;
810 transhdrlen += exthdrlen;
811 }
812 } else {
813 rt = inet->cork.rt;
814 if (inet->cork.flags & IPCORK_OPT)
815 opt = inet->cork.opt;
816
817 transhdrlen = 0;
818 exthdrlen = 0;
819 mtu = inet->cork.fragsize;
820 }
821 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
822
823 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
824 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
825
826 if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
827 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
828 return -EMSGSIZE;
829 }
830
831 /*
832 * transhdrlen > 0 means that this is the first fragment and we wish
833 * it won't be fragmented in the future.
834 */
835 if (transhdrlen &&
836 length + fragheaderlen <= mtu &&
837 rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
838 !exthdrlen)
839 csummode = CHECKSUM_HW;
840
841 inet->cork.length += length;
e89e9cf5
AR
842 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
843 (rt->u.dst.dev->features & NETIF_F_UFO)) {
844
845 if(ip_ufo_append_data(sk, getfrag, from, length, hh_len,
846 fragheaderlen, transhdrlen, mtu, flags))
847 goto error;
848
849 return 0;
850 }
1da177e4
LT
851
852 /* So, what's going on in the loop below?
853 *
854 * We use calculated fragment length to generate chained skb,
855 * each of segments is IP fragment ready for sending to network after
856 * adding appropriate IP header.
857 */
858
859 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
860 goto alloc_new_skb;
861
862 while (length > 0) {
863 /* Check if the remaining data fits into current packet. */
864 copy = mtu - skb->len;
865 if (copy < length)
866 copy = maxfraglen - skb->len;
867 if (copy <= 0) {
868 char *data;
869 unsigned int datalen;
870 unsigned int fraglen;
871 unsigned int fraggap;
872 unsigned int alloclen;
873 struct sk_buff *skb_prev;
874alloc_new_skb:
875 skb_prev = skb;
876 if (skb_prev)
877 fraggap = skb_prev->len - maxfraglen;
878 else
879 fraggap = 0;
880
881 /*
882 * If remaining data exceeds the mtu,
883 * we know we need more fragment(s).
884 */
885 datalen = length + fraggap;
886 if (datalen > mtu - fragheaderlen)
887 datalen = maxfraglen - fragheaderlen;
888 fraglen = datalen + fragheaderlen;
889
890 if ((flags & MSG_MORE) &&
891 !(rt->u.dst.dev->features&NETIF_F_SG))
892 alloclen = mtu;
893 else
894 alloclen = datalen + fragheaderlen;
895
896 /* The last fragment gets additional space at tail.
897 * Note, with MSG_MORE we overallocate on fragments,
898 * because we have no idea what fragment will be
899 * the last.
900 */
901 if (datalen == length)
902 alloclen += rt->u.dst.trailer_len;
903
904 if (transhdrlen) {
905 skb = sock_alloc_send_skb(sk,
906 alloclen + hh_len + 15,
907 (flags & MSG_DONTWAIT), &err);
908 } else {
909 skb = NULL;
910 if (atomic_read(&sk->sk_wmem_alloc) <=
911 2 * sk->sk_sndbuf)
912 skb = sock_wmalloc(sk,
913 alloclen + hh_len + 15, 1,
914 sk->sk_allocation);
915 if (unlikely(skb == NULL))
916 err = -ENOBUFS;
917 }
918 if (skb == NULL)
919 goto error;
920
921 /*
922 * Fill in the control structures
923 */
924 skb->ip_summed = csummode;
925 skb->csum = 0;
926 skb_reserve(skb, hh_len);
927
928 /*
929 * Find where to start putting bytes.
930 */
931 data = skb_put(skb, fraglen);
932 skb->nh.raw = data + exthdrlen;
933 data += fragheaderlen;
934 skb->h.raw = data + exthdrlen;
935
936 if (fraggap) {
937 skb->csum = skb_copy_and_csum_bits(
938 skb_prev, maxfraglen,
939 data + transhdrlen, fraggap, 0);
940 skb_prev->csum = csum_sub(skb_prev->csum,
941 skb->csum);
942 data += fraggap;
943 skb_trim(skb_prev, maxfraglen);
944 }
945
946 copy = datalen - transhdrlen - fraggap;
947 if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
948 err = -EFAULT;
949 kfree_skb(skb);
950 goto error;
951 }
952
953 offset += copy;
954 length -= datalen - fraggap;
955 transhdrlen = 0;
956 exthdrlen = 0;
957 csummode = CHECKSUM_NONE;
958
959 /*
960 * Put the packet on the pending queue.
961 */
962 __skb_queue_tail(&sk->sk_write_queue, skb);
963 continue;
964 }
965
966 if (copy > length)
967 copy = length;
968
969 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
970 unsigned int off;
971
972 off = skb->len;
973 if (getfrag(from, skb_put(skb, copy),
974 offset, copy, off, skb) < 0) {
975 __skb_trim(skb, off);
976 err = -EFAULT;
977 goto error;
978 }
979 } else {
980 int i = skb_shinfo(skb)->nr_frags;
981 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
982 struct page *page = sk->sk_sndmsg_page;
983 int off = sk->sk_sndmsg_off;
984 unsigned int left;
985
986 if (page && (left = PAGE_SIZE - off) > 0) {
987 if (copy >= left)
988 copy = left;
989 if (page != frag->page) {
990 if (i == MAX_SKB_FRAGS) {
991 err = -EMSGSIZE;
992 goto error;
993 }
994 get_page(page);
995 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
996 frag = &skb_shinfo(skb)->frags[i];
997 }
998 } else if (i < MAX_SKB_FRAGS) {
999 if (copy > PAGE_SIZE)
1000 copy = PAGE_SIZE;
1001 page = alloc_pages(sk->sk_allocation, 0);
1002 if (page == NULL) {
1003 err = -ENOMEM;
1004 goto error;
1005 }
1006 sk->sk_sndmsg_page = page;
1007 sk->sk_sndmsg_off = 0;
1008
1009 skb_fill_page_desc(skb, i, page, 0, 0);
1010 frag = &skb_shinfo(skb)->frags[i];
1011 skb->truesize += PAGE_SIZE;
1012 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1013 } else {
1014 err = -EMSGSIZE;
1015 goto error;
1016 }
1017 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1018 err = -EFAULT;
1019 goto error;
1020 }
1021 sk->sk_sndmsg_off += copy;
1022 frag->size += copy;
1023 skb->len += copy;
1024 skb->data_len += copy;
1025 }
1026 offset += copy;
1027 length -= copy;
1028 }
1029
1030 return 0;
1031
1032error:
1033 inet->cork.length -= length;
1034 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1035 return err;
1036}
1037
1038ssize_t ip_append_page(struct sock *sk, struct page *page,
1039 int offset, size_t size, int flags)
1040{
1041 struct inet_sock *inet = inet_sk(sk);
1042 struct sk_buff *skb;
1043 struct rtable *rt;
1044 struct ip_options *opt = NULL;
1045 int hh_len;
1046 int mtu;
1047 int len;
1048 int err;
1049 unsigned int maxfraglen, fragheaderlen, fraggap;
1050
1051 if (inet->hdrincl)
1052 return -EPERM;
1053
1054 if (flags&MSG_PROBE)
1055 return 0;
1056
1057 if (skb_queue_empty(&sk->sk_write_queue))
1058 return -EINVAL;
1059
1060 rt = inet->cork.rt;
1061 if (inet->cork.flags & IPCORK_OPT)
1062 opt = inet->cork.opt;
1063
1064 if (!(rt->u.dst.dev->features&NETIF_F_SG))
1065 return -EOPNOTSUPP;
1066
1067 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1068 mtu = inet->cork.fragsize;
1069
1070 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1071 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1072
1073 if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1074 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
1075 return -EMSGSIZE;
1076 }
1077
1078 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1079 return -EINVAL;
1080
1081 inet->cork.length += size;
e89e9cf5
AR
1082 if ((sk->sk_protocol == IPPROTO_UDP) &&
1083 (rt->u.dst.dev->features & NETIF_F_UFO))
1084 skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
1085
1da177e4
LT
1086
1087 while (size > 0) {
1088 int i;
1089
e89e9cf5
AR
1090 if (skb_shinfo(skb)->ufo_size)
1091 len = size;
1092 else {
1093
1094 /* Check if the remaining data fits into current packet. */
1095 len = mtu - skb->len;
1096 if (len < size)
1097 len = maxfraglen - skb->len;
1098 }
1da177e4
LT
1099 if (len <= 0) {
1100 struct sk_buff *skb_prev;
1101 char *data;
1102 struct iphdr *iph;
1103 int alloclen;
1104
1105 skb_prev = skb;
0d0d2bba 1106 fraggap = skb_prev->len - maxfraglen;
1da177e4
LT
1107
1108 alloclen = fragheaderlen + hh_len + fraggap + 15;
1109 skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1110 if (unlikely(!skb)) {
1111 err = -ENOBUFS;
1112 goto error;
1113 }
1114
1115 /*
1116 * Fill in the control structures
1117 */
1118 skb->ip_summed = CHECKSUM_NONE;
1119 skb->csum = 0;
1120 skb_reserve(skb, hh_len);
1121
1122 /*
1123 * Find where to start putting bytes.
1124 */
1125 data = skb_put(skb, fragheaderlen + fraggap);
1126 skb->nh.iph = iph = (struct iphdr *)data;
1127 data += fragheaderlen;
1128 skb->h.raw = data;
1129
1130 if (fraggap) {
1131 skb->csum = skb_copy_and_csum_bits(
1132 skb_prev, maxfraglen,
1133 data, fraggap, 0);
1134 skb_prev->csum = csum_sub(skb_prev->csum,
1135 skb->csum);
1136 skb_trim(skb_prev, maxfraglen);
1137 }
1138
1139 /*
1140 * Put the packet on the pending queue.
1141 */
1142 __skb_queue_tail(&sk->sk_write_queue, skb);
1143 continue;
1144 }
1145
1146 i = skb_shinfo(skb)->nr_frags;
1147 if (len > size)
1148 len = size;
1149 if (skb_can_coalesce(skb, i, page, offset)) {
1150 skb_shinfo(skb)->frags[i-1].size += len;
1151 } else if (i < MAX_SKB_FRAGS) {
1152 get_page(page);
1153 skb_fill_page_desc(skb, i, page, offset, len);
1154 } else {
1155 err = -EMSGSIZE;
1156 goto error;
1157 }
1158
1159 if (skb->ip_summed == CHECKSUM_NONE) {
1160 unsigned int csum;
1161 csum = csum_page(page, offset, len);
1162 skb->csum = csum_block_add(skb->csum, csum, skb->len);
1163 }
1164
1165 skb->len += len;
1166 skb->data_len += len;
1167 offset += len;
1168 size -= len;
1169 }
1170 return 0;
1171
1172error:
1173 inet->cork.length -= size;
1174 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1175 return err;
1176}
1177
1178/*
1179 * Combined all pending IP fragments on the socket as one IP datagram
1180 * and push them out.
1181 */
1182int ip_push_pending_frames(struct sock *sk)
1183{
1184 struct sk_buff *skb, *tmp_skb;
1185 struct sk_buff **tail_skb;
1186 struct inet_sock *inet = inet_sk(sk);
1187 struct ip_options *opt = NULL;
1188 struct rtable *rt = inet->cork.rt;
1189 struct iphdr *iph;
76ab608d 1190 __be16 df = 0;
1da177e4
LT
1191 __u8 ttl;
1192 int err = 0;
1193
1194 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1195 goto out;
1196 tail_skb = &(skb_shinfo(skb)->frag_list);
1197
1198 /* move skb->data to ip header from ext header */
1199 if (skb->data < skb->nh.raw)
1200 __skb_pull(skb, skb->nh.raw - skb->data);
1201 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1202 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1203 *tail_skb = tmp_skb;
1204 tail_skb = &(tmp_skb->next);
1205 skb->len += tmp_skb->len;
1206 skb->data_len += tmp_skb->len;
1207 skb->truesize += tmp_skb->truesize;
1208 __sock_put(tmp_skb->sk);
1209 tmp_skb->destructor = NULL;
1210 tmp_skb->sk = NULL;
1211 }
1212
1213 /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
1214 * to fragment the frame generated here. No matter, what transforms
1215 * how transforms change size of the packet, it will come out.
1216 */
1217 if (inet->pmtudisc != IP_PMTUDISC_DO)
1218 skb->local_df = 1;
1219
1220 /* DF bit is set when we want to see DF on outgoing frames.
1221 * If local_df is set too, we still allow to fragment this frame
1222 * locally. */
1223 if (inet->pmtudisc == IP_PMTUDISC_DO ||
1224 (skb->len <= dst_mtu(&rt->u.dst) &&
1225 ip_dont_fragment(sk, &rt->u.dst)))
1226 df = htons(IP_DF);
1227
1228 if (inet->cork.flags & IPCORK_OPT)
1229 opt = inet->cork.opt;
1230
1231 if (rt->rt_type == RTN_MULTICAST)
1232 ttl = inet->mc_ttl;
1233 else
1234 ttl = ip_select_ttl(inet, &rt->u.dst);
1235
1236 iph = (struct iphdr *)skb->data;
1237 iph->version = 4;
1238 iph->ihl = 5;
1239 if (opt) {
1240 iph->ihl += opt->optlen>>2;
1241 ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1242 }
1243 iph->tos = inet->tos;
1244 iph->tot_len = htons(skb->len);
1245 iph->frag_off = df;
1246 if (!df) {
1247 __ip_select_ident(iph, &rt->u.dst, 0);
1248 } else {
1249 iph->id = htons(inet->id++);
1250 }
1251 iph->ttl = ttl;
1252 iph->protocol = sk->sk_protocol;
1253 iph->saddr = rt->rt_src;
1254 iph->daddr = rt->rt_dst;
1255 ip_send_check(iph);
1256
1257 skb->priority = sk->sk_priority;
1258 skb->dst = dst_clone(&rt->u.dst);
1259
1260 /* Netfilter gets whole the not fragmented skb. */
1261 err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
1262 skb->dst->dev, dst_output);
1263 if (err) {
1264 if (err > 0)
1265 err = inet->recverr ? net_xmit_errno(err) : 0;
1266 if (err)
1267 goto error;
1268 }
1269
1270out:
1271 inet->cork.flags &= ~IPCORK_OPT;
a51482bd
JJ
1272 kfree(inet->cork.opt);
1273 inet->cork.opt = NULL;
1da177e4
LT
1274 if (inet->cork.rt) {
1275 ip_rt_put(inet->cork.rt);
1276 inet->cork.rt = NULL;
1277 }
1278 return err;
1279
1280error:
1281 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1282 goto out;
1283}
1284
1285/*
1286 * Throw away all pending data on the socket.
1287 */
1288void ip_flush_pending_frames(struct sock *sk)
1289{
1290 struct inet_sock *inet = inet_sk(sk);
1291 struct sk_buff *skb;
1292
1293 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1294 kfree_skb(skb);
1295
1296 inet->cork.flags &= ~IPCORK_OPT;
a51482bd
JJ
1297 kfree(inet->cork.opt);
1298 inet->cork.opt = NULL;
1da177e4
LT
1299 if (inet->cork.rt) {
1300 ip_rt_put(inet->cork.rt);
1301 inet->cork.rt = NULL;
1302 }
1303}
1304
1305
1306/*
1307 * Fetch data from kernel space and fill in checksum if needed.
1308 */
1309static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1310 int len, int odd, struct sk_buff *skb)
1311{
1312 unsigned int csum;
1313
1314 csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1315 skb->csum = csum_block_add(skb->csum, csum, odd);
1316 return 0;
1317}
1318
1319/*
1320 * Generic function to send a packet as reply to another packet.
1321 * Used to send TCP resets so far. ICMP should use this function too.
1322 *
1323 * Should run single threaded per socket because it uses the sock
1324 * structure to pass arguments.
1325 *
1326 * LATER: switch from ip_build_xmit to ip_append_*
1327 */
1328void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1329 unsigned int len)
1330{
1331 struct inet_sock *inet = inet_sk(sk);
1332 struct {
1333 struct ip_options opt;
1334 char data[40];
1335 } replyopts;
1336 struct ipcm_cookie ipc;
1337 u32 daddr;
1338 struct rtable *rt = (struct rtable*)skb->dst;
1339
1340 if (ip_options_echo(&replyopts.opt, skb))
1341 return;
1342
1343 daddr = ipc.addr = rt->rt_src;
1344 ipc.opt = NULL;
1345
1346 if (replyopts.opt.optlen) {
1347 ipc.opt = &replyopts.opt;
1348
1349 if (ipc.opt->srr)
1350 daddr = replyopts.opt.faddr;
1351 }
1352
1353 {
1354 struct flowi fl = { .nl_u = { .ip4_u =
1355 { .daddr = daddr,
1356 .saddr = rt->rt_spec_dst,
1357 .tos = RT_TOS(skb->nh.iph->tos) } },
1358 /* Not quite clean, but right. */
1359 .uli_u = { .ports =
1360 { .sport = skb->h.th->dest,
1361 .dport = skb->h.th->source } },
1362 .proto = sk->sk_protocol };
1363 if (ip_route_output_key(&rt, &fl))
1364 return;
1365 }
1366
1367 /* And let IP do all the hard work.
1368
1369 This chunk is not reenterable, hence spinlock.
1370 Note that it uses the fact, that this function is called
1371 with locally disabled BH and that sk cannot be already spinlocked.
1372 */
1373 bh_lock_sock(sk);
1374 inet->tos = skb->nh.iph->tos;
1375 sk->sk_priority = skb->priority;
1376 sk->sk_protocol = skb->nh.iph->protocol;
1377 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1378 &ipc, rt, MSG_DONTWAIT);
1379 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1380 if (arg->csumoffset >= 0)
1381 *((u16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
1382 skb->ip_summed = CHECKSUM_NONE;
1383 ip_push_pending_frames(sk);
1384 }
1385
1386 bh_unlock_sock(sk);
1387
1388 ip_rt_put(rt);
1389}
1390
1da177e4
LT
1391void __init ip_init(void)
1392{
1da177e4
LT
1393 ip_rt_init();
1394 inet_initpeers();
1395
1396#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1397 igmp_mc_proc_init();
1398#endif
1399}
1400
1da177e4
LT
1401EXPORT_SYMBOL(ip_generic_getfrag);
1402EXPORT_SYMBOL(ip_queue_xmit);
1403EXPORT_SYMBOL(ip_send_check);