]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/core/skbuff.c
net: Rename skb_has_frags to skb_has_frag_list
[net-next-2.6.git] / net / core / skbuff.c
CommitLineData
1da177e4
LT
1/*
2 * Routines having to do with the 'struct sk_buff' memory handlers.
3 *
113aa838 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Florian La Roche <rzsfl@rz.uni-sb.de>
6 *
1da177e4
LT
7 * Fixes:
8 * Alan Cox : Fixed the worst of the load
9 * balancer bugs.
10 * Dave Platt : Interrupt stacking fix.
11 * Richard Kooijman : Timestamp fixes.
12 * Alan Cox : Changed buffer format.
13 * Alan Cox : destructor hook for AF_UNIX etc.
14 * Linus Torvalds : Better skb_clone.
15 * Alan Cox : Added skb_copy.
16 * Alan Cox : Added all the changed routines Linus
17 * only put in the headers
18 * Ray VanTassle : Fixed --skb->lock in free
19 * Alan Cox : skb_copy copy arp field
20 * Andi Kleen : slabified it.
21 * Robert Olsson : Removed skb_head_pool
22 *
23 * NOTE:
24 * The __skb_ routines should be called with interrupts
25 * disabled, or you better be *real* sure that the operation is atomic
26 * with respect to whatever list is being frobbed (e.g. via lock_sock()
27 * or via disabling bottom half handlers, etc).
28 *
29 * This program is free software; you can redistribute it and/or
30 * modify it under the terms of the GNU General Public License
31 * as published by the Free Software Foundation; either version
32 * 2 of the License, or (at your option) any later version.
33 */
34
35/*
36 * The functions in this file will not compile correctly with gcc 2.4.x
37 */
38
1da177e4
LT
39#include <linux/module.h>
40#include <linux/types.h>
41#include <linux/kernel.h>
fe55f6d5 42#include <linux/kmemcheck.h>
1da177e4
LT
43#include <linux/mm.h>
44#include <linux/interrupt.h>
45#include <linux/in.h>
46#include <linux/inet.h>
47#include <linux/slab.h>
48#include <linux/netdevice.h>
49#ifdef CONFIG_NET_CLS_ACT
50#include <net/pkt_sched.h>
51#endif
52#include <linux/string.h>
53#include <linux/skbuff.h>
9c55e01c 54#include <linux/splice.h>
1da177e4
LT
55#include <linux/cache.h>
56#include <linux/rtnetlink.h>
57#include <linux/init.h>
716ea3a7 58#include <linux/scatterlist.h>
ac45f602 59#include <linux/errqueue.h>
1da177e4
LT
60
61#include <net/protocol.h>
62#include <net/dst.h>
63#include <net/sock.h>
64#include <net/checksum.h>
65#include <net/xfrm.h>
66
67#include <asm/uaccess.h>
68#include <asm/system.h>
ad8d75ff 69#include <trace/events/skb.h>
1da177e4 70
a1f8e7f7
AV
71#include "kmap_skb.h"
72
e18b890b
CL
73static struct kmem_cache *skbuff_head_cache __read_mostly;
74static struct kmem_cache *skbuff_fclone_cache __read_mostly;
1da177e4 75
9c55e01c
JA
76static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
77 struct pipe_buffer *buf)
78{
8b9d3728 79 put_page(buf->page);
9c55e01c
JA
80}
81
82static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
83 struct pipe_buffer *buf)
84{
8b9d3728 85 get_page(buf->page);
9c55e01c
JA
86}
87
88static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
89 struct pipe_buffer *buf)
90{
91 return 1;
92}
93
94
95/* Pipe buffer operations for a socket. */
28dfef8f 96static const struct pipe_buf_operations sock_pipe_buf_ops = {
9c55e01c
JA
97 .can_merge = 0,
98 .map = generic_pipe_buf_map,
99 .unmap = generic_pipe_buf_unmap,
100 .confirm = generic_pipe_buf_confirm,
101 .release = sock_pipe_buf_release,
102 .steal = sock_pipe_buf_steal,
103 .get = sock_pipe_buf_get,
104};
105
1da177e4
LT
106/*
107 * Keep out-of-line to prevent kernel bloat.
108 * __builtin_return_address is not used because it is not always
109 * reliable.
110 */
111
112/**
113 * skb_over_panic - private function
114 * @skb: buffer
115 * @sz: size
116 * @here: address
117 *
118 * Out of line support code for skb_put(). Not user callable.
119 */
ccb7c773 120static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
1da177e4 121{
26095455 122 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
4305b541 123 "data:%p tail:%#lx end:%#lx dev:%s\n",
27a884dc 124 here, skb->len, sz, skb->head, skb->data,
4305b541 125 (unsigned long)skb->tail, (unsigned long)skb->end,
26095455 126 skb->dev ? skb->dev->name : "<NULL>");
1da177e4
LT
127 BUG();
128}
129
130/**
131 * skb_under_panic - private function
132 * @skb: buffer
133 * @sz: size
134 * @here: address
135 *
136 * Out of line support code for skb_push(). Not user callable.
137 */
138
ccb7c773 139static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
1da177e4 140{
26095455 141 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
4305b541 142 "data:%p tail:%#lx end:%#lx dev:%s\n",
27a884dc 143 here, skb->len, sz, skb->head, skb->data,
4305b541 144 (unsigned long)skb->tail, (unsigned long)skb->end,
26095455 145 skb->dev ? skb->dev->name : "<NULL>");
1da177e4
LT
146 BUG();
147}
148
149/* Allocate a new skbuff. We do this ourselves so we can fill in a few
150 * 'private' fields and also do memory statistics to find all the
151 * [BEEP] leaks.
152 *
153 */
154
155/**
d179cd12 156 * __alloc_skb - allocate a network buffer
1da177e4
LT
157 * @size: size to allocate
158 * @gfp_mask: allocation mask
c83c2486
RD
159 * @fclone: allocate from fclone cache instead of head cache
160 * and allocate a cloned (child) skb
b30973f8 161 * @node: numa node to allocate memory on
1da177e4
LT
162 *
163 * Allocate a new &sk_buff. The returned buffer has no headroom and a
164 * tail room of size bytes. The object has a reference count of one.
165 * The return is the buffer. On a failure the return is %NULL.
166 *
167 * Buffers may only be allocated from interrupts using a @gfp_mask of
168 * %GFP_ATOMIC.
169 */
dd0fc66f 170struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
b30973f8 171 int fclone, int node)
1da177e4 172{
e18b890b 173 struct kmem_cache *cache;
4947d3ef 174 struct skb_shared_info *shinfo;
1da177e4
LT
175 struct sk_buff *skb;
176 u8 *data;
177
8798b3fb
HX
178 cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
179
1da177e4 180 /* Get the HEAD */
b30973f8 181 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
1da177e4
LT
182 if (!skb)
183 goto out;
ec7d2f2c 184 prefetchw(skb);
1da177e4 185
1da177e4 186 size = SKB_DATA_ALIGN(size);
b30973f8
CH
187 data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
188 gfp_mask, node);
1da177e4
LT
189 if (!data)
190 goto nodata;
ec7d2f2c 191 prefetchw(data + size);
1da177e4 192
ca0605a7 193 /*
c8005785
JB
194 * Only clear those fields we need to clear, not those that we will
195 * actually initialise below. Hence, don't put any more fields after
196 * the tail pointer in struct sk_buff!
ca0605a7
ACM
197 */
198 memset(skb, 0, offsetof(struct sk_buff, tail));
1da177e4
LT
199 skb->truesize = size + sizeof(struct sk_buff);
200 atomic_set(&skb->users, 1);
201 skb->head = data;
202 skb->data = data;
27a884dc 203 skb_reset_tail_pointer(skb);
4305b541 204 skb->end = skb->tail + size;
fe55f6d5
VN
205 kmemcheck_annotate_bitfield(skb, flags1);
206 kmemcheck_annotate_bitfield(skb, flags2);
19633e12
SH
207#ifdef NET_SKBUFF_DATA_USES_OFFSET
208 skb->mac_header = ~0U;
209#endif
210
4947d3ef
BL
211 /* make sure we initialize shinfo sequentially */
212 shinfo = skb_shinfo(skb);
ec7d2f2c 213 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
4947d3ef 214 atomic_set(&shinfo->dataref, 1);
4947d3ef 215
d179cd12
DM
216 if (fclone) {
217 struct sk_buff *child = skb + 1;
218 atomic_t *fclone_ref = (atomic_t *) (child + 1);
1da177e4 219
fe55f6d5
VN
220 kmemcheck_annotate_bitfield(child, flags1);
221 kmemcheck_annotate_bitfield(child, flags2);
d179cd12
DM
222 skb->fclone = SKB_FCLONE_ORIG;
223 atomic_set(fclone_ref, 1);
224
225 child->fclone = SKB_FCLONE_UNAVAILABLE;
226 }
1da177e4
LT
227out:
228 return skb;
229nodata:
8798b3fb 230 kmem_cache_free(cache, skb);
1da177e4
LT
231 skb = NULL;
232 goto out;
1da177e4 233}
b4ac530f 234EXPORT_SYMBOL(__alloc_skb);
1da177e4 235
8af27456
CH
236/**
237 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
238 * @dev: network device to receive on
239 * @length: length to allocate
240 * @gfp_mask: get_free_pages mask, passed to alloc_skb
241 *
242 * Allocate a new &sk_buff and assign it a usage count of one. The
243 * buffer has unspecified headroom built in. Users should allocate
244 * the headroom they think they need without accounting for the
245 * built in space. The built in space is used for optimisations.
246 *
247 * %NULL is returned if there is no free memory.
248 */
249struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
250 unsigned int length, gfp_t gfp_mask)
251{
43cb76d9 252 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
8af27456
CH
253 struct sk_buff *skb;
254
4ec93edb 255 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
7b2e497a 256 if (likely(skb)) {
8af27456 257 skb_reserve(skb, NET_SKB_PAD);
7b2e497a
CH
258 skb->dev = dev;
259 }
8af27456
CH
260 return skb;
261}
b4ac530f 262EXPORT_SYMBOL(__netdev_alloc_skb);
1da177e4 263
654bed16
PZ
264struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
265{
266 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
267 struct page *page;
268
269 page = alloc_pages_node(node, gfp_mask, 0);
270 return page;
271}
272EXPORT_SYMBOL(__netdev_alloc_page);
273
274void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
275 int size)
276{
277 skb_fill_page_desc(skb, i, page, off, size);
278 skb->len += size;
279 skb->data_len += size;
280 skb->truesize += size;
281}
282EXPORT_SYMBOL(skb_add_rx_frag);
283
f58518e6
IJ
284/**
285 * dev_alloc_skb - allocate an skbuff for receiving
286 * @length: length to allocate
287 *
288 * Allocate a new &sk_buff and assign it a usage count of one. The
289 * buffer has unspecified headroom built in. Users should allocate
290 * the headroom they think they need without accounting for the
291 * built in space. The built in space is used for optimisations.
292 *
293 * %NULL is returned if there is no free memory. Although this function
294 * allocates memory it can be called from an interrupt.
295 */
296struct sk_buff *dev_alloc_skb(unsigned int length)
297{
1483b874
DV
298 /*
299 * There is more code here than it seems:
a0f55e0e 300 * __dev_alloc_skb is an inline
1483b874 301 */
f58518e6
IJ
302 return __dev_alloc_skb(length, GFP_ATOMIC);
303}
304EXPORT_SYMBOL(dev_alloc_skb);
305
27b437c8 306static void skb_drop_list(struct sk_buff **listp)
1da177e4 307{
27b437c8 308 struct sk_buff *list = *listp;
1da177e4 309
27b437c8 310 *listp = NULL;
1da177e4
LT
311
312 do {
313 struct sk_buff *this = list;
314 list = list->next;
315 kfree_skb(this);
316 } while (list);
317}
318
27b437c8
HX
319static inline void skb_drop_fraglist(struct sk_buff *skb)
320{
321 skb_drop_list(&skb_shinfo(skb)->frag_list);
322}
323
1da177e4
LT
324static void skb_clone_fraglist(struct sk_buff *skb)
325{
326 struct sk_buff *list;
327
fbb398a8 328 skb_walk_frags(skb, list)
1da177e4
LT
329 skb_get(list);
330}
331
5bba1712 332static void skb_release_data(struct sk_buff *skb)
1da177e4
LT
333{
334 if (!skb->cloned ||
335 !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
336 &skb_shinfo(skb)->dataref)) {
337 if (skb_shinfo(skb)->nr_frags) {
338 int i;
339 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
340 put_page(skb_shinfo(skb)->frags[i].page);
341 }
342
21dc3301 343 if (skb_has_frag_list(skb))
1da177e4
LT
344 skb_drop_fraglist(skb);
345
346 kfree(skb->head);
347 }
348}
349
350/*
351 * Free an skbuff by memory without cleaning the state.
352 */
2d4baff8 353static void kfree_skbmem(struct sk_buff *skb)
1da177e4 354{
d179cd12
DM
355 struct sk_buff *other;
356 atomic_t *fclone_ref;
357
d179cd12
DM
358 switch (skb->fclone) {
359 case SKB_FCLONE_UNAVAILABLE:
360 kmem_cache_free(skbuff_head_cache, skb);
361 break;
362
363 case SKB_FCLONE_ORIG:
364 fclone_ref = (atomic_t *) (skb + 2);
365 if (atomic_dec_and_test(fclone_ref))
366 kmem_cache_free(skbuff_fclone_cache, skb);
367 break;
368
369 case SKB_FCLONE_CLONE:
370 fclone_ref = (atomic_t *) (skb + 1);
371 other = skb - 1;
372
373 /* The clone portion is available for
374 * fast-cloning again.
375 */
376 skb->fclone = SKB_FCLONE_UNAVAILABLE;
377
378 if (atomic_dec_and_test(fclone_ref))
379 kmem_cache_free(skbuff_fclone_cache, other);
380 break;
3ff50b79 381 }
1da177e4
LT
382}
383
04a4bb55 384static void skb_release_head_state(struct sk_buff *skb)
1da177e4 385{
adf30907 386 skb_dst_drop(skb);
1da177e4
LT
387#ifdef CONFIG_XFRM
388 secpath_put(skb->sp);
389#endif
9c2b3328
SH
390 if (skb->destructor) {
391 WARN_ON(in_irq());
1da177e4
LT
392 skb->destructor(skb);
393 }
9fb9cbb1 394#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
5f79e0f9 395 nf_conntrack_put(skb->nfct);
9fb9cbb1
YK
396 nf_conntrack_put_reasm(skb->nfct_reasm);
397#endif
1da177e4
LT
398#ifdef CONFIG_BRIDGE_NETFILTER
399 nf_bridge_put(skb->nf_bridge);
400#endif
1da177e4
LT
401/* XXX: IS this still necessary? - JHS */
402#ifdef CONFIG_NET_SCHED
403 skb->tc_index = 0;
404#ifdef CONFIG_NET_CLS_ACT
405 skb->tc_verd = 0;
1da177e4
LT
406#endif
407#endif
04a4bb55
LB
408}
409
410/* Free everything but the sk_buff shell. */
411static void skb_release_all(struct sk_buff *skb)
412{
413 skb_release_head_state(skb);
2d4baff8
HX
414 skb_release_data(skb);
415}
416
417/**
418 * __kfree_skb - private function
419 * @skb: buffer
420 *
421 * Free an sk_buff. Release anything attached to the buffer.
422 * Clean the state. This is an internal helper function. Users should
423 * always call kfree_skb
424 */
1da177e4 425
2d4baff8
HX
426void __kfree_skb(struct sk_buff *skb)
427{
428 skb_release_all(skb);
1da177e4
LT
429 kfree_skbmem(skb);
430}
b4ac530f 431EXPORT_SYMBOL(__kfree_skb);
1da177e4 432