[PATCH] slab: implement /proc/slab_allocators
[deliverable/linux.git] / net / core / skbuff.c
CommitLineData
1da177e4
LT
1/*
2 * Routines having to do with the 'struct sk_buff' memory handlers.
3 *
4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
5 * Florian La Roche <rzsfl@rz.uni-sb.de>
6 *
7 * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
8 *
9 * Fixes:
10 * Alan Cox : Fixed the worst of the load
11 * balancer bugs.
12 * Dave Platt : Interrupt stacking fix.
13 * Richard Kooijman : Timestamp fixes.
14 * Alan Cox : Changed buffer format.
15 * Alan Cox : destructor hook for AF_UNIX etc.
16 * Linus Torvalds : Better skb_clone.
17 * Alan Cox : Added skb_copy.
18 * Alan Cox : Added all the changed routines Linus
19 * only put in the headers
20 * Ray VanTassle : Fixed --skb->lock in free
21 * Alan Cox : skb_copy copy arp field
22 * Andi Kleen : slabified it.
23 * Robert Olsson : Removed skb_head_pool
24 *
25 * NOTE:
26 * The __skb_ routines should be called with interrupts
27 * disabled, or you better be *real* sure that the operation is atomic
28 * with respect to whatever list is being frobbed (e.g. via lock_sock()
29 * or via disabling bottom half handlers, etc).
30 *
31 * This program is free software; you can redistribute it and/or
32 * modify it under the terms of the GNU General Public License
33 * as published by the Free Software Foundation; either version
34 * 2 of the License, or (at your option) any later version.
35 */
36
37/*
38 * The functions in this file will not compile correctly with gcc 2.4.x
39 */
40
41#include <linux/config.h>
42#include <linux/module.h>
43#include <linux/types.h>
44#include <linux/kernel.h>
45#include <linux/sched.h>
46#include <linux/mm.h>
47#include <linux/interrupt.h>
48#include <linux/in.h>
49#include <linux/inet.h>
50#include <linux/slab.h>
51#include <linux/netdevice.h>
52#ifdef CONFIG_NET_CLS_ACT
53#include <net/pkt_sched.h>
54#endif
55#include <linux/string.h>
56#include <linux/skbuff.h>
57#include <linux/cache.h>
58#include <linux/rtnetlink.h>
59#include <linux/init.h>
60#include <linux/highmem.h>
61
62#include <net/protocol.h>
63#include <net/dst.h>
64#include <net/sock.h>
65#include <net/checksum.h>
66#include <net/xfrm.h>
67
68#include <asm/uaccess.h>
69#include <asm/system.h>
70
ba89966c
ED
71static kmem_cache_t *skbuff_head_cache __read_mostly;
72static kmem_cache_t *skbuff_fclone_cache __read_mostly;
1da177e4
LT
73
74/*
75 * Keep out-of-line to prevent kernel bloat.
76 * __builtin_return_address is not used because it is not always
77 * reliable.
78 */
79
80/**
81 * skb_over_panic - private function
82 * @skb: buffer
83 * @sz: size
84 * @here: address
85 *
86 * Out of line support code for skb_put(). Not user callable.
87 */
88void skb_over_panic(struct sk_buff *skb, int sz, void *here)
89{
26095455
PM
90 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
91 "data:%p tail:%p end:%p dev:%s\n",
92 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
93 skb->dev ? skb->dev->name : "<NULL>");
1da177e4
LT
94 BUG();
95}
96
97/**
98 * skb_under_panic - private function
99 * @skb: buffer
100 * @sz: size
101 * @here: address
102 *
103 * Out of line support code for skb_push(). Not user callable.
104 */
105
106void skb_under_panic(struct sk_buff *skb, int sz, void *here)
107{
26095455
PM
108 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
109 "data:%p tail:%p end:%p dev:%s\n",
110 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
111 skb->dev ? skb->dev->name : "<NULL>");
1da177e4
LT
112 BUG();
113}
114
115/* Allocate a new skbuff. We do this ourselves so we can fill in a few
116 * 'private' fields and also do memory statistics to find all the
117 * [BEEP] leaks.
118 *
119 */
120
121/**
d179cd12 122 * __alloc_skb - allocate a network buffer
1da177e4
LT
123 * @size: size to allocate
124 * @gfp_mask: allocation mask
c83c2486
RD
125 * @fclone: allocate from fclone cache instead of head cache
126 * and allocate a cloned (child) skb
1da177e4
LT
127 *
128 * Allocate a new &sk_buff. The returned buffer has no headroom and a
129 * tail room of size bytes. The object has a reference count of one.
130 * The return is the buffer. On a failure the return is %NULL.
131 *
132 * Buffers may only be allocated from interrupts using a @gfp_mask of
133 * %GFP_ATOMIC.
134 */
dd0fc66f 135struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
d179cd12 136 int fclone)
1da177e4 137{
8798b3fb 138 kmem_cache_t *cache;
4947d3ef 139 struct skb_shared_info *shinfo;
1da177e4
LT
140 struct sk_buff *skb;
141 u8 *data;
142
8798b3fb
HX
143 cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
144
1da177e4 145 /* Get the HEAD */
8798b3fb 146 skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA);
1da177e4
LT
147 if (!skb)
148 goto out;
149
150 /* Get the DATA. Size must match skb_add_mtu(). */
151 size = SKB_DATA_ALIGN(size);
871751e2 152 data = ____kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1da177e4
LT
153 if (!data)
154 goto nodata;
155
156 memset(skb, 0, offsetof(struct sk_buff, truesize));
157 skb->truesize = size + sizeof(struct sk_buff);
158 atomic_set(&skb->users, 1);
159 skb->head = data;
160 skb->data = data;
161 skb->tail = data;
162 skb->end = data + size;
4947d3ef
BL
163 /* make sure we initialize shinfo sequentially */
164 shinfo = skb_shinfo(skb);
165 atomic_set(&shinfo->dataref, 1);
166 shinfo->nr_frags = 0;
167 shinfo->tso_size = 0;
168 shinfo->tso_segs = 0;
169 shinfo->ufo_size = 0;
170 shinfo->ip6_frag_id = 0;
171 shinfo->frag_list = NULL;
172
d179cd12
DM
173 if (fclone) {
174 struct sk_buff *child = skb + 1;
175 atomic_t *fclone_ref = (atomic_t *) (child + 1);
1da177e4 176
d179cd12
DM
177 skb->fclone = SKB_FCLONE_ORIG;
178 atomic_set(fclone_ref, 1);
179
180 child->fclone = SKB_FCLONE_UNAVAILABLE;
181 }
1da177e4
LT
182out:
183 return skb;
184nodata:
8798b3fb 185 kmem_cache_free(cache, skb);
1da177e4
LT
186 skb = NULL;
187 goto out;
188}
189
190/**
191 * alloc_skb_from_cache - allocate a network buffer
192 * @cp: kmem_cache from which to allocate the data area
193 * (object size must be big enough for @size bytes + skb overheads)
194 * @size: size to allocate
195 * @gfp_mask: allocation mask
196 *
197 * Allocate a new &sk_buff. The returned buffer has no headroom and
198 * tail room of size bytes. The object has a reference count of one.
199 * The return is the buffer. On a failure the return is %NULL.
200 *
201 * Buffers may only be allocated from interrupts using a @gfp_mask of
202 * %GFP_ATOMIC.
203 */
204struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
86a76caf 205 unsigned int size,
dd0fc66f 206 gfp_t gfp_mask)
1da177e4
LT
207{
208 struct sk_buff *skb;
209 u8 *data;
210
211 /* Get the HEAD */
212 skb = kmem_cache_alloc(skbuff_head_cache,
213 gfp_mask & ~__GFP_DMA);
214 if (!skb)
215 goto out;
216
217 /* Get the DATA. */
218 size = SKB_DATA_ALIGN(size);
219 data = kmem_cache_alloc(cp, gfp_mask);
220 if (!data)
221 goto nodata;
222
223 memset(skb, 0, offsetof(struct sk_buff, truesize));
224 skb->truesize = size + sizeof(struct sk_buff);
225 atomic_set(&skb->users, 1);
226 skb->head = data;
227 skb->data = data;
228 skb->tail = data;
229 skb->end = data + size;
230
231 atomic_set(&(skb_shinfo(skb)->dataref), 1);
232 skb_shinfo(skb)->nr_frags = 0;
233 skb_shinfo(skb)->tso_size = 0;
234 skb_shinfo(skb)->tso_segs = 0;
235 skb_shinfo(skb)->frag_list = NULL;
236out:
237 return skb;
238nodata:
239 kmem_cache_free(skbuff_head_cache, skb);
240 skb = NULL;
241 goto out;
242}
243
244
245static void skb_drop_fraglist(struct sk_buff *skb)
246{
247 struct sk_buff *list = skb_shinfo(skb)->frag_list;
248
249 skb_shinfo(skb)->frag_list = NULL;
250
251 do {
252 struct sk_buff *this = list;
253 list = list->next;
254 kfree_skb(this);
255 } while (list);
256}
257
258static void skb_clone_fraglist(struct sk_buff *skb)
259{
260 struct sk_buff *list;
261
262 for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
263 skb_get(list);
264}
265
266void skb_release_data(struct sk_buff *skb)
267{
268 if (!skb->cloned ||
269 !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
270 &skb_shinfo(skb)->dataref)) {
271 if (skb_shinfo(skb)->nr_frags) {
272 int i;
273 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
274 put_page(skb_shinfo(skb)->frags[i].page);
275 }
276
277 if (skb_shinfo(skb)->frag_list)
278 skb_drop_fraglist(skb);
279
280 kfree(skb->head);
281 }
282}
283
284/*
285 * Free an skbuff by memory without cleaning the state.
286 */
287void kfree_skbmem(struct sk_buff *skb)
288{
d179cd12
DM
289 struct sk_buff *other;
290 atomic_t *fclone_ref;
291
1da177e4 292 skb_release_data(skb);
d179cd12
DM
293 switch (skb->fclone) {
294 case SKB_FCLONE_UNAVAILABLE:
295 kmem_cache_free(skbuff_head_cache, skb);
296 break;
297
298 case SKB_FCLONE_ORIG:
299 fclone_ref = (atomic_t *) (skb + 2);
300 if (atomic_dec_and_test(fclone_ref))
301 kmem_cache_free(skbuff_fclone_cache, skb);
302 break;
303
304 case SKB_FCLONE_CLONE:
305 fclone_ref = (atomic_t *) (skb + 1);
306 other = skb - 1;
307
308 /* The clone portion is available for
309 * fast-cloning again.
310 */
311 skb->fclone = SKB_FCLONE_UNAVAILABLE;
312
313 if (atomic_dec_and_test(fclone_ref))
314 kmem_cache_free(skbuff_fclone_cache, other);
315 break;
316 };
1da177e4
LT
317}
318
319/**
320 * __kfree_skb - private function
321 * @skb: buffer
322 *
323 * Free an sk_buff. Release anything attached to the buffer.
324 * Clean the state. This is an internal helper function. Users should
325 * always call kfree_skb
326 */
327
328void __kfree_skb(struct sk_buff *skb)
329{
1da177e4
LT
330 dst_release(skb->dst);
331#ifdef CONFIG_XFRM
332 secpath_put(skb->sp);
333#endif
9c2b3328
SH
334 if (skb->destructor) {
335 WARN_ON(in_irq());
1da177e4
LT
336 skb->destructor(skb);
337 }
338#ifdef CONFIG_NETFILTER
339 nf_conntrack_put(skb->nfct);
9fb9cbb1
YK
340#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
341 nf_conntrack_put_reasm(skb->nfct_reasm);
342#endif
1da177e4
LT
343#ifdef CONFIG_BRIDGE_NETFILTER
344 nf_bridge_put(skb->nf_bridge);
345#endif
346#endif
347/* XXX: IS this still necessary? - JHS */
348#ifdef CONFIG_NET_SCHED
349 skb->tc_index = 0;
350#ifdef CONFIG_NET_CLS_ACT
351 skb->tc_verd = 0;
1da177e4
LT
352#endif
353#endif
354
355 kfree_skbmem(skb);
356}
357