[net-next-2.6.git] / mm / slob.c

/*
 * SLOB Allocator: Simple List Of Blocks
 *
 * Matt Mackall <mpm@selenic.com> 12/30/03
 *
 * NUMA support by Paul Mundt, 2007.
 *
 * How SLOB works:
 *
 * The core of SLOB is a traditional K&R style heap allocator, with
 * support for returning aligned objects. The granularity of this
 * allocator is as little as 2 bytes, however typically most architectures
 * will require 4 bytes on 32-bit and 8 bytes on 64-bit.
 *
 * The slob heap is a linked list of pages from alloc_pages(), and
 * within each page, there is a singly-linked list of free blocks (slob_t).
 * The heap is grown on demand and allocation from the heap is currently
 * first-fit.
 *
 * Above this is an implementation of kmalloc/kfree. Blocks returned
 * from kmalloc are prepended with a 4-byte header with the kmalloc size.
 * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
 * alloc_pages() directly, allocating compound pages so the page order
 * does not have to be separately tracked, and also stores the exact
 * allocation size in page->private so that it can be used to accurately
 * provide ksize(). These objects are detected in kfree() because slob_page()
 * is false for them.
 *
 * SLAB is emulated on top of SLOB by simply calling constructors and
 * destructors for every SLAB allocation. Objects are returned with the
 * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
 * case the low-level allocator will fragment blocks to create the proper
 * alignment. Again, objects of page-size or greater are allocated by
 * calling alloc_pages(). As SLAB objects know their size, no separate
 * size bookkeeping is necessary and there is essentially no allocation
 * space overhead, and compound pages aren't needed for multi-page
 * allocations.
 *
 * NUMA support in SLOB is fairly simplistic, pushing most of the real
 * logic down to the page allocator, and simply doing the node accounting
 * on the upper levels. In the event that a node id is explicitly
 * provided, alloc_pages_node() with the specified node id is used
 * instead. The common case (or when the node id isn't explicitly provided)
 * will default to the current node, as per numa_node_id().
 *
 * Node aware pages are still inserted in to the global freelist, and
 * these are scanned for by matching against the node id encoded in the
 * page flags. As a result, block allocations that can be satisfied from
 * the freelist will only be done so on pages residing on the same node,
 * in order to prevent random node placement.
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/cache.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <asm/atomic.h>

/*
 * slob_block has a field 'units', which indicates size of block if +ve,
 * or offset of next block if -ve (in SLOB_UNITs).
 *
 * Free blocks of size 1 unit simply contain the offset of the next block.
 * Those with larger size contain their size in the first SLOB_UNIT of
 * memory, and the offset of the next free block in the second SLOB_UNIT.
 */
#if PAGE_SIZE <= (32767 * 2)
typedef s16 slobidx_t;
#else
typedef s32 slobidx_t;
#endif

struct slob_block {
	slobidx_t units;
};
typedef struct slob_block slob_t;

/*
 * We use struct page fields to manage some slob allocation aspects,
 * however to avoid the horrible mess in include/linux/mm_types.h, we'll
 * just define our own struct page type variant here.
 */
struct slob_page {
	union {
		struct {
			unsigned long flags;	/* mandatory */
			atomic_t _count;	/* mandatory */
			slobidx_t units;	/* free units left in page */
			unsigned long pad[2];
			slob_t *free;		/* first free slob_t in page */
			struct list_head list;	/* linked list of free pages */
		};
		struct page page;
	};
};
static inline void struct_slob_page_wrong_size(void)
{ BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); }

/*
 * free_slob_page: call before a slob_page is returned to the page allocator.
 */
static inline void free_slob_page(struct slob_page *sp)
{
	reset_page_mapcount(&sp->page);
	sp->page.mapping = NULL;
}

/*
 * All (partially) free slob pages go on this list.
 */
static LIST_HEAD(free_slob_pages);

/*
 * slob_page: True for all slob pages (false for bigblock pages)
 */
static inline int slob_page(struct slob_page *sp)
{
	return test_bit(PG_active, &sp->flags);
}

static inline void set_slob_page(struct slob_page *sp)
{
	__set_bit(PG_active, &sp->flags);
}

static inline void clear_slob_page(struct slob_page *sp)
{
	__clear_bit(PG_active, &sp->flags);
}

/*
 * slob_page_free: true for pages on free_slob_pages list.
 */
static inline int slob_page_free(struct slob_page *sp)
{
	return test_bit(PG_private, &sp->flags);
}

static inline void set_slob_page_free(struct slob_page *sp)
{
	list_add(&sp->list, &free_slob_pages);
	__set_bit(PG_private, &sp->flags);
}

static inline void clear_slob_page_free(struct slob_page *sp)
{
	list_del(&sp->list);
	__clear_bit(PG_private, &sp->flags);
}

#define SLOB_UNIT sizeof(slob_t)
#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
#define SLOB_ALIGN L1_CACHE_BYTES

/*
 * struct slob_rcu is inserted at the tail of allocated slob blocks, which
 * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
 * the block using call_rcu.
 */
struct slob_rcu {
	struct rcu_head head;
	int size;
};

/*
 * slob_lock protects all slob allocator structures.
 */
static DEFINE_SPINLOCK(slob_lock);

/*
 * Encode the given size and next info into a free slob block s.
 */
static void set_slob(slob_t *s, slobidx_t size, slob_t *next)
{
	slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
	slobidx_t offset = next - base;

	if (size > 1) {
		s[0].units = size;
		s[1].units = offset;
	} else
		s[0].units = -offset;
}

/*
 * Return the size of a slob block.
 */
static slobidx_t slob_units(slob_t *s)
{
	if (s->units > 0)
		return s->units;
	return 1;
}

/*
 * Return the next free slob block pointer after this one.
 */
static slob_t *slob_next(slob_t *s)
{
	slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
	slobidx_t next;

	if (s[0].units < 0)
		next = -s[0].units;
	else
		next = s[1].units;
	return base+next;
}

/*
 * Returns true if s is the last free block in its page.
 */
static int slob_last(slob_t *s)
{
	return !((unsigned long)slob_next(s) & ~PAGE_MASK);
}

static void *slob_new_page(gfp_t gfp, int order, int node)
{
	void *page;

#ifdef CONFIG_NUMA
	if (node != -1)
		page = alloc_pages_node(node, gfp, order);
	else
#endif
		page = alloc_pages(gfp, order);

	if (!page)
		return NULL;

	return page_address(page);
}

/*
 * Allocate a slob block within a given slob_page sp.
 */
static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
{
	slob_t *prev, *cur, *aligned = 0;
	int delta = 0, units = SLOB_UNITS(size);

	for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) {
		slobidx_t avail = slob_units(cur);

		if (align) {
			aligned = (slob_t *)ALIGN((unsigned long)cur, align);
			delta = aligned - cur;
		}
		if (avail >= units + delta) { /* room enough? */
			slob_t *next;

			if (delta) { /* need to fragment head to align? */
				next = slob_next(cur);
				set_slob(aligned, avail - delta, next);
				set_slob(cur, delta, aligned);
				prev = cur;
				cur = aligned;
				avail = slob_units(cur);
			}

			next = slob_next(cur);
			if (avail == units) { /* exact fit? unlink. */
				if (prev)
					set_slob(prev, slob_units(prev), next);
				else
					sp->free = next;
			} else { /* fragment */
				if (prev)
					set_slob(prev, slob_units(prev), cur + units);
				else
					sp->free = cur + units;
				set_slob(cur + units, avail - units, next);
			}

			sp->units -= units;
			if (!sp->units)
				clear_slob_page_free(sp);
			return cur;
		}
		if (slob_last(cur))
			return NULL;
	}
}

/*
 * slob_alloc: entry point into the slob allocator.
 */
static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
{
	struct slob_page *sp;
	struct list_head *prev;
	slob_t *b = NULL;
	unsigned long flags;

	spin_lock_irqsave(&slob_lock, flags);
	/* Iterate through each partially free page, try to find room */
	list_for_each_entry(sp, &free_slob_pages, list) {
#ifdef CONFIG_NUMA
		/*
		 * If there's a node specification, search for a partial
		 * page with a matching node id in the freelist.
		 */
		if (node != -1 && page_to_nid(&sp->page) != node)
			continue;
#endif
		/* Enough room on this page? */
		if (sp->units < SLOB_UNITS(size))
			continue;

		/* Attempt to alloc */
		prev = sp->list.prev;
		b = slob_page_alloc(sp, size, align);
		if (!b)
			continue;

		/* Improve fragment distribution and reduce our average
		 * search time by starting our next search here. (see
		 * Knuth vol 1, sec 2.5, pg 449) */
		if (prev != free_slob_pages.prev &&
				free_slob_pages.next != prev->next)
			list_move_tail(&free_slob_pages, prev->next);
		break;
	}
	spin_unlock_irqrestore(&slob_lock, flags);

	/* Not enough space: must allocate a new page */
	if (!b) {
		b = slob_new_page(gfp & ~__GFP_ZERO, 0, node);
		if (!b)
			return 0;
		sp = (struct slob_page *)virt_to_page(b);
		set_slob_page(sp);

		spin_lock_irqsave(&slob_lock, flags);
		sp->units = SLOB_UNITS(PAGE_SIZE);
		sp->free = b;
		INIT_LIST_HEAD(&sp->list);
		set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
		set_slob_page_free(sp);
		b = slob_page_alloc(sp, size, align);
		BUG_ON(!b);
		spin_unlock_irqrestore(&slob_lock, flags);
	}
	if (unlikely((gfp & __GFP_ZERO) && b))
		memset(b, 0, size);
	return b;
}

/*
 * slob_free: entry point into the slob allocator.
 */
static void slob_free(void *block, int size)
{
	struct slob_page *sp;
	slob_t *prev, *next, *b = (slob_t *)block;
	slobidx_t units;
	unsigned long flags;

	if (unlikely(ZERO_OR_NULL_PTR(block)))
		return;
	BUG_ON(!size);

	sp = (struct slob_page *)virt_to_page(block);
	units = SLOB_UNITS(size);

	spin_lock_irqsave(&slob_lock, flags);

	if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) {
		/* Go directly to page allocator. Do not pass slob allocator */
		if (slob_page_free(sp))
			clear_slob_page_free(sp);
		clear_slob_page(sp);
		free_slob_page(sp);
		free_page((unsigned long)b);
		goto out;
	}

	if (!slob_page_free(sp)) {
		/* This slob page is about to become partially free. Easy! */
		sp->units = units;
		sp->free = b;
		set_slob(b, units,
			(void *)((unsigned long)(b +
					SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
		set_slob_page_free(sp);
		goto out;
	}

	/*
	 * Otherwise the page is already partially free, so find reinsertion
	 * point.
	 */
	sp->units += units;

	if (b < sp->free) {
		set_slob(b, units, sp->free);
		sp->free = b;
	} else {
		prev = sp->free;
		next = slob_next(prev);
		while (b > next) {
			prev = next;
			next = slob_next(prev);
		}

		if (!slob_last(prev) && b + units == next) {
			units += slob_units(next);
			set_slob(b, units, slob_next(next));
		} else
			set_slob(b, units, next);

		if (prev + slob_units(prev) == b) {
			units = slob_units(b) + slob_units(prev);
			set_slob(prev, units, slob_next(b));
		} else
			set_slob(prev, slob_units(prev), b);
	}
out:
	spin_unlock_irqrestore(&slob_lock, flags);
}

/*
 * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
 */

#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
#endif

#ifndef ARCH_SLAB_MINALIGN
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
#endif

void *__kmalloc_node(size_t size, gfp_t gfp, int node)
{
	unsigned int *m;
	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);

	if (size < PAGE_SIZE - align) {
		if (!size)
			return ZERO_SIZE_PTR;

		m = slob_alloc(size + align, gfp, align, node);
		if (m)
			*m = size;
		return (void *)m + align;
	} else {
		void *ret;

		ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node);
		if (ret) {
			struct page *page;
			page = virt_to_page(ret);
			page->private = size;
		}
		return ret;
	}
}
EXPORT_SYMBOL(__kmalloc_node);

void kfree(const void *block)
{
	struct slob_page *sp;

	if (unlikely(ZERO_OR_NULL_PTR(block)))
		return;

	sp = (struct slob_page *)virt_to_page(block);
	if (slob_page(sp)) {
		int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
		unsigned int *m = (unsigned int *)(block - align);
		slob_free(m, *m + align);
	} else
		put_page(&sp->page);
}
EXPORT_SYMBOL(kfree);

/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
size_t ksize(const void *block)
{
	struct slob_page *sp;

	BUG_ON(!block);
	if (unlikely(block == ZERO_SIZE_PTR))
		return 0;

	sp = (struct slob_page *)virt_to_page(block);
	if (slob_page(sp))
		return ((slob_t *)block - 1)->units + SLOB_UNIT;
	else
		return sp->page.private;
}
EXPORT_SYMBOL(ksize);

struct kmem_cache {
	unsigned int size, align;
	unsigned long flags;
	const char *name;
	void (*ctor)(struct kmem_cache *, void *);
};

struct kmem_cache *kmem_cache_create(const char *name, size_t size,
	size_t align, unsigned long flags,
	void (*ctor)(struct kmem_cache *, void *))
{
	struct kmem_cache *c;

	c = slob_alloc(sizeof(struct kmem_cache), flags, 0, -1);

	if (c) {
		c->name = name;
		c->size = size;
		if (flags & SLAB_DESTROY_BY_RCU) {
			/* leave room for rcu footer at the end of object */
			c->size += sizeof(struct slob_rcu);
		}
		c->flags = flags;
		c->ctor = ctor;
		/* ignore alignment unless it's forced */
		c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
		if (c->align < ARCH_SLAB_MINALIGN)
			c->align = ARCH_SLAB_MINALIGN;
		if (c->align < align)
			c->align = align;
	} else if (flags & SLAB_PANIC)
		panic("Cannot create slab cache %s\n", name);

	return c;
}
EXPORT_SYMBOL(kmem_cache_create);

void kmem_cache_destroy(struct kmem_cache *c)
{
	slob_free(c, sizeof(struct kmem_cache));
}
EXPORT_SYMBOL(kmem_cache_destroy);

void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
{
	void *b;

	if (c->size < PAGE_SIZE)
		b = slob_alloc(c->size, flags, c->align, node);
	else
		b = slob_new_page(flags, get_order(c->size), node);

	if (c->ctor)
		c->ctor(c, b);

	return b;
}
EXPORT_SYMBOL(kmem_cache_alloc_node);

static void __kmem_cache_free(void *b, int size)
{
	if (size < PAGE_SIZE)
		slob_free(b, size);
	else
		free_pages((unsigned long)b, get_order(size));
}

static void kmem_rcu_free(struct rcu_head *head)
{
	struct slob_rcu *slob_rcu = (struct slob_rcu *)head;
	void *b = (void *)slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu));

	__kmem_cache_free(b, slob_rcu->size);
}

void kmem_cache_free(struct kmem_cache *c, void *b)
{
	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
		struct slob_rcu *slob_rcu;
		slob_rcu = b + (c->size - sizeof(struct slob_rcu));
		INIT_RCU_HEAD(&slob_rcu->head);
		slob_rcu->size = c->size;
		call_rcu(&slob_rcu->head, kmem_rcu_free);
	} else {
		__kmem_cache_free(b, c->size);
	}
}
EXPORT_SYMBOL(kmem_cache_free);

unsigned int kmem_cache_size(struct kmem_cache *c)
{
	return c->size;
}
EXPORT_SYMBOL(kmem_cache_size);

const char *kmem_cache_name(struct kmem_cache *c)
{
	return c->name;
}
EXPORT_SYMBOL(kmem_cache_name);

int kmem_cache_shrink(struct kmem_cache *d)
{
	return 0;
}
EXPORT_SYMBOL(kmem_cache_shrink);

int kmem_ptr_validate(struct kmem_cache *a, const void *b)
{
	return 0;
}

static unsigned int slob_ready __read_mostly;

int slab_is_available(void)
{
	return slob_ready;
}

void __init kmem_cache_init(void)
{
	slob_ready = 1;
}
Commit	Line	Data
10cef602 MM	1	/*
	2	* SLOB Allocator: Simple List Of Blocks
	3	*
	4	* Matt Mackall <mpm@selenic.com> 12/30/03
	5	*
6193a2ff PM	6	* NUMA support by Paul Mundt, 2007.
6193a2ff PM	7	*
10cef602 MM	8	* How SLOB works:
	9	*
	10	* The core of SLOB is a traditional K&R style heap allocator, with
	11	* support for returning aligned objects. The granularity of this
55394849 NP	12	* allocator is as little as 2 bytes, however typically most architectures
55394849 NP	13	* will require 4 bytes on 32-bit and 8 bytes on 64-bit.
95b35127	14	*
6193a2ff	15	* The slob heap is a linked list of pages from alloc_pages(), and
95b35127 NP	16	* within each page, there is a singly-linked list of free blocks (slob_t).
	17	* The heap is grown on demand and allocation from the heap is currently
	18	* first-fit.
10cef602 MM	19	*
10cef602 MM	20	* Above this is an implementation of kmalloc/kfree. Blocks returned
55394849	21	* from kmalloc are prepended with a 4-byte header with the kmalloc size.
10cef602	22	* If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
6193a2ff	23	* alloc_pages() directly, allocating compound pages so the page order
d87a133f NP	24	* does not have to be separately tracked, and also stores the exact
	25	* allocation size in page->private so that it can be used to accurately
	26	* provide ksize(). These objects are detected in kfree() because slob_page()
	27	* is false for them.
10cef602 MM	28	*
10cef602 MM	29	* SLAB is emulated on top of SLOB by simply calling constructors and
95b35127 NP	30	* destructors for every SLAB allocation. Objects are returned with the
	31	* 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
	32	* case the low-level allocator will fragment blocks to create the proper
	33	* alignment. Again, objects of page-size or greater are allocated by
6193a2ff	34	* calling alloc_pages(). As SLAB objects know their size, no separate
95b35127	35	* size bookkeeping is necessary and there is essentially no allocation
d87a133f NP	36	* space overhead, and compound pages aren't needed for multi-page
d87a133f NP	37	* allocations.
6193a2ff PM	38	*
	39	* NUMA support in SLOB is fairly simplistic, pushing most of the real
	40	* logic down to the page allocator, and simply doing the node accounting
	41	* on the upper levels. In the event that a node id is explicitly
	42	* provided, alloc_pages_node() with the specified node id is used
	43	* instead. The common case (or when the node id isn't explicitly provided)
	44	* will default to the current node, as per numa_node_id().
	45	*
	46	* Node aware pages are still inserted in to the global freelist, and
	47	* these are scanned for by matching against the node id encoded in the
	48	* page flags. As a result, block allocations that can be satisfied from
	49	* the freelist will only be done so on pages residing on the same node,
	50	* in order to prevent random node placement.
10cef602 MM	51	*/
10cef602 MM	52
95b35127	53	#include <linux/kernel.h>
10cef602 MM	54	#include <linux/slab.h>
	55	#include <linux/mm.h>
	56	#include <linux/cache.h>
	57	#include <linux/init.h>
	58	#include <linux/module.h>
afc0cedb	59	#include <linux/rcupdate.h>
95b35127 NP	60	#include <linux/list.h>
	61	#include <asm/atomic.h>
	62
95b35127 NP	63	/*
	64	* slob_block has a field 'units', which indicates size of block if +ve,
	65	* or offset of next block if -ve (in SLOB_UNITs).
	66	*
	67	* Free blocks of size 1 unit simply contain the offset of the next block.
	68	* Those with larger size contain their size in the first SLOB_UNIT of
	69	* memory, and the offset of the next free block in the second SLOB_UNIT.
	70	*/
55394849	71	#if PAGE_SIZE <= (32767 * 2)
95b35127 NP	72	typedef s16 slobidx_t;
	73	#else
	74	typedef s32 slobidx_t;
	75	#endif
	76
10cef602	77	struct slob_block {
95b35127	78	slobidx_t units;
55394849	79	};
10cef602 MM	80	typedef struct slob_block slob_t;
10cef602 MM	81
95b35127 NP	82	/*
	83	* We use struct page fields to manage some slob allocation aspects,
	84	* however to avoid the horrible mess in include/linux/mm_types.h, we'll
	85	* just define our own struct page type variant here.
	86	*/
	87	struct slob_page {
	88	union {
	89	struct {
	90	unsigned long flags; /* mandatory */
	91	atomic_t _count; /* mandatory */
	92	slobidx_t units; /* free units left in page */
	93	unsigned long pad[2];
	94	slob_t free; / first free slob_t in page */
	95	struct list_head list; /* linked list of free pages */
	96	};
	97	struct page page;
	98	};
	99	};
	100	static inline void struct_slob_page_wrong_size(void)
	101	{ BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); }
	102
	103	/*
	104	* free_slob_page: call before a slob_page is returned to the page allocator.
	105	*/
	106	static inline void free_slob_page(struct slob_page *sp)
	107	{
	108	reset_page_mapcount(&sp->page);
	109	sp->page.mapping = NULL;
	110	}
	111
	112	/*
	113	* All (partially) free slob pages go on this list.
	114	*/
	115	static LIST_HEAD(free_slob_pages);
	116
	117	/*
	118	* slob_page: True for all slob pages (false for bigblock pages)
	119	*/
	120	static inline int slob_page(struct slob_page *sp)
	121	{
	122	return test_bit(PG_active, &sp->flags);
	123	}
	124
	125	static inline void set_slob_page(struct slob_page *sp)
	126	{
	127	__set_bit(PG_active, &sp->flags);
	128	}
	129
	130	static inline void clear_slob_page(struct slob_page *sp)
	131	{
	132	__clear_bit(PG_active, &sp->flags);
	133	}
	134
	135	/*
	136	* slob_page_free: true for pages on free_slob_pages list.
	137	*/
	138	static inline int slob_page_free(struct slob_page *sp)
	139	{
	140	return test_bit(PG_private, &sp->flags);
	141	}
	142
	143	static inline void set_slob_page_free(struct slob_page *sp)
	144	{
	145	list_add(&sp->list, &free_slob_pages);
146	__set_bit(PG_private, &sp->flags);
147	}
148
149	static inline void clear_slob_page_free(struct slob_page *sp)
150	{
151	list_del(&sp->list);
152	__clear_bit(PG_private, &sp->flags);
153	}
154
10cef602 MM	155	#define SLOB_UNIT sizeof(slob_t)
	156	#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
	157	#define SLOB_ALIGN L1_CACHE_BYTES
	158
afc0cedb NP	159	/*
	160	* struct slob_rcu is inserted at the tail of allocated slob blocks, which
	161	* were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
	162	* the block using call_rcu.
	163	*/
	164	struct slob_rcu {
	165	struct rcu_head head;
	166	int size;
	167	};
	168
95b35127 NP	169	/*
	170	* slob_lock protects all slob allocator structures.
	171	*/
10cef602	172	static DEFINE_SPINLOCK(slob_lock);
10cef602	173
95b35127 NP	174	/*
	175	* Encode the given size and next info into a free slob block s.
	176	*/
	177	static void set_slob(slob_t s, slobidx_t size, slob_t next)
	178	{
	179	slob_t base = (slob_t )((unsigned long)s & PAGE_MASK);
	180	slobidx_t offset = next - base;
bcb4ddb4	181
95b35127 NP	182	if (size > 1) {
	183	s[0].units = size;
	184	s[1].units = offset;
	185	} else
	186	s[0].units = -offset;
	187	}
10cef602	188
95b35127 NP	189	/*
	190	* Return the size of a slob block.
	191	*/
	192	static slobidx_t slob_units(slob_t *s)
	193	{
	194	if (s->units > 0)
	195	return s->units;
	196	return 1;
	197	}
	198
	199	/*
	200	* Return the next free slob block pointer after this one.
	201	*/
	202	static slob_t slob_next(slob_t s)
	203	{
	204	slob_t base = (slob_t )((unsigned long)s & PAGE_MASK);
	205	slobidx_t next;
	206
	207	if (s[0].units < 0)
	208	next = -s[0].units;
	209	else
	210	next = s[1].units;
	211	return base+next;
	212	}
	213
	214	/*
	215	* Returns true if s is the last free block in its page.
	216	*/
	217	static int slob_last(slob_t *s)
	218	{
	219	return !((unsigned long)slob_next(s) & ~PAGE_MASK);
	220	}
	221
6193a2ff PM	222	static void *slob_new_page(gfp_t gfp, int order, int node)
	223	{
	224	void *page;
	225
	226	#ifdef CONFIG_NUMA
	227	if (node != -1)
	228	page = alloc_pages_node(node, gfp, order);
	229	else
	230	#endif
	231	page = alloc_pages(gfp, order);
	232
	233	if (!page)
	234	return NULL;
	235
	236	return page_address(page);
	237	}
	238
95b35127 NP	239	/*
	240	* Allocate a slob block within a given slob_page sp.
	241	*/
	242	static void slob_page_alloc(struct slob_page sp, size_t size, int align)
10cef602 MM	243	{
	244	slob_t prev, cur, *aligned = 0;
	245	int delta = 0, units = SLOB_UNITS(size);
10cef602	246
95b35127 NP	247	for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) {
	248	slobidx_t avail = slob_units(cur);
	249
10cef602 MM	250	if (align) {
	251	aligned = (slob_t *)ALIGN((unsigned long)cur, align);
	252	delta = aligned - cur;
	253	}
95b35127 NP	254	if (avail >= units + delta) { /* room enough? */
	255	slob_t *next;
	256
10cef602	257	if (delta) { /* need to fragment head to align? */
95b35127 NP	258	next = slob_next(cur);
	259	set_slob(aligned, avail - delta, next);
	260	set_slob(cur, delta, aligned);
10cef602 MM	261	prev = cur;
10cef602 MM	262	cur = aligned;
95b35127	263	avail = slob_units(cur);
10cef602 MM	264	}
10cef602 MM	265
95b35127 NP	266	next = slob_next(cur);
	267	if (avail == units) { /* exact fit? unlink. */
	268	if (prev)
	269	set_slob(prev, slob_units(prev), next);
	270	else
	271	sp->free = next;
	272	} else { /* fragment */
	273	if (prev)
	274	set_slob(prev, slob_units(prev), cur + units);
	275	else
	276	sp->free = cur + units;
	277	set_slob(cur + units, avail - units, next);
10cef602 MM	278	}
10cef602 MM	279
95b35127 NP	280	sp->units -= units;
	281	if (!sp->units)
	282	clear_slob_page_free(sp);
10cef602 MM	283	return cur;
10cef602 MM	284	}
95b35127 NP	285	if (slob_last(cur))
	286	return NULL;
	287	}
	288	}
10cef602	289
95b35127 NP	290	/*
	291	* slob_alloc: entry point into the slob allocator.
	292	*/
6193a2ff	293	static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
95b35127 NP	294	{
95b35127 NP	295	struct slob_page *sp;
d6269543	296	struct list_head *prev;
95b35127 NP	297	slob_t *b = NULL;
95b35127 NP	298	unsigned long flags;
10cef602	299
95b35127 NP	300	spin_lock_irqsave(&slob_lock, flags);
	301	/* Iterate through each partially free page, try to find room */
	302	list_for_each_entry(sp, &free_slob_pages, list) {
6193a2ff PM	303	#ifdef CONFIG_NUMA
	304	/*
	305	* If there's a node specification, search for a partial
	306	* page with a matching node id in the freelist.
	307	*/
	308	if (node != -1 && page_to_nid(&sp->page) != node)
	309	continue;
	310	#endif
d6269543 MM	311	/* Enough room on this page? */
	312	if (sp->units < SLOB_UNITS(size))
	313	continue;
6193a2ff	314
d6269543 MM	315	/* Attempt to alloc */
	316	prev = sp->list.prev;
	317	b = slob_page_alloc(sp, size, align);
	318	if (!b)
	319	continue;
	320
	321	/* Improve fragment distribution and reduce our average
	322	* search time by starting our next search here. (see
	323	* Knuth vol 1, sec 2.5, pg 449) */
d32ddd8f NP	324	if (prev != free_slob_pages.prev &&
d32ddd8f NP	325	free_slob_pages.next != prev->next)
d6269543 MM	326	list_move_tail(&free_slob_pages, prev->next);
d6269543 MM	327	break;
10cef602	328	}
95b35127 NP	329	spin_unlock_irqrestore(&slob_lock, flags);
	330
	331	/* Not enough space: must allocate a new page */
	332	if (!b) {
7fd27255	333	b = slob_new_page(gfp & ~__GFP_ZERO, 0, node);
95b35127 NP	334	if (!b)
	335	return 0;
	336	sp = (struct slob_page *)virt_to_page(b);
	337	set_slob_page(sp);
	338
	339	spin_lock_irqsave(&slob_lock, flags);
	340	sp->units = SLOB_UNITS(PAGE_SIZE);
	341	sp->free = b;
	342	INIT_LIST_HEAD(&sp->list);
	343	set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
	344	set_slob_page_free(sp);
	345	b = slob_page_alloc(sp, size, align);
	346	BUG_ON(!b);
	347	spin_unlock_irqrestore(&slob_lock, flags);
	348	}
d07dbea4 CL	349	if (unlikely((gfp & __GFP_ZERO) && b))
d07dbea4 CL	350	memset(b, 0, size);
95b35127	351	return b;
10cef602 MM	352	}
10cef602 MM	353
95b35127 NP	354	/*
	355	* slob_free: entry point into the slob allocator.
	356	*/
10cef602 MM	357	static void slob_free(void *block, int size)
10cef602 MM	358	{
95b35127 NP	359	struct slob_page *sp;
	360	slob_t prev, next, b = (slob_t )block;
	361	slobidx_t units;
10cef602 MM	362	unsigned long flags;
10cef602 MM	363
2408c550	364	if (unlikely(ZERO_OR_NULL_PTR(block)))
10cef602	365	return;
95b35127	366	BUG_ON(!size);
10cef602	367
95b35127 NP	368	sp = (struct slob_page *)virt_to_page(block);
95b35127 NP	369	units = SLOB_UNITS(size);
10cef602	370
10cef602	371	spin_lock_irqsave(&slob_lock, flags);
10cef602	372
95b35127 NP	373	if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) {
	374	/* Go directly to page allocator. Do not pass slob allocator */
	375	if (slob_page_free(sp))
	376	clear_slob_page_free(sp);
	377	clear_slob_page(sp);
	378	free_slob_page(sp);
	379	free_page((unsigned long)b);
	380	goto out;
	381	}
10cef602	382
95b35127 NP	383	if (!slob_page_free(sp)) {
	384	/* This slob page is about to become partially free. Easy! */
	385	sp->units = units;
	386	sp->free = b;
	387	set_slob(b, units,
	388	(void *)((unsigned long)(b +
	389	SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
	390	set_slob_page_free(sp);
	391	goto out;
	392	}
	393
	394	/*
	395	* Otherwise the page is already partially free, so find reinsertion
	396	* point.
	397	*/
	398	sp->units += units;
10cef602	399
95b35127 NP	400	if (b < sp->free) {
	401	set_slob(b, units, sp->free);
	402	sp->free = b;
	403	} else {
	404	prev = sp->free;
	405	next = slob_next(prev);
	406	while (b > next) {
	407	prev = next;
	408	next = slob_next(prev);
	409	}
10cef602	410
95b35127 NP	411	if (!slob_last(prev) && b + units == next) {
	412	units += slob_units(next);
	413	set_slob(b, units, slob_next(next));
	414	} else
	415	set_slob(b, units, next);
	416
	417	if (prev + slob_units(prev) == b) {
	418	units = slob_units(b) + slob_units(prev);
	419	set_slob(prev, units, slob_next(b));
	420	} else
	421	set_slob(prev, slob_units(prev), b);
	422	}
	423	out:
10cef602 MM	424	spin_unlock_irqrestore(&slob_lock, flags);
	425	}
	426
95b35127 NP	427	/*
	428	* End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
	429	*/
	430
55394849 NP	431	#ifndef ARCH_KMALLOC_MINALIGN
	432	#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
	433	#endif
	434
	435	#ifndef ARCH_SLAB_MINALIGN
	436	#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
	437	#endif
	438
6193a2ff	439	void *__kmalloc_node(size_t size, gfp_t gfp, int node)
10cef602	440	{
6cb8f913	441	unsigned int *m;
55394849 NP	442	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
	443
	444	if (size < PAGE_SIZE - align) {
6cb8f913 CL	445	if (!size)
	446	return ZERO_SIZE_PTR;
	447
6193a2ff	448	m = slob_alloc(size + align, gfp, align, node);
95b35127	449	if (m)
55394849 NP	450	*m = size;
55394849 NP	451	return (void *)m + align;
d87a133f NP	452	} else {
	453	void *ret;
	454
6193a2ff	455	ret = slob_new_page(gfp \| __GFP_COMP, get_order(size), node);
d87a133f NP	456	if (ret) {
	457	struct page *page;
	458	page = virt_to_page(ret);
	459	page->private = size;
	460	}
	461	return ret;
10cef602	462	}
10cef602	463	}
6193a2ff	464	EXPORT_SYMBOL(__kmalloc_node);
10cef602 MM	465
	466	void kfree(const void *block)
	467	{
95b35127	468	struct slob_page *sp;
10cef602	469
2408c550	470	if (unlikely(ZERO_OR_NULL_PTR(block)))
10cef602 MM	471	return;
10cef602 MM	472
95b35127	473	sp = (struct slob_page *)virt_to_page(block);
d87a133f	474	if (slob_page(sp)) {
55394849 NP	475	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
	476	unsigned int m = (unsigned int )(block - align);
	477	slob_free(m, *m + align);
d87a133f NP	478	} else
d87a133f NP	479	put_page(&sp->page);
10cef602	480	}
10cef602 MM	481	EXPORT_SYMBOL(kfree);
10cef602 MM	482
d87a133f	483	/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
fd76bab2	484	size_t ksize(const void *block)
10cef602	485	{
95b35127	486	struct slob_page *sp;
10cef602	487
ef8b4520 CL	488	BUG_ON(!block);
ef8b4520 CL	489	if (unlikely(block == ZERO_SIZE_PTR))
10cef602 MM	490	return 0;
10cef602 MM	491
95b35127	492	sp = (struct slob_page *)virt_to_page(block);
d87a133f NP	493	if (slob_page(sp))
	494	return ((slob_t *)block - 1)->units + SLOB_UNIT;
	495	else
	496	return sp->page.private;
10cef602	497	}
f8fcc933	498	EXPORT_SYMBOL(ksize);
10cef602 MM	499
	500	struct kmem_cache {
	501	unsigned int size, align;
afc0cedb	502	unsigned long flags;
10cef602	503	const char *name;
4ba9b9d0	504	void (ctor)(struct kmem_cache , void *);
10cef602 MM	505	};
	506
	507	struct kmem_cache kmem_cache_create(const char name, size_t size,
	508	size_t align, unsigned long flags,
4ba9b9d0	509	void (ctor)(struct kmem_cache , void *))
10cef602 MM	510	{
	511	struct kmem_cache *c;
	512
6193a2ff	513	c = slob_alloc(sizeof(struct kmem_cache), flags, 0, -1);
10cef602 MM	514
	515	if (c) {
	516	c->name = name;
	517	c->size = size;
afc0cedb	518	if (flags & SLAB_DESTROY_BY_RCU) {
afc0cedb NP	519	/* leave room for rcu footer at the end of object */
	520	c->size += sizeof(struct slob_rcu);
	521	}
	522	c->flags = flags;
10cef602	523	c->ctor = ctor;
10cef602	524	/* ignore alignment unless it's forced */
5af60839	525	c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
55394849 NP	526	if (c->align < ARCH_SLAB_MINALIGN)
55394849 NP	527	c->align = ARCH_SLAB_MINALIGN;
10cef602 MM	528	if (c->align < align)
10cef602 MM	529	c->align = align;
bc0055ae AM	530	} else if (flags & SLAB_PANIC)
bc0055ae AM	531	panic("Cannot create slab cache %s\n", name);
10cef602 MM	532
	533	return c;
	534	}
	535	EXPORT_SYMBOL(kmem_cache_create);
	536
133d205a	537	void kmem_cache_destroy(struct kmem_cache *c)
10cef602 MM	538	{
10cef602 MM	539	slob_free(c, sizeof(struct kmem_cache));
10cef602 MM	540	}
	541	EXPORT_SYMBOL(kmem_cache_destroy);
	542
6193a2ff	543	void kmem_cache_alloc_node(struct kmem_cache c, gfp_t flags, int node)
10cef602 MM	544	{
	545	void *b;
	546
	547	if (c->size < PAGE_SIZE)
6193a2ff	548	b = slob_alloc(c->size, flags, c->align, node);
10cef602	549	else
6193a2ff	550	b = slob_new_page(flags, get_order(c->size), node);
10cef602 MM	551
10cef602 MM	552	if (c->ctor)
4ba9b9d0	553	c->ctor(c, b);
10cef602 MM	554
	555	return b;
	556	}
6193a2ff	557	EXPORT_SYMBOL(kmem_cache_alloc_node);
10cef602	558
afc0cedb	559	static void __kmem_cache_free(void *b, int size)
10cef602	560	{
afc0cedb NP	561	if (size < PAGE_SIZE)
afc0cedb NP	562	slob_free(b, size);
10cef602	563	else
afc0cedb NP	564	free_pages((unsigned long)b, get_order(size));
	565	}
	566
	567	static void kmem_rcu_free(struct rcu_head *head)
	568	{
	569	struct slob_rcu slob_rcu = (struct slob_rcu )head;
	570	void b = (void )slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu));
	571
	572	__kmem_cache_free(b, slob_rcu->size);
	573	}
	574
	575	void kmem_cache_free(struct kmem_cache c, void b)
	576	{
	577	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
	578	struct slob_rcu *slob_rcu;
	579	slob_rcu = b + (c->size - sizeof(struct slob_rcu));
	580	INIT_RCU_HEAD(&slob_rcu->head);
	581	slob_rcu->size = c->size;
	582	call_rcu(&slob_rcu->head, kmem_rcu_free);
	583	} else {
afc0cedb NP	584	__kmem_cache_free(b, c->size);
afc0cedb NP	585	}
10cef602 MM	586	}
	587	EXPORT_SYMBOL(kmem_cache_free);
	588
	589	unsigned int kmem_cache_size(struct kmem_cache *c)
	590	{
	591	return c->size;
	592	}
	593	EXPORT_SYMBOL(kmem_cache_size);
	594
	595	const char kmem_cache_name(struct kmem_cache c)
	596	{
	597	return c->name;
	598	}
	599	EXPORT_SYMBOL(kmem_cache_name);
	600
2e892f43 CL	601	int kmem_cache_shrink(struct kmem_cache *d)
	602	{
	603	return 0;
	604	}
	605	EXPORT_SYMBOL(kmem_cache_shrink);
	606
55935a34	607	int kmem_ptr_validate(struct kmem_cache a, const void b)
2e892f43 CL	608	{
	609	return 0;
	610	}
	611
84a01c2f PM	612	static unsigned int slob_ready __read_mostly;
	613
	614	int slab_is_available(void)
	615	{
	616	return slob_ready;
	617	}
	618
bcb4ddb4 DG	619	void __init kmem_cache_init(void)
bcb4ddb4 DG	620	{
84a01c2f	621	slob_ready = 1;
10cef602	622	}