[net-next-2.6.git] / mm / slob.c

/*
 * SLOB Allocator: Simple List Of Blocks
 *
 * Matt Mackall <mpm@selenic.com> 12/30/03
 *
 * NUMA support by Paul Mundt, 2007.
 *
 * How SLOB works:
 *
 * The core of SLOB is a traditional K&R style heap allocator, with
 * support for returning aligned objects. The granularity of this
 * allocator is as little as 2 bytes, however typically most architectures
 * will require 4 bytes on 32-bit and 8 bytes on 64-bit.
 *
 * The slob heap is a linked list of pages from alloc_pages(), and
 * within each page, there is a singly-linked list of free blocks (slob_t).
 * The heap is grown on demand and allocation from the heap is currently
 * first-fit.
 *
 * Above this is an implementation of kmalloc/kfree. Blocks returned
 * from kmalloc are prepended with a 4-byte header with the kmalloc size.
 * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
 * alloc_pages() directly, allocating compound pages so the page order
 * does not have to be separately tracked, and also stores the exact
 * allocation size in page->private so that it can be used to accurately
 * provide ksize(). These objects are detected in kfree() because slob_page()
 * is false for them.
 *
 * SLAB is emulated on top of SLOB by simply calling constructors and
 * destructors for every SLAB allocation. Objects are returned with the
 * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
 * case the low-level allocator will fragment blocks to create the proper
 * alignment. Again, objects of page-size or greater are allocated by
 * calling alloc_pages(). As SLAB objects know their size, no separate
 * size bookkeeping is necessary and there is essentially no allocation
 * space overhead, and compound pages aren't needed for multi-page
 * allocations.
 *
 * NUMA support in SLOB is fairly simplistic, pushing most of the real
 * logic down to the page allocator, and simply doing the node accounting
 * on the upper levels. In the event that a node id is explicitly
 * provided, alloc_pages_node() with the specified node id is used
 * instead. The common case (or when the node id isn't explicitly provided)
 * will default to the current node, as per numa_node_id().
 *
 * Node aware pages are still inserted in to the global freelist, and
 * these are scanned for by matching against the node id encoded in the
 * page flags. As a result, block allocations that can be satisfied from
 * the freelist will only be done so on pages residing on the same node,
 * in order to prevent random node placement.
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/cache.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <asm/atomic.h>

/*
 * slob_block has a field 'units', which indicates size of block if +ve,
 * or offset of next block if -ve (in SLOB_UNITs).
 *
 * Free blocks of size 1 unit simply contain the offset of the next block.
 * Those with larger size contain their size in the first SLOB_UNIT of
 * memory, and the offset of the next free block in the second SLOB_UNIT.
 */
#if PAGE_SIZE <= (32767 * 2)
typedef s16 slobidx_t;
#else
typedef s32 slobidx_t;
#endif

struct slob_block {
	slobidx_t units;
};
typedef struct slob_block slob_t;

/*
 * We use struct page fields to manage some slob allocation aspects,
 * however to avoid the horrible mess in include/linux/mm_types.h, we'll
 * just define our own struct page type variant here.
 */
struct slob_page {
	union {
		struct {
			unsigned long flags;	/* mandatory */
			atomic_t _count;	/* mandatory */
			slobidx_t units;	/* free units left in page */
			unsigned long pad[2];
			slob_t *free;		/* first free slob_t in page */
			struct list_head list;	/* linked list of free pages */
		};
		struct page page;
	};
};
static inline void struct_slob_page_wrong_size(void)
{ BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); }

/*
 * free_slob_page: call before a slob_page is returned to the page allocator.
 */
static inline void free_slob_page(struct slob_page *sp)
{
	reset_page_mapcount(&sp->page);
	sp->page.mapping = NULL;
}

/*
 * All (partially) free slob pages go on this list.
 */
static LIST_HEAD(free_slob_pages);

/*
 * slob_page: True for all slob pages (false for bigblock pages)
 */
static inline int slob_page(struct slob_page *sp)
{
	return test_bit(PG_active, &sp->flags);
}

static inline void set_slob_page(struct slob_page *sp)
{
	__set_bit(PG_active, &sp->flags);
}

static inline void clear_slob_page(struct slob_page *sp)
{
	__clear_bit(PG_active, &sp->flags);
}

/*
 * slob_page_free: true for pages on free_slob_pages list.
 */
static inline int slob_page_free(struct slob_page *sp)
{
	return test_bit(PG_private, &sp->flags);
}

static inline void set_slob_page_free(struct slob_page *sp)
{
	list_add(&sp->list, &free_slob_pages);
	__set_bit(PG_private, &sp->flags);
}

static inline void clear_slob_page_free(struct slob_page *sp)
{
	list_del(&sp->list);
	__clear_bit(PG_private, &sp->flags);
}

#define SLOB_UNIT sizeof(slob_t)
#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
#define SLOB_ALIGN L1_CACHE_BYTES

/*
 * struct slob_rcu is inserted at the tail of allocated slob blocks, which
 * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
 * the block using call_rcu.
 */
struct slob_rcu {
	struct rcu_head head;
	int size;
};

/*
 * slob_lock protects all slob allocator structures.
 */
static DEFINE_SPINLOCK(slob_lock);

/*
 * Encode the given size and next info into a free slob block s.
 */
static void set_slob(slob_t *s, slobidx_t size, slob_t *next)
{
	slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
	slobidx_t offset = next - base;

	if (size > 1) {
		s[0].units = size;
		s[1].units = offset;
	} else
		s[0].units = -offset;
}

/*
 * Return the size of a slob block.
 */
static slobidx_t slob_units(slob_t *s)
{
	if (s->units > 0)
		return s->units;
	return 1;
}

/*
 * Return the next free slob block pointer after this one.
 */
static slob_t *slob_next(slob_t *s)
{
	slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
	slobidx_t next;

	if (s[0].units < 0)
		next = -s[0].units;
	else
		next = s[1].units;
	return base+next;
}

/*
 * Returns true if s is the last free block in its page.
 */
static int slob_last(slob_t *s)
{
	return !((unsigned long)slob_next(s) & ~PAGE_MASK);
}

static void *slob_new_page(gfp_t gfp, int order, int node)
{
	void *page;

#ifdef CONFIG_NUMA
	if (node != -1)
		page = alloc_pages_node(node, gfp, order);
	else
#endif
		page = alloc_pages(gfp, order);

	if (!page)
		return NULL;

	return page_address(page);
}

/*
 * Allocate a slob block within a given slob_page sp.
 */
static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
{
	slob_t *prev, *cur, *aligned = 0;
	int delta = 0, units = SLOB_UNITS(size);

	for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) {
		slobidx_t avail = slob_units(cur);

		if (align) {
			aligned = (slob_t *)ALIGN((unsigned long)cur, align);
			delta = aligned - cur;
		}
		if (avail >= units + delta) { /* room enough? */
			slob_t *next;

			if (delta) { /* need to fragment head to align? */
				next = slob_next(cur);
				set_slob(aligned, avail - delta, next);
				set_slob(cur, delta, aligned);
				prev = cur;
				cur = aligned;
				avail = slob_units(cur);
			}

			next = slob_next(cur);
			if (avail == units) { /* exact fit? unlink. */
				if (prev)
					set_slob(prev, slob_units(prev), next);
				else
					sp->free = next;
			} else { /* fragment */
				if (prev)
					set_slob(prev, slob_units(prev), cur + units);
				else
					sp->free = cur + units;
				set_slob(cur + units, avail - units, next);
			}

			sp->units -= units;
			if (!sp->units)
				clear_slob_page_free(sp);
			return cur;
		}
		if (slob_last(cur))
			return NULL;
	}
}

/*
 * slob_alloc: entry point into the slob allocator.
 */
static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
{
	struct slob_page *sp;
	slob_t *b = NULL;
	unsigned long flags;

	spin_lock_irqsave(&slob_lock, flags);
	/* Iterate through each partially free page, try to find room */
	list_for_each_entry(sp, &free_slob_pages, list) {
#ifdef CONFIG_NUMA
		/*
		 * If there's a node specification, search for a partial
		 * page with a matching node id in the freelist.
		 */
		if (node != -1 && page_to_nid(&sp->page) != node)
			continue;
#endif

		if (sp->units >= SLOB_UNITS(size)) {
			b = slob_page_alloc(sp, size, align);
			if (b)
				break;
		}
	}
	spin_unlock_irqrestore(&slob_lock, flags);

	/* Not enough space: must allocate a new page */
	if (!b) {
		b = slob_new_page(gfp, 0, node);
		if (!b)
			return 0;
		sp = (struct slob_page *)virt_to_page(b);
		set_slob_page(sp);

		spin_lock_irqsave(&slob_lock, flags);
		sp->units = SLOB_UNITS(PAGE_SIZE);
		sp->free = b;
		INIT_LIST_HEAD(&sp->list);
		set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
		set_slob_page_free(sp);
		b = slob_page_alloc(sp, size, align);
		BUG_ON(!b);
		spin_unlock_irqrestore(&slob_lock, flags);
	}
	if (unlikely((gfp & __GFP_ZERO) && b))
		memset(b, 0, size);
	return b;
}

/*
 * slob_free: entry point into the slob allocator.
 */
static void slob_free(void *block, int size)
{
	struct slob_page *sp;
	slob_t *prev, *next, *b = (slob_t *)block;
	slobidx_t units;
	unsigned long flags;

	if (ZERO_OR_NULL_PTR(block))
		return;
	BUG_ON(!size);

	sp = (struct slob_page *)virt_to_page(block);
	units = SLOB_UNITS(size);

	spin_lock_irqsave(&slob_lock, flags);

	if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) {
		/* Go directly to page allocator. Do not pass slob allocator */
		if (slob_page_free(sp))
			clear_slob_page_free(sp);
		clear_slob_page(sp);
		free_slob_page(sp);
		free_page((unsigned long)b);
		goto out;
	}

	if (!slob_page_free(sp)) {
		/* This slob page is about to become partially free. Easy! */
		sp->units = units;
		sp->free = b;
		set_slob(b, units,
			(void *)((unsigned long)(b +
					SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
		set_slob_page_free(sp);
		goto out;
	}

	/*
	 * Otherwise the page is already partially free, so find reinsertion
	 * point.
	 */
	sp->units += units;

	if (b < sp->free) {
		set_slob(b, units, sp->free);
		sp->free = b;
	} else {
		prev = sp->free;
		next = slob_next(prev);
		while (b > next) {
			prev = next;
			next = slob_next(prev);
		}

		if (!slob_last(prev) && b + units == next) {
			units += slob_units(next);
			set_slob(b, units, slob_next(next));
		} else
			set_slob(b, units, next);

		if (prev + slob_units(prev) == b) {
			units = slob_units(b) + slob_units(prev);
			set_slob(prev, units, slob_next(b));
		} else
			set_slob(prev, slob_units(prev), b);
	}
out:
	spin_unlock_irqrestore(&slob_lock, flags);
}

/*
 * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
 */

#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
#endif

#ifndef ARCH_SLAB_MINALIGN
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
#endif

void *__kmalloc_node(size_t size, gfp_t gfp, int node)
{
	unsigned int *m;
	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);

	if (size < PAGE_SIZE - align) {
		if (!size)
			return ZERO_SIZE_PTR;

		m = slob_alloc(size + align, gfp, align, node);
		if (m)
			*m = size;
		return (void *)m + align;
	} else {
		void *ret;

		ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node);
		if (ret) {
			struct page *page;
			page = virt_to_page(ret);
			page->private = size;
		}
		return ret;
	}
}
EXPORT_SYMBOL(__kmalloc_node);

void kfree(const void *block)
{
	struct slob_page *sp;

	if (ZERO_OR_NULL_PTR(block))
		return;

	sp = (struct slob_page *)virt_to_page(block);
	if (slob_page(sp)) {
		int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
		unsigned int *m = (unsigned int *)(block - align);
		slob_free(m, *m + align);
	} else
		put_page(&sp->page);
}
EXPORT_SYMBOL(kfree);

/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
size_t ksize(const void *block)
{
	struct slob_page *sp;

	if (ZERO_OR_NULL_PTR(block))
		return 0;

	sp = (struct slob_page *)virt_to_page(block);
	if (slob_page(sp))
		return ((slob_t *)block - 1)->units + SLOB_UNIT;
	else
		return sp->page.private;
}

struct kmem_cache {
	unsigned int size, align;
	unsigned long flags;
	const char *name;
	void (*ctor)(void *, struct kmem_cache *, unsigned long);
};

struct kmem_cache *kmem_cache_create(const char *name, size_t size,
	size_t align, unsigned long flags,
	void (*ctor)(void*, struct kmem_cache *, unsigned long))
{
	struct kmem_cache *c;

	c = slob_alloc(sizeof(struct kmem_cache), flags, 0, -1);

	if (c) {
		c->name = name;
		c->size = size;
		if (flags & SLAB_DESTROY_BY_RCU) {
			/* leave room for rcu footer at the end of object */
			c->size += sizeof(struct slob_rcu);
		}
		c->flags = flags;
		c->ctor = ctor;
		/* ignore alignment unless it's forced */
		c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
		if (c->align < ARCH_SLAB_MINALIGN)
			c->align = ARCH_SLAB_MINALIGN;
		if (c->align < align)
			c->align = align;
	} else if (flags & SLAB_PANIC)
		panic("Cannot create slab cache %s\n", name);

	return c;
}
EXPORT_SYMBOL(kmem_cache_create);

void kmem_cache_destroy(struct kmem_cache *c)
{
	slob_free(c, sizeof(struct kmem_cache));
}
EXPORT_SYMBOL(kmem_cache_destroy);

void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
{
	void *b;

	if (c->size < PAGE_SIZE)
		b = slob_alloc(c->size, flags, c->align, node);
	else
		b = slob_new_page(flags, get_order(c->size), node);

	if (c->ctor)
		c->ctor(b, c, 0);

	return b;
}
EXPORT_SYMBOL(kmem_cache_alloc_node);

static void __kmem_cache_free(void *b, int size)
{
	if (size < PAGE_SIZE)
		slob_free(b, size);
	else
		free_pages((unsigned long)b, get_order(size));
}

static void kmem_rcu_free(struct rcu_head *head)
{
	struct slob_rcu *slob_rcu = (struct slob_rcu *)head;
	void *b = (void *)slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu));

	__kmem_cache_free(b, slob_rcu->size);
}

void kmem_cache_free(struct kmem_cache *c, void *b)
{
	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
		struct slob_rcu *slob_rcu;
		slob_rcu = b + (c->size - sizeof(struct slob_rcu));
		INIT_RCU_HEAD(&slob_rcu->head);
		slob_rcu->size = c->size;
		call_rcu(&slob_rcu->head, kmem_rcu_free);
	} else {
		__kmem_cache_free(b, c->size);
	}
}
EXPORT_SYMBOL(kmem_cache_free);

unsigned int kmem_cache_size(struct kmem_cache *c)
{
	return c->size;
}
EXPORT_SYMBOL(kmem_cache_size);

const char *kmem_cache_name(struct kmem_cache *c)
{
	return c->name;
}
EXPORT_SYMBOL(kmem_cache_name);

int kmem_cache_shrink(struct kmem_cache *d)
{
	return 0;
}
EXPORT_SYMBOL(kmem_cache_shrink);

int kmem_ptr_validate(struct kmem_cache *a, const void *b)
{
	return 0;
}

static unsigned int slob_ready __read_mostly;

int slab_is_available(void)
{
	return slob_ready;
}

void __init kmem_cache_init(void)
{
	slob_ready = 1;
}
Commit	Line	Data
10cef602 MM	1	/*
	2	* SLOB Allocator: Simple List Of Blocks
	3	*
	4	* Matt Mackall <mpm@selenic.com> 12/30/03
	5	*
6193a2ff PM	6	* NUMA support by Paul Mundt, 2007.
6193a2ff PM	7	*
10cef602 MM	8	* How SLOB works:
	9	*
	10	* The core of SLOB is a traditional K&R style heap allocator, with
	11	* support for returning aligned objects. The granularity of this
55394849 NP	12	* allocator is as little as 2 bytes, however typically most architectures
55394849 NP	13	* will require 4 bytes on 32-bit and 8 bytes on 64-bit.
95b35127	14	*
6193a2ff	15	* The slob heap is a linked list of pages from alloc_pages(), and
95b35127 NP	16	* within each page, there is a singly-linked list of free blocks (slob_t).
	17	* The heap is grown on demand and allocation from the heap is currently
	18	* first-fit.
10cef602 MM	19	*
10cef602 MM	20	* Above this is an implementation of kmalloc/kfree. Blocks returned
55394849	21	* from kmalloc are prepended with a 4-byte header with the kmalloc size.
10cef602	22	* If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
6193a2ff	23	* alloc_pages() directly, allocating compound pages so the page order
d87a133f NP	24	* does not have to be separately tracked, and also stores the exact
	25	* allocation size in page->private so that it can be used to accurately
	26	* provide ksize(). These objects are detected in kfree() because slob_page()
	27	* is false for them.
10cef602 MM	28	*
10cef602 MM	29	* SLAB is emulated on top of SLOB by simply calling constructors and
95b35127 NP	30	* destructors for every SLAB allocation. Objects are returned with the
	31	* 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
	32	* case the low-level allocator will fragment blocks to create the proper
	33	* alignment. Again, objects of page-size or greater are allocated by
6193a2ff	34	* calling alloc_pages(). As SLAB objects know their size, no separate
95b35127	35	* size bookkeeping is necessary and there is essentially no allocation
d87a133f NP	36	* space overhead, and compound pages aren't needed for multi-page
d87a133f NP	37	* allocations.
6193a2ff PM	38	*
	39	* NUMA support in SLOB is fairly simplistic, pushing most of the real
	40	* logic down to the page allocator, and simply doing the node accounting
	41	* on the upper levels. In the event that a node id is explicitly
	42	* provided, alloc_pages_node() with the specified node id is used
	43	* instead. The common case (or when the node id isn't explicitly provided)
	44	* will default to the current node, as per numa_node_id().
	45	*
	46	* Node aware pages are still inserted in to the global freelist, and
	47	* these are scanned for by matching against the node id encoded in the
	48	* page flags. As a result, block allocations that can be satisfied from
	49	* the freelist will only be done so on pages residing on the same node,
	50	* in order to prevent random node placement.
10cef602 MM	51	*/
10cef602 MM	52
95b35127	53	#include <linux/kernel.h>
10cef602 MM	54	#include <linux/slab.h>
	55	#include <linux/mm.h>
	56	#include <linux/cache.h>
	57	#include <linux/init.h>
	58	#include <linux/module.h>
afc0cedb	59	#include <linux/rcupdate.h>
95b35127 NP	60	#include <linux/list.h>
	61	#include <asm/atomic.h>
	62
95b35127 NP	63	/*
	64	* slob_block has a field 'units', which indicates size of block if +ve,
	65	* or offset of next block if -ve (in SLOB_UNITs).
	66	*
	67	* Free blocks of size 1 unit simply contain the offset of the next block.
	68	* Those with larger size contain their size in the first SLOB_UNIT of
	69	* memory, and the offset of the next free block in the second SLOB_UNIT.
	70	*/
55394849	71	#if PAGE_SIZE <= (32767 * 2)
95b35127 NP	72	typedef s16 slobidx_t;
	73	#else
	74	typedef s32 slobidx_t;
	75	#endif
	76
10cef602	77	struct slob_block {
95b35127	78	slobidx_t units;
55394849	79	};
10cef602 MM	80	typedef struct slob_block slob_t;
10cef602 MM	81
95b35127 NP	82	/*
	83	* We use struct page fields to manage some slob allocation aspects,
	84	* however to avoid the horrible mess in include/linux/mm_types.h, we'll
	85	* just define our own struct page type variant here.
	86	*/
	87	struct slob_page {
	88	union {
	89	struct {
	90	unsigned long flags; /* mandatory */
	91	atomic_t _count; /* mandatory */
	92	slobidx_t units; /* free units left in page */
	93	unsigned long pad[2];
	94	slob_t free; / first free slob_t in page */
	95	struct list_head list; /* linked list of free pages */
	96	};
	97	struct page page;
	98	};
	99	};
	100	static inline void struct_slob_page_wrong_size(void)
	101	{ BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); }
	102
	103	/*
	104	* free_slob_page: call before a slob_page is returned to the page allocator.
	105	*/
	106	static inline void free_slob_page(struct slob_page *sp)
	107	{
	108	reset_page_mapcount(&sp->page);
	109	sp->page.mapping = NULL;
	110	}
	111
	112	/*
	113	* All (partially) free slob pages go on this list.
	114	*/
	115	static LIST_HEAD(free_slob_pages);
	116
	117	/*
	118	* slob_page: True for all slob pages (false for bigblock pages)
	119	*/
	120	static inline int slob_page(struct slob_page *sp)
	121	{
	122	return test_bit(PG_active, &sp->flags);
	123	}
	124
	125	static inline void set_slob_page(struct slob_page *sp)
	126	{
	127	__set_bit(PG_active, &sp->flags);
	128	}
	129
	130	static inline void clear_slob_page(struct slob_page *sp)
	131	{
	132	__clear_bit(PG_active, &sp->flags);
	133	}
	134
	135	/*
	136	* slob_page_free: true for pages on free_slob_pages list.
	137	*/
	138	static inline int slob_page_free(struct slob_page *sp)
	139	{
	140	return test_bit(PG_private, &sp->flags);
	141	}
	142
	143	static inline void set_slob_page_free(struct slob_page *sp)
	144	{
	145	list_add(&sp->list, &free_slob_pages);
146	__set_bit(PG_private, &sp->flags);
147	}
148
149	static inline void clear_slob_page_free(struct slob_page *sp)
150	{
151	list_del(&sp->list);
152	__clear_bit(PG_private, &sp->flags);
153	}
154
10cef602 MM	155	#define SLOB_UNIT sizeof(slob_t)
	156	#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
	157	#define SLOB_ALIGN L1_CACHE_BYTES
	158
afc0cedb NP	159	/*
	160	* struct slob_rcu is inserted at the tail of allocated slob blocks, which
	161	* were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
	162	* the block using call_rcu.
	163	*/
	164	struct slob_rcu {
	165	struct rcu_head head;
	166	int size;
	167	};
	168
95b35127 NP	169	/*
	170	* slob_lock protects all slob allocator structures.
	171	*/
10cef602	172	static DEFINE_SPINLOCK(slob_lock);
10cef602	173
95b35127 NP	174	/*
	175	* Encode the given size and next info into a free slob block s.
	176	*/
	177	static void set_slob(slob_t s, slobidx_t size, slob_t next)
	178	{
	179	slob_t base = (slob_t )((unsigned long)s & PAGE_MASK);
	180	slobidx_t offset = next - base;
bcb4ddb4	181
95b35127 NP	182	if (size > 1) {
	183	s[0].units = size;
	184	s[1].units = offset;
	185	} else
	186	s[0].units = -offset;
	187	}
10cef602	188
95b35127 NP	189	/*
	190	* Return the size of a slob block.
	191	*/
	192	static slobidx_t slob_units(slob_t *s)
	193	{
	194	if (s->units > 0)
	195	return s->units;
	196	return 1;
	197	}
	198
	199	/*
	200	* Return the next free slob block pointer after this one.
	201	*/
	202	static slob_t slob_next(slob_t s)
	203	{
	204	slob_t base = (slob_t )((unsigned long)s & PAGE_MASK);
	205	slobidx_t next;
	206
	207	if (s[0].units < 0)
	208	next = -s[0].units;
	209	else
	210	next = s[1].units;
	211	return base+next;
	212	}
	213
	214	/*
	215	* Returns true if s is the last free block in its page.
	216	*/
	217	static int slob_last(slob_t *s)
	218	{
	219	return !((unsigned long)slob_next(s) & ~PAGE_MASK);
	220	}
	221
6193a2ff PM	222	static void *slob_new_page(gfp_t gfp, int order, int node)
	223	{
	224	void *page;
	225
	226	#ifdef CONFIG_NUMA
	227	if (node != -1)
	228	page = alloc_pages_node(node, gfp, order);
	229	else
	230	#endif
	231	page = alloc_pages(gfp, order);
	232
	233	if (!page)
	234	return NULL;
	235
	236	return page_address(page);
	237	}
	238
95b35127 NP	239	/*
	240	* Allocate a slob block within a given slob_page sp.
	241	*/
	242	static void slob_page_alloc(struct slob_page sp, size_t size, int align)
10cef602 MM	243	{
	244	slob_t prev, cur, *aligned = 0;
	245	int delta = 0, units = SLOB_UNITS(size);
10cef602	246
95b35127 NP	247	for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) {
	248	slobidx_t avail = slob_units(cur);
	249
10cef602 MM	250	if (align) {
	251	aligned = (slob_t *)ALIGN((unsigned long)cur, align);
	252	delta = aligned - cur;
	253	}
95b35127 NP	254	if (avail >= units + delta) { /* room enough? */
	255	slob_t *next;
	256
10cef602	257	if (delta) { /* need to fragment head to align? */
95b35127 NP	258	next = slob_next(cur);
	259	set_slob(aligned, avail - delta, next);
	260	set_slob(cur, delta, aligned);
10cef602 MM	261	prev = cur;
10cef602 MM	262	cur = aligned;
95b35127	263	avail = slob_units(cur);
10cef602 MM	264	}
10cef602 MM	265
95b35127 NP	266	next = slob_next(cur);
	267	if (avail == units) { /* exact fit? unlink. */
	268	if (prev)
	269	set_slob(prev, slob_units(prev), next);
	270	else
	271	sp->free = next;
	272	} else { /* fragment */
	273	if (prev)
	274	set_slob(prev, slob_units(prev), cur + units);
	275	else
	276	sp->free = cur + units;
	277	set_slob(cur + units, avail - units, next);
10cef602 MM	278	}
10cef602 MM	279
95b35127 NP	280	sp->units -= units;
	281	if (!sp->units)
	282	clear_slob_page_free(sp);
10cef602 MM	283	return cur;
10cef602 MM	284	}
95b35127 NP	285	if (slob_last(cur))
	286	return NULL;
	287	}
	288	}
10cef602	289
95b35127 NP	290	/*
	291	* slob_alloc: entry point into the slob allocator.
	292	*/
6193a2ff	293	static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
95b35127 NP	294	{
	295	struct slob_page *sp;
	296	slob_t *b = NULL;
	297	unsigned long flags;
10cef602	298
95b35127 NP	299	spin_lock_irqsave(&slob_lock, flags);
	300	/* Iterate through each partially free page, try to find room */
	301	list_for_each_entry(sp, &free_slob_pages, list) {
6193a2ff PM	302	#ifdef CONFIG_NUMA
	303	/*
	304	* If there's a node specification, search for a partial
	305	* page with a matching node id in the freelist.
	306	*/
	307	if (node != -1 && page_to_nid(&sp->page) != node)
	308	continue;
	309	#endif
	310
95b35127 NP	311	if (sp->units >= SLOB_UNITS(size)) {
	312	b = slob_page_alloc(sp, size, align);
	313	if (b)
	314	break;
10cef602 MM	315	}
10cef602 MM	316	}
95b35127 NP	317	spin_unlock_irqrestore(&slob_lock, flags);
	318
	319	/* Not enough space: must allocate a new page */
	320	if (!b) {
6193a2ff	321	b = slob_new_page(gfp, 0, node);
95b35127 NP	322	if (!b)
	323	return 0;
	324	sp = (struct slob_page *)virt_to_page(b);
	325	set_slob_page(sp);
	326
	327	spin_lock_irqsave(&slob_lock, flags);
	328	sp->units = SLOB_UNITS(PAGE_SIZE);
	329	sp->free = b;
	330	INIT_LIST_HEAD(&sp->list);
	331	set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
	332	set_slob_page_free(sp);
	333	b = slob_page_alloc(sp, size, align);
	334	BUG_ON(!b);
	335	spin_unlock_irqrestore(&slob_lock, flags);
	336	}
d07dbea4 CL	337	if (unlikely((gfp & __GFP_ZERO) && b))
d07dbea4 CL	338	memset(b, 0, size);
95b35127	339	return b;
10cef602 MM	340	}
10cef602 MM	341
95b35127 NP	342	/*
	343	* slob_free: entry point into the slob allocator.
	344	*/
10cef602 MM	345	static void slob_free(void *block, int size)
10cef602 MM	346	{
95b35127 NP	347	struct slob_page *sp;
	348	slob_t prev, next, b = (slob_t )block;
	349	slobidx_t units;
10cef602 MM	350	unsigned long flags;
10cef602 MM	351
6cb8f913	352	if (ZERO_OR_NULL_PTR(block))
10cef602	353	return;
95b35127	354	BUG_ON(!size);
10cef602	355
95b35127 NP	356	sp = (struct slob_page *)virt_to_page(block);
95b35127 NP	357	units = SLOB_UNITS(size);
10cef602	358
10cef602	359	spin_lock_irqsave(&slob_lock, flags);
10cef602	360
95b35127 NP	361	if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) {
	362	/* Go directly to page allocator. Do not pass slob allocator */
	363	if (slob_page_free(sp))
	364	clear_slob_page_free(sp);
	365	clear_slob_page(sp);
	366	free_slob_page(sp);
	367	free_page((unsigned long)b);
	368	goto out;
	369	}
10cef602	370
95b35127 NP	371	if (!slob_page_free(sp)) {
	372	/* This slob page is about to become partially free. Easy! */
	373	sp->units = units;
	374	sp->free = b;
	375	set_slob(b, units,
	376	(void *)((unsigned long)(b +
	377	SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
	378	set_slob_page_free(sp);
	379	goto out;
	380	}
	381
	382	/*
	383	* Otherwise the page is already partially free, so find reinsertion
	384	* point.
	385	*/
	386	sp->units += units;
10cef602	387
95b35127 NP	388	if (b < sp->free) {
	389	set_slob(b, units, sp->free);
	390	sp->free = b;
	391	} else {
	392	prev = sp->free;
	393	next = slob_next(prev);
	394	while (b > next) {
	395	prev = next;
	396	next = slob_next(prev);
	397	}
10cef602	398
95b35127 NP	399	if (!slob_last(prev) && b + units == next) {
	400	units += slob_units(next);
	401	set_slob(b, units, slob_next(next));
	402	} else
	403	set_slob(b, units, next);
	404
	405	if (prev + slob_units(prev) == b) {
	406	units = slob_units(b) + slob_units(prev);
	407	set_slob(prev, units, slob_next(b));
	408	} else
	409	set_slob(prev, slob_units(prev), b);
	410	}
	411	out:
10cef602 MM	412	spin_unlock_irqrestore(&slob_lock, flags);
	413	}
	414
95b35127 NP	415	/*
	416	* End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
	417	*/
	418
55394849 NP	419	#ifndef ARCH_KMALLOC_MINALIGN
	420	#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
	421	#endif
	422
	423	#ifndef ARCH_SLAB_MINALIGN
	424	#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
	425	#endif
	426
6193a2ff	427	void *__kmalloc_node(size_t size, gfp_t gfp, int node)
10cef602	428	{
6cb8f913	429	unsigned int *m;
55394849 NP	430	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
	431
	432	if (size < PAGE_SIZE - align) {
6cb8f913 CL	433	if (!size)
	434	return ZERO_SIZE_PTR;
	435
6193a2ff	436	m = slob_alloc(size + align, gfp, align, node);
95b35127	437	if (m)
55394849 NP	438	*m = size;
55394849 NP	439	return (void *)m + align;
d87a133f NP	440	} else {
	441	void *ret;
	442
6193a2ff	443	ret = slob_new_page(gfp \| __GFP_COMP, get_order(size), node);
d87a133f NP	444	if (ret) {
	445	struct page *page;
	446	page = virt_to_page(ret);
	447	page->private = size;
	448	}
	449	return ret;
10cef602	450	}
10cef602	451	}
6193a2ff	452	EXPORT_SYMBOL(__kmalloc_node);
10cef602 MM	453
	454	void kfree(const void *block)
	455	{
95b35127	456	struct slob_page *sp;
10cef602	457
6cb8f913	458	if (ZERO_OR_NULL_PTR(block))
10cef602 MM	459	return;
10cef602 MM	460
95b35127	461	sp = (struct slob_page *)virt_to_page(block);
d87a133f	462	if (slob_page(sp)) {
55394849 NP	463	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
	464	unsigned int m = (unsigned int )(block - align);
	465	slob_free(m, *m + align);
d87a133f NP	466	} else
d87a133f NP	467	put_page(&sp->page);
10cef602	468	}
10cef602 MM	469	EXPORT_SYMBOL(kfree);
10cef602 MM	470
d87a133f	471	/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
fd76bab2	472	size_t ksize(const void *block)
10cef602	473	{
95b35127	474	struct slob_page *sp;
10cef602	475
6cb8f913	476	if (ZERO_OR_NULL_PTR(block))
10cef602 MM	477	return 0;
10cef602 MM	478
95b35127	479	sp = (struct slob_page *)virt_to_page(block);
d87a133f NP	480	if (slob_page(sp))
	481	return ((slob_t *)block - 1)->units + SLOB_UNIT;
	482	else
	483	return sp->page.private;
10cef602 MM	484	}
	485
	486	struct kmem_cache {
	487	unsigned int size, align;
afc0cedb	488	unsigned long flags;
10cef602 MM	489	const char *name;
10cef602 MM	490	void (ctor)(void , struct kmem_cache *, unsigned long);
10cef602 MM	491	};
	492
	493	struct kmem_cache kmem_cache_create(const char name, size_t size,
	494	size_t align, unsigned long flags,
20c2df83	495	void (ctor)(void, struct kmem_cache *, unsigned long))
10cef602 MM	496	{
	497	struct kmem_cache *c;
	498
6193a2ff	499	c = slob_alloc(sizeof(struct kmem_cache), flags, 0, -1);
10cef602 MM	500
	501	if (c) {
	502	c->name = name;
	503	c->size = size;
afc0cedb	504	if (flags & SLAB_DESTROY_BY_RCU) {
afc0cedb NP	505	/* leave room for rcu footer at the end of object */
	506	c->size += sizeof(struct slob_rcu);
	507	}
	508	c->flags = flags;
10cef602	509	c->ctor = ctor;
10cef602	510	/* ignore alignment unless it's forced */
5af60839	511	c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
55394849 NP	512	if (c->align < ARCH_SLAB_MINALIGN)
55394849 NP	513	c->align = ARCH_SLAB_MINALIGN;
10cef602 MM	514	if (c->align < align)
10cef602 MM	515	c->align = align;
bc0055ae AM	516	} else if (flags & SLAB_PANIC)
bc0055ae AM	517	panic("Cannot create slab cache %s\n", name);
10cef602 MM	518
	519	return c;
	520	}
	521	EXPORT_SYMBOL(kmem_cache_create);
	522
133d205a	523	void kmem_cache_destroy(struct kmem_cache *c)
10cef602 MM	524	{
10cef602 MM	525	slob_free(c, sizeof(struct kmem_cache));
10cef602 MM	526	}
	527	EXPORT_SYMBOL(kmem_cache_destroy);
	528
6193a2ff	529	void kmem_cache_alloc_node(struct kmem_cache c, gfp_t flags, int node)
10cef602 MM	530	{
	531	void *b;
	532
	533	if (c->size < PAGE_SIZE)
6193a2ff	534	b = slob_alloc(c->size, flags, c->align, node);
10cef602	535	else
6193a2ff	536	b = slob_new_page(flags, get_order(c->size), node);
10cef602 MM	537
10cef602 MM	538	if (c->ctor)
a35afb83	539	c->ctor(b, c, 0);
10cef602 MM	540
	541	return b;
	542	}
6193a2ff	543	EXPORT_SYMBOL(kmem_cache_alloc_node);
10cef602	544
afc0cedb	545	static void __kmem_cache_free(void *b, int size)
10cef602	546	{
afc0cedb NP	547	if (size < PAGE_SIZE)
afc0cedb NP	548	slob_free(b, size);
10cef602	549	else
afc0cedb NP	550	free_pages((unsigned long)b, get_order(size));
	551	}
	552
	553	static void kmem_rcu_free(struct rcu_head *head)
	554	{
	555	struct slob_rcu slob_rcu = (struct slob_rcu )head;
	556	void b = (void )slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu));
	557
	558	__kmem_cache_free(b, slob_rcu->size);
	559	}
	560
	561	void kmem_cache_free(struct kmem_cache c, void b)
	562	{
	563	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
	564	struct slob_rcu *slob_rcu;
	565	slob_rcu = b + (c->size - sizeof(struct slob_rcu));
	566	INIT_RCU_HEAD(&slob_rcu->head);
	567	slob_rcu->size = c->size;
	568	call_rcu(&slob_rcu->head, kmem_rcu_free);
	569	} else {
afc0cedb NP	570	__kmem_cache_free(b, c->size);
afc0cedb NP	571	}
10cef602 MM	572	}
	573	EXPORT_SYMBOL(kmem_cache_free);
	574
	575	unsigned int kmem_cache_size(struct kmem_cache *c)
	576	{
	577	return c->size;
	578	}
	579	EXPORT_SYMBOL(kmem_cache_size);
	580
	581	const char kmem_cache_name(struct kmem_cache c)
	582	{
	583	return c->name;
	584	}
	585	EXPORT_SYMBOL(kmem_cache_name);
	586
2e892f43 CL	587	int kmem_cache_shrink(struct kmem_cache *d)
	588	{
	589	return 0;
	590	}
	591	EXPORT_SYMBOL(kmem_cache_shrink);
	592
55935a34	593	int kmem_ptr_validate(struct kmem_cache a, const void b)
2e892f43 CL	594	{
	595	return 0;
	596	}
	597
84a01c2f PM	598	static unsigned int slob_ready __read_mostly;
	599
	600	int slab_is_available(void)
	601	{
	602	return slob_ready;
	603	}
	604
bcb4ddb4 DG	605	void __init kmem_cache_init(void)
bcb4ddb4 DG	606	{
84a01c2f	607	slob_ready = 1;
10cef602	608	}