[net-next-2.6.git] / mm / slob.c

/*
 * SLOB Allocator: Simple List Of Blocks
 *
 * Matt Mackall <mpm@selenic.com> 12/30/03
 *
 * NUMA support by Paul Mundt, 2007.
 *
 * How SLOB works:
 *
 * The core of SLOB is a traditional K&R style heap allocator, with
 * support for returning aligned objects. The granularity of this
 * allocator is as little as 2 bytes, however typically most architectures
 * will require 4 bytes on 32-bit and 8 bytes on 64-bit.
 *
 * The slob heap is a set of linked list of pages from alloc_pages(),
 * and within each page, there is a singly-linked list of free blocks
 * (slob_t). The heap is grown on demand. To reduce fragmentation,
 * heap pages are segregated into three lists, with objects less than
 * 256 bytes, objects less than 1024 bytes, and all other objects.
 *
 * Allocation from heap involves first searching for a page with
 * sufficient free blocks (using a next-fit-like approach) followed by
 * a first-fit scan of the page. Deallocation inserts objects back
 * into the free list in address order, so this is effectively an
 * address-ordered first fit.
 *
 * Above this is an implementation of kmalloc/kfree. Blocks returned
 * from kmalloc are prepended with a 4-byte header with the kmalloc size.
 * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
 * alloc_pages() directly, allocating compound pages so the page order
 * does not have to be separately tracked, and also stores the exact
 * allocation size in page->private so that it can be used to accurately
 * provide ksize(). These objects are detected in kfree() because slob_page()
 * is false for them.
 *
 * SLAB is emulated on top of SLOB by simply calling constructors and
 * destructors for every SLAB allocation. Objects are returned with the
 * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
 * case the low-level allocator will fragment blocks to create the proper
 * alignment. Again, objects of page-size or greater are allocated by
 * calling alloc_pages(). As SLAB objects know their size, no separate
 * size bookkeeping is necessary and there is essentially no allocation
 * space overhead, and compound pages aren't needed for multi-page
 * allocations.
 *
 * NUMA support in SLOB is fairly simplistic, pushing most of the real
 * logic down to the page allocator, and simply doing the node accounting
 * on the upper levels. In the event that a node id is explicitly
 * provided, alloc_pages_node() with the specified node id is used
 * instead. The common case (or when the node id isn't explicitly provided)
 * will default to the current node, as per numa_node_id().
 *
 * Node aware pages are still inserted in to the global freelist, and
 * these are scanned for by matching against the node id encoded in the
 * page flags. As a result, block allocations that can be satisfied from
 * the freelist will only be done so on pages residing on the same node,
 * in order to prevent random node placement.
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/cache.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <asm/atomic.h>

/*
 * slob_block has a field 'units', which indicates size of block if +ve,
 * or offset of next block if -ve (in SLOB_UNITs).
 *
 * Free blocks of size 1 unit simply contain the offset of the next block.
 * Those with larger size contain their size in the first SLOB_UNIT of
 * memory, and the offset of the next free block in the second SLOB_UNIT.
 */
#if PAGE_SIZE <= (32767 * 2)
typedef s16 slobidx_t;
#else
typedef s32 slobidx_t;
#endif

struct slob_block {
	slobidx_t units;
};
typedef struct slob_block slob_t;

/*
 * We use struct page fields to manage some slob allocation aspects,
 * however to avoid the horrible mess in include/linux/mm_types.h, we'll
 * just define our own struct page type variant here.
 */
struct slob_page {
	union {
		struct {
			unsigned long flags;	/* mandatory */
			atomic_t _count;	/* mandatory */
			slobidx_t units;	/* free units left in page */
			unsigned long pad[2];
			slob_t *free;		/* first free slob_t in page */
			struct list_head list;	/* linked list of free pages */
		};
		struct page page;
	};
};
static inline void struct_slob_page_wrong_size(void)
{ BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); }

/*
 * free_slob_page: call before a slob_page is returned to the page allocator.
 */
static inline void free_slob_page(struct slob_page *sp)
{
	reset_page_mapcount(&sp->page);
	sp->page.mapping = NULL;
}

/*
 * All partially free slob pages go on these lists.
 */
#define SLOB_BREAK1 256
#define SLOB_BREAK2 1024
static LIST_HEAD(free_slob_small);
static LIST_HEAD(free_slob_medium);
static LIST_HEAD(free_slob_large);

/*
 * slob_page: True for all slob pages (false for bigblock pages)
 */
static inline int slob_page(struct slob_page *sp)
{
	return PageSlobPage((struct page *)sp);
}

static inline void set_slob_page(struct slob_page *sp)
{
	__SetPageSlobPage((struct page *)sp);
}

static inline void clear_slob_page(struct slob_page *sp)
{
	__ClearPageSlobPage((struct page *)sp);
}

/*
 * slob_page_free: true for pages on free_slob_pages list.
 */
static inline int slob_page_free(struct slob_page *sp)
{
	return PageSlobFree((struct page *)sp);
}

static void set_slob_page_free(struct slob_page *sp, struct list_head *list)
{
	list_add(&sp->list, list);
	__SetPageSlobFree((struct page *)sp);
}

static inline void clear_slob_page_free(struct slob_page *sp)
{
	list_del(&sp->list);
	__ClearPageSlobFree((struct page *)sp);
}

#define SLOB_UNIT sizeof(slob_t)
#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
#define SLOB_ALIGN L1_CACHE_BYTES

/*
 * struct slob_rcu is inserted at the tail of allocated slob blocks, which
 * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
 * the block using call_rcu.
 */
struct slob_rcu {
	struct rcu_head head;
	int size;
};

/*
 * slob_lock protects all slob allocator structures.
 */
static DEFINE_SPINLOCK(slob_lock);

/*
 * Encode the given size and next info into a free slob block s.
 */
static void set_slob(slob_t *s, slobidx_t size, slob_t *next)
{
	slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
	slobidx_t offset = next - base;

	if (size > 1) {
		s[0].units = size;
		s[1].units = offset;
	} else
		s[0].units = -offset;
}

/*
 * Return the size of a slob block.
 */
static slobidx_t slob_units(slob_t *s)
{
	if (s->units > 0)
		return s->units;
	return 1;
}

/*
 * Return the next free slob block pointer after this one.
 */
static slob_t *slob_next(slob_t *s)
{
	slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
	slobidx_t next;

	if (s[0].units < 0)
		next = -s[0].units;
	else
		next = s[1].units;
	return base+next;
}

/*
 * Returns true if s is the last free block in its page.
 */
static int slob_last(slob_t *s)
{
	return !((unsigned long)slob_next(s) & ~PAGE_MASK);
}

static void *slob_new_page(gfp_t gfp, int order, int node)
{
	void *page;

#ifdef CONFIG_NUMA
	if (node != -1)
		page = alloc_pages_node(node, gfp, order);
	else
#endif
		page = alloc_pages(gfp, order);

	if (!page)
		return NULL;

	return page_address(page);
}

/*
 * Allocate a slob block within a given slob_page sp.
 */
static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
{
	slob_t *prev, *cur, *aligned = 0;
	int delta = 0, units = SLOB_UNITS(size);

	for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) {
		slobidx_t avail = slob_units(cur);

		if (align) {
			aligned = (slob_t *)ALIGN((unsigned long)cur, align);
			delta = aligned - cur;
		}
		if (avail >= units + delta) { /* room enough? */
			slob_t *next;

			if (delta) { /* need to fragment head to align? */
				next = slob_next(cur);
				set_slob(aligned, avail - delta, next);
				set_slob(cur, delta, aligned);
				prev = cur;
				cur = aligned;
				avail = slob_units(cur);
			}

			next = slob_next(cur);
			if (avail == units) { /* exact fit? unlink. */
				if (prev)
					set_slob(prev, slob_units(prev), next);
				else
					sp->free = next;
			} else { /* fragment */
				if (prev)
					set_slob(prev, slob_units(prev), cur + units);
				else
					sp->free = cur + units;
				set_slob(cur + units, avail - units, next);
			}

			sp->units -= units;
			if (!sp->units)
				clear_slob_page_free(sp);
			return cur;
		}
		if (slob_last(cur))
			return NULL;
	}
}

/*
 * slob_alloc: entry point into the slob allocator.
 */
static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
{
	struct slob_page *sp;
	struct list_head *prev;
	struct list_head *slob_list;
	slob_t *b = NULL;
	unsigned long flags;

	if (size < SLOB_BREAK1)
		slob_list = &free_slob_small;
	else if (size < SLOB_BREAK2)
		slob_list = &free_slob_medium;
	else
		slob_list = &free_slob_large;

	spin_lock_irqsave(&slob_lock, flags);
	/* Iterate through each partially free page, try to find room */
	list_for_each_entry(sp, slob_list, list) {
#ifdef CONFIG_NUMA
		/*
		 * If there's a node specification, search for a partial
		 * page with a matching node id in the freelist.
		 */
		if (node != -1 && page_to_nid(&sp->page) != node)
			continue;
#endif
		/* Enough room on this page? */
		if (sp->units < SLOB_UNITS(size))
			continue;

		/* Attempt to alloc */
		prev = sp->list.prev;
		b = slob_page_alloc(sp, size, align);
		if (!b)
			continue;

		/* Improve fragment distribution and reduce our average
		 * search time by starting our next search here. (see
		 * Knuth vol 1, sec 2.5, pg 449) */
		if (prev != slob_list->prev &&
				slob_list->next != prev->next)
			list_move_tail(slob_list, prev->next);
		break;
	}
	spin_unlock_irqrestore(&slob_lock, flags);

	/* Not enough space: must allocate a new page */
	if (!b) {
		b = slob_new_page(gfp & ~__GFP_ZERO, 0, node);
		if (!b)
			return 0;
		sp = (struct slob_page *)virt_to_page(b);
		set_slob_page(sp);

		spin_lock_irqsave(&slob_lock, flags);
		sp->units = SLOB_UNITS(PAGE_SIZE);
		sp->free = b;
		INIT_LIST_HEAD(&sp->list);
		set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
		set_slob_page_free(sp, slob_list);
		b = slob_page_alloc(sp, size, align);
		BUG_ON(!b);
		spin_unlock_irqrestore(&slob_lock, flags);
	}
	if (unlikely((gfp & __GFP_ZERO) && b))
		memset(b, 0, size);
	return b;
}

/*
 * slob_free: entry point into the slob allocator.
 */
static void slob_free(void *block, int size)
{
	struct slob_page *sp;
	slob_t *prev, *next, *b = (slob_t *)block;
	slobidx_t units;
	unsigned long flags;

	if (unlikely(ZERO_OR_NULL_PTR(block)))
		return;
	BUG_ON(!size);

	sp = (struct slob_page *)virt_to_page(block);
	units = SLOB_UNITS(size);

	spin_lock_irqsave(&slob_lock, flags);

	if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) {
		/* Go directly to page allocator. Do not pass slob allocator */
		if (slob_page_free(sp))
			clear_slob_page_free(sp);
		clear_slob_page(sp);
		free_slob_page(sp);
		free_page((unsigned long)b);
		goto out;
	}

	if (!slob_page_free(sp)) {
		/* This slob page is about to become partially free. Easy! */
		sp->units = units;
		sp->free = b;
		set_slob(b, units,
			(void *)((unsigned long)(b +
					SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
		set_slob_page_free(sp, &free_slob_small);
		goto out;
	}

	/*
	 * Otherwise the page is already partially free, so find reinsertion
	 * point.
	 */
	sp->units += units;

	if (b < sp->free) {
		if (b + units == sp->free) {
			units += slob_units(sp->free);
			sp->free = slob_next(sp->free);
		}
		set_slob(b, units, sp->free);
		sp->free = b;
	} else {
		prev = sp->free;
		next = slob_next(prev);
		while (b > next) {
			prev = next;
			next = slob_next(prev);
		}

		if (!slob_last(prev) && b + units == next) {
			units += slob_units(next);
			set_slob(b, units, slob_next(next));
		} else
			set_slob(b, units, next);

		if (prev + slob_units(prev) == b) {
			units = slob_units(b) + slob_units(prev);
			set_slob(prev, units, slob_next(b));
		} else
			set_slob(prev, slob_units(prev), b);
	}
out:
	spin_unlock_irqrestore(&slob_lock, flags);
}

/*
 * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
 */

#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
#endif

#ifndef ARCH_SLAB_MINALIGN
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
#endif

void *__kmalloc_node(size_t size, gfp_t gfp, int node)
{
	unsigned int *m;
	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);

	if (size < PAGE_SIZE - align) {
		if (!size)
			return ZERO_SIZE_PTR;

		m = slob_alloc(size + align, gfp, align, node);
		if (!m)
			return NULL;
		*m = size;
		return (void *)m + align;
	} else {
		void *ret;

		ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node);
		if (ret) {
			struct page *page;
			page = virt_to_page(ret);
			page->private = size;
		}
		return ret;
	}
}
EXPORT_SYMBOL(__kmalloc_node);

void kfree(const void *block)
{
	struct slob_page *sp;

	if (unlikely(ZERO_OR_NULL_PTR(block)))
		return;

	sp = (struct slob_page *)virt_to_page(block);
	if (slob_page(sp)) {
		int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
		unsigned int *m = (unsigned int *)(block - align);
		slob_free(m, *m + align);
	} else
		put_page(&sp->page);
}
EXPORT_SYMBOL(kfree);

/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
size_t ksize(const void *block)
{
	struct slob_page *sp;

	BUG_ON(!block);
	if (unlikely(block == ZERO_SIZE_PTR))
		return 0;

	sp = (struct slob_page *)virt_to_page(block);
	if (slob_page(sp)) {
		int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
		unsigned int *m = (unsigned int *)(block - align);
		return SLOB_UNITS(*m) * SLOB_UNIT;
	} else
		return sp->page.private;
}

struct kmem_cache {
	unsigned int size, align;
	unsigned long flags;
	const char *name;
	void (*ctor)(void *);
};

struct kmem_cache *kmem_cache_create(const char *name, size_t size,
	size_t align, unsigned long flags, void (*ctor)(void *))
{
	struct kmem_cache *c;

	c = slob_alloc(sizeof(struct kmem_cache),
		flags, ARCH_KMALLOC_MINALIGN, -1);

	if (c) {
		c->name = name;
		c->size = size;
		if (flags & SLAB_DESTROY_BY_RCU) {
			/* leave room for rcu footer at the end of object */
			c->size += sizeof(struct slob_rcu);
		}
		c->flags = flags;
		c->ctor = ctor;
		/* ignore alignment unless it's forced */
		c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
		if (c->align < ARCH_SLAB_MINALIGN)
			c->align = ARCH_SLAB_MINALIGN;
		if (c->align < align)
			c->align = align;
	} else if (flags & SLAB_PANIC)
		panic("Cannot create slab cache %s\n", name);

	return c;
}
EXPORT_SYMBOL(kmem_cache_create);

void kmem_cache_destroy(struct kmem_cache *c)
{
	slob_free(c, sizeof(struct kmem_cache));
}
EXPORT_SYMBOL(kmem_cache_destroy);

void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
{
	void *b;

	if (c->size < PAGE_SIZE)
		b = slob_alloc(c->size, flags, c->align, node);
	else
		b = slob_new_page(flags, get_order(c->size), node);

	if (c->ctor)
		c->ctor(b);

	return b;
}
EXPORT_SYMBOL(kmem_cache_alloc_node);

static void __kmem_cache_free(void *b, int size)
{
	if (size < PAGE_SIZE)
		slob_free(b, size);
	else
		free_pages((unsigned long)b, get_order(size));
}

static void kmem_rcu_free(struct rcu_head *head)
{
	struct slob_rcu *slob_rcu = (struct slob_rcu *)head;
	void *b = (void *)slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu));

	__kmem_cache_free(b, slob_rcu->size);
}

void kmem_cache_free(struct kmem_cache *c, void *b)
{
	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
		struct slob_rcu *slob_rcu;
		slob_rcu = b + (c->size - sizeof(struct slob_rcu));
		INIT_RCU_HEAD(&slob_rcu->head);
		slob_rcu->size = c->size;
		call_rcu(&slob_rcu->head, kmem_rcu_free);
	} else {
		__kmem_cache_free(b, c->size);
	}
}
EXPORT_SYMBOL(kmem_cache_free);

unsigned int kmem_cache_size(struct kmem_cache *c)
{
	return c->size;
}
EXPORT_SYMBOL(kmem_cache_size);

const char *kmem_cache_name(struct kmem_cache *c)
{
	return c->name;
}
EXPORT_SYMBOL(kmem_cache_name);

int kmem_cache_shrink(struct kmem_cache *d)
{
	return 0;
}
EXPORT_SYMBOL(kmem_cache_shrink);

int kmem_ptr_validate(struct kmem_cache *a, const void *b)
{
	return 0;
}

static unsigned int slob_ready __read_mostly;

int slab_is_available(void)
{
	return slob_ready;
}

void __init kmem_cache_init(void)
{
	slob_ready = 1;
}
Commit	Line	Data
10cef602 MM	1	/*
	2	* SLOB Allocator: Simple List Of Blocks
	3	*
	4	* Matt Mackall <mpm@selenic.com> 12/30/03
	5	*
6193a2ff PM	6	* NUMA support by Paul Mundt, 2007.
6193a2ff PM	7	*
10cef602 MM	8	* How SLOB works:
	9	*
	10	* The core of SLOB is a traditional K&R style heap allocator, with
	11	* support for returning aligned objects. The granularity of this
55394849 NP	12	* allocator is as little as 2 bytes, however typically most architectures
55394849 NP	13	* will require 4 bytes on 32-bit and 8 bytes on 64-bit.
95b35127	14	*
20cecbae MM	15	* The slob heap is a set of linked list of pages from alloc_pages(),
	16	* and within each page, there is a singly-linked list of free blocks
	17	* (slob_t). The heap is grown on demand. To reduce fragmentation,
	18	* heap pages are segregated into three lists, with objects less than
	19	* 256 bytes, objects less than 1024 bytes, and all other objects.
	20	*
	21	* Allocation from heap involves first searching for a page with
	22	* sufficient free blocks (using a next-fit-like approach) followed by
	23	* a first-fit scan of the page. Deallocation inserts objects back
	24	* into the free list in address order, so this is effectively an
	25	* address-ordered first fit.
10cef602 MM	26	*
10cef602 MM	27	* Above this is an implementation of kmalloc/kfree. Blocks returned
55394849	28	* from kmalloc are prepended with a 4-byte header with the kmalloc size.
10cef602	29	* If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
6193a2ff	30	* alloc_pages() directly, allocating compound pages so the page order
d87a133f NP	31	* does not have to be separately tracked, and also stores the exact
	32	* allocation size in page->private so that it can be used to accurately
	33	* provide ksize(). These objects are detected in kfree() because slob_page()
	34	* is false for them.
10cef602 MM	35	*
10cef602 MM	36	* SLAB is emulated on top of SLOB by simply calling constructors and
95b35127 NP	37	* destructors for every SLAB allocation. Objects are returned with the
	38	* 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
	39	* case the low-level allocator will fragment blocks to create the proper
	40	* alignment. Again, objects of page-size or greater are allocated by
6193a2ff	41	* calling alloc_pages(). As SLAB objects know their size, no separate
95b35127	42	* size bookkeeping is necessary and there is essentially no allocation
d87a133f NP	43	* space overhead, and compound pages aren't needed for multi-page
d87a133f NP	44	* allocations.
6193a2ff PM	45	*
	46	* NUMA support in SLOB is fairly simplistic, pushing most of the real
	47	* logic down to the page allocator, and simply doing the node accounting
	48	* on the upper levels. In the event that a node id is explicitly
	49	* provided, alloc_pages_node() with the specified node id is used
	50	* instead. The common case (or when the node id isn't explicitly provided)
	51	* will default to the current node, as per numa_node_id().
	52	*
	53	* Node aware pages are still inserted in to the global freelist, and
	54	* these are scanned for by matching against the node id encoded in the
	55	* page flags. As a result, block allocations that can be satisfied from
	56	* the freelist will only be done so on pages residing on the same node,
	57	* in order to prevent random node placement.
10cef602 MM	58	*/
10cef602 MM	59
95b35127	60	#include <linux/kernel.h>
10cef602 MM	61	#include <linux/slab.h>
	62	#include <linux/mm.h>
	63	#include <linux/cache.h>
	64	#include <linux/init.h>
	65	#include <linux/module.h>
afc0cedb	66	#include <linux/rcupdate.h>
95b35127 NP	67	#include <linux/list.h>
	68	#include <asm/atomic.h>
	69
95b35127 NP	70	/*
	71	* slob_block has a field 'units', which indicates size of block if +ve,
	72	* or offset of next block if -ve (in SLOB_UNITs).
	73	*
	74	* Free blocks of size 1 unit simply contain the offset of the next block.
	75	* Those with larger size contain their size in the first SLOB_UNIT of
	76	* memory, and the offset of the next free block in the second SLOB_UNIT.
	77	*/
55394849	78	#if PAGE_SIZE <= (32767 * 2)
95b35127 NP	79	typedef s16 slobidx_t;
	80	#else
	81	typedef s32 slobidx_t;
	82	#endif
	83
10cef602	84	struct slob_block {
95b35127	85	slobidx_t units;
55394849	86	};
10cef602 MM	87	typedef struct slob_block slob_t;
10cef602 MM	88
95b35127 NP	89	/*
	90	* We use struct page fields to manage some slob allocation aspects,
	91	* however to avoid the horrible mess in include/linux/mm_types.h, we'll
	92	* just define our own struct page type variant here.
	93	*/
	94	struct slob_page {
	95	union {
	96	struct {
	97	unsigned long flags; /* mandatory */
	98	atomic_t _count; /* mandatory */
	99	slobidx_t units; /* free units left in page */
	100	unsigned long pad[2];
	101	slob_t free; / first free slob_t in page */
	102	struct list_head list; /* linked list of free pages */
	103	};
	104	struct page page;
	105	};
	106	};
	107	static inline void struct_slob_page_wrong_size(void)
	108	{ BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); }
	109
	110	/*
	111	* free_slob_page: call before a slob_page is returned to the page allocator.
	112	*/
	113	static inline void free_slob_page(struct slob_page *sp)
	114	{
	115	reset_page_mapcount(&sp->page);
	116	sp->page.mapping = NULL;
	117	}
	118
	119	/*
20cecbae	120	* All partially free slob pages go on these lists.
95b35127	121	*/
20cecbae MM	122	#define SLOB_BREAK1 256
	123	#define SLOB_BREAK2 1024
	124	static LIST_HEAD(free_slob_small);
	125	static LIST_HEAD(free_slob_medium);
	126	static LIST_HEAD(free_slob_large);
95b35127 NP	127
	128	/*
	129	* slob_page: True for all slob pages (false for bigblock pages)
	130	*/
	131	static inline int slob_page(struct slob_page *sp)
	132	{
9023cb7e	133	return PageSlobPage((struct page *)sp);
95b35127 NP	134	}
	135
	136	static inline void set_slob_page(struct slob_page *sp)
	137	{
9023cb7e	138	__SetPageSlobPage((struct page *)sp);
95b35127 NP	139	}
	140
	141	static inline void clear_slob_page(struct slob_page *sp)
	142	{
9023cb7e	143	__ClearPageSlobPage((struct page *)sp);
95b35127 NP	144	}
	145
	146	/*
	147	* slob_page_free: true for pages on free_slob_pages list.
	148	*/
	149	static inline int slob_page_free(struct slob_page *sp)
	150	{
9023cb7e	151	return PageSlobFree((struct page *)sp);
95b35127 NP	152	}
95b35127 NP	153
20cecbae	154	static void set_slob_page_free(struct slob_page sp, struct list_head list)
95b35127	155	{
20cecbae	156	list_add(&sp->list, list);
9023cb7e	157	__SetPageSlobFree((struct page *)sp);
95b35127 NP	158	}
	159
	160	static inline void clear_slob_page_free(struct slob_page *sp)
	161	{
	162	list_del(&sp->list);
9023cb7e	163	__ClearPageSlobFree((struct page *)sp);
95b35127 NP	164	}
95b35127 NP	165
10cef602 MM	166	#define SLOB_UNIT sizeof(slob_t)
	167	#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
	168	#define SLOB_ALIGN L1_CACHE_BYTES
	169
afc0cedb NP	170	/*
	171	* struct slob_rcu is inserted at the tail of allocated slob blocks, which
	172	* were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
	173	* the block using call_rcu.
	174	*/
	175	struct slob_rcu {
	176	struct rcu_head head;
	177	int size;
	178	};
	179
95b35127 NP	180	/*
	181	* slob_lock protects all slob allocator structures.
	182	*/
10cef602	183	static DEFINE_SPINLOCK(slob_lock);
10cef602	184
95b35127 NP	185	/*
	186	* Encode the given size and next info into a free slob block s.
	187	*/
	188	static void set_slob(slob_t s, slobidx_t size, slob_t next)
	189	{
	190	slob_t base = (slob_t )((unsigned long)s & PAGE_MASK);
	191	slobidx_t offset = next - base;
bcb4ddb4	192
95b35127 NP	193	if (size > 1) {
	194	s[0].units = size;
	195	s[1].units = offset;
	196	} else
	197	s[0].units = -offset;
	198	}
10cef602	199
95b35127 NP	200	/*
	201	* Return the size of a slob block.
	202	*/
	203	static slobidx_t slob_units(slob_t *s)
	204	{
	205	if (s->units > 0)
	206	return s->units;
	207	return 1;
	208	}
	209
	210	/*
	211	* Return the next free slob block pointer after this one.
	212	*/
	213	static slob_t slob_next(slob_t s)
	214	{
	215	slob_t base = (slob_t )((unsigned long)s & PAGE_MASK);
	216	slobidx_t next;
	217
	218	if (s[0].units < 0)
	219	next = -s[0].units;
	220	else
	221	next = s[1].units;
	222	return base+next;
	223	}
	224
	225	/*
	226	* Returns true if s is the last free block in its page.
	227	*/
	228	static int slob_last(slob_t *s)
	229	{
	230	return !((unsigned long)slob_next(s) & ~PAGE_MASK);
	231	}
	232
6193a2ff PM	233	static void *slob_new_page(gfp_t gfp, int order, int node)
	234	{
	235	void *page;
	236
	237	#ifdef CONFIG_NUMA
	238	if (node != -1)
	239	page = alloc_pages_node(node, gfp, order);
	240	else
	241	#endif
	242	page = alloc_pages(gfp, order);
	243
	244	if (!page)
	245	return NULL;
	246
	247	return page_address(page);
	248	}
	249
95b35127 NP	250	/*
	251	* Allocate a slob block within a given slob_page sp.
	252	*/
	253	static void slob_page_alloc(struct slob_page sp, size_t size, int align)
10cef602 MM	254	{
	255	slob_t prev, cur, *aligned = 0;
	256	int delta = 0, units = SLOB_UNITS(size);
10cef602	257
95b35127 NP	258	for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) {
	259	slobidx_t avail = slob_units(cur);
	260
10cef602 MM	261	if (align) {
	262	aligned = (slob_t *)ALIGN((unsigned long)cur, align);
	263	delta = aligned - cur;
	264	}
95b35127 NP	265	if (avail >= units + delta) { /* room enough? */
	266	slob_t *next;
	267
10cef602	268	if (delta) { /* need to fragment head to align? */
95b35127 NP	269	next = slob_next(cur);
	270	set_slob(aligned, avail - delta, next);
	271	set_slob(cur, delta, aligned);
10cef602 MM	272	prev = cur;
10cef602 MM	273	cur = aligned;
95b35127	274	avail = slob_units(cur);
10cef602 MM	275	}
10cef602 MM	276
95b35127 NP	277	next = slob_next(cur);
	278	if (avail == units) { /* exact fit? unlink. */
	279	if (prev)
	280	set_slob(prev, slob_units(prev), next);
	281	else
	282	sp->free = next;
	283	} else { /* fragment */
	284	if (prev)
	285	set_slob(prev, slob_units(prev), cur + units);
	286	else
	287	sp->free = cur + units;
	288	set_slob(cur + units, avail - units, next);
10cef602 MM	289	}
10cef602 MM	290
95b35127 NP	291	sp->units -= units;
	292	if (!sp->units)
	293	clear_slob_page_free(sp);
10cef602 MM	294	return cur;
10cef602 MM	295	}
95b35127 NP	296	if (slob_last(cur))
	297	return NULL;
	298	}
	299	}
10cef602	300
95b35127 NP	301	/*
	302	* slob_alloc: entry point into the slob allocator.
	303	*/
6193a2ff	304	static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
95b35127 NP	305	{
95b35127 NP	306	struct slob_page *sp;
d6269543	307	struct list_head *prev;
20cecbae	308	struct list_head *slob_list;
95b35127 NP	309	slob_t *b = NULL;
95b35127 NP	310	unsigned long flags;
10cef602	311
20cecbae MM	312	if (size < SLOB_BREAK1)
	313	slob_list = &free_slob_small;
	314	else if (size < SLOB_BREAK2)
	315	slob_list = &free_slob_medium;
	316	else
	317	slob_list = &free_slob_large;
	318
95b35127 NP	319	spin_lock_irqsave(&slob_lock, flags);
95b35127 NP	320	/* Iterate through each partially free page, try to find room */
20cecbae	321	list_for_each_entry(sp, slob_list, list) {
6193a2ff PM	322	#ifdef CONFIG_NUMA
	323	/*
	324	* If there's a node specification, search for a partial
	325	* page with a matching node id in the freelist.
	326	*/
	327	if (node != -1 && page_to_nid(&sp->page) != node)
	328	continue;
	329	#endif
d6269543 MM	330	/* Enough room on this page? */
	331	if (sp->units < SLOB_UNITS(size))
	332	continue;
6193a2ff	333
d6269543 MM	334	/* Attempt to alloc */
	335	prev = sp->list.prev;
	336	b = slob_page_alloc(sp, size, align);
	337	if (!b)
	338	continue;
	339
	340	/* Improve fragment distribution and reduce our average
	341	* search time by starting our next search here. (see
	342	* Knuth vol 1, sec 2.5, pg 449) */
20cecbae MM	343	if (prev != slob_list->prev &&
	344	slob_list->next != prev->next)
	345	list_move_tail(slob_list, prev->next);
d6269543	346	break;
10cef602	347	}
95b35127 NP	348	spin_unlock_irqrestore(&slob_lock, flags);
	349
	350	/* Not enough space: must allocate a new page */
	351	if (!b) {
7fd27255	352	b = slob_new_page(gfp & ~__GFP_ZERO, 0, node);
95b35127 NP	353	if (!b)
	354	return 0;
	355	sp = (struct slob_page *)virt_to_page(b);
	356	set_slob_page(sp);
	357
	358	spin_lock_irqsave(&slob_lock, flags);
	359	sp->units = SLOB_UNITS(PAGE_SIZE);
	360	sp->free = b;
	361	INIT_LIST_HEAD(&sp->list);
	362	set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
20cecbae	363	set_slob_page_free(sp, slob_list);
95b35127 NP	364	b = slob_page_alloc(sp, size, align);
	365	BUG_ON(!b);
	366	spin_unlock_irqrestore(&slob_lock, flags);
	367	}
d07dbea4 CL	368	if (unlikely((gfp & __GFP_ZERO) && b))
d07dbea4 CL	369	memset(b, 0, size);
95b35127	370	return b;
10cef602 MM	371	}
10cef602 MM	372
95b35127 NP	373	/*
	374	* slob_free: entry point into the slob allocator.
	375	*/
10cef602 MM	376	static void slob_free(void *block, int size)
10cef602 MM	377	{
95b35127 NP	378	struct slob_page *sp;
	379	slob_t prev, next, b = (slob_t )block;
	380	slobidx_t units;
10cef602 MM	381	unsigned long flags;
10cef602 MM	382
2408c550	383	if (unlikely(ZERO_OR_NULL_PTR(block)))
10cef602	384	return;
95b35127	385	BUG_ON(!size);
10cef602	386
95b35127 NP	387	sp = (struct slob_page *)virt_to_page(block);
95b35127 NP	388	units = SLOB_UNITS(size);
10cef602	389
10cef602	390	spin_lock_irqsave(&slob_lock, flags);
10cef602	391
95b35127 NP	392	if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) {
	393	/* Go directly to page allocator. Do not pass slob allocator */
	394	if (slob_page_free(sp))
	395	clear_slob_page_free(sp);
	396	clear_slob_page(sp);
	397	free_slob_page(sp);
	398	free_page((unsigned long)b);
	399	goto out;
	400	}
10cef602	401
95b35127 NP	402	if (!slob_page_free(sp)) {
	403	/* This slob page is about to become partially free. Easy! */
	404	sp->units = units;
	405	sp->free = b;
	406	set_slob(b, units,
	407	(void *)((unsigned long)(b +
	408	SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
20cecbae	409	set_slob_page_free(sp, &free_slob_small);
95b35127 NP	410	goto out;
	411	}
	412
	413	/*
	414	* Otherwise the page is already partially free, so find reinsertion
	415	* point.
	416	*/
	417	sp->units += units;
10cef602	418
95b35127	419	if (b < sp->free) {
679299b3 MM	420	if (b + units == sp->free) {
	421	units += slob_units(sp->free);
	422	sp->free = slob_next(sp->free);
	423	}
95b35127 NP	424	set_slob(b, units, sp->free);
	425	sp->free = b;
	426	} else {
	427	prev = sp->free;
	428	next = slob_next(prev);
	429	while (b > next) {
	430	prev = next;
	431	next = slob_next(prev);
	432	}
10cef602	433
95b35127 NP	434	if (!slob_last(prev) && b + units == next) {
	435	units += slob_units(next);
	436	set_slob(b, units, slob_next(next));
	437	} else
	438	set_slob(b, units, next);
	439
	440	if (prev + slob_units(prev) == b) {
	441	units = slob_units(b) + slob_units(prev);
	442	set_slob(prev, units, slob_next(b));
	443	} else
	444	set_slob(prev, slob_units(prev), b);
	445	}
	446	out:
10cef602 MM	447	spin_unlock_irqrestore(&slob_lock, flags);
	448	}
	449
95b35127 NP	450	/*
	451	* End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
	452	*/
	453
55394849 NP	454	#ifndef ARCH_KMALLOC_MINALIGN
	455	#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
	456	#endif
	457
	458	#ifndef ARCH_SLAB_MINALIGN
	459	#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
	460	#endif
	461
6193a2ff	462	void *__kmalloc_node(size_t size, gfp_t gfp, int node)
10cef602	463	{
6cb8f913	464	unsigned int *m;
55394849 NP	465	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
	466
	467	if (size < PAGE_SIZE - align) {
6cb8f913 CL	468	if (!size)
	469	return ZERO_SIZE_PTR;
	470
6193a2ff	471	m = slob_alloc(size + align, gfp, align, node);
239f49c0 MK	472	if (!m)
	473	return NULL;
	474	*m = size;
55394849	475	return (void *)m + align;
d87a133f NP	476	} else {
	477	void *ret;
	478
6193a2ff	479	ret = slob_new_page(gfp \| __GFP_COMP, get_order(size), node);
d87a133f NP	480	if (ret) {
	481	struct page *page;
	482	page = virt_to_page(ret);
	483	page->private = size;
	484	}
	485	return ret;
10cef602	486	}
10cef602	487	}
6193a2ff	488	EXPORT_SYMBOL(__kmalloc_node);
10cef602 MM	489
	490	void kfree(const void *block)
	491	{
95b35127	492	struct slob_page *sp;
10cef602	493
2408c550	494	if (unlikely(ZERO_OR_NULL_PTR(block)))
10cef602 MM	495	return;
10cef602 MM	496
95b35127	497	sp = (struct slob_page *)virt_to_page(block);
d87a133f	498	if (slob_page(sp)) {
55394849 NP	499	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
	500	unsigned int m = (unsigned int )(block - align);
	501	slob_free(m, *m + align);
d87a133f NP	502	} else
d87a133f NP	503	put_page(&sp->page);
10cef602	504	}
10cef602 MM	505	EXPORT_SYMBOL(kfree);
10cef602 MM	506
d87a133f	507	/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
fd76bab2	508	size_t ksize(const void *block)
10cef602	509	{
95b35127	510	struct slob_page *sp;
10cef602	511
ef8b4520 CL	512	BUG_ON(!block);
ef8b4520 CL	513	if (unlikely(block == ZERO_SIZE_PTR))
10cef602 MM	514	return 0;
10cef602 MM	515
95b35127	516	sp = (struct slob_page *)virt_to_page(block);
70096a56 MM	517	if (slob_page(sp)) {
	518	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
	519	unsigned int m = (unsigned int )(block - align);
	520	return SLOB_UNITS(m) SLOB_UNIT;
	521	} else
d87a133f	522	return sp->page.private;
10cef602 MM	523	}
	524
	525	struct kmem_cache {
	526	unsigned int size, align;
afc0cedb	527	unsigned long flags;
10cef602	528	const char *name;
51cc5068	529	void (ctor)(void );
10cef602 MM	530	};
	531
	532	struct kmem_cache kmem_cache_create(const char name, size_t size,
51cc5068	533	size_t align, unsigned long flags, void (ctor)(void ))
10cef602 MM	534	{
	535	struct kmem_cache *c;
	536
0701a9e6 YL	537	c = slob_alloc(sizeof(struct kmem_cache),
0701a9e6 YL	538	flags, ARCH_KMALLOC_MINALIGN, -1);
10cef602 MM	539
	540	if (c) {
	541	c->name = name;
	542	c->size = size;
afc0cedb	543	if (flags & SLAB_DESTROY_BY_RCU) {
afc0cedb NP	544	/* leave room for rcu footer at the end of object */
	545	c->size += sizeof(struct slob_rcu);
	546	}
	547	c->flags = flags;
10cef602	548	c->ctor = ctor;
10cef602	549	/* ignore alignment unless it's forced */
5af60839	550	c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
55394849 NP	551	if (c->align < ARCH_SLAB_MINALIGN)
55394849 NP	552	c->align = ARCH_SLAB_MINALIGN;
10cef602 MM	553	if (c->align < align)
10cef602 MM	554	c->align = align;
bc0055ae AM	555	} else if (flags & SLAB_PANIC)
bc0055ae AM	556	panic("Cannot create slab cache %s\n", name);
10cef602 MM	557
	558	return c;
	559	}
	560	EXPORT_SYMBOL(kmem_cache_create);
	561
133d205a	562	void kmem_cache_destroy(struct kmem_cache *c)
10cef602 MM	563	{
10cef602 MM	564	slob_free(c, sizeof(struct kmem_cache));
10cef602 MM	565	}
	566	EXPORT_SYMBOL(kmem_cache_destroy);
	567
6193a2ff	568	void kmem_cache_alloc_node(struct kmem_cache c, gfp_t flags, int node)
10cef602 MM	569	{
	570	void *b;
	571
	572	if (c->size < PAGE_SIZE)
6193a2ff	573	b = slob_alloc(c->size, flags, c->align, node);
10cef602	574	else
6193a2ff	575	b = slob_new_page(flags, get_order(c->size), node);
10cef602 MM	576
10cef602 MM	577	if (c->ctor)
51cc5068	578	c->ctor(b);
10cef602 MM	579
	580	return b;
	581	}
6193a2ff	582	EXPORT_SYMBOL(kmem_cache_alloc_node);
10cef602	583
afc0cedb	584	static void __kmem_cache_free(void *b, int size)
10cef602	585	{
afc0cedb NP	586	if (size < PAGE_SIZE)
afc0cedb NP	587	slob_free(b, size);
10cef602	588	else
afc0cedb NP	589	free_pages((unsigned long)b, get_order(size));
	590	}
	591
	592	static void kmem_rcu_free(struct rcu_head *head)
	593	{
	594	struct slob_rcu slob_rcu = (struct slob_rcu )head;
	595	void b = (void )slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu));
	596
	597	__kmem_cache_free(b, slob_rcu->size);
	598	}
	599
	600	void kmem_cache_free(struct kmem_cache c, void b)
	601	{
	602	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
	603	struct slob_rcu *slob_rcu;
	604	slob_rcu = b + (c->size - sizeof(struct slob_rcu));
	605	INIT_RCU_HEAD(&slob_rcu->head);
	606	slob_rcu->size = c->size;
	607	call_rcu(&slob_rcu->head, kmem_rcu_free);
	608	} else {
afc0cedb NP	609	__kmem_cache_free(b, c->size);
afc0cedb NP	610	}
10cef602 MM	611	}
	612	EXPORT_SYMBOL(kmem_cache_free);
	613
	614	unsigned int kmem_cache_size(struct kmem_cache *c)
	615	{
	616	return c->size;
	617	}
	618	EXPORT_SYMBOL(kmem_cache_size);
	619
	620	const char kmem_cache_name(struct kmem_cache c)
	621	{
	622	return c->name;
	623	}
	624	EXPORT_SYMBOL(kmem_cache_name);
	625
2e892f43 CL	626	int kmem_cache_shrink(struct kmem_cache *d)
	627	{
	628	return 0;
	629	}
	630	EXPORT_SYMBOL(kmem_cache_shrink);
	631
55935a34	632	int kmem_ptr_validate(struct kmem_cache a, const void b)
2e892f43 CL	633	{
	634	return 0;
	635	}
	636
84a01c2f PM	637	static unsigned int slob_ready __read_mostly;
	638
	639	int slab_is_available(void)
	640	{
	641	return slob_ready;
	642	}
	643
bcb4ddb4 DG	644	void __init kmem_cache_init(void)
bcb4ddb4 DG	645	{
84a01c2f	646	slob_ready = 1;
10cef602	647	}