[net-next-2.6.git] / mm / slob.c

/*
 * SLOB Allocator: Simple List Of Blocks
 *
 * Matt Mackall <mpm@selenic.com> 12/30/03
 *
 * How SLOB works:
 *
 * The core of SLOB is a traditional K&R style heap allocator, with
 * support for returning aligned objects. The granularity of this
 * allocator is 4 bytes on 32-bit and 8 bytes on 64-bit, though it
 * could be as low as 2 if the compiler alignment requirements allow.
 *
 * The slob heap is a linked list of pages from __get_free_page, and
 * within each page, there is a singly-linked list of free blocks (slob_t).
 * The heap is grown on demand and allocation from the heap is currently
 * first-fit.
 *
 * Above this is an implementation of kmalloc/kfree. Blocks returned
 * from kmalloc are 4-byte aligned and prepended with a 4-byte header.
 * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
 * __get_free_pages directly so that it can return page-aligned blocks
 * and keeps a linked list of such pages and their orders. These
 * objects are detected in kfree() by their page alignment.
 *
 * SLAB is emulated on top of SLOB by simply calling constructors and
 * destructors for every SLAB allocation. Objects are returned with the
 * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
 * case the low-level allocator will fragment blocks to create the proper
 * alignment. Again, objects of page-size or greater are allocated by
 * calling __get_free_pages. As SLAB objects know their size, no separate
 * size bookkeeping is necessary and there is essentially no allocation
 * space overhead.
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/cache.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <asm/atomic.h>

/* SLOB_MIN_ALIGN == sizeof(long) */
#if BITS_PER_BYTE == 32
#define SLOB_MIN_ALIGN	4
#else
#define SLOB_MIN_ALIGN	8
#endif

/*
 * slob_block has a field 'units', which indicates size of block if +ve,
 * or offset of next block if -ve (in SLOB_UNITs).
 *
 * Free blocks of size 1 unit simply contain the offset of the next block.
 * Those with larger size contain their size in the first SLOB_UNIT of
 * memory, and the offset of the next free block in the second SLOB_UNIT.
 */
#if PAGE_SIZE <= (32767 * SLOB_MIN_ALIGN)
typedef s16 slobidx_t;
#else
typedef s32 slobidx_t;
#endif

/*
 * Align struct slob_block to long for now, but can some embedded
 * architectures get away with less?
 */
struct slob_block {
	slobidx_t units;
} __attribute__((aligned(SLOB_MIN_ALIGN)));
typedef struct slob_block slob_t;

/*
 * We use struct page fields to manage some slob allocation aspects,
 * however to avoid the horrible mess in include/linux/mm_types.h, we'll
 * just define our own struct page type variant here.
 */
struct slob_page {
	union {
		struct {
			unsigned long flags;	/* mandatory */
			atomic_t _count;	/* mandatory */
			slobidx_t units;	/* free units left in page */
			unsigned long pad[2];
			slob_t *free;		/* first free slob_t in page */
			struct list_head list;	/* linked list of free pages */
		};
		struct page page;
	};
};
static inline void struct_slob_page_wrong_size(void)
{ BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); }

/*
 * free_slob_page: call before a slob_page is returned to the page allocator.
 */
static inline void free_slob_page(struct slob_page *sp)
{
	reset_page_mapcount(&sp->page);
	sp->page.mapping = NULL;
}

/*
 * All (partially) free slob pages go on this list.
 */
static LIST_HEAD(free_slob_pages);

/*
 * slob_page: True for all slob pages (false for bigblock pages)
 */
static inline int slob_page(struct slob_page *sp)
{
	return test_bit(PG_active, &sp->flags);
}

static inline void set_slob_page(struct slob_page *sp)
{
	__set_bit(PG_active, &sp->flags);
}

static inline void clear_slob_page(struct slob_page *sp)
{
	__clear_bit(PG_active, &sp->flags);
}

/*
 * slob_page_free: true for pages on free_slob_pages list.
 */
static inline int slob_page_free(struct slob_page *sp)
{
	return test_bit(PG_private, &sp->flags);
}

static inline void set_slob_page_free(struct slob_page *sp)
{
	list_add(&sp->list, &free_slob_pages);
	__set_bit(PG_private, &sp->flags);
}

static inline void clear_slob_page_free(struct slob_page *sp)
{
	list_del(&sp->list);
	__clear_bit(PG_private, &sp->flags);
}

#define SLOB_UNIT sizeof(slob_t)
#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
#define SLOB_ALIGN L1_CACHE_BYTES

/*
 * struct slob_rcu is inserted at the tail of allocated slob blocks, which
 * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
 * the block using call_rcu.
 */
struct slob_rcu {
	struct rcu_head head;
	int size;
};

/*
 * slob_lock protects all slob allocator structures.
 */
static DEFINE_SPINLOCK(slob_lock);

/*
 * Encode the given size and next info into a free slob block s.
 */
static void set_slob(slob_t *s, slobidx_t size, slob_t *next)
{
	slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
	slobidx_t offset = next - base;

	if (size > 1) {
		s[0].units = size;
		s[1].units = offset;
	} else
		s[0].units = -offset;
}

/*
 * Return the size of a slob block.
 */
static slobidx_t slob_units(slob_t *s)
{
	if (s->units > 0)
		return s->units;
	return 1;
}

/*
 * Return the next free slob block pointer after this one.
 */
static slob_t *slob_next(slob_t *s)
{
	slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
	slobidx_t next;

	if (s[0].units < 0)
		next = -s[0].units;
	else
		next = s[1].units;
	return base+next;
}

/*
 * Returns true if s is the last free block in its page.
 */
static int slob_last(slob_t *s)
{
	return !((unsigned long)slob_next(s) & ~PAGE_MASK);
}

/*
 * Allocate a slob block within a given slob_page sp.
 */
static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
{
	slob_t *prev, *cur, *aligned = 0;
	int delta = 0, units = SLOB_UNITS(size);

	for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) {
		slobidx_t avail = slob_units(cur);

		if (align) {
			aligned = (slob_t *)ALIGN((unsigned long)cur, align);
			delta = aligned - cur;
		}
		if (avail >= units + delta) { /* room enough? */
			slob_t *next;

			if (delta) { /* need to fragment head to align? */
				next = slob_next(cur);
				set_slob(aligned, avail - delta, next);
				set_slob(cur, delta, aligned);
				prev = cur;
				cur = aligned;
				avail = slob_units(cur);
			}

			next = slob_next(cur);
			if (avail == units) { /* exact fit? unlink. */
				if (prev)
					set_slob(prev, slob_units(prev), next);
				else
					sp->free = next;
			} else { /* fragment */
				if (prev)
					set_slob(prev, slob_units(prev), cur + units);
				else
					sp->free = cur + units;
				set_slob(cur + units, avail - units, next);
			}

			sp->units -= units;
			if (!sp->units)
				clear_slob_page_free(sp);
			return cur;
		}
		if (slob_last(cur))
			return NULL;
	}
}

/*
 * slob_alloc: entry point into the slob allocator.
 */
static void *slob_alloc(size_t size, gfp_t gfp, int align)
{
	struct slob_page *sp;
	slob_t *b = NULL;
	unsigned long flags;

	spin_lock_irqsave(&slob_lock, flags);
	/* Iterate through each partially free page, try to find room */
	list_for_each_entry(sp, &free_slob_pages, list) {
		if (sp->units >= SLOB_UNITS(size)) {
			b = slob_page_alloc(sp, size, align);
			if (b)
				break;
		}
	}
	spin_unlock_irqrestore(&slob_lock, flags);

	/* Not enough space: must allocate a new page */
	if (!b) {
		b = (slob_t *)__get_free_page(gfp);
		if (!b)
			return 0;
		sp = (struct slob_page *)virt_to_page(b);
		set_slob_page(sp);

		spin_lock_irqsave(&slob_lock, flags);
		sp->units = SLOB_UNITS(PAGE_SIZE);
		sp->free = b;
		INIT_LIST_HEAD(&sp->list);
		set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
		set_slob_page_free(sp);
		b = slob_page_alloc(sp, size, align);
		BUG_ON(!b);
		spin_unlock_irqrestore(&slob_lock, flags);
	}
	return b;
}

/*
 * slob_free: entry point into the slob allocator.
 */
static void slob_free(void *block, int size)
{
	struct slob_page *sp;
	slob_t *prev, *next, *b = (slob_t *)block;
	slobidx_t units;
	unsigned long flags;

	if (!block)
		return;
	BUG_ON(!size);

	sp = (struct slob_page *)virt_to_page(block);
	units = SLOB_UNITS(size);

	spin_lock_irqsave(&slob_lock, flags);

	if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) {
		/* Go directly to page allocator. Do not pass slob allocator */
		if (slob_page_free(sp))
			clear_slob_page_free(sp);
		clear_slob_page(sp);
		free_slob_page(sp);
		free_page((unsigned long)b);
		goto out;
	}

	if (!slob_page_free(sp)) {
		/* This slob page is about to become partially free. Easy! */
		sp->units = units;
		sp->free = b;
		set_slob(b, units,
			(void *)((unsigned long)(b +
					SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
		set_slob_page_free(sp);
		goto out;
	}

	/*
	 * Otherwise the page is already partially free, so find reinsertion
	 * point.
	 */
	sp->units += units;

	if (b < sp->free) {
		set_slob(b, units, sp->free);
		sp->free = b;
	} else {
		prev = sp->free;
		next = slob_next(prev);
		while (b > next) {
			prev = next;
			next = slob_next(prev);
		}

		if (!slob_last(prev) && b + units == next) {
			units += slob_units(next);
			set_slob(b, units, slob_next(next));
		} else
			set_slob(b, units, next);

		if (prev + slob_units(prev) == b) {
			units = slob_units(b) + slob_units(prev);
			set_slob(prev, units, slob_next(b));
		} else
			set_slob(prev, slob_units(prev), b);
	}
out:
	spin_unlock_irqrestore(&slob_lock, flags);
}

/*
 * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
 */

struct bigblock {
	int order;
	void *pages;
	struct bigblock *next;
};
typedef struct bigblock bigblock_t;

static bigblock_t *bigblocks;

static DEFINE_SPINLOCK(block_lock);


void *__kmalloc(size_t size, gfp_t gfp)
{
	slob_t *m;
	bigblock_t *bb;
	unsigned long flags;

	if (size < PAGE_SIZE - SLOB_UNIT) {
		m = slob_alloc(size + SLOB_UNIT, gfp, 0);
		if (m)
			m->units = size;
		return m+1;
	}

	bb = slob_alloc(sizeof(bigblock_t), gfp, 0);
	if (!bb)
		return 0;

	bb->order = get_order(size);
	bb->pages = (void *)__get_free_pages(gfp, bb->order);

	if (bb->pages) {
		spin_lock_irqsave(&block_lock, flags);
		bb->next = bigblocks;
		bigblocks = bb;
		spin_unlock_irqrestore(&block_lock, flags);
		return bb->pages;
	}

	slob_free(bb, sizeof(bigblock_t));
	return 0;
}
EXPORT_SYMBOL(__kmalloc);

/**
 * krealloc - reallocate memory. The contents will remain unchanged.
 *
 * @p: object to reallocate memory for.
 * @new_size: how many bytes of memory are required.
 * @flags: the type of memory to allocate.
 *
 * The contents of the object pointed to are preserved up to the
 * lesser of the new and old sizes.  If @p is %NULL, krealloc()
 * behaves exactly like kmalloc().  If @size is 0 and @p is not a
 * %NULL pointer, the object pointed to is freed.
 */
void *krealloc(const void *p, size_t new_size, gfp_t flags)
{
	void *ret;

	if (unlikely(!p))
		return kmalloc_track_caller(new_size, flags);

	if (unlikely(!new_size)) {
		kfree(p);
		return NULL;
	}

	ret = kmalloc_track_caller(new_size, flags);
	if (ret) {
		memcpy(ret, p, min(new_size, ksize(p)));
		kfree(p);
	}
	return ret;
}
EXPORT_SYMBOL(krealloc);

void kfree(const void *block)
{
	struct slob_page *sp;
	slob_t *m;
	bigblock_t *bb, **last = &bigblocks;
	unsigned long flags;

	if (!block)
		return;

	sp = (struct slob_page *)virt_to_page(block);
	if (!slob_page(sp)) {
		/* on the big block list */
		spin_lock_irqsave(&block_lock, flags);
		for (bb = bigblocks; bb; last = &bb->next, bb = bb->next) {
			if (bb->pages == block) {
				*last = bb->next;
				spin_unlock_irqrestore(&block_lock, flags);
				free_pages((unsigned long)block, bb->order);
				slob_free(bb, sizeof(bigblock_t));
				return;
			}
		}
		spin_unlock_irqrestore(&block_lock, flags);
		WARN_ON(1);
		return;
	}

	m = (slob_t *)block - 1;
	slob_free(m, m->units + SLOB_UNIT);
	return;
}

EXPORT_SYMBOL(kfree);

size_t ksize(const void *block)
{
	struct slob_page *sp;
	bigblock_t *bb;
	unsigned long flags;

	if (!block)
		return 0;

	sp = (struct slob_page *)virt_to_page(block);
	if (!slob_page(sp)) {
		spin_lock_irqsave(&block_lock, flags);
		for (bb = bigblocks; bb; bb = bb->next)
			if (bb->pages == block) {
				spin_unlock_irqrestore(&slob_lock, flags);
				return PAGE_SIZE << bb->order;
			}
		spin_unlock_irqrestore(&block_lock, flags);
	}

	return ((slob_t *)block - 1)->units + SLOB_UNIT;
}

struct kmem_cache {
	unsigned int size, align;
	unsigned long flags;
	const char *name;
	void (*ctor)(void *, struct kmem_cache *, unsigned long);
};

struct kmem_cache *kmem_cache_create(const char *name, size_t size,
	size_t align, unsigned long flags,
	void (*ctor)(void*, struct kmem_cache *, unsigned long),
	void (*dtor)(void*, struct kmem_cache *, unsigned long))
{
	struct kmem_cache *c;

	c = slob_alloc(sizeof(struct kmem_cache), flags, 0);

	if (c) {
		c->name = name;
		c->size = size;
		if (flags & SLAB_DESTROY_BY_RCU) {
			/* leave room for rcu footer at the end of object */
			c->size += sizeof(struct slob_rcu);
		}
		c->flags = flags;
		c->ctor = ctor;
		/* ignore alignment unless it's forced */
		c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
		if (c->align < align)
			c->align = align;
	} else if (flags & SLAB_PANIC)
		panic("Cannot create slab cache %s\n", name);

	return c;
}
EXPORT_SYMBOL(kmem_cache_create);

void kmem_cache_destroy(struct kmem_cache *c)
{
	slob_free(c, sizeof(struct kmem_cache));
}
EXPORT_SYMBOL(kmem_cache_destroy);

void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags)
{
	void *b;

	if (c->size < PAGE_SIZE)
		b = slob_alloc(c->size, flags, c->align);
	else
		b = (void *)__get_free_pages(flags, get_order(c->size));

	if (c->ctor)
		c->ctor(b, c, 0);

	return b;
}
EXPORT_SYMBOL(kmem_cache_alloc);

void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t flags)
{
	void *ret = kmem_cache_alloc(c, flags);
	if (ret)
		memset(ret, 0, c->size);

	return ret;
}
EXPORT_SYMBOL(kmem_cache_zalloc);

static void __kmem_cache_free(void *b, int size)
{
	if (size < PAGE_SIZE)
		slob_free(b, size);
	else
		free_pages((unsigned long)b, get_order(size));
}

static void kmem_rcu_free(struct rcu_head *head)
{
	struct slob_rcu *slob_rcu = (struct slob_rcu *)head;
	void *b = (void *)slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu));

	__kmem_cache_free(b, slob_rcu->size);
}

void kmem_cache_free(struct kmem_cache *c, void *b)
{
	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
		struct slob_rcu *slob_rcu;
		slob_rcu = b + (c->size - sizeof(struct slob_rcu));
		INIT_RCU_HEAD(&slob_rcu->head);
		slob_rcu->size = c->size;
		call_rcu(&slob_rcu->head, kmem_rcu_free);
	} else {
		__kmem_cache_free(b, c->size);
	}
}
EXPORT_SYMBOL(kmem_cache_free);

unsigned int kmem_cache_size(struct kmem_cache *c)
{
	return c->size;
}
EXPORT_SYMBOL(kmem_cache_size);

const char *kmem_cache_name(struct kmem_cache *c)
{
	return c->name;
}
EXPORT_SYMBOL(kmem_cache_name);

int kmem_cache_shrink(struct kmem_cache *d)
{
	return 0;
}
EXPORT_SYMBOL(kmem_cache_shrink);

int kmem_ptr_validate(struct kmem_cache *a, const void *b)
{
	return 0;
}

void __init kmem_cache_init(void)
{
}
Commit	Line	Data
10cef602 MM	1	/*
	2	* SLOB Allocator: Simple List Of Blocks
	3	*
	4	* Matt Mackall <mpm@selenic.com> 12/30/03
	5	*
	6	* How SLOB works:
	7	*
	8	* The core of SLOB is a traditional K&R style heap allocator, with
	9	* support for returning aligned objects. The granularity of this
95b35127 NP	10	* allocator is 4 bytes on 32-bit and 8 bytes on 64-bit, though it
	11	* could be as low as 2 if the compiler alignment requirements allow.
	12	*
	13	* The slob heap is a linked list of pages from __get_free_page, and
	14	* within each page, there is a singly-linked list of free blocks (slob_t).
	15	* The heap is grown on demand and allocation from the heap is currently
	16	* first-fit.
10cef602 MM	17	*
10cef602 MM	18	* Above this is an implementation of kmalloc/kfree. Blocks returned
95b35127	19	* from kmalloc are 4-byte aligned and prepended with a 4-byte header.
10cef602 MM	20	* If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
	21	* __get_free_pages directly so that it can return page-aligned blocks
	22	* and keeps a linked list of such pages and their orders. These
	23	* objects are detected in kfree() by their page alignment.
	24	*
	25	* SLAB is emulated on top of SLOB by simply calling constructors and
95b35127 NP	26	* destructors for every SLAB allocation. Objects are returned with the
	27	* 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
	28	* case the low-level allocator will fragment blocks to create the proper
	29	* alignment. Again, objects of page-size or greater are allocated by
	30	* calling __get_free_pages. As SLAB objects know their size, no separate
	31	* size bookkeeping is necessary and there is essentially no allocation
	32	* space overhead.
10cef602 MM	33	*/
10cef602 MM	34
95b35127	35	#include <linux/kernel.h>
10cef602 MM	36	#include <linux/slab.h>
	37	#include <linux/mm.h>
	38	#include <linux/cache.h>
	39	#include <linux/init.h>
	40	#include <linux/module.h>
afc0cedb	41	#include <linux/rcupdate.h>
95b35127 NP	42	#include <linux/list.h>
	43	#include <asm/atomic.h>
	44
	45	/* SLOB_MIN_ALIGN == sizeof(long) */
	46	#if BITS_PER_BYTE == 32
	47	#define SLOB_MIN_ALIGN 4
	48	#else
	49	#define SLOB_MIN_ALIGN 8
	50	#endif
10cef602	51
95b35127 NP	52	/*
	53	* slob_block has a field 'units', which indicates size of block if +ve,
	54	* or offset of next block if -ve (in SLOB_UNITs).
	55	*
	56	* Free blocks of size 1 unit simply contain the offset of the next block.
	57	* Those with larger size contain their size in the first SLOB_UNIT of
	58	* memory, and the offset of the next free block in the second SLOB_UNIT.
	59	*/
	60	#if PAGE_SIZE <= (32767 * SLOB_MIN_ALIGN)
	61	typedef s16 slobidx_t;
	62	#else
	63	typedef s32 slobidx_t;
	64	#endif
	65
	66	/*
	67	* Align struct slob_block to long for now, but can some embedded
	68	* architectures get away with less?
	69	*/
10cef602	70	struct slob_block {
95b35127 NP	71	slobidx_t units;
95b35127 NP	72	} __attribute__((aligned(SLOB_MIN_ALIGN)));
10cef602 MM	73	typedef struct slob_block slob_t;
10cef602 MM	74
95b35127 NP	75	/*
	76	* We use struct page fields to manage some slob allocation aspects,
	77	* however to avoid the horrible mess in include/linux/mm_types.h, we'll
	78	* just define our own struct page type variant here.
	79	*/
	80	struct slob_page {
	81	union {
	82	struct {
	83	unsigned long flags; /* mandatory */
	84	atomic_t _count; /* mandatory */
	85	slobidx_t units; /* free units left in page */
	86	unsigned long pad[2];
	87	slob_t free; / first free slob_t in page */
	88	struct list_head list; /* linked list of free pages */
	89	};
	90	struct page page;
	91	};
	92	};
	93	static inline void struct_slob_page_wrong_size(void)
	94	{ BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); }
	95
	96	/*
	97	* free_slob_page: call before a slob_page is returned to the page allocator.
	98	*/
	99	static inline void free_slob_page(struct slob_page *sp)
	100	{
	101	reset_page_mapcount(&sp->page);
	102	sp->page.mapping = NULL;
	103	}
	104
	105	/*
	106	* All (partially) free slob pages go on this list.
	107	*/
	108	static LIST_HEAD(free_slob_pages);
	109
	110	/*
	111	* slob_page: True for all slob pages (false for bigblock pages)
	112	*/
	113	static inline int slob_page(struct slob_page *sp)
	114	{
	115	return test_bit(PG_active, &sp->flags);
	116	}
	117
	118	static inline void set_slob_page(struct slob_page *sp)
	119	{
	120	__set_bit(PG_active, &sp->flags);
	121	}
	122
	123	static inline void clear_slob_page(struct slob_page *sp)
	124	{
	125	__clear_bit(PG_active, &sp->flags);
	126	}
	127
	128	/*
	129	* slob_page_free: true for pages on free_slob_pages list.
	130	*/
	131	static inline int slob_page_free(struct slob_page *sp)
	132	{
	133	return test_bit(PG_private, &sp->flags);
	134	}
	135
	136	static inline void set_slob_page_free(struct slob_page *sp)
	137	{
	138	list_add(&sp->list, &free_slob_pages);
139	__set_bit(PG_private, &sp->flags);
140	}
141
142	static inline void clear_slob_page_free(struct slob_page *sp)
143	{
144	list_del(&sp->list);
145	__clear_bit(PG_private, &sp->flags);
146	}
147
10cef602 MM	148	#define SLOB_UNIT sizeof(slob_t)
	149	#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
	150	#define SLOB_ALIGN L1_CACHE_BYTES
	151
afc0cedb NP	152	/*
	153	* struct slob_rcu is inserted at the tail of allocated slob blocks, which
	154	* were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
	155	* the block using call_rcu.
	156	*/
	157	struct slob_rcu {
	158	struct rcu_head head;
	159	int size;
	160	};
	161
95b35127 NP	162	/*
	163	* slob_lock protects all slob allocator structures.
	164	*/
10cef602	165	static DEFINE_SPINLOCK(slob_lock);
10cef602	166
95b35127 NP	167	/*
	168	* Encode the given size and next info into a free slob block s.
	169	*/
	170	static void set_slob(slob_t s, slobidx_t size, slob_t next)
	171	{
	172	slob_t base = (slob_t )((unsigned long)s & PAGE_MASK);
	173	slobidx_t offset = next - base;
bcb4ddb4	174
95b35127 NP	175	if (size > 1) {
	176	s[0].units = size;
	177	s[1].units = offset;
	178	} else
	179	s[0].units = -offset;
	180	}
10cef602	181
95b35127 NP	182	/*
	183	* Return the size of a slob block.
	184	*/
	185	static slobidx_t slob_units(slob_t *s)
	186	{
	187	if (s->units > 0)
	188	return s->units;
	189	return 1;
	190	}
	191
	192	/*
	193	* Return the next free slob block pointer after this one.
	194	*/
	195	static slob_t slob_next(slob_t s)
	196	{
	197	slob_t base = (slob_t )((unsigned long)s & PAGE_MASK);
	198	slobidx_t next;
	199
	200	if (s[0].units < 0)
	201	next = -s[0].units;
	202	else
	203	next = s[1].units;
	204	return base+next;
	205	}
	206
	207	/*
	208	* Returns true if s is the last free block in its page.
	209	*/
	210	static int slob_last(slob_t *s)
	211	{
	212	return !((unsigned long)slob_next(s) & ~PAGE_MASK);
	213	}
	214
	215	/*
	216	* Allocate a slob block within a given slob_page sp.
	217	*/
	218	static void slob_page_alloc(struct slob_page sp, size_t size, int align)
10cef602 MM	219	{
	220	slob_t prev, cur, *aligned = 0;
	221	int delta = 0, units = SLOB_UNITS(size);
10cef602	222
95b35127 NP	223	for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) {
	224	slobidx_t avail = slob_units(cur);
	225
10cef602 MM	226	if (align) {
	227	aligned = (slob_t *)ALIGN((unsigned long)cur, align);
	228	delta = aligned - cur;
	229	}
95b35127 NP	230	if (avail >= units + delta) { /* room enough? */
	231	slob_t *next;
	232
10cef602	233	if (delta) { /* need to fragment head to align? */
95b35127 NP	234	next = slob_next(cur);
	235	set_slob(aligned, avail - delta, next);
	236	set_slob(cur, delta, aligned);
10cef602 MM	237	prev = cur;
10cef602 MM	238	cur = aligned;
95b35127	239	avail = slob_units(cur);
10cef602 MM	240	}
10cef602 MM	241
95b35127 NP	242	next = slob_next(cur);
	243	if (avail == units) { /* exact fit? unlink. */
	244	if (prev)
	245	set_slob(prev, slob_units(prev), next);
	246	else
	247	sp->free = next;
	248	} else { /* fragment */
	249	if (prev)
	250	set_slob(prev, slob_units(prev), cur + units);
	251	else
	252	sp->free = cur + units;
	253	set_slob(cur + units, avail - units, next);
10cef602 MM	254	}
10cef602 MM	255
95b35127 NP	256	sp->units -= units;
	257	if (!sp->units)
	258	clear_slob_page_free(sp);
10cef602 MM	259	return cur;
10cef602 MM	260	}
95b35127 NP	261	if (slob_last(cur))
	262	return NULL;
	263	}
	264	}
10cef602	265
95b35127 NP	266	/*
	267	* slob_alloc: entry point into the slob allocator.
	268	*/
	269	static void *slob_alloc(size_t size, gfp_t gfp, int align)
	270	{
	271	struct slob_page *sp;
	272	slob_t *b = NULL;
	273	unsigned long flags;
10cef602	274
95b35127 NP	275	spin_lock_irqsave(&slob_lock, flags);
	276	/* Iterate through each partially free page, try to find room */
	277	list_for_each_entry(sp, &free_slob_pages, list) {
	278	if (sp->units >= SLOB_UNITS(size)) {
	279	b = slob_page_alloc(sp, size, align);
	280	if (b)
	281	break;
10cef602 MM	282	}
10cef602 MM	283	}
95b35127 NP	284	spin_unlock_irqrestore(&slob_lock, flags);
	285
	286	/* Not enough space: must allocate a new page */
	287	if (!b) {
	288	b = (slob_t *)__get_free_page(gfp);
	289	if (!b)
	290	return 0;
	291	sp = (struct slob_page *)virt_to_page(b);
	292	set_slob_page(sp);
	293
	294	spin_lock_irqsave(&slob_lock, flags);
	295	sp->units = SLOB_UNITS(PAGE_SIZE);
	296	sp->free = b;
	297	INIT_LIST_HEAD(&sp->list);
	298	set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
	299	set_slob_page_free(sp);
	300	b = slob_page_alloc(sp, size, align);
	301	BUG_ON(!b);
	302	spin_unlock_irqrestore(&slob_lock, flags);
	303	}
	304	return b;
10cef602 MM	305	}
10cef602 MM	306
95b35127 NP	307	/*
	308	* slob_free: entry point into the slob allocator.
	309	*/
10cef602 MM	310	static void slob_free(void *block, int size)
10cef602 MM	311	{
95b35127 NP	312	struct slob_page *sp;
	313	slob_t prev, next, b = (slob_t )block;
	314	slobidx_t units;
10cef602 MM	315	unsigned long flags;
	316
	317	if (!block)
	318	return;
95b35127	319	BUG_ON(!size);
10cef602	320
95b35127 NP	321	sp = (struct slob_page *)virt_to_page(block);
95b35127 NP	322	units = SLOB_UNITS(size);
10cef602	323
10cef602	324	spin_lock_irqsave(&slob_lock, flags);
10cef602	325
95b35127 NP	326	if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) {
	327	/* Go directly to page allocator. Do not pass slob allocator */
	328	if (slob_page_free(sp))
	329	clear_slob_page_free(sp);
	330	clear_slob_page(sp);
	331	free_slob_page(sp);
	332	free_page((unsigned long)b);
	333	goto out;
	334	}
10cef602	335
95b35127 NP	336	if (!slob_page_free(sp)) {
	337	/* This slob page is about to become partially free. Easy! */
	338	sp->units = units;
	339	sp->free = b;
	340	set_slob(b, units,
	341	(void *)((unsigned long)(b +
	342	SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
	343	set_slob_page_free(sp);
	344	goto out;
	345	}
	346
	347	/*
	348	* Otherwise the page is already partially free, so find reinsertion
	349	* point.
	350	*/
	351	sp->units += units;
10cef602	352
95b35127 NP	353	if (b < sp->free) {
	354	set_slob(b, units, sp->free);
	355	sp->free = b;
	356	} else {
	357	prev = sp->free;
	358	next = slob_next(prev);
	359	while (b > next) {
	360	prev = next;
	361	next = slob_next(prev);
	362	}
10cef602	363
95b35127 NP	364	if (!slob_last(prev) && b + units == next) {
	365	units += slob_units(next);
	366	set_slob(b, units, slob_next(next));
	367	} else
	368	set_slob(b, units, next);
	369
	370	if (prev + slob_units(prev) == b) {
	371	units = slob_units(b) + slob_units(prev);
	372	set_slob(prev, units, slob_next(b));
	373	} else
	374	set_slob(prev, slob_units(prev), b);
	375	}
	376	out:
10cef602 MM	377	spin_unlock_irqrestore(&slob_lock, flags);
	378	}
	379
95b35127 NP	380	/*
	381	* End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
	382	*/
	383
	384	struct bigblock {
	385	int order;
	386	void *pages;
	387	struct bigblock *next;
	388	};
	389	typedef struct bigblock bigblock_t;
	390
	391	static bigblock_t *bigblocks;
	392
	393	static DEFINE_SPINLOCK(block_lock);
	394
	395
2e892f43	396	void *__kmalloc(size_t size, gfp_t gfp)
10cef602 MM	397	{
	398	slob_t *m;
	399	bigblock_t *bb;
	400	unsigned long flags;
	401
	402	if (size < PAGE_SIZE - SLOB_UNIT) {
	403	m = slob_alloc(size + SLOB_UNIT, gfp, 0);
95b35127 NP	404	if (m)
	405	m->units = size;
	406	return m+1;
10cef602 MM	407	}
	408
	409	bb = slob_alloc(sizeof(bigblock_t), gfp, 0);
	410	if (!bb)
	411	return 0;
	412
4ab688c5	413	bb->order = get_order(size);
10cef602 MM	414	bb->pages = (void *)__get_free_pages(gfp, bb->order);
	415
	416	if (bb->pages) {
	417	spin_lock_irqsave(&block_lock, flags);
	418	bb->next = bigblocks;
	419	bigblocks = bb;
	420	spin_unlock_irqrestore(&block_lock, flags);
	421	return bb->pages;
	422	}
	423
	424	slob_free(bb, sizeof(bigblock_t));
	425	return 0;
	426	}
2e892f43	427	EXPORT_SYMBOL(__kmalloc);
10cef602	428
fd76bab2 PE	429	/**
	430	* krealloc - reallocate memory. The contents will remain unchanged.
	431	*
	432	* @p: object to reallocate memory for.
	433	* @new_size: how many bytes of memory are required.
	434	* @flags: the type of memory to allocate.
	435	*
	436	* The contents of the object pointed to are preserved up to the
	437	* lesser of the new and old sizes. If @p is %NULL, krealloc()
	438	* behaves exactly like kmalloc(). If @size is 0 and @p is not a
	439	* %NULL pointer, the object pointed to is freed.
	440	*/
	441	void krealloc(const void p, size_t new_size, gfp_t flags)
	442	{
	443	void *ret;
	444
	445	if (unlikely(!p))
	446	return kmalloc_track_caller(new_size, flags);
	447
	448	if (unlikely(!new_size)) {
	449	kfree(p);
	450	return NULL;
	451	}
	452
	453	ret = kmalloc_track_caller(new_size, flags);
	454	if (ret) {
	455	memcpy(ret, p, min(new_size, ksize(p)));
	456	kfree(p);
	457	}
	458	return ret;
	459	}
	460	EXPORT_SYMBOL(krealloc);
	461
10cef602 MM	462	void kfree(const void *block)
10cef602 MM	463	{
95b35127 NP	464	struct slob_page *sp;
95b35127 NP	465	slob_t *m;
10cef602 MM	466	bigblock_t bb, *last = &bigblocks;
	467	unsigned long flags;
	468
	469	if (!block)
	470	return;
	471
95b35127 NP	472	sp = (struct slob_page *)virt_to_page(block);
	473	if (!slob_page(sp)) {
	474	/* on the big block list */
10cef602 MM	475	spin_lock_irqsave(&block_lock, flags);
	476	for (bb = bigblocks; bb; last = &bb->next, bb = bb->next) {
	477	if (bb->pages == block) {
	478	*last = bb->next;
	479	spin_unlock_irqrestore(&block_lock, flags);
	480	free_pages((unsigned long)block, bb->order);
	481	slob_free(bb, sizeof(bigblock_t));
	482	return;
	483	}
	484	}
	485	spin_unlock_irqrestore(&block_lock, flags);
95b35127 NP	486	WARN_ON(1);
95b35127 NP	487	return;
10cef602 MM	488	}
10cef602 MM	489
95b35127 NP	490	m = (slob_t *)block - 1;
95b35127 NP	491	slob_free(m, m->units + SLOB_UNIT);
10cef602 MM	492	return;
	493	}
	494
	495	EXPORT_SYMBOL(kfree);
	496
fd76bab2	497	size_t ksize(const void *block)
10cef602	498	{
95b35127	499	struct slob_page *sp;
10cef602 MM	500	bigblock_t *bb;
	501	unsigned long flags;
	502
	503	if (!block)
	504	return 0;
	505
95b35127 NP	506	sp = (struct slob_page *)virt_to_page(block);
95b35127 NP	507	if (!slob_page(sp)) {
10cef602 MM	508	spin_lock_irqsave(&block_lock, flags);
	509	for (bb = bigblocks; bb; bb = bb->next)
	510	if (bb->pages == block) {
	511	spin_unlock_irqrestore(&slob_lock, flags);
	512	return PAGE_SIZE << bb->order;
	513	}
	514	spin_unlock_irqrestore(&block_lock, flags);
	515	}
	516
95b35127	517	return ((slob_t *)block - 1)->units + SLOB_UNIT;
10cef602 MM	518	}
	519
	520	struct kmem_cache {
	521	unsigned int size, align;
afc0cedb	522	unsigned long flags;
10cef602 MM	523	const char *name;
10cef602 MM	524	void (ctor)(void , struct kmem_cache *, unsigned long);
10cef602 MM	525	};
	526
	527	struct kmem_cache kmem_cache_create(const char name, size_t size,
	528	size_t align, unsigned long flags,
	529	void (ctor)(void, struct kmem_cache *, unsigned long),
	530	void (dtor)(void, struct kmem_cache *, unsigned long))
	531	{
	532	struct kmem_cache *c;
	533
	534	c = slob_alloc(sizeof(struct kmem_cache), flags, 0);
	535
	536	if (c) {
	537	c->name = name;
	538	c->size = size;
afc0cedb	539	if (flags & SLAB_DESTROY_BY_RCU) {
afc0cedb NP	540	/* leave room for rcu footer at the end of object */
	541	c->size += sizeof(struct slob_rcu);
	542	}
	543	c->flags = flags;
10cef602	544	c->ctor = ctor;
10cef602	545	/* ignore alignment unless it's forced */
5af60839	546	c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
10cef602 MM	547	if (c->align < align)
10cef602 MM	548	c->align = align;
bc0055ae AM	549	} else if (flags & SLAB_PANIC)
bc0055ae AM	550	panic("Cannot create slab cache %s\n", name);
10cef602 MM	551
	552	return c;
	553	}
	554	EXPORT_SYMBOL(kmem_cache_create);
	555
133d205a	556	void kmem_cache_destroy(struct kmem_cache *c)
10cef602 MM	557	{
10cef602 MM	558	slob_free(c, sizeof(struct kmem_cache));
10cef602 MM	559	}
	560	EXPORT_SYMBOL(kmem_cache_destroy);
	561
	562	void kmem_cache_alloc(struct kmem_cache c, gfp_t flags)
	563	{
	564	void *b;
	565
	566	if (c->size < PAGE_SIZE)
	567	b = slob_alloc(c->size, flags, c->align);
	568	else
4ab688c5	569	b = (void *)__get_free_pages(flags, get_order(c->size));
10cef602 MM	570
10cef602 MM	571	if (c->ctor)
a35afb83	572	c->ctor(b, c, 0);
10cef602 MM	573
	574	return b;
	575	}
	576	EXPORT_SYMBOL(kmem_cache_alloc);
	577
a8c0f9a4 PE	578	void kmem_cache_zalloc(struct kmem_cache c, gfp_t flags)
	579	{
	580	void *ret = kmem_cache_alloc(c, flags);
	581	if (ret)
	582	memset(ret, 0, c->size);
	583
	584	return ret;
	585	}
	586	EXPORT_SYMBOL(kmem_cache_zalloc);
	587
afc0cedb	588	static void __kmem_cache_free(void *b, int size)
10cef602	589	{
afc0cedb NP	590	if (size < PAGE_SIZE)
afc0cedb NP	591	slob_free(b, size);
10cef602	592	else
afc0cedb NP	593	free_pages((unsigned long)b, get_order(size));
	594	}
	595
	596	static void kmem_rcu_free(struct rcu_head *head)
	597	{
	598	struct slob_rcu slob_rcu = (struct slob_rcu )head;
	599	void b = (void )slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu));
	600
	601	__kmem_cache_free(b, slob_rcu->size);
	602	}
	603
	604	void kmem_cache_free(struct kmem_cache c, void b)
	605	{
	606	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
	607	struct slob_rcu *slob_rcu;
	608	slob_rcu = b + (c->size - sizeof(struct slob_rcu));
	609	INIT_RCU_HEAD(&slob_rcu->head);
	610	slob_rcu->size = c->size;
	611	call_rcu(&slob_rcu->head, kmem_rcu_free);
	612	} else {
afc0cedb NP	613	__kmem_cache_free(b, c->size);
afc0cedb NP	614	}
10cef602 MM	615	}
	616	EXPORT_SYMBOL(kmem_cache_free);
	617
	618	unsigned int kmem_cache_size(struct kmem_cache *c)
	619	{
	620	return c->size;
	621	}
	622	EXPORT_SYMBOL(kmem_cache_size);
	623
	624	const char kmem_cache_name(struct kmem_cache c)
	625	{
	626	return c->name;
	627	}
	628	EXPORT_SYMBOL(kmem_cache_name);
	629
2e892f43 CL	630	int kmem_cache_shrink(struct kmem_cache *d)
	631	{
	632	return 0;
	633	}
	634	EXPORT_SYMBOL(kmem_cache_shrink);
	635
55935a34	636	int kmem_ptr_validate(struct kmem_cache a, const void b)
2e892f43 CL	637	{
	638	return 0;
	639	}
	640
bcb4ddb4 DG	641	void __init kmem_cache_init(void)
bcb4ddb4 DG	642	{
10cef602	643	}