[net-next-2.6.git] / mm / page_cgroup.c

#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/bootmem.h>
#include <linux/bit_spinlock.h>
#include <linux/page_cgroup.h>
#include <linux/hash.h>
#include <linux/slab.h>
#include <linux/memory.h>
#include <linux/vmalloc.h>
#include <linux/cgroup.h>

static void __meminit
__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
{
	pc->flags = 0;
	pc->mem_cgroup = NULL;
	pc->page = pfn_to_page(pfn);
}
static unsigned long total_usage;

#if !defined(CONFIG_SPARSEMEM)


void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
{
	pgdat->node_page_cgroup = NULL;
}

struct page_cgroup *lookup_page_cgroup(struct page *page)
{
	unsigned long pfn = page_to_pfn(page);
	unsigned long offset;
	struct page_cgroup *base;

	base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
	if (unlikely(!base))
		return NULL;

	offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
	return base + offset;
}

static int __init alloc_node_page_cgroup(int nid)
{
	struct page_cgroup *base, *pc;
	unsigned long table_size;
	unsigned long start_pfn, nr_pages, index;

	start_pfn = NODE_DATA(nid)->node_start_pfn;
	nr_pages = NODE_DATA(nid)->node_spanned_pages;

	if (!nr_pages)
		return 0;

	table_size = sizeof(struct page_cgroup) * nr_pages;

	base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
	if (!base)
		return -ENOMEM;
	for (index = 0; index < nr_pages; index++) {
		pc = base + index;
		__init_page_cgroup(pc, start_pfn + index);
	}
	NODE_DATA(nid)->node_page_cgroup = base;
	total_usage += table_size;
	return 0;
}

void __init page_cgroup_init(void)
{

	int nid, fail;

	if (mem_cgroup_subsys.disabled)
		return;

	for_each_online_node(nid)  {
		fail = alloc_node_page_cgroup(nid);
		if (fail)
			goto fail;
	}
	printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
	printk(KERN_INFO "please try cgroup_disable=memory option if you"
	" don't want\n");
	return;
fail:
	printk(KERN_CRIT "allocation of page_cgroup was failed.\n");
	printk(KERN_CRIT "please try cgroup_disable=memory boot option\n");
	panic("Out of memory");
}

#else /* CONFIG_FLAT_NODE_MEM_MAP */

struct page_cgroup *lookup_page_cgroup(struct page *page)
{
	unsigned long pfn = page_to_pfn(page);
	struct mem_section *section = __pfn_to_section(pfn);

	return section->page_cgroup + pfn;
}

/* __alloc_bootmem...() is protected by !slab_available() */
static int __init_refok init_section_page_cgroup(unsigned long pfn)
{
	struct mem_section *section;
	struct page_cgroup *base, *pc;
	unsigned long table_size;
	int nid, index;

	section = __pfn_to_section(pfn);

	if (!section->page_cgroup) {
		nid = page_to_nid(pfn_to_page(pfn));
		table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
		if (slab_is_available()) {
			base = kmalloc_node(table_size, GFP_KERNEL, nid);
			if (!base)
				base = vmalloc_node(table_size, nid);
		} else {
			base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
				table_size,
				PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
		}
	} else {
		/*
 		 * We don't have to allocate page_cgroup again, but
		 * address of memmap may be changed. So, we have to initialize
		 * again.
		 */
		base = section->page_cgroup + pfn;
		table_size = 0;
		/* check address of memmap is changed or not. */
		if (base->page == pfn_to_page(pfn))
			return 0;
	}

	if (!base) {
		printk(KERN_ERR "page cgroup allocation failure\n");
		return -ENOMEM;
	}

	for (index = 0; index < PAGES_PER_SECTION; index++) {
		pc = base + index;
		__init_page_cgroup(pc, pfn + index);
	}

	section = __pfn_to_section(pfn);
	section->page_cgroup = base - pfn;
	total_usage += table_size;
	return 0;
}
#ifdef CONFIG_MEMORY_HOTPLUG
void __free_page_cgroup(unsigned long pfn)
{
	struct mem_section *ms;
	struct page_cgroup *base;

	ms = __pfn_to_section(pfn);
	if (!ms || !ms->page_cgroup)
		return;
	base = ms->page_cgroup + pfn;
	if (is_vmalloc_addr(base)) {
		vfree(base);
		ms->page_cgroup = NULL;
	} else {
		struct page *page = virt_to_page(base);
		if (!PageReserved(page)) { /* Is bootmem ? */
			kfree(base);
			ms->page_cgroup = NULL;
		}
	}
}

int __meminit online_page_cgroup(unsigned long start_pfn,
			unsigned long nr_pages,
			int nid)
{
	unsigned long start, end, pfn;
	int fail = 0;

	start = start_pfn & ~(PAGES_PER_SECTION - 1);
	end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);

	for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
		if (!pfn_present(pfn))
			continue;
		fail = init_section_page_cgroup(pfn);
	}
	if (!fail)
		return 0;

	/* rollback */
	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
		__free_page_cgroup(pfn);

	return -ENOMEM;
}

int __meminit offline_page_cgroup(unsigned long start_pfn,
		unsigned long nr_pages, int nid)
{
	unsigned long start, end, pfn;

	start = start_pfn & ~(PAGES_PER_SECTION - 1);
	end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);

	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
		__free_page_cgroup(pfn);
	return 0;

}

static int __meminit page_cgroup_callback(struct notifier_block *self,
			       unsigned long action, void *arg)
{
	struct memory_notify *mn = arg;
	int ret = 0;
	switch (action) {
	case MEM_GOING_ONLINE:
		ret = online_page_cgroup(mn->start_pfn,
				   mn->nr_pages, mn->status_change_nid);
		break;
	case MEM_OFFLINE:
		offline_page_cgroup(mn->start_pfn,
				mn->nr_pages, mn->status_change_nid);
		break;
	case MEM_CANCEL_ONLINE:
	case MEM_GOING_OFFLINE:
		break;
	case MEM_ONLINE:
	case MEM_CANCEL_OFFLINE:
		break;
	}

	if (ret)
		ret = notifier_from_errno(ret);
	else
		ret = NOTIFY_OK;

	return ret;
}

#endif

void __init page_cgroup_init(void)
{
	unsigned long pfn;
	int fail = 0;

	if (mem_cgroup_subsys.disabled)
		return;

	for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {
		if (!pfn_present(pfn))
			continue;
		fail = init_section_page_cgroup(pfn);
	}
	if (fail) {
		printk(KERN_CRIT "try cgroup_disable=memory boot option\n");
		panic("Out of memory");
	} else {
		hotplug_memory_notifier(page_cgroup_callback, 0);
	}
	printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
	printk(KERN_INFO "please try cgroup_disable=memory option if you don't"
	" want\n");
}

void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
{
	return;
}

#endif
Commit	Line	Data
52d4b9ac KH	1	#include <linux/mm.h>
	2	#include <linux/mmzone.h>
	3	#include <linux/bootmem.h>
	4	#include <linux/bit_spinlock.h>
	5	#include <linux/page_cgroup.h>
	6	#include <linux/hash.h>
94b6da5a	7	#include <linux/slab.h>
52d4b9ac	8	#include <linux/memory.h>
4c821042	9	#include <linux/vmalloc.h>
94b6da5a	10	#include <linux/cgroup.h>
52d4b9ac KH	11
	12	static void __meminit
	13	__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
	14	{
	15	pc->flags = 0;
	16	pc->mem_cgroup = NULL;
	17	pc->page = pfn_to_page(pfn);
	18	}
	19	static unsigned long total_usage;
	20
	21	#if !defined(CONFIG_SPARSEMEM)
	22
	23
31168481	24	void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
52d4b9ac KH	25	{
	26	pgdat->node_page_cgroup = NULL;
	27	}
	28
	29	struct page_cgroup lookup_page_cgroup(struct page page)
	30	{
	31	unsigned long pfn = page_to_pfn(page);
	32	unsigned long offset;
	33	struct page_cgroup *base;
	34
	35	base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
	36	if (unlikely(!base))
	37	return NULL;
	38
	39	offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
	40	return base + offset;
	41	}
	42
	43	static int __init alloc_node_page_cgroup(int nid)
	44	{
	45	struct page_cgroup base, pc;
	46	unsigned long table_size;
	47	unsigned long start_pfn, nr_pages, index;
	48
	49	start_pfn = NODE_DATA(nid)->node_start_pfn;
	50	nr_pages = NODE_DATA(nid)->node_spanned_pages;
	51
653d22c0 KH	52	if (!nr_pages)
	53	return 0;
	54
52d4b9ac KH	55	table_size = sizeof(struct page_cgroup) * nr_pages;
	56
	57	base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
	58	table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
	59	if (!base)
	60	return -ENOMEM;
	61	for (index = 0; index < nr_pages; index++) {
	62	pc = base + index;
	63	__init_page_cgroup(pc, start_pfn + index);
	64	}
	65	NODE_DATA(nid)->node_page_cgroup = base;
	66	total_usage += table_size;
	67	return 0;
	68	}
	69
	70	void __init page_cgroup_init(void)
	71	{
	72
	73	int nid, fail;
	74
94b6da5a KH	75	if (mem_cgroup_subsys.disabled)
	76	return;
	77
52d4b9ac KH	78	for_each_online_node(nid) {
	79	fail = alloc_node_page_cgroup(nid);
	80	if (fail)
	81	goto fail;
	82	}
	83	printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
	84	printk(KERN_INFO "please try cgroup_disable=memory option if you"
	85	" don't want\n");
	86	return;
	87	fail:
	88	printk(KERN_CRIT "allocation of page_cgroup was failed.\n");
	89	printk(KERN_CRIT "please try cgroup_disable=memory boot option\n");
	90	panic("Out of memory");
	91	}
	92
	93	#else /* CONFIG_FLAT_NODE_MEM_MAP */
	94
	95	struct page_cgroup lookup_page_cgroup(struct page page)
	96	{
	97	unsigned long pfn = page_to_pfn(page);
	98	struct mem_section *section = __pfn_to_section(pfn);
	99
	100	return section->page_cgroup + pfn;
	101	}
	102
31168481	103	/* __alloc_bootmem...() is protected by !slab_available() */
feb16694	104	static int __init_refok init_section_page_cgroup(unsigned long pfn)
52d4b9ac KH	105	{
	106	struct mem_section *section;
	107	struct page_cgroup base, pc;
	108	unsigned long table_size;
	109	int nid, index;
	110
	111	section = __pfn_to_section(pfn);
	112
dc19f9db KH	113	if (!section->page_cgroup) {
	114	nid = page_to_nid(pfn_to_page(pfn));
	115	table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
	116	if (slab_is_available()) {
	117	base = kmalloc_node(table_size, GFP_KERNEL, nid);
	118	if (!base)
	119	base = vmalloc_node(table_size, nid);
	120	} else {
	121	base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
	122	table_size,
94b6da5a	123	PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
dc19f9db KH	124	}
	125	} else {
	126	/*
	127	* We don't have to allocate page_cgroup again, but
	128	* address of memmap may be changed. So, we have to initialize
	129	* again.
	130	*/
	131	base = section->page_cgroup + pfn;
	132	table_size = 0;
	133	/* check address of memmap is changed or not. */
	134	if (base->page == pfn_to_page(pfn))
	135	return 0;
94b6da5a	136	}
52d4b9ac KH	137
	138	if (!base) {
	139	printk(KERN_ERR "page cgroup allocation failure\n");
	140	return -ENOMEM;
	141	}
	142
	143	for (index = 0; index < PAGES_PER_SECTION; index++) {
	144	pc = base + index;
	145	__init_page_cgroup(pc, pfn + index);
	146	}
	147
	148	section = __pfn_to_section(pfn);
	149	section->page_cgroup = base - pfn;
	150	total_usage += table_size;
	151	return 0;
	152	}
	153	#ifdef CONFIG_MEMORY_HOTPLUG
	154	void __free_page_cgroup(unsigned long pfn)
	155	{
	156	struct mem_section *ms;
	157	struct page_cgroup *base;
	158
	159	ms = __pfn_to_section(pfn);
	160	if (!ms \|\| !ms->page_cgroup)
	161	return;
	162	base = ms->page_cgroup + pfn;
94b6da5a	163	if (is_vmalloc_addr(base)) {
52d4b9ac	164	vfree(base);
94b6da5a KH	165	ms->page_cgroup = NULL;
	166	} else {
	167	struct page *page = virt_to_page(base);
	168	if (!PageReserved(page)) { /* Is bootmem ? */
	169	kfree(base);
	170	ms->page_cgroup = NULL;
	171	}
	172	}
52d4b9ac KH	173	}
52d4b9ac KH	174
31168481	175	int __meminit online_page_cgroup(unsigned long start_pfn,
52d4b9ac KH	176	unsigned long nr_pages,
	177	int nid)
	178	{
	179	unsigned long start, end, pfn;
	180	int fail = 0;
	181
33c5d3d6	182	start = start_pfn & ~(PAGES_PER_SECTION - 1);
52d4b9ac KH	183	end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
	184
	185	for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
	186	if (!pfn_present(pfn))
	187	continue;
	188	fail = init_section_page_cgroup(pfn);
	189	}
	190	if (!fail)
	191	return 0;
	192
	193	/* rollback */
	194	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
	195	__free_page_cgroup(pfn);
	196
	197	return -ENOMEM;
	198	}
	199
31168481	200	int __meminit offline_page_cgroup(unsigned long start_pfn,
52d4b9ac KH	201	unsigned long nr_pages, int nid)
	202	{
	203	unsigned long start, end, pfn;
	204
33c5d3d6	205	start = start_pfn & ~(PAGES_PER_SECTION - 1);
52d4b9ac KH	206	end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
	207
	208	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
	209	__free_page_cgroup(pfn);
	210	return 0;
	211
	212	}
	213
31168481	214	static int __meminit page_cgroup_callback(struct notifier_block *self,
52d4b9ac KH	215	unsigned long action, void *arg)
	216	{
	217	struct memory_notify *mn = arg;
	218	int ret = 0;
	219	switch (action) {
	220	case MEM_GOING_ONLINE:
	221	ret = online_page_cgroup(mn->start_pfn,
	222	mn->nr_pages, mn->status_change_nid);
	223	break;
52d4b9ac KH	224	case MEM_OFFLINE:
	225	offline_page_cgroup(mn->start_pfn,
	226	mn->nr_pages, mn->status_change_nid);
	227	break;
dc19f9db	228	case MEM_CANCEL_ONLINE:
52d4b9ac KH	229	case MEM_GOING_OFFLINE:
	230	break;
	231	case MEM_ONLINE:
	232	case MEM_CANCEL_OFFLINE:
	233	break;
	234	}
dc19f9db KH	235
	236	if (ret)
	237	ret = notifier_from_errno(ret);
	238	else
	239	ret = NOTIFY_OK;
	240
52d4b9ac KH	241	return ret;
	242	}
	243
	244	#endif
	245
	246	void __init page_cgroup_init(void)
	247	{
	248	unsigned long pfn;
	249	int fail = 0;
	250
94b6da5a KH	251	if (mem_cgroup_subsys.disabled)
	252	return;
	253
52d4b9ac KH	254	for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {
	255	if (!pfn_present(pfn))
	256	continue;
	257	fail = init_section_page_cgroup(pfn);
	258	}
	259	if (fail) {
	260	printk(KERN_CRIT "try cgroup_disable=memory boot option\n");
	261	panic("Out of memory");
	262	} else {
	263	hotplug_memory_notifier(page_cgroup_callback, 0);
	264	}
	265	printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
	266	printk(KERN_INFO "please try cgroup_disable=memory option if you don't"
	267	" want\n");
	268	}
	269
31168481	270	void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
52d4b9ac KH	271	{
	272	return;
	273	}
	274
	275	#endif