]>
Commit | Line | Data |
---|---|---|
8cdea7c0 BS |
1 | /* memcontrol.c - Memory Controller |
2 | * | |
3 | * Copyright IBM Corporation, 2007 | |
4 | * Author Balbir Singh <balbir@linux.vnet.ibm.com> | |
5 | * | |
78fb7466 PE |
6 | * Copyright 2007 OpenVZ SWsoft Inc |
7 | * Author: Pavel Emelianov <xemul@openvz.org> | |
8 | * | |
8cdea7c0 BS |
9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of the GNU General Public License as published by | |
11 | * the Free Software Foundation; either version 2 of the License, or | |
12 | * (at your option) any later version. | |
13 | * | |
14 | * This program is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | * GNU General Public License for more details. | |
18 | */ | |
19 | ||
20 | #include <linux/res_counter.h> | |
21 | #include <linux/memcontrol.h> | |
22 | #include <linux/cgroup.h> | |
78fb7466 | 23 | #include <linux/mm.h> |
8cdea7c0 BS |
24 | |
25 | struct cgroup_subsys mem_cgroup_subsys; | |
26 | ||
27 | /* | |
28 | * The memory controller data structure. The memory controller controls both | |
29 | * page cache and RSS per cgroup. We would eventually like to provide | |
30 | * statistics based on the statistics developed by Rik Van Riel for clock-pro, | |
31 | * to help the administrator determine what knobs to tune. | |
32 | * | |
33 | * TODO: Add a water mark for the memory controller. Reclaim will begin when | |
34 | * we hit the water mark. | |
35 | */ | |
36 | struct mem_cgroup { | |
37 | struct cgroup_subsys_state css; | |
38 | /* | |
39 | * the counter to account for memory usage | |
40 | */ | |
41 | struct res_counter res; | |
78fb7466 PE |
42 | /* |
43 | * Per cgroup active and inactive list, similar to the | |
44 | * per zone LRU lists. | |
45 | * TODO: Consider making these lists per zone | |
46 | */ | |
47 | struct list_head active_list; | |
48 | struct list_head inactive_list; | |
8cdea7c0 BS |
49 | }; |
50 | ||
51 | /* | |
52 | * A page_cgroup page is associated with every page descriptor. The | |
53 | * page_cgroup helps us identify information about the cgroup | |
54 | */ | |
55 | struct page_cgroup { | |
56 | struct list_head lru; /* per cgroup LRU list */ | |
57 | struct page *page; | |
58 | struct mem_cgroup *mem_cgroup; | |
59 | }; | |
60 | ||
61 | ||
62 | static inline | |
63 | struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) | |
64 | { | |
65 | return container_of(cgroup_subsys_state(cont, | |
66 | mem_cgroup_subsys_id), struct mem_cgroup, | |
67 | css); | |
68 | } | |
69 | ||
78fb7466 PE |
70 | static inline |
71 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | |
72 | { | |
73 | return container_of(task_subsys_state(p, mem_cgroup_subsys_id), | |
74 | struct mem_cgroup, css); | |
75 | } | |
76 | ||
77 | void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) | |
78 | { | |
79 | struct mem_cgroup *mem; | |
80 | ||
81 | mem = mem_cgroup_from_task(p); | |
82 | css_get(&mem->css); | |
83 | mm->mem_cgroup = mem; | |
84 | } | |
85 | ||
86 | void mm_free_cgroup(struct mm_struct *mm) | |
87 | { | |
88 | css_put(&mm->mem_cgroup->css); | |
89 | } | |
90 | ||
91 | void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) | |
92 | { | |
93 | page->page_cgroup = (unsigned long)pc; | |
94 | } | |
95 | ||
96 | struct page_cgroup *page_get_page_cgroup(struct page *page) | |
97 | { | |
98 | return page->page_cgroup; | |
99 | } | |
100 | ||
8cdea7c0 BS |
101 | static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft, |
102 | struct file *file, char __user *userbuf, size_t nbytes, | |
103 | loff_t *ppos) | |
104 | { | |
105 | return res_counter_read(&mem_cgroup_from_cont(cont)->res, | |
106 | cft->private, userbuf, nbytes, ppos); | |
107 | } | |
108 | ||
109 | static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | |
110 | struct file *file, const char __user *userbuf, | |
111 | size_t nbytes, loff_t *ppos) | |
112 | { | |
113 | return res_counter_write(&mem_cgroup_from_cont(cont)->res, | |
114 | cft->private, userbuf, nbytes, ppos); | |
115 | } | |
116 | ||
117 | static struct cftype mem_cgroup_files[] = { | |
118 | { | |
119 | .name = "usage", | |
120 | .private = RES_USAGE, | |
121 | .read = mem_cgroup_read, | |
122 | }, | |
123 | { | |
124 | .name = "limit", | |
125 | .private = RES_LIMIT, | |
126 | .write = mem_cgroup_write, | |
127 | .read = mem_cgroup_read, | |
128 | }, | |
129 | { | |
130 | .name = "failcnt", | |
131 | .private = RES_FAILCNT, | |
132 | .read = mem_cgroup_read, | |
133 | }, | |
134 | }; | |
135 | ||
78fb7466 PE |
136 | static struct mem_cgroup init_mem_cgroup; |
137 | ||
8cdea7c0 BS |
138 | static struct cgroup_subsys_state * |
139 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |
140 | { | |
141 | struct mem_cgroup *mem; | |
142 | ||
78fb7466 PE |
143 | if (unlikely((cont->parent) == NULL)) { |
144 | mem = &init_mem_cgroup; | |
145 | init_mm.mem_cgroup = mem; | |
146 | } else | |
147 | mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL); | |
148 | ||
149 | if (mem == NULL) | |
150 | return NULL; | |
8cdea7c0 BS |
151 | |
152 | res_counter_init(&mem->res); | |
153 | return &mem->css; | |
154 | } | |
155 | ||
156 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, | |
157 | struct cgroup *cont) | |
158 | { | |
159 | kfree(mem_cgroup_from_cont(cont)); | |
160 | } | |
161 | ||
162 | static int mem_cgroup_populate(struct cgroup_subsys *ss, | |
163 | struct cgroup *cont) | |
164 | { | |
165 | return cgroup_add_files(cont, ss, mem_cgroup_files, | |
166 | ARRAY_SIZE(mem_cgroup_files)); | |
167 | } | |
168 | ||
169 | struct cgroup_subsys mem_cgroup_subsys = { | |
170 | .name = "memory", | |
171 | .subsys_id = mem_cgroup_subsys_id, | |
172 | .create = mem_cgroup_create, | |
173 | .destroy = mem_cgroup_destroy, | |
174 | .populate = mem_cgroup_populate, | |
78fb7466 | 175 | .early_init = 1, |
8cdea7c0 | 176 | }; |