]>
Commit | Line | Data |
---|---|---|
3610cce8 | 1 | /* |
239a6425 | 2 | * Copyright IBM Corp. 2007,2009 |
3610cce8 MS |
3 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> |
4 | */ | |
5 | ||
6 | #include <linux/sched.h> | |
7 | #include <linux/kernel.h> | |
8 | #include <linux/errno.h> | |
9 | #include <linux/mm.h> | |
10 | #include <linux/swap.h> | |
11 | #include <linux/smp.h> | |
12 | #include <linux/highmem.h> | |
13 | #include <linux/slab.h> | |
14 | #include <linux/pagemap.h> | |
15 | #include <linux/spinlock.h> | |
16 | #include <linux/module.h> | |
17 | #include <linux/quicklist.h> | |
18 | ||
19 | #include <asm/system.h> | |
20 | #include <asm/pgtable.h> | |
21 | #include <asm/pgalloc.h> | |
22 | #include <asm/tlb.h> | |
23 | #include <asm/tlbflush.h> | |
6252d702 | 24 | #include <asm/mmu_context.h> |
3610cce8 MS |
25 | |
26 | #ifndef CONFIG_64BIT | |
27 | #define ALLOC_ORDER 1 | |
146e4b3c MS |
28 | #define TABLES_PER_PAGE 4 |
29 | #define FRAG_MASK 15UL | |
30 | #define SECOND_HALVES 10UL | |
402b0862 CO |
31 | |
32 | void clear_table_pgstes(unsigned long *table) | |
33 | { | |
34 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | |
35 | memset(table + 256, 0, PAGE_SIZE/4); | |
36 | clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | |
37 | memset(table + 768, 0, PAGE_SIZE/4); | |
38 | } | |
39 | ||
3610cce8 MS |
40 | #else |
41 | #define ALLOC_ORDER 2 | |
146e4b3c MS |
42 | #define TABLES_PER_PAGE 2 |
43 | #define FRAG_MASK 3UL | |
44 | #define SECOND_HALVES 2UL | |
402b0862 CO |
45 | |
46 | void clear_table_pgstes(unsigned long *table) | |
47 | { | |
48 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | |
49 | memset(table + 256, 0, PAGE_SIZE/2); | |
50 | } | |
51 | ||
3610cce8 MS |
52 | #endif |
53 | ||
239a6425 HC |
54 | unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; |
55 | EXPORT_SYMBOL(VMALLOC_START); | |
56 | ||
57 | static int __init parse_vmalloc(char *arg) | |
58 | { | |
59 | if (!arg) | |
60 | return -EINVAL; | |
61 | VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK; | |
62 | return 0; | |
63 | } | |
64 | early_param("vmalloc", parse_vmalloc); | |
65 | ||
3610cce8 MS |
66 | unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) |
67 | { | |
68 | struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | |
69 | ||
70 | if (!page) | |
71 | return NULL; | |
72 | page->index = 0; | |
73 | if (noexec) { | |
74 | struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | |
75 | if (!shadow) { | |
76 | __free_pages(page, ALLOC_ORDER); | |
77 | return NULL; | |
78 | } | |
79 | page->index = page_to_phys(shadow); | |
80 | } | |
50aa98ba | 81 | spin_lock(&mm->context.list_lock); |
146e4b3c | 82 | list_add(&page->lru, &mm->context.crst_list); |
50aa98ba | 83 | spin_unlock(&mm->context.list_lock); |
3610cce8 MS |
84 | return (unsigned long *) page_to_phys(page); |
85 | } | |
86 | ||
146e4b3c | 87 | void crst_table_free(struct mm_struct *mm, unsigned long *table) |
3610cce8 MS |
88 | { |
89 | unsigned long *shadow = get_shadow_table(table); | |
146e4b3c | 90 | struct page *page = virt_to_page(table); |
3610cce8 | 91 | |
50aa98ba | 92 | spin_lock(&mm->context.list_lock); |
146e4b3c | 93 | list_del(&page->lru); |
50aa98ba | 94 | spin_unlock(&mm->context.list_lock); |
3610cce8 MS |
95 | if (shadow) |
96 | free_pages((unsigned long) shadow, ALLOC_ORDER); | |
97 | free_pages((unsigned long) table, ALLOC_ORDER); | |
98 | } | |
99 | ||
6252d702 MS |
100 | #ifdef CONFIG_64BIT |
101 | int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) | |
102 | { | |
103 | unsigned long *table, *pgd; | |
104 | unsigned long entry; | |
105 | ||
106 | BUG_ON(limit > (1UL << 53)); | |
107 | repeat: | |
108 | table = crst_table_alloc(mm, mm->context.noexec); | |
109 | if (!table) | |
110 | return -ENOMEM; | |
111 | spin_lock(&mm->page_table_lock); | |
112 | if (mm->context.asce_limit < limit) { | |
113 | pgd = (unsigned long *) mm->pgd; | |
114 | if (mm->context.asce_limit <= (1UL << 31)) { | |
115 | entry = _REGION3_ENTRY_EMPTY; | |
116 | mm->context.asce_limit = 1UL << 42; | |
117 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | |
118 | _ASCE_USER_BITS | | |
119 | _ASCE_TYPE_REGION3; | |
120 | } else { | |
121 | entry = _REGION2_ENTRY_EMPTY; | |
122 | mm->context.asce_limit = 1UL << 53; | |
123 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | |
124 | _ASCE_USER_BITS | | |
125 | _ASCE_TYPE_REGION2; | |
126 | } | |
127 | crst_table_init(table, entry); | |
128 | pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); | |
129 | mm->pgd = (pgd_t *) table; | |
f481bfaf | 130 | mm->task_size = mm->context.asce_limit; |
6252d702 MS |
131 | table = NULL; |
132 | } | |
133 | spin_unlock(&mm->page_table_lock); | |
134 | if (table) | |
135 | crst_table_free(mm, table); | |
136 | if (mm->context.asce_limit < limit) | |
137 | goto repeat; | |
138 | update_mm(mm, current); | |
139 | return 0; | |
140 | } | |
141 | ||
142 | void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) | |
143 | { | |
144 | pgd_t *pgd; | |
145 | ||
146 | if (mm->context.asce_limit <= limit) | |
147 | return; | |
148 | __tlb_flush_mm(mm); | |
149 | while (mm->context.asce_limit > limit) { | |
150 | pgd = mm->pgd; | |
151 | switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { | |
152 | case _REGION_ENTRY_TYPE_R2: | |
153 | mm->context.asce_limit = 1UL << 42; | |
154 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | |
155 | _ASCE_USER_BITS | | |
156 | _ASCE_TYPE_REGION3; | |
157 | break; | |
158 | case _REGION_ENTRY_TYPE_R3: | |
159 | mm->context.asce_limit = 1UL << 31; | |
160 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | |
161 | _ASCE_USER_BITS | | |
162 | _ASCE_TYPE_SEGMENT; | |
163 | break; | |
164 | default: | |
165 | BUG(); | |
166 | } | |
167 | mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); | |
f481bfaf | 168 | mm->task_size = mm->context.asce_limit; |
6252d702 MS |
169 | crst_table_free(mm, (unsigned long *) pgd); |
170 | } | |
171 | update_mm(mm, current); | |
172 | } | |
173 | #endif | |
174 | ||
3610cce8 MS |
175 | /* |
176 | * page table entry allocation/free routines. | |
177 | */ | |
146e4b3c | 178 | unsigned long *page_table_alloc(struct mm_struct *mm) |
3610cce8 | 179 | { |
146e4b3c | 180 | struct page *page; |
3610cce8 | 181 | unsigned long *table; |
146e4b3c | 182 | unsigned long bits; |
3610cce8 | 183 | |
250cf776 | 184 | bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; |
50aa98ba | 185 | spin_lock(&mm->context.list_lock); |
146e4b3c MS |
186 | page = NULL; |
187 | if (!list_empty(&mm->context.pgtable_list)) { | |
188 | page = list_first_entry(&mm->context.pgtable_list, | |
189 | struct page, lru); | |
190 | if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | |
191 | page = NULL; | |
192 | } | |
193 | if (!page) { | |
50aa98ba | 194 | spin_unlock(&mm->context.list_lock); |
146e4b3c MS |
195 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); |
196 | if (!page) | |
3610cce8 | 197 | return NULL; |
146e4b3c MS |
198 | pgtable_page_ctor(page); |
199 | page->flags &= ~FRAG_MASK; | |
200 | table = (unsigned long *) page_to_phys(page); | |
250cf776 | 201 | if (mm->context.has_pgste) |
402b0862 CO |
202 | clear_table_pgstes(table); |
203 | else | |
204 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | |
50aa98ba | 205 | spin_lock(&mm->context.list_lock); |
146e4b3c | 206 | list_add(&page->lru, &mm->context.pgtable_list); |
3610cce8 MS |
207 | } |
208 | table = (unsigned long *) page_to_phys(page); | |
146e4b3c MS |
209 | while (page->flags & bits) { |
210 | table += 256; | |
211 | bits <<= 1; | |
212 | } | |
213 | page->flags |= bits; | |
214 | if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | |
215 | list_move_tail(&page->lru, &mm->context.pgtable_list); | |
50aa98ba | 216 | spin_unlock(&mm->context.list_lock); |
3610cce8 MS |
217 | return table; |
218 | } | |
219 | ||
146e4b3c | 220 | void page_table_free(struct mm_struct *mm, unsigned long *table) |
3610cce8 | 221 | { |
146e4b3c MS |
222 | struct page *page; |
223 | unsigned long bits; | |
3610cce8 | 224 | |
250cf776 | 225 | bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; |
146e4b3c MS |
226 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); |
227 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | |
50aa98ba | 228 | spin_lock(&mm->context.list_lock); |
146e4b3c MS |
229 | page->flags ^= bits; |
230 | if (page->flags & FRAG_MASK) { | |
231 | /* Page now has some free pgtable fragments. */ | |
232 | list_move(&page->lru, &mm->context.pgtable_list); | |
233 | page = NULL; | |
234 | } else | |
235 | /* All fragments of the 4K page have been freed. */ | |
236 | list_del(&page->lru); | |
50aa98ba | 237 | spin_unlock(&mm->context.list_lock); |
146e4b3c MS |
238 | if (page) { |
239 | pgtable_page_dtor(page); | |
240 | __free_page(page); | |
241 | } | |
242 | } | |
3610cce8 | 243 | |
146e4b3c MS |
244 | void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) |
245 | { | |
246 | struct page *page; | |
247 | ||
50aa98ba | 248 | spin_lock(&mm->context.list_lock); |
146e4b3c MS |
249 | /* Free shadow region and segment tables. */ |
250 | list_for_each_entry(page, &mm->context.crst_list, lru) | |
251 | if (page->index) { | |
252 | free_pages((unsigned long) page->index, ALLOC_ORDER); | |
253 | page->index = 0; | |
254 | } | |
255 | /* "Free" second halves of page tables. */ | |
256 | list_for_each_entry(page, &mm->context.pgtable_list, lru) | |
257 | page->flags &= ~SECOND_HALVES; | |
50aa98ba | 258 | spin_unlock(&mm->context.list_lock); |
146e4b3c MS |
259 | mm->context.noexec = 0; |
260 | update_mm(mm, tsk); | |
3610cce8 | 261 | } |
402b0862 CO |
262 | |
263 | /* | |
264 | * switch on pgstes for its userspace process (for kvm) | |
265 | */ | |
266 | int s390_enable_sie(void) | |
267 | { | |
268 | struct task_struct *tsk = current; | |
74b6b522 | 269 | struct mm_struct *mm, *old_mm; |
402b0862 | 270 | |
702d9e58 | 271 | /* Do we have switched amode? If no, we cannot do sie */ |
b11b5334 | 272 | if (user_mode == HOME_SPACE_MODE) |
702d9e58 CO |
273 | return -EINVAL; |
274 | ||
74b6b522 | 275 | /* Do we have pgstes? if yes, we are done */ |
250cf776 | 276 | if (tsk->mm->context.has_pgste) |
74b6b522 | 277 | return 0; |
402b0862 | 278 | |
74b6b522 CB |
279 | /* lets check if we are allowed to replace the mm */ |
280 | task_lock(tsk); | |
402b0862 | 281 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || |
52a21f2c MS |
282 | #ifdef CONFIG_AIO |
283 | !hlist_empty(&tsk->mm->ioctx_list) || | |
284 | #endif | |
285 | tsk->mm != tsk->active_mm) { | |
74b6b522 CB |
286 | task_unlock(tsk); |
287 | return -EINVAL; | |
288 | } | |
289 | task_unlock(tsk); | |
402b0862 | 290 | |
250cf776 CB |
291 | /* we copy the mm and let dup_mm create the page tables with_pgstes */ |
292 | tsk->mm->context.alloc_pgste = 1; | |
402b0862 | 293 | mm = dup_mm(tsk); |
250cf776 | 294 | tsk->mm->context.alloc_pgste = 0; |
402b0862 | 295 | if (!mm) |
74b6b522 CB |
296 | return -ENOMEM; |
297 | ||
250cf776 | 298 | /* Now lets check again if something happened */ |
74b6b522 CB |
299 | task_lock(tsk); |
300 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | |
52a21f2c MS |
301 | #ifdef CONFIG_AIO |
302 | !hlist_empty(&tsk->mm->ioctx_list) || | |
303 | #endif | |
304 | tsk->mm != tsk->active_mm) { | |
74b6b522 CB |
305 | mmput(mm); |
306 | task_unlock(tsk); | |
307 | return -EINVAL; | |
308 | } | |
309 | ||
310 | /* ok, we are alone. No ptrace, no threads, etc. */ | |
311 | old_mm = tsk->mm; | |
402b0862 CO |
312 | tsk->mm = tsk->active_mm = mm; |
313 | preempt_disable(); | |
314 | update_mm(mm, tsk); | |
005f8eee | 315 | cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); |
402b0862 | 316 | preempt_enable(); |
402b0862 | 317 | task_unlock(tsk); |
74b6b522 CB |
318 | mmput(old_mm); |
319 | return 0; | |
402b0862 CO |
320 | } |
321 | EXPORT_SYMBOL_GPL(s390_enable_sie); | |
7db11a36 | 322 | |
87458ff4 | 323 | #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION) |
7db11a36 HJP |
324 | bool kernel_page_present(struct page *page) |
325 | { | |
326 | unsigned long addr; | |
327 | int cc; | |
328 | ||
329 | addr = page_to_phys(page); | |
87458ff4 HC |
330 | asm volatile( |
331 | " lra %1,0(%1)\n" | |
332 | " ipm %0\n" | |
333 | " srl %0,28" | |
334 | : "=d" (cc), "+a" (addr) : : "cc"); | |
7db11a36 HJP |
335 | return cc == 0; |
336 | } | |
87458ff4 | 337 | #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */ |