]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * PPC64 (POWER4) Huge TLB Page Support for Kernel. | |
3 | * | |
4 | * Copyright (C) 2003 David Gibson, IBM Corporation. | |
5 | * | |
6 | * Based on the IA-32 version: | |
7 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> | |
8 | */ | |
9 | ||
10 | #include <linux/init.h> | |
11 | #include <linux/fs.h> | |
12 | #include <linux/mm.h> | |
13 | #include <linux/hugetlb.h> | |
14 | #include <linux/pagemap.h> | |
15 | #include <linux/smp_lock.h> | |
16 | #include <linux/slab.h> | |
17 | #include <linux/err.h> | |
18 | #include <linux/sysctl.h> | |
19 | #include <asm/mman.h> | |
20 | #include <asm/pgalloc.h> | |
21 | #include <asm/tlb.h> | |
22 | #include <asm/tlbflush.h> | |
23 | #include <asm/mmu_context.h> | |
24 | #include <asm/machdep.h> | |
25 | #include <asm/cputable.h> | |
26 | #include <asm/tlb.h> | |
27 | ||
28 | #include <linux/sysctl.h> | |
29 | ||
30 | #define HUGEPGDIR_SHIFT (HPAGE_SHIFT + PAGE_SHIFT - 3) | |
31 | #define HUGEPGDIR_SIZE (1UL << HUGEPGDIR_SHIFT) | |
32 | #define HUGEPGDIR_MASK (~(HUGEPGDIR_SIZE-1)) | |
33 | ||
34 | #define HUGEPTE_INDEX_SIZE 9 | |
35 | #define HUGEPGD_INDEX_SIZE 10 | |
36 | ||
37 | #define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE) | |
38 | #define PTRS_PER_HUGEPGD (1 << HUGEPGD_INDEX_SIZE) | |
39 | ||
40 | static inline int hugepgd_index(unsigned long addr) | |
41 | { | |
42 | return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT; | |
43 | } | |
44 | ||
58366af5 | 45 | static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr) |
1da177e4 LT |
46 | { |
47 | int index; | |
48 | ||
49 | if (! mm->context.huge_pgdir) | |
50 | return NULL; | |
51 | ||
52 | ||
53 | index = hugepgd_index(addr); | |
54 | BUG_ON(index >= PTRS_PER_HUGEPGD); | |
58366af5 | 55 | return (pud_t *)(mm->context.huge_pgdir + index); |
1da177e4 LT |
56 | } |
57 | ||
58366af5 | 58 | static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr) |
1da177e4 LT |
59 | { |
60 | int index; | |
61 | ||
58366af5 | 62 | if (pud_none(*dir)) |
1da177e4 LT |
63 | return NULL; |
64 | ||
65 | index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE; | |
58366af5 | 66 | return (pte_t *)pud_page(*dir) + index; |
1da177e4 LT |
67 | } |
68 | ||
58366af5 | 69 | static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr) |
1da177e4 LT |
70 | { |
71 | BUG_ON(! in_hugepage_area(mm->context, addr)); | |
72 | ||
73 | if (! mm->context.huge_pgdir) { | |
74 | pgd_t *new; | |
75 | spin_unlock(&mm->page_table_lock); | |
76 | /* Don't use pgd_alloc(), because we want __GFP_REPEAT */ | |
77 | new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); | |
78 | BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE)); | |
79 | spin_lock(&mm->page_table_lock); | |
80 | ||
81 | /* | |
82 | * Because we dropped the lock, we should re-check the | |
83 | * entry, as somebody else could have populated it.. | |
84 | */ | |
85 | if (mm->context.huge_pgdir) | |
86 | pgd_free(new); | |
87 | else | |
88 | mm->context.huge_pgdir = new; | |
89 | } | |
90 | return hugepgd_offset(mm, addr); | |
91 | } | |
92 | ||
58366af5 | 93 | static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr) |
1da177e4 | 94 | { |
58366af5 | 95 | if (! pud_present(*dir)) { |
1da177e4 LT |
96 | pte_t *new; |
97 | ||
98 | spin_unlock(&mm->page_table_lock); | |
99 | new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); | |
100 | BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE)); | |
101 | spin_lock(&mm->page_table_lock); | |
102 | /* | |
103 | * Because we dropped the lock, we should re-check the | |
104 | * entry, as somebody else could have populated it.. | |
105 | */ | |
58366af5 | 106 | if (pud_present(*dir)) { |
1da177e4 LT |
107 | if (new) |
108 | kmem_cache_free(zero_cache, new); | |
109 | } else { | |
110 | struct page *ptepage; | |
111 | ||
112 | if (! new) | |
113 | return NULL; | |
114 | ptepage = virt_to_page(new); | |
115 | ptepage->mapping = (void *) mm; | |
116 | ptepage->index = addr & HUGEPGDIR_MASK; | |
58366af5 | 117 | pud_populate(mm, dir, new); |
1da177e4 LT |
118 | } |
119 | } | |
120 | ||
121 | return hugepte_offset(dir, addr); | |
122 | } | |
123 | ||
63551ae0 | 124 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
1da177e4 | 125 | { |
58366af5 | 126 | pud_t *pud; |
1da177e4 LT |
127 | |
128 | BUG_ON(! in_hugepage_area(mm->context, addr)); | |
129 | ||
58366af5 BH |
130 | pud = hugepgd_offset(mm, addr); |
131 | if (! pud) | |
1da177e4 LT |
132 | return NULL; |
133 | ||
58366af5 | 134 | return hugepte_offset(pud, addr); |
1da177e4 LT |
135 | } |
136 | ||
63551ae0 | 137 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) |
1da177e4 | 138 | { |
58366af5 | 139 | pud_t *pud; |
1da177e4 LT |
140 | |
141 | BUG_ON(! in_hugepage_area(mm->context, addr)); | |
142 | ||
58366af5 BH |
143 | pud = hugepgd_alloc(mm, addr); |
144 | if (! pud) | |
1da177e4 LT |
145 | return NULL; |
146 | ||
58366af5 | 147 | return hugepte_alloc(mm, pud, addr); |
1da177e4 LT |
148 | } |
149 | ||
1da177e4 LT |
150 | /* |
151 | * This function checks for proper alignment of input addr and len parameters. | |
152 | */ | |
153 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | |
154 | { | |
155 | if (len & ~HPAGE_MASK) | |
156 | return -EINVAL; | |
157 | if (addr & ~HPAGE_MASK) | |
158 | return -EINVAL; | |
159 | if (! (within_hugepage_low_range(addr, len) | |
160 | || within_hugepage_high_range(addr, len)) ) | |
161 | return -EINVAL; | |
162 | return 0; | |
163 | } | |
164 | ||
165 | static void flush_segments(void *parm) | |
166 | { | |
167 | u16 segs = (unsigned long) parm; | |
168 | unsigned long i; | |
169 | ||
170 | asm volatile("isync" : : : "memory"); | |
171 | ||
172 | for (i = 0; i < 16; i++) { | |
173 | if (! (segs & (1U << i))) | |
174 | continue; | |
175 | asm volatile("slbie %0" : : "r" (i << SID_SHIFT)); | |
176 | } | |
177 | ||
178 | asm volatile("isync" : : : "memory"); | |
179 | } | |
180 | ||
181 | static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg) | |
182 | { | |
183 | unsigned long start = seg << SID_SHIFT; | |
184 | unsigned long end = (seg+1) << SID_SHIFT; | |
185 | struct vm_area_struct *vma; | |
1da177e4 LT |
186 | |
187 | BUG_ON(seg >= 16); | |
188 | ||
189 | /* Check no VMAs are in the region */ | |
190 | vma = find_vma(mm, start); | |
191 | if (vma && (vma->vm_start < end)) | |
192 | return -EBUSY; | |
193 | ||
1da177e4 LT |
194 | return 0; |
195 | } | |
196 | ||
197 | static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs) | |
198 | { | |
199 | unsigned long i; | |
200 | ||
201 | newsegs &= ~(mm->context.htlb_segs); | |
202 | if (! newsegs) | |
203 | return 0; /* The segments we want are already open */ | |
204 | ||
205 | for (i = 0; i < 16; i++) | |
206 | if ((1 << i) & newsegs) | |
207 | if (prepare_low_seg_for_htlb(mm, i) != 0) | |
208 | return -EBUSY; | |
209 | ||
210 | mm->context.htlb_segs |= newsegs; | |
211 | ||
212 | /* update the paca copy of the context struct */ | |
213 | get_paca()->context = mm->context; | |
214 | ||
215 | /* the context change must make it to memory before the flush, | |
216 | * so that further SLB misses do the right thing. */ | |
217 | mb(); | |
218 | on_each_cpu(flush_segments, (void *)(unsigned long)newsegs, 0, 1); | |
219 | ||
220 | return 0; | |
221 | } | |
222 | ||
223 | int prepare_hugepage_range(unsigned long addr, unsigned long len) | |
224 | { | |
225 | if (within_hugepage_high_range(addr, len)) | |
226 | return 0; | |
227 | else if ((addr < 0x100000000UL) && ((addr+len) < 0x100000000UL)) { | |
228 | int err; | |
229 | /* Yes, we need both tests, in case addr+len overflows | |
230 | * 64-bit arithmetic */ | |
231 | err = open_low_hpage_segs(current->mm, | |
232 | LOW_ESID_MASK(addr, len)); | |
233 | if (err) | |
234 | printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" | |
235 | " failed (segs: 0x%04hx)\n", addr, len, | |
236 | LOW_ESID_MASK(addr, len)); | |
237 | return err; | |
238 | } | |
239 | ||
240 | return -EINVAL; | |
241 | } | |
242 | ||
1da177e4 LT |
243 | struct page * |
244 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | |
245 | { | |
246 | pte_t *ptep; | |
247 | struct page *page; | |
248 | ||
249 | if (! in_hugepage_area(mm->context, address)) | |
250 | return ERR_PTR(-EINVAL); | |
251 | ||
252 | ptep = huge_pte_offset(mm, address); | |
253 | page = pte_page(*ptep); | |
254 | if (page) | |
255 | page += (address % HPAGE_SIZE) / PAGE_SIZE; | |
256 | ||
257 | return page; | |
258 | } | |
259 | ||
260 | int pmd_huge(pmd_t pmd) | |
261 | { | |
262 | return 0; | |
263 | } | |
264 | ||
265 | struct page * | |
266 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |
267 | pmd_t *pmd, int write) | |
268 | { | |
269 | BUG(); | |
270 | return NULL; | |
271 | } | |
272 | ||
1da177e4 LT |
273 | /* Because we have an exclusive hugepage region which lies within the |
274 | * normal user address space, we have to take special measures to make | |
275 | * non-huge mmap()s evade the hugepage reserved regions. */ | |
276 | unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, | |
277 | unsigned long len, unsigned long pgoff, | |
278 | unsigned long flags) | |
279 | { | |
280 | struct mm_struct *mm = current->mm; | |
281 | struct vm_area_struct *vma; | |
282 | unsigned long start_addr; | |
283 | ||
284 | if (len > TASK_SIZE) | |
285 | return -ENOMEM; | |
286 | ||
287 | if (addr) { | |
288 | addr = PAGE_ALIGN(addr); | |
289 | vma = find_vma(mm, addr); | |
290 | if (((TASK_SIZE - len) >= addr) | |
291 | && (!vma || (addr+len) <= vma->vm_start) | |
292 | && !is_hugepage_only_range(mm, addr,len)) | |
293 | return addr; | |
294 | } | |
1363c3cd WW |
295 | if (len > mm->cached_hole_size) { |
296 | start_addr = addr = mm->free_area_cache; | |
297 | } else { | |
298 | start_addr = addr = TASK_UNMAPPED_BASE; | |
299 | mm->cached_hole_size = 0; | |
300 | } | |
1da177e4 LT |
301 | |
302 | full_search: | |
303 | vma = find_vma(mm, addr); | |
304 | while (TASK_SIZE - len >= addr) { | |
305 | BUG_ON(vma && (addr >= vma->vm_end)); | |
306 | ||
307 | if (touches_hugepage_low_range(mm, addr, len)) { | |
308 | addr = ALIGN(addr+1, 1<<SID_SHIFT); | |
309 | vma = find_vma(mm, addr); | |
310 | continue; | |
311 | } | |
312 | if (touches_hugepage_high_range(addr, len)) { | |
313 | addr = TASK_HPAGE_END; | |
314 | vma = find_vma(mm, addr); | |
315 | continue; | |
316 | } | |
317 | if (!vma || addr + len <= vma->vm_start) { | |
318 | /* | |
319 | * Remember the place where we stopped the search: | |
320 | */ | |
321 | mm->free_area_cache = addr + len; | |
322 | return addr; | |
323 | } | |
1363c3cd WW |
324 | if (addr + mm->cached_hole_size < vma->vm_start) |
325 | mm->cached_hole_size = vma->vm_start - addr; | |
1da177e4 LT |
326 | addr = vma->vm_end; |
327 | vma = vma->vm_next; | |
328 | } | |
329 | ||
330 | /* Make sure we didn't miss any holes */ | |
331 | if (start_addr != TASK_UNMAPPED_BASE) { | |
332 | start_addr = addr = TASK_UNMAPPED_BASE; | |
1363c3cd | 333 | mm->cached_hole_size = 0; |
1da177e4 LT |
334 | goto full_search; |
335 | } | |
336 | return -ENOMEM; | |
337 | } | |
338 | ||
339 | /* | |
340 | * This mmap-allocator allocates new areas top-down from below the | |
341 | * stack's low limit (the base): | |
342 | * | |
343 | * Because we have an exclusive hugepage region which lies within the | |
344 | * normal user address space, we have to take special measures to make | |
345 | * non-huge mmap()s evade the hugepage reserved regions. | |
346 | */ | |
347 | unsigned long | |
348 | arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |
349 | const unsigned long len, const unsigned long pgoff, | |
350 | const unsigned long flags) | |
351 | { | |
352 | struct vm_area_struct *vma, *prev_vma; | |
353 | struct mm_struct *mm = current->mm; | |
354 | unsigned long base = mm->mmap_base, addr = addr0; | |
1363c3cd | 355 | unsigned long largest_hole = mm->cached_hole_size; |
1da177e4 LT |
356 | int first_time = 1; |
357 | ||
358 | /* requested length too big for entire address space */ | |
359 | if (len > TASK_SIZE) | |
360 | return -ENOMEM; | |
361 | ||
362 | /* dont allow allocations above current base */ | |
363 | if (mm->free_area_cache > base) | |
364 | mm->free_area_cache = base; | |
365 | ||
366 | /* requesting a specific address */ | |
367 | if (addr) { | |
368 | addr = PAGE_ALIGN(addr); | |
369 | vma = find_vma(mm, addr); | |
370 | if (TASK_SIZE - len >= addr && | |
371 | (!vma || addr + len <= vma->vm_start) | |
372 | && !is_hugepage_only_range(mm, addr,len)) | |
373 | return addr; | |
374 | } | |
375 | ||
1363c3cd WW |
376 | if (len <= largest_hole) { |
377 | largest_hole = 0; | |
378 | mm->free_area_cache = base; | |
379 | } | |
1da177e4 LT |
380 | try_again: |
381 | /* make sure it can fit in the remaining address space */ | |
382 | if (mm->free_area_cache < len) | |
383 | goto fail; | |
384 | ||
385 | /* either no address requested or cant fit in requested address hole */ | |
386 | addr = (mm->free_area_cache - len) & PAGE_MASK; | |
387 | do { | |
388 | hugepage_recheck: | |
389 | if (touches_hugepage_low_range(mm, addr, len)) { | |
390 | addr = (addr & ((~0) << SID_SHIFT)) - len; | |
391 | goto hugepage_recheck; | |
392 | } else if (touches_hugepage_high_range(addr, len)) { | |
393 | addr = TASK_HPAGE_BASE - len; | |
394 | } | |
395 | ||
396 | /* | |
397 | * Lookup failure means no vma is above this address, | |
398 | * i.e. return with success: | |
399 | */ | |
400 | if (!(vma = find_vma_prev(mm, addr, &prev_vma))) | |
401 | return addr; | |
402 | ||
403 | /* | |
404 | * new region fits between prev_vma->vm_end and | |
405 | * vma->vm_start, use it: | |
406 | */ | |
407 | if (addr+len <= vma->vm_start && | |
1363c3cd | 408 | (!prev_vma || (addr >= prev_vma->vm_end))) { |
1da177e4 | 409 | /* remember the address as a hint for next time */ |
1363c3cd WW |
410 | mm->cached_hole_size = largest_hole; |
411 | return (mm->free_area_cache = addr); | |
412 | } else { | |
1da177e4 | 413 | /* pull free_area_cache down to the first hole */ |
1363c3cd | 414 | if (mm->free_area_cache == vma->vm_end) { |
1da177e4 | 415 | mm->free_area_cache = vma->vm_start; |
1363c3cd WW |
416 | mm->cached_hole_size = largest_hole; |
417 | } | |
418 | } | |
419 | ||
420 | /* remember the largest hole we saw so far */ | |
421 | if (addr + largest_hole < vma->vm_start) | |
422 | largest_hole = vma->vm_start - addr; | |
1da177e4 LT |
423 | |
424 | /* try just below the current vma->vm_start */ | |
425 | addr = vma->vm_start-len; | |
426 | } while (len <= vma->vm_start); | |
427 | ||
428 | fail: | |
429 | /* | |
430 | * if hint left us with no space for the requested | |
431 | * mapping then try again: | |
432 | */ | |
433 | if (first_time) { | |
434 | mm->free_area_cache = base; | |
1363c3cd | 435 | largest_hole = 0; |
1da177e4 LT |
436 | first_time = 0; |
437 | goto try_again; | |
438 | } | |
439 | /* | |
440 | * A failed mmap() very likely causes application failure, | |
441 | * so fall back to the bottom-up function here. This scenario | |
442 | * can happen with large stack limits and large mmap() | |
443 | * allocations. | |
444 | */ | |
445 | mm->free_area_cache = TASK_UNMAPPED_BASE; | |
1363c3cd | 446 | mm->cached_hole_size = ~0UL; |
1da177e4 LT |
447 | addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); |
448 | /* | |
449 | * Restore the topdown base: | |
450 | */ | |
451 | mm->free_area_cache = base; | |
1363c3cd | 452 | mm->cached_hole_size = ~0UL; |
1da177e4 LT |
453 | |
454 | return addr; | |
455 | } | |
456 | ||
457 | static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) | |
458 | { | |
459 | unsigned long addr = 0; | |
460 | struct vm_area_struct *vma; | |
461 | ||
462 | vma = find_vma(current->mm, addr); | |
463 | while (addr + len <= 0x100000000UL) { | |
464 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ | |
465 | ||
466 | if (! __within_hugepage_low_range(addr, len, segmask)) { | |
467 | addr = ALIGN(addr+1, 1<<SID_SHIFT); | |
468 | vma = find_vma(current->mm, addr); | |
469 | continue; | |
470 | } | |
471 | ||
472 | if (!vma || (addr + len) <= vma->vm_start) | |
473 | return addr; | |
474 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | |
475 | /* Depending on segmask this might not be a confirmed | |
476 | * hugepage region, so the ALIGN could have skipped | |
477 | * some VMAs */ | |
478 | vma = find_vma(current->mm, addr); | |
479 | } | |
480 | ||
481 | return -ENOMEM; | |
482 | } | |
483 | ||
484 | static unsigned long htlb_get_high_area(unsigned long len) | |
485 | { | |
486 | unsigned long addr = TASK_HPAGE_BASE; | |
487 | struct vm_area_struct *vma; | |
488 | ||
489 | vma = find_vma(current->mm, addr); | |
490 | for (vma = find_vma(current->mm, addr); | |
491 | addr + len <= TASK_HPAGE_END; | |
492 | vma = vma->vm_next) { | |
493 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ | |
494 | BUG_ON(! within_hugepage_high_range(addr, len)); | |
495 | ||
496 | if (!vma || (addr + len) <= vma->vm_start) | |
497 | return addr; | |
498 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | |
499 | /* Because we're in a hugepage region, this alignment | |
500 | * should not skip us over any VMAs */ | |
501 | } | |
502 | ||
503 | return -ENOMEM; | |
504 | } | |
505 | ||
506 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |
507 | unsigned long len, unsigned long pgoff, | |
508 | unsigned long flags) | |
509 | { | |
510 | if (len & ~HPAGE_MASK) | |
511 | return -EINVAL; | |
512 | ||
513 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | |
514 | return -EINVAL; | |
515 | ||
516 | if (test_thread_flag(TIF_32BIT)) { | |
517 | int lastshift = 0; | |
518 | u16 segmask, cursegs = current->mm->context.htlb_segs; | |
519 | ||
520 | /* First see if we can do the mapping in the existing | |
521 | * low hpage segments */ | |
522 | addr = htlb_get_low_area(len, cursegs); | |
523 | if (addr != -ENOMEM) | |
524 | return addr; | |
525 | ||
526 | for (segmask = LOW_ESID_MASK(0x100000000UL-len, len); | |
527 | ! lastshift; segmask >>=1) { | |
528 | if (segmask & 1) | |
529 | lastshift = 1; | |
530 | ||
531 | addr = htlb_get_low_area(len, cursegs | segmask); | |
532 | if ((addr != -ENOMEM) | |
533 | && open_low_hpage_segs(current->mm, segmask) == 0) | |
534 | return addr; | |
535 | } | |
536 | printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" | |
537 | " enough segments\n"); | |
538 | return -ENOMEM; | |
539 | } else { | |
540 | return htlb_get_high_area(len); | |
541 | } | |
542 | } | |
543 | ||
544 | void hugetlb_mm_free_pgd(struct mm_struct *mm) | |
545 | { | |
546 | int i; | |
547 | pgd_t *pgdir; | |
548 | ||
549 | spin_lock(&mm->page_table_lock); | |
550 | ||
551 | pgdir = mm->context.huge_pgdir; | |
552 | if (! pgdir) | |
553 | goto out; | |
554 | ||
555 | mm->context.huge_pgdir = NULL; | |
556 | ||
557 | /* cleanup any hugepte pages leftover */ | |
558 | for (i = 0; i < PTRS_PER_HUGEPGD; i++) { | |
58366af5 | 559 | pud_t *pud = (pud_t *)(pgdir + i); |
1da177e4 | 560 | |
58366af5 BH |
561 | if (! pud_none(*pud)) { |
562 | pte_t *pte = (pte_t *)pud_page(*pud); | |
1da177e4 LT |
563 | struct page *ptepage = virt_to_page(pte); |
564 | ||
565 | ptepage->mapping = NULL; | |
566 | ||
567 | BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE)); | |
568 | kmem_cache_free(zero_cache, pte); | |
569 | } | |
58366af5 | 570 | pud_clear(pud); |
1da177e4 LT |
571 | } |
572 | ||
573 | BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE)); | |
574 | kmem_cache_free(zero_cache, pgdir); | |
575 | ||
576 | out: | |
577 | spin_unlock(&mm->page_table_lock); | |
578 | } | |
579 | ||
580 | int hash_huge_page(struct mm_struct *mm, unsigned long access, | |
581 | unsigned long ea, unsigned long vsid, int local) | |
582 | { | |
583 | pte_t *ptep; | |
584 | unsigned long va, vpn; | |
585 | pte_t old_pte, new_pte; | |
96e28449 | 586 | unsigned long rflags, prpn; |
1da177e4 LT |
587 | long slot; |
588 | int err = 1; | |
589 | ||
590 | spin_lock(&mm->page_table_lock); | |
591 | ||
592 | ptep = huge_pte_offset(mm, ea); | |
593 | ||
594 | /* Search the Linux page table for a match with va */ | |
595 | va = (vsid << 28) | (ea & 0x0fffffff); | |
596 | vpn = va >> HPAGE_SHIFT; | |
597 | ||
598 | /* | |
599 | * If no pte found or not present, send the problem up to | |
600 | * do_page_fault | |
601 | */ | |
602 | if (unlikely(!ptep || pte_none(*ptep))) | |
603 | goto out; | |
604 | ||
605 | /* BUG_ON(pte_bad(*ptep)); */ | |
606 | ||
607 | /* | |
608 | * Check the user's access rights to the page. If access should be | |
609 | * prevented then send the problem up to do_page_fault. | |
610 | */ | |
611 | if (unlikely(access & ~pte_val(*ptep))) | |
612 | goto out; | |
613 | /* | |
614 | * At this point, we have a pte (old_pte) which can be used to build | |
615 | * or update an HPTE. There are 2 cases: | |
616 | * | |
617 | * 1. There is a valid (present) pte with no associated HPTE (this is | |
618 | * the most common case) | |
619 | * 2. There is a valid (present) pte with an associated HPTE. The | |
620 | * current values of the pp bits in the HPTE prevent access | |
621 | * because we are doing software DIRTY bit management and the | |
622 | * page is currently not DIRTY. | |
623 | */ | |
624 | ||
625 | ||
626 | old_pte = *ptep; | |
627 | new_pte = old_pte; | |
628 | ||
96e28449 | 629 | rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); |
1da177e4 | 630 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ |
96e28449 | 631 | rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); |
1da177e4 LT |
632 | |
633 | /* Check if pte already has an hpte (case 2) */ | |
634 | if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { | |
635 | /* There MIGHT be an HPTE for this pte */ | |
636 | unsigned long hash, slot; | |
637 | ||
638 | hash = hpt_hash(vpn, 1); | |
639 | if (pte_val(old_pte) & _PAGE_SECONDARY) | |
640 | hash = ~hash; | |
641 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | |
642 | slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; | |
643 | ||
96e28449 | 644 | if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) |
1da177e4 LT |
645 | pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; |
646 | } | |
647 | ||
648 | if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) { | |
649 | unsigned long hash = hpt_hash(vpn, 1); | |
650 | unsigned long hpte_group; | |
651 | ||
652 | prpn = pte_pfn(old_pte); | |
653 | ||
654 | repeat: | |
655 | hpte_group = ((hash & htab_hash_mask) * | |
656 | HPTES_PER_GROUP) & ~0x7UL; | |
657 | ||
658 | /* Update the linux pte with the HPTE slot */ | |
659 | pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; | |
660 | pte_val(new_pte) |= _PAGE_HASHPTE; | |
661 | ||
662 | /* Add in WIMG bits */ | |
663 | /* XXX We should store these in the pte */ | |
96e28449 | 664 | rflags |= _PAGE_COHERENT; |
1da177e4 | 665 | |
96e28449 DG |
666 | slot = ppc_md.hpte_insert(hpte_group, va, prpn, |
667 | HPTE_V_LARGE, rflags); | |
1da177e4 LT |
668 | |
669 | /* Primary is full, try the secondary */ | |
670 | if (unlikely(slot == -1)) { | |
671 | pte_val(new_pte) |= _PAGE_SECONDARY; | |
672 | hpte_group = ((~hash & htab_hash_mask) * | |
673 | HPTES_PER_GROUP) & ~0x7UL; | |
674 | slot = ppc_md.hpte_insert(hpte_group, va, prpn, | |
96e28449 | 675 | HPTE_V_LARGE, rflags); |
1da177e4 LT |
676 | if (slot == -1) { |
677 | if (mftb() & 0x1) | |
678 | hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; | |
679 | ||
680 | ppc_md.hpte_remove(hpte_group); | |
681 | goto repeat; | |
682 | } | |
683 | } | |
684 | ||
685 | if (unlikely(slot == -2)) | |
686 | panic("hash_huge_page: pte_insert failed\n"); | |
687 | ||
688 | pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; | |
689 | ||
690 | /* | |
691 | * No need to use ldarx/stdcx here because all who | |
692 | * might be updating the pte will hold the | |
693 | * page_table_lock | |
694 | */ | |
695 | *ptep = new_pte; | |
696 | } | |
697 | ||
698 | err = 0; | |
699 | ||
700 | out: | |
701 | spin_unlock(&mm->page_table_lock); | |
702 | ||
703 | return err; | |
704 | } |