]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/mm/init_64.c
x86: clean up arch/x86/mm/pageattr_64.c
[net-next-2.6.git] / arch / x86 / mm / init_64.c
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/x86_64/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7 */
8
1da177e4
LT
9#include <linux/signal.h>
10#include <linux/sched.h>
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/string.h>
14#include <linux/types.h>
15#include <linux/ptrace.h>
16#include <linux/mman.h>
17#include <linux/mm.h>
18#include <linux/swap.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/pagemap.h>
22#include <linux/bootmem.h>
23#include <linux/proc_fs.h>
59170891 24#include <linux/pci.h>
6fb14755 25#include <linux/pfn.h>
c9cf5528 26#include <linux/poison.h>
17a941d8 27#include <linux/dma-mapping.h>
44df75e6
MT
28#include <linux/module.h>
29#include <linux/memory_hotplug.h>
ae32b129 30#include <linux/nmi.h>
1da177e4
LT
31
32#include <asm/processor.h>
33#include <asm/system.h>
34#include <asm/uaccess.h>
35#include <asm/pgtable.h>
36#include <asm/pgalloc.h>
37#include <asm/dma.h>
38#include <asm/fixmap.h>
39#include <asm/e820.h>
40#include <asm/apic.h>
41#include <asm/tlb.h>
42#include <asm/mmu_context.h>
43#include <asm/proto.h>
44#include <asm/smp.h>
2bc0414e 45#include <asm/sections.h>
718fc13b 46#include <asm/kdebug.h>
aaa64e04 47#include <asm/numa.h>
1da177e4
LT
48
49#ifndef Dprintk
50#define Dprintk(x...)
51#endif
52
e6584504 53const struct dma_mapping_ops* dma_ops;
17a941d8
MBY
54EXPORT_SYMBOL(dma_ops);
55
e18c6874
AK
56static unsigned long dma_reserve __initdata;
57
1da177e4
LT
58DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
59
60/*
61 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
62 * physical space so we can cache the place of the first one and move
63 * around without checking the pgd every time.
64 */
65
66void show_mem(void)
67{
e92343cc
AK
68 long i, total = 0, reserved = 0;
69 long shared = 0, cached = 0;
1da177e4
LT
70 pg_data_t *pgdat;
71 struct page *page;
72
e92343cc 73 printk(KERN_INFO "Mem-info:\n");
1da177e4 74 show_free_areas();
e92343cc 75 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
1da177e4 76
ec936fc5 77 for_each_online_pgdat(pgdat) {
1da177e4 78 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
ae32b129
KR
79 /* this loop can take a while with 256 GB and 4k pages
80 so update the NMI watchdog */
81 if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
82 touch_nmi_watchdog();
83 }
12710a56
BP
84 if (!pfn_valid(pgdat->node_start_pfn + i))
85 continue;
1da177e4
LT
86 page = pfn_to_page(pgdat->node_start_pfn + i);
87 total++;
e92343cc
AK
88 if (PageReserved(page))
89 reserved++;
90 else if (PageSwapCache(page))
91 cached++;
92 else if (page_count(page))
93 shared += page_count(page) - 1;
1da177e4
LT
94 }
95 }
e92343cc
AK
96 printk(KERN_INFO "%lu pages of RAM\n", total);
97 printk(KERN_INFO "%lu reserved pages\n",reserved);
98 printk(KERN_INFO "%lu pages shared\n",shared);
99 printk(KERN_INFO "%lu pages swap cached\n",cached);
1da177e4
LT
100}
101
1da177e4
LT
102int after_bootmem;
103
5f44a669 104static __init void *spp_getpage(void)
1da177e4
LT
105{
106 void *ptr;
107 if (after_bootmem)
108 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
109 else
110 ptr = alloc_bootmem_pages(PAGE_SIZE);
111 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
112 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
113
114 Dprintk("spp_getpage %p\n", ptr);
115 return ptr;
116}
117
5f44a669 118static __init void set_pte_phys(unsigned long vaddr,
1da177e4
LT
119 unsigned long phys, pgprot_t prot)
120{
121 pgd_t *pgd;
122 pud_t *pud;
123 pmd_t *pmd;
124 pte_t *pte, new_pte;
125
126 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
127
128 pgd = pgd_offset_k(vaddr);
129 if (pgd_none(*pgd)) {
130 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
131 return;
132 }
133 pud = pud_offset(pgd, vaddr);
134 if (pud_none(*pud)) {
135 pmd = (pmd_t *) spp_getpage();
136 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
137 if (pmd != pmd_offset(pud, 0)) {
138 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
139 return;
140 }
141 }
142 pmd = pmd_offset(pud, vaddr);
143 if (pmd_none(*pmd)) {
144 pte = (pte_t *) spp_getpage();
145 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
146 if (pte != pte_offset_kernel(pmd, 0)) {
147 printk("PAGETABLE BUG #02!\n");
148 return;
149 }
150 }
151 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
152
153 pte = pte_offset_kernel(pmd, vaddr);
154 if (!pte_none(*pte) &&
155 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
156 pte_ERROR(*pte);
157 set_pte(pte, new_pte);
158
159 /*
160 * It's enough to flush this one mapping.
161 * (PGE mappings get flushed as well)
162 */
163 __flush_tlb_one(vaddr);
164}
165
166/* NOTE: this is meant to be run only at boot */
5f44a669
AK
167void __init
168__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
1da177e4
LT
169{
170 unsigned long address = __fix_to_virt(idx);
171
172 if (idx >= __end_of_fixed_addresses) {
173 printk("Invalid __set_fixmap\n");
174 return;
175 }
176 set_pte_phys(address, phys, prot);
177}
178
75175278
AK
179static unsigned long __initdata table_start;
180static unsigned long __meminitdata table_end;
1da177e4 181
dafe41ee 182static __meminit void *alloc_low_page(unsigned long *phys)
1da177e4 183{
dafe41ee 184 unsigned long pfn = table_end++;
1da177e4
LT
185 void *adr;
186
44df75e6
MT
187 if (after_bootmem) {
188 adr = (void *)get_zeroed_page(GFP_ATOMIC);
189 *phys = __pa(adr);
190 return adr;
191 }
192
1da177e4
LT
193 if (pfn >= end_pfn)
194 panic("alloc_low_page: ran out of memory");
dafe41ee
VG
195
196 adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
44df75e6 197 memset(adr, 0, PAGE_SIZE);
dafe41ee
VG
198 *phys = pfn * PAGE_SIZE;
199 return adr;
200}
1da177e4 201
dafe41ee 202static __meminit void unmap_low_page(void *adr)
1da177e4 203{
44df75e6
MT
204
205 if (after_bootmem)
206 return;
207
dafe41ee 208 early_iounmap(adr, PAGE_SIZE);
1da177e4
LT
209}
210
f2d3efed 211/* Must run before zap_low_mappings */
a3142c8e 212__meminit void *early_ioremap(unsigned long addr, unsigned long size)
f2d3efed 213{
dafe41ee
VG
214 unsigned long vaddr;
215 pmd_t *pmd, *last_pmd;
216 int i, pmds;
217
218 pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
219 vaddr = __START_KERNEL_map;
220 pmd = level2_kernel_pgt;
221 last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
222 for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
223 for (i = 0; i < pmds; i++) {
224 if (pmd_present(pmd[i]))
225 goto next;
226 }
227 vaddr += addr & ~PMD_MASK;
228 addr &= PMD_MASK;
229 for (i = 0; i < pmds; i++, addr += PMD_SIZE)
929fd589 230 set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
dafe41ee
VG
231 __flush_tlb();
232 return (void *)vaddr;
233 next:
234 ;
f2d3efed 235 }
dafe41ee
VG
236 printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
237 return NULL;
f2d3efed
AK
238}
239
240/* To avoid virtual aliases later */
a3142c8e 241__meminit void early_iounmap(void *addr, unsigned long size)
f2d3efed 242{
dafe41ee
VG
243 unsigned long vaddr;
244 pmd_t *pmd;
245 int i, pmds;
246
247 vaddr = (unsigned long)addr;
248 pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
249 pmd = level2_kernel_pgt + pmd_index(vaddr);
250 for (i = 0; i < pmds; i++)
251 pmd_clear(pmd + i);
f2d3efed
AK
252 __flush_tlb();
253}
254
44df75e6 255static void __meminit
6ad91658 256phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
44df75e6 257{
6ad91658 258 int i = pmd_index(address);
44df75e6 259
6ad91658 260 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
44df75e6 261 unsigned long entry;
6ad91658 262 pmd_t *pmd = pmd_page + pmd_index(address);
44df75e6 263
5f51e139
JB
264 if (address >= end) {
265 if (!after_bootmem)
266 for (; i < PTRS_PER_PMD; i++, pmd++)
267 set_pmd(pmd, __pmd(0));
44df75e6
MT
268 break;
269 }
6ad91658
KM
270
271 if (pmd_val(*pmd))
272 continue;
273
40842bf5 274 entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
44df75e6
MT
275 entry &= __supported_pte_mask;
276 set_pmd(pmd, __pmd(entry));
277 }
278}
279
280static void __meminit
281phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
282{
6ad91658
KM
283 pmd_t *pmd = pmd_offset(pud,0);
284 spin_lock(&init_mm.page_table_lock);
285 phys_pmd_init(pmd, address, end);
286 spin_unlock(&init_mm.page_table_lock);
287 __flush_tlb_all();
44df75e6
MT
288}
289
6ad91658 290static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
1da177e4 291{
6ad91658 292 int i = pud_index(addr);
44df75e6 293
44df75e6 294
6ad91658 295 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
6ad91658
KM
296 unsigned long pmd_phys;
297 pud_t *pud = pud_page + pud_index(addr);
1da177e4
LT
298 pmd_t *pmd;
299
6ad91658 300 if (addr >= end)
1da177e4 301 break;
1da177e4 302
6ad91658 303 if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
1da177e4
LT
304 set_pud(pud, __pud(0));
305 continue;
306 }
307
6ad91658
KM
308 if (pud_val(*pud)) {
309 phys_pmd_update(pud, addr, end);
310 continue;
311 }
312
dafe41ee 313 pmd = alloc_low_page(&pmd_phys);
44df75e6 314 spin_lock(&init_mm.page_table_lock);
1da177e4 315 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
6ad91658 316 phys_pmd_init(pmd, addr, end);
44df75e6 317 spin_unlock(&init_mm.page_table_lock);
dafe41ee 318 unmap_low_page(pmd);
1da177e4
LT
319 }
320 __flush_tlb();
321}
322
323static void __init find_early_table_space(unsigned long end)
324{
6c5acd16 325 unsigned long puds, pmds, tables, start;
1da177e4
LT
326
327 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
328 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
329 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
330 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
331
ee408c79
AK
332 /* RED-PEN putting page tables only on node 0 could
333 cause a hotspot and fill up ZONE_DMA. The page tables
334 need roughly 0.5KB per GB. */
335 start = 0x8000;
336 table_start = find_e820_area(start, end, tables);
1da177e4
LT
337 if (table_start == -1UL)
338 panic("Cannot find space for the kernel page tables");
339
340 table_start >>= PAGE_SHIFT;
341 table_end = table_start;
44df75e6
MT
342
343 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
5f51e139
JB
344 end, table_start << PAGE_SHIFT,
345 (table_start << PAGE_SHIFT) + tables);
1da177e4
LT
346}
347
348/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
349 This runs before bootmem is initialized and gets pages directly from the
350 physical memory. To access them they are temporarily mapped. */
b6fd6ecb 351void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
1da177e4
LT
352{
353 unsigned long next;
354
355 Dprintk("init_memory_mapping\n");
356
357 /*
358 * Find space for the kernel direct mapping tables.
359 * Later we should allocate these tables in the local node of the memory
360 * mapped. Unfortunately this is done currently before the nodes are
361 * discovered.
362 */
44df75e6
MT
363 if (!after_bootmem)
364 find_early_table_space(end);
1da177e4
LT
365
366 start = (unsigned long)__va(start);
367 end = (unsigned long)__va(end);
368
369 for (; start < end; start = next) {
1da177e4 370 unsigned long pud_phys;
44df75e6
MT
371 pgd_t *pgd = pgd_offset_k(start);
372 pud_t *pud;
373
374 if (after_bootmem)
d2ae5b5f 375 pud = pud_offset(pgd, start & PGDIR_MASK);
44df75e6 376 else
dafe41ee 377 pud = alloc_low_page(&pud_phys);
44df75e6 378
1da177e4
LT
379 next = start + PGDIR_SIZE;
380 if (next > end)
381 next = end;
382 phys_pud_init(pud, __pa(start), __pa(next));
44df75e6
MT
383 if (!after_bootmem)
384 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
dafe41ee 385 unmap_low_page(pud);
1da177e4
LT
386 }
387
44df75e6 388 if (!after_bootmem)
f51c9452 389 mmu_cr4_features = read_cr4();
1da177e4 390 __flush_tlb_all();
75175278
AK
391
392 reserve_early(table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
1da177e4
LT
393}
394
2b97690f 395#ifndef CONFIG_NUMA
1da177e4
LT
396void __init paging_init(void)
397{
6391af17
MG
398 unsigned long max_zone_pfns[MAX_NR_ZONES];
399 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
400 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
401 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
402 max_zone_pfns[ZONE_NORMAL] = end_pfn;
403
44df75e6
MT
404 memory_present(0, 0, end_pfn);
405 sparse_init();
5cb248ab 406 free_area_init_nodes(max_zone_pfns);
1da177e4
LT
407}
408#endif
409
410/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
411 from the CPU leading to inconsistent cache lines. address and size
412 must be aligned to 2MB boundaries.
413 Does nothing when the mapping doesn't exist. */
414void __init clear_kernel_mapping(unsigned long address, unsigned long size)
415{
416 unsigned long end = address + size;
417
418 BUG_ON(address & ~LARGE_PAGE_MASK);
419 BUG_ON(size & ~LARGE_PAGE_MASK);
420
421 for (; address < end; address += LARGE_PAGE_SIZE) {
422 pgd_t *pgd = pgd_offset_k(address);
423 pud_t *pud;
424 pmd_t *pmd;
425 if (pgd_none(*pgd))
426 continue;
427 pud = pud_offset(pgd, address);
428 if (pud_none(*pud))
429 continue;
430 pmd = pmd_offset(pud, address);
431 if (!pmd || pmd_none(*pmd))
432 continue;
433 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
434 /* Could handle this, but it should not happen currently. */
435 printk(KERN_ERR
436 "clear_kernel_mapping: mapping has been split. will leak memory\n");
437 pmd_ERROR(*pmd);
438 }
439 set_pmd(pmd, __pmd(0));
440 }
441 __flush_tlb_all();
442}
443
44df75e6
MT
444/*
445 * Memory hotplug specific functions
44df75e6 446 */
44df75e6
MT
447void online_page(struct page *page)
448{
449 ClearPageReserved(page);
7835e98b 450 init_page_count(page);
44df75e6
MT
451 __free_page(page);
452 totalram_pages++;
453 num_physpages++;
454}
455
bc02af93 456#ifdef CONFIG_MEMORY_HOTPLUG
9d99aaa3
AK
457/*
458 * Memory is added always to NORMAL zone. This means you will never get
459 * additional DMA/DMA32 memory.
460 */
bc02af93 461int arch_add_memory(int nid, u64 start, u64 size)
44df75e6 462{
bc02af93 463 struct pglist_data *pgdat = NODE_DATA(nid);
776ed98b 464 struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
44df75e6
MT
465 unsigned long start_pfn = start >> PAGE_SHIFT;
466 unsigned long nr_pages = size >> PAGE_SHIFT;
467 int ret;
468
45e0b78b
KM
469 init_memory_mapping(start, (start + size -1));
470
44df75e6
MT
471 ret = __add_pages(zone, start_pfn, nr_pages);
472 if (ret)
473 goto error;
474
44df75e6
MT
475 return ret;
476error:
477 printk("%s: Problem encountered in __add_pages!\n", __func__);
478 return ret;
479}
bc02af93 480EXPORT_SYMBOL_GPL(arch_add_memory);
44df75e6 481
8243229f 482#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
4942e998
KM
483int memory_add_physaddr_to_nid(u64 start)
484{
485 return 0;
486}
8c2676a5 487EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
4942e998
KM
488#endif
489
45e0b78b
KM
490#endif /* CONFIG_MEMORY_HOTPLUG */
491
1da177e4
LT
492static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
493 kcore_vsyscall;
494
495void __init mem_init(void)
496{
0a43e4bf 497 long codesize, reservedpages, datasize, initsize;
1da177e4 498
0dc243ae 499 pci_iommu_alloc();
1da177e4 500
48ddb154 501 /* clear_bss() already clear the empty_zero_page */
1da177e4 502
f2633105
IM
503 /* temporary debugging - double check it's true: */
504 {
505 int i;
506
507 for (i = 0; i < 1024; i++)
508 WARN_ON_ONCE(empty_zero_page[i]);
509 }
510
1da177e4
LT
511 reservedpages = 0;
512
513 /* this will put all low memory onto the freelists */
2b97690f 514#ifdef CONFIG_NUMA
0a43e4bf 515 totalram_pages = numa_free_all_bootmem();
1da177e4 516#else
0a43e4bf 517 totalram_pages = free_all_bootmem();
1da177e4 518#endif
5cb248ab
MG
519 reservedpages = end_pfn - totalram_pages -
520 absent_pages_in_range(0, end_pfn);
1da177e4
LT
521
522 after_bootmem = 1;
523
524 codesize = (unsigned long) &_etext - (unsigned long) &_text;
525 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
526 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
527
528 /* Register memory areas for /proc/kcore */
529 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
530 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
531 VMALLOC_END-VMALLOC_START);
532 kclist_add(&kcore_kernel, &_stext, _end - _stext);
533 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
534 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
535 VSYSCALL_END - VSYSCALL_START);
536
0a43e4bf 537 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
1da177e4
LT
538 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
539 end_pfn << (PAGE_SHIFT-10),
540 codesize >> 10,
541 reservedpages << (PAGE_SHIFT-10),
542 datasize >> 10,
543 initsize >> 10);
1da177e4
LT
544}
545
d167a518 546void free_init_pages(char *what, unsigned long begin, unsigned long end)
1da177e4
LT
547{
548 unsigned long addr;
549
d167a518
GH
550 if (begin >= end)
551 return;
552
6fb14755 553 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
d167a518 554 for (addr = begin; addr < end; addr += PAGE_SIZE) {
e3ebadd9
LT
555 ClearPageReserved(virt_to_page(addr));
556 init_page_count(virt_to_page(addr));
557 memset((void *)(addr & ~(PAGE_SIZE-1)),
558 POISON_FREE_INITMEM, PAGE_SIZE);
6fb14755
JB
559 if (addr >= __START_KERNEL_map)
560 change_page_attr_addr(addr, 1, __pgprot(0));
e3ebadd9 561 free_page(addr);
1da177e4
LT
562 totalram_pages++;
563 }
6fb14755
JB
564 if (addr > __START_KERNEL_map)
565 global_flush_tlb();
d167a518
GH
566}
567
568void free_initmem(void)
569{
d167a518 570 free_init_pages("unused kernel memory",
e3ebadd9
LT
571 (unsigned long)(&__init_begin),
572 (unsigned long)(&__init_end));
1da177e4
LT
573}
574
67df197b
AV
575#ifdef CONFIG_DEBUG_RODATA
576
67df197b
AV
577void mark_rodata_ro(void)
578{
e3ebadd9 579 unsigned long start = (unsigned long)_stext, end;
67df197b 580
602033ed
LT
581#ifdef CONFIG_HOTPLUG_CPU
582 /* It must still be possible to apply SMP alternatives. */
583 if (num_possible_cpus() > 1)
584 start = (unsigned long)_etext;
585#endif
586
587#ifdef CONFIG_KPROBES
588 start = (unsigned long)__start_rodata;
589#endif
590
e3ebadd9
LT
591 end = (unsigned long)__end_rodata;
592 start = (start + PAGE_SIZE - 1) & PAGE_MASK;
593 end &= PAGE_MASK;
594 if (end <= start)
595 return;
596
597 change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
67df197b 598
6fb14755 599 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
e3ebadd9 600 (end - start) >> 10);
67df197b
AV
601
602 /*
603 * change_page_attr_addr() requires a global_flush_tlb() call after it.
604 * We do this after the printk so that if something went wrong in the
605 * change, the printk gets out at least to give a better debug hint
606 * of who is the culprit.
607 */
608 global_flush_tlb();
609}
610#endif
611
1da177e4
LT
612#ifdef CONFIG_BLK_DEV_INITRD
613void free_initrd_mem(unsigned long start, unsigned long end)
614{
e3ebadd9 615 free_init_pages("initrd memory", start, end);
1da177e4
LT
616}
617#endif
618
619void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
620{
2b97690f 621#ifdef CONFIG_NUMA
1da177e4 622 int nid = phys_to_nid(phys);
5e58a02a
AK
623#endif
624 unsigned long pfn = phys >> PAGE_SHIFT;
625 if (pfn >= end_pfn) {
626 /* This can happen with kdump kernels when accessing firmware
627 tables. */
628 if (pfn < end_pfn_map)
629 return;
630 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
631 phys, len);
632 return;
633 }
634
635 /* Should check here against the e820 map to avoid double free */
636#ifdef CONFIG_NUMA
1da177e4
LT
637 reserve_bootmem_node(NODE_DATA(nid), phys, len);
638#else
639 reserve_bootmem(phys, len);
640#endif
0e0b864e 641 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
e18c6874 642 dma_reserve += len / PAGE_SIZE;
0e0b864e
MG
643 set_dma_reserve(dma_reserve);
644 }
1da177e4
LT
645}
646
647int kern_addr_valid(unsigned long addr)
648{
649 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
650 pgd_t *pgd;
651 pud_t *pud;
652 pmd_t *pmd;
653 pte_t *pte;
654
655 if (above != 0 && above != -1UL)
656 return 0;
657
658 pgd = pgd_offset_k(addr);
659 if (pgd_none(*pgd))
660 return 0;
661
662 pud = pud_offset(pgd, addr);
663 if (pud_none(*pud))
664 return 0;
665
666 pmd = pmd_offset(pud, addr);
667 if (pmd_none(*pmd))
668 return 0;
669 if (pmd_large(*pmd))
670 return pfn_valid(pmd_pfn(*pmd));
671
672 pte = pte_offset_kernel(pmd, addr);
673 if (pte_none(*pte))
674 return 0;
675 return pfn_valid(pte_pfn(*pte));
676}
677
103efcd9 678/* A pseudo VMA to allow ptrace access for the vsyscall page. This only
1e014410
AK
679 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
680 not need special handling anymore. */
1da177e4
LT
681
682static struct vm_area_struct gate_vma = {
683 .vm_start = VSYSCALL_START,
103efcd9
EP
684 .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
685 .vm_page_prot = PAGE_READONLY_EXEC,
686 .vm_flags = VM_READ | VM_EXEC
1da177e4
LT
687};
688
1da177e4
LT
689struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
690{
691#ifdef CONFIG_IA32_EMULATION
1e014410
AK
692 if (test_tsk_thread_flag(tsk, TIF_IA32))
693 return NULL;
1da177e4
LT
694#endif
695 return &gate_vma;
696}
697
698int in_gate_area(struct task_struct *task, unsigned long addr)
699{
700 struct vm_area_struct *vma = get_gate_vma(task);
1e014410
AK
701 if (!vma)
702 return 0;
1da177e4
LT
703 return (addr >= vma->vm_start) && (addr < vma->vm_end);
704}
705
706/* Use this when you have no reliable task/vma, typically from interrupt
707 * context. It is less reliable than using the task's vma and may give
708 * false positives.
709 */
710int in_gate_area_no_task(unsigned long addr)
711{
1e014410 712 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
1da177e4 713}
2e1c49db 714
2aae950b
AK
715const char *arch_vma_name(struct vm_area_struct *vma)
716{
717 if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
718 return "[vdso]";
719 if (vma == &gate_vma)
720 return "[vsyscall]";
721 return NULL;
722}
0889eba5
CL
723
724#ifdef CONFIG_SPARSEMEM_VMEMMAP
725/*
726 * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
727 */
728int __meminit vmemmap_populate(struct page *start_page,
729 unsigned long size, int node)
730{
731 unsigned long addr = (unsigned long)start_page;
732 unsigned long end = (unsigned long)(start_page + size);
733 unsigned long next;
734 pgd_t *pgd;
735 pud_t *pud;
736 pmd_t *pmd;
737
738 for (; addr < end; addr = next) {
739 next = pmd_addr_end(addr, end);
740
741 pgd = vmemmap_pgd_populate(addr, node);
742 if (!pgd)
743 return -ENOMEM;
744 pud = vmemmap_pud_populate(pgd, addr, node);
745 if (!pud)
746 return -ENOMEM;
747
748 pmd = pmd_offset(pud, addr);
749 if (pmd_none(*pmd)) {
750 pte_t entry;
751 void *p = vmemmap_alloc_block(PMD_SIZE, node);
752 if (!p)
753 return -ENOMEM;
754
27ec161f 755 entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE);
0889eba5
CL
756 set_pmd(pmd, __pmd(pte_val(entry)));
757
758 printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
759 addr, addr + PMD_SIZE - 1, p, node);
760 } else
761 vmemmap_verify((pte_t *)pmd, node, addr, next);
762 }
763
764 return 0;
765}
766#endif