]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/kernel/cpu/mtrr/cleanup.c
x86: separate mtrr cleanup/mtrr_e820 trim to separate file
[net-next-2.6.git] / arch / x86 / kernel / cpu / mtrr / cleanup.c
CommitLineData
0d890355
YL
1/* MTRR (Memory Type Range Register) cleanup
2
3 Copyright (C) 2009 Yinghai Lu
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with this library; if not, write to the Free
17 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18*/
19
20#include <linux/module.h>
21#include <linux/init.h>
22#include <linux/pci.h>
23#include <linux/smp.h>
24#include <linux/cpu.h>
25#include <linux/mutex.h>
26#include <linux/sort.h>
27
28#include <asm/e820.h>
29#include <asm/mtrr.h>
30#include <asm/uaccess.h>
31#include <asm/processor.h>
32#include <asm/msr.h>
33#include <asm/kvm_para.h>
34#include "mtrr.h"
35
36/* should be related to MTRR_VAR_RANGES nums */
37#define RANGE_NUM 256
38
39struct res_range {
40 unsigned long start;
41 unsigned long end;
42};
43
44static int __init
45add_range(struct res_range *range, int nr_range, unsigned long start,
46 unsigned long end)
47{
48 /* out of slots */
49 if (nr_range >= RANGE_NUM)
50 return nr_range;
51
52 range[nr_range].start = start;
53 range[nr_range].end = end;
54
55 nr_range++;
56
57 return nr_range;
58}
59
60static int __init
61add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
62 unsigned long end)
63{
64 int i;
65
66 /* try to merge it with old one */
67 for (i = 0; i < nr_range; i++) {
68 unsigned long final_start, final_end;
69 unsigned long common_start, common_end;
70
71 if (!range[i].end)
72 continue;
73
74 common_start = max(range[i].start, start);
75 common_end = min(range[i].end, end);
76 if (common_start > common_end + 1)
77 continue;
78
79 final_start = min(range[i].start, start);
80 final_end = max(range[i].end, end);
81
82 range[i].start = final_start;
83 range[i].end = final_end;
84 return nr_range;
85 }
86
87 /* need to add that */
88 return add_range(range, nr_range, start, end);
89}
90
91static void __init
92subtract_range(struct res_range *range, unsigned long start, unsigned long end)
93{
94 int i, j;
95
96 for (j = 0; j < RANGE_NUM; j++) {
97 if (!range[j].end)
98 continue;
99
100 if (start <= range[j].start && end >= range[j].end) {
101 range[j].start = 0;
102 range[j].end = 0;
103 continue;
104 }
105
106 if (start <= range[j].start && end < range[j].end &&
107 range[j].start < end + 1) {
108 range[j].start = end + 1;
109 continue;
110 }
111
112
113 if (start > range[j].start && end >= range[j].end &&
114 range[j].end > start - 1) {
115 range[j].end = start - 1;
116 continue;
117 }
118
119 if (start > range[j].start && end < range[j].end) {
120 /* find the new spare */
121 for (i = 0; i < RANGE_NUM; i++) {
122 if (range[i].end == 0)
123 break;
124 }
125 if (i < RANGE_NUM) {
126 range[i].end = range[j].end;
127 range[i].start = end + 1;
128 } else {
129 printk(KERN_ERR "run of slot in ranges\n");
130 }
131 range[j].end = start - 1;
132 continue;
133 }
134 }
135}
136
137static int __init cmp_range(const void *x1, const void *x2)
138{
139 const struct res_range *r1 = x1;
140 const struct res_range *r2 = x2;
141 long start1, start2;
142
143 start1 = r1->start;
144 start2 = r2->start;
145
146 return start1 - start2;
147}
148
149struct var_mtrr_range_state {
150 unsigned long base_pfn;
151 unsigned long size_pfn;
152 mtrr_type type;
153};
154
155static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
156static int __initdata debug_print;
157
158static int __init
159x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
160 unsigned long extra_remove_base,
161 unsigned long extra_remove_size)
162{
163 unsigned long i, base, size;
164 mtrr_type type;
165
166 for (i = 0; i < num_var_ranges; i++) {
167 type = range_state[i].type;
168 if (type != MTRR_TYPE_WRBACK)
169 continue;
170 base = range_state[i].base_pfn;
171 size = range_state[i].size_pfn;
172 nr_range = add_range_with_merge(range, nr_range, base,
173 base + size - 1);
174 }
175 if (debug_print) {
176 printk(KERN_DEBUG "After WB checking\n");
177 for (i = 0; i < nr_range; i++)
178 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
179 range[i].start, range[i].end + 1);
180 }
181
182 /* take out UC ranges */
183 for (i = 0; i < num_var_ranges; i++) {
184 type = range_state[i].type;
185 if (type != MTRR_TYPE_UNCACHABLE &&
186 type != MTRR_TYPE_WRPROT)
187 continue;
188 size = range_state[i].size_pfn;
189 if (!size)
190 continue;
191 base = range_state[i].base_pfn;
192 subtract_range(range, base, base + size - 1);
193 }
194 if (extra_remove_size)
195 subtract_range(range, extra_remove_base,
196 extra_remove_base + extra_remove_size - 1);
197
198 /* get new range num */
199 nr_range = 0;
200 for (i = 0; i < RANGE_NUM; i++) {
201 if (!range[i].end)
202 continue;
203 nr_range++;
204 }
205 if (debug_print) {
206 printk(KERN_DEBUG "After UC checking\n");
207 for (i = 0; i < nr_range; i++)
208 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
209 range[i].start, range[i].end + 1);
210 }
211
212 /* sort the ranges */
213 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
214 if (debug_print) {
215 printk(KERN_DEBUG "After sorting\n");
216 for (i = 0; i < nr_range; i++)
217 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
218 range[i].start, range[i].end + 1);
219 }
220
221 /* clear those is not used */
222 for (i = nr_range; i < RANGE_NUM; i++)
223 memset(&range[i], 0, sizeof(range[i]));
224
225 return nr_range;
226}
227
228static struct res_range __initdata range[RANGE_NUM];
229static int __initdata nr_range;
230
231#ifdef CONFIG_MTRR_SANITIZER
232
233static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
234{
235 unsigned long sum;
236 int i;
237
238 sum = 0;
239 for (i = 0; i < nr_range; i++)
240 sum += range[i].end + 1 - range[i].start;
241
242 return sum;
243}
244
245static int enable_mtrr_cleanup __initdata =
246 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
247
248static int __init disable_mtrr_cleanup_setup(char *str)
249{
250 enable_mtrr_cleanup = 0;
251 return 0;
252}
253early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
254
255static int __init enable_mtrr_cleanup_setup(char *str)
256{
257 enable_mtrr_cleanup = 1;
258 return 0;
259}
260early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
261
262static int __init mtrr_cleanup_debug_setup(char *str)
263{
264 debug_print = 1;
265 return 0;
266}
267early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
268
269struct var_mtrr_state {
270 unsigned long range_startk;
271 unsigned long range_sizek;
272 unsigned long chunk_sizek;
273 unsigned long gran_sizek;
274 unsigned int reg;
275};
276
277static void __init
278set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
279 unsigned char type, unsigned int address_bits)
280{
281 u32 base_lo, base_hi, mask_lo, mask_hi;
282 u64 base, mask;
283
284 if (!sizek) {
285 fill_mtrr_var_range(reg, 0, 0, 0, 0);
286 return;
287 }
288
289 mask = (1ULL << address_bits) - 1;
290 mask &= ~((((u64)sizek) << 10) - 1);
291
292 base = ((u64)basek) << 10;
293
294 base |= type;
295 mask |= 0x800;
296
297 base_lo = base & ((1ULL<<32) - 1);
298 base_hi = base >> 32;
299
300 mask_lo = mask & ((1ULL<<32) - 1);
301 mask_hi = mask >> 32;
302
303 fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
304}
305
306static void __init
307save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
308 unsigned char type)
309{
310 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
311 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
312 range_state[reg].type = type;
313}
314
315static void __init
316set_var_mtrr_all(unsigned int address_bits)
317{
318 unsigned long basek, sizek;
319 unsigned char type;
320 unsigned int reg;
321
322 for (reg = 0; reg < num_var_ranges; reg++) {
323 basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
324 sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
325 type = range_state[reg].type;
326
327 set_var_mtrr(reg, basek, sizek, type, address_bits);
328 }
329}
330
331static unsigned long to_size_factor(unsigned long sizek, char *factorp)
332{
333 char factor;
334 unsigned long base = sizek;
335
336 if (base & ((1<<10) - 1)) {
337 /* not MB alignment */
338 factor = 'K';
339 } else if (base & ((1<<20) - 1)) {
340 factor = 'M';
341 base >>= 10;
342 } else {
343 factor = 'G';
344 base >>= 20;
345 }
346
347 *factorp = factor;
348
349 return base;
350}
351
352static unsigned int __init
353range_to_mtrr(unsigned int reg, unsigned long range_startk,
354 unsigned long range_sizek, unsigned char type)
355{
356 if (!range_sizek || (reg >= num_var_ranges))
357 return reg;
358
359 while (range_sizek) {
360 unsigned long max_align, align;
361 unsigned long sizek;
362
363 /* Compute the maximum size I can make a range */
364 if (range_startk)
365 max_align = ffs(range_startk) - 1;
366 else
367 max_align = 32;
368 align = fls(range_sizek) - 1;
369 if (align > max_align)
370 align = max_align;
371
372 sizek = 1 << align;
373 if (debug_print) {
374 char start_factor = 'K', size_factor = 'K';
375 unsigned long start_base, size_base;
376
377 start_base = to_size_factor(range_startk,
378 &start_factor),
379 size_base = to_size_factor(sizek, &size_factor),
380
381 printk(KERN_DEBUG "Setting variable MTRR %d, "
382 "base: %ld%cB, range: %ld%cB, type %s\n",
383 reg, start_base, start_factor,
384 size_base, size_factor,
385 (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
386 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")
387 );
388 }
389 save_var_mtrr(reg++, range_startk, sizek, type);
390 range_startk += sizek;
391 range_sizek -= sizek;
392 if (reg >= num_var_ranges)
393 break;
394 }
395 return reg;
396}
397
398static unsigned __init
399range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
400 unsigned long sizek)
401{
402 unsigned long hole_basek, hole_sizek;
403 unsigned long second_basek, second_sizek;
404 unsigned long range0_basek, range0_sizek;
405 unsigned long range_basek, range_sizek;
406 unsigned long chunk_sizek;
407 unsigned long gran_sizek;
408
409 hole_basek = 0;
410 hole_sizek = 0;
411 second_basek = 0;
412 second_sizek = 0;
413 chunk_sizek = state->chunk_sizek;
414 gran_sizek = state->gran_sizek;
415
416 /* align with gran size, prevent small block used up MTRRs */
417 range_basek = ALIGN(state->range_startk, gran_sizek);
418 if ((range_basek > basek) && basek)
419 return second_sizek;
420 state->range_sizek -= (range_basek - state->range_startk);
421 range_sizek = ALIGN(state->range_sizek, gran_sizek);
422
423 while (range_sizek > state->range_sizek) {
424 range_sizek -= gran_sizek;
425 if (!range_sizek)
426 return 0;
427 }
428 state->range_sizek = range_sizek;
429
430 /* try to append some small hole */
431 range0_basek = state->range_startk;
432 range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
433
434 /* no increase */
435 if (range0_sizek == state->range_sizek) {
436 if (debug_print)
437 printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
438 range0_basek<<10,
439 (range0_basek + state->range_sizek)<<10);
440 state->reg = range_to_mtrr(state->reg, range0_basek,
441 state->range_sizek, MTRR_TYPE_WRBACK);
442 return 0;
443 }
444
445 /* only cut back, when it is not the last */
446 if (sizek) {
447 while (range0_basek + range0_sizek > (basek + sizek)) {
448 if (range0_sizek >= chunk_sizek)
449 range0_sizek -= chunk_sizek;
450 else
451 range0_sizek = 0;
452
453 if (!range0_sizek)
454 break;
455 }
456 }
457
458second_try:
459 range_basek = range0_basek + range0_sizek;
460
461 /* one hole in the middle */
462 if (range_basek > basek && range_basek <= (basek + sizek))
463 second_sizek = range_basek - basek;
464
465 if (range0_sizek > state->range_sizek) {
466
467 /* one hole in middle or at end */
468 hole_sizek = range0_sizek - state->range_sizek - second_sizek;
469
470 /* hole size should be less than half of range0 size */
471 if (hole_sizek >= (range0_sizek >> 1) &&
472 range0_sizek >= chunk_sizek) {
473 range0_sizek -= chunk_sizek;
474 second_sizek = 0;
475 hole_sizek = 0;
476
477 goto second_try;
478 }
479 }
480
481 if (range0_sizek) {
482 if (debug_print)
483 printk(KERN_DEBUG "range0: %016lx - %016lx\n",
484 range0_basek<<10,
485 (range0_basek + range0_sizek)<<10);
486 state->reg = range_to_mtrr(state->reg, range0_basek,
487 range0_sizek, MTRR_TYPE_WRBACK);
488 }
489
490 if (range0_sizek < state->range_sizek) {
491 /* need to handle left over */
492 range_sizek = state->range_sizek - range0_sizek;
493
494 if (debug_print)
495 printk(KERN_DEBUG "range: %016lx - %016lx\n",
496 range_basek<<10,
497 (range_basek + range_sizek)<<10);
498 state->reg = range_to_mtrr(state->reg, range_basek,
499 range_sizek, MTRR_TYPE_WRBACK);
500 }
501
502 if (hole_sizek) {
503 hole_basek = range_basek - hole_sizek - second_sizek;
504 if (debug_print)
505 printk(KERN_DEBUG "hole: %016lx - %016lx\n",
506 hole_basek<<10,
507 (hole_basek + hole_sizek)<<10);
508 state->reg = range_to_mtrr(state->reg, hole_basek,
509 hole_sizek, MTRR_TYPE_UNCACHABLE);
510 }
511
512 return second_sizek;
513}
514
515static void __init
516set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
517 unsigned long size_pfn)
518{
519 unsigned long basek, sizek;
520 unsigned long second_sizek = 0;
521
522 if (state->reg >= num_var_ranges)
523 return;
524
525 basek = base_pfn << (PAGE_SHIFT - 10);
526 sizek = size_pfn << (PAGE_SHIFT - 10);
527
528 /* See if I can merge with the last range */
529 if ((basek <= 1024) ||
530 (state->range_startk + state->range_sizek == basek)) {
531 unsigned long endk = basek + sizek;
532 state->range_sizek = endk - state->range_startk;
533 return;
534 }
535 /* Write the range mtrrs */
536 if (state->range_sizek != 0)
537 second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
538
539 /* Allocate an msr */
540 state->range_startk = basek + second_sizek;
541 state->range_sizek = sizek - second_sizek;
542}
543
544/* mininum size of mtrr block that can take hole */
545static u64 mtrr_chunk_size __initdata = (256ULL<<20);
546
547static int __init parse_mtrr_chunk_size_opt(char *p)
548{
549 if (!p)
550 return -EINVAL;
551 mtrr_chunk_size = memparse(p, &p);
552 return 0;
553}
554early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
555
556/* granity of mtrr of block */
557static u64 mtrr_gran_size __initdata;
558
559static int __init parse_mtrr_gran_size_opt(char *p)
560{
561 if (!p)
562 return -EINVAL;
563 mtrr_gran_size = memparse(p, &p);
564 return 0;
565}
566early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
567
568static int nr_mtrr_spare_reg __initdata =
569 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
570
571static int __init parse_mtrr_spare_reg(char *arg)
572{
573 if (arg)
574 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
575 return 0;
576}
577
578early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
579
580static int __init
581x86_setup_var_mtrrs(struct res_range *range, int nr_range,
582 u64 chunk_size, u64 gran_size)
583{
584 struct var_mtrr_state var_state;
585 int i;
586 int num_reg;
587
588 var_state.range_startk = 0;
589 var_state.range_sizek = 0;
590 var_state.reg = 0;
591 var_state.chunk_sizek = chunk_size >> 10;
592 var_state.gran_sizek = gran_size >> 10;
593
594 memset(range_state, 0, sizeof(range_state));
595
596 /* Write the range etc */
597 for (i = 0; i < nr_range; i++)
598 set_var_mtrr_range(&var_state, range[i].start,
599 range[i].end - range[i].start + 1);
600
601 /* Write the last range */
602 if (var_state.range_sizek != 0)
603 range_to_mtrr_with_hole(&var_state, 0, 0);
604
605 num_reg = var_state.reg;
606 /* Clear out the extra MTRR's */
607 while (var_state.reg < num_var_ranges) {
608 save_var_mtrr(var_state.reg, 0, 0, 0);
609 var_state.reg++;
610 }
611
612 return num_reg;
613}
614
615struct mtrr_cleanup_result {
616 unsigned long gran_sizek;
617 unsigned long chunk_sizek;
618 unsigned long lose_cover_sizek;
619 unsigned int num_reg;
620 int bad;
621};
622
623/*
624 * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
625 * chunk size: gran_size, ..., 2G
626 * so we need (1+16)*8
627 */
628#define NUM_RESULT 136
629#define PSHIFT (PAGE_SHIFT - 10)
630
631static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
632static unsigned long __initdata min_loss_pfn[RANGE_NUM];
633
634static void __init print_out_mtrr_range_state(void)
635{
636 int i;
637 char start_factor = 'K', size_factor = 'K';
638 unsigned long start_base, size_base;
639 mtrr_type type;
640
641 for (i = 0; i < num_var_ranges; i++) {
642
643 size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
644 if (!size_base)
645 continue;
646
647 size_base = to_size_factor(size_base, &size_factor),
648 start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
649 start_base = to_size_factor(start_base, &start_factor),
650 type = range_state[i].type;
651
652 printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
653 i, start_base, start_factor,
654 size_base, size_factor,
655 (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
656 ((type == MTRR_TYPE_WRPROT) ? "WP" :
657 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
658 );
659 }
660}
661
662static int __init mtrr_need_cleanup(void)
663{
664 int i;
665 mtrr_type type;
666 unsigned long size;
667 /* extra one for all 0 */
668 int num[MTRR_NUM_TYPES + 1];
669
670 /* check entries number */
671 memset(num, 0, sizeof(num));
672 for (i = 0; i < num_var_ranges; i++) {
673 type = range_state[i].type;
674 size = range_state[i].size_pfn;
675 if (type >= MTRR_NUM_TYPES)
676 continue;
677 if (!size)
678 type = MTRR_NUM_TYPES;
679 if (type == MTRR_TYPE_WRPROT)
680 type = MTRR_TYPE_UNCACHABLE;
681 num[type]++;
682 }
683
684 /* check if we got UC entries */
685 if (!num[MTRR_TYPE_UNCACHABLE])
686 return 0;
687
688 /* check if we only had WB and UC */
689 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
690 num_var_ranges - num[MTRR_NUM_TYPES])
691 return 0;
692
693 return 1;
694}
695
696static unsigned long __initdata range_sums;
697static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
698 unsigned long extra_remove_base,
699 unsigned long extra_remove_size,
700 int i)
701{
702 int num_reg;
703 static struct res_range range_new[RANGE_NUM];
704 static int nr_range_new;
705 unsigned long range_sums_new;
706
707 /* convert ranges to var ranges state */
708 num_reg = x86_setup_var_mtrrs(range, nr_range,
709 chunk_size, gran_size);
710
711 /* we got new setting in range_state, check it */
712 memset(range_new, 0, sizeof(range_new));
713 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
714 extra_remove_base, extra_remove_size);
715 range_sums_new = sum_ranges(range_new, nr_range_new);
716
717 result[i].chunk_sizek = chunk_size >> 10;
718 result[i].gran_sizek = gran_size >> 10;
719 result[i].num_reg = num_reg;
720 if (range_sums < range_sums_new) {
721 result[i].lose_cover_sizek =
722 (range_sums_new - range_sums) << PSHIFT;
723 result[i].bad = 1;
724 } else
725 result[i].lose_cover_sizek =
726 (range_sums - range_sums_new) << PSHIFT;
727
728 /* double check it */
729 if (!result[i].bad && !result[i].lose_cover_sizek) {
730 if (nr_range_new != nr_range ||
731 memcmp(range, range_new, sizeof(range)))
732 result[i].bad = 1;
733 }
734
735 if (!result[i].bad && (range_sums - range_sums_new <
736 min_loss_pfn[num_reg])) {
737 min_loss_pfn[num_reg] =
738 range_sums - range_sums_new;
739 }
740}
741
742static void __init mtrr_print_out_one_result(int i)
743{
744 char gran_factor, chunk_factor, lose_factor;
745 unsigned long gran_base, chunk_base, lose_base;
746
747 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
748 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
749 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
750 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
751 result[i].bad ? "*BAD*" : " ",
752 gran_base, gran_factor, chunk_base, chunk_factor);
753 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
754 result[i].num_reg, result[i].bad ? "-" : "",
755 lose_base, lose_factor);
756}
757
758static int __init mtrr_search_optimal_index(void)
759{
760 int i;
761 int num_reg_good;
762 int index_good;
763
764 if (nr_mtrr_spare_reg >= num_var_ranges)
765 nr_mtrr_spare_reg = num_var_ranges - 1;
766 num_reg_good = -1;
767 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
768 if (!min_loss_pfn[i])
769 num_reg_good = i;
770 }
771
772 index_good = -1;
773 if (num_reg_good != -1) {
774 for (i = 0; i < NUM_RESULT; i++) {
775 if (!result[i].bad &&
776 result[i].num_reg == num_reg_good &&
777 !result[i].lose_cover_sizek) {
778 index_good = i;
779 break;
780 }
781 }
782 }
783
784 return index_good;
785}
786
787
788int __init mtrr_cleanup(unsigned address_bits)
789{
790 unsigned long extra_remove_base, extra_remove_size;
791 unsigned long base, size, def, dummy;
792 mtrr_type type;
793 u64 chunk_size, gran_size;
794 int index_good;
795 int i;
796
797 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
798 return 0;
799 rdmsr(MTRRdefType_MSR, def, dummy);
800 def &= 0xff;
801 if (def != MTRR_TYPE_UNCACHABLE)
802 return 0;
803
804 /* get it and store it aside */
805 memset(range_state, 0, sizeof(range_state));
806 for (i = 0; i < num_var_ranges; i++) {
807 mtrr_if->get(i, &base, &size, &type);
808 range_state[i].base_pfn = base;
809 range_state[i].size_pfn = size;
810 range_state[i].type = type;
811 }
812
813 /* check if we need handle it and can handle it */
814 if (!mtrr_need_cleanup())
815 return 0;
816
817 /* print original var MTRRs at first, for debugging: */
818 printk(KERN_DEBUG "original variable MTRRs\n");
819 print_out_mtrr_range_state();
820
821 memset(range, 0, sizeof(range));
822 extra_remove_size = 0;
823 extra_remove_base = 1 << (32 - PAGE_SHIFT);
824 if (mtrr_tom2)
825 extra_remove_size =
826 (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
827 nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
828 extra_remove_size);
829 /*
830 * [0, 1M) should always be coverred by var mtrr with WB
831 * and fixed mtrrs should take effective before var mtrr for it
832 */
833 nr_range = add_range_with_merge(range, nr_range, 0,
834 (1ULL<<(20 - PAGE_SHIFT)) - 1);
835 /* sort the ranges */
836 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
837
838 range_sums = sum_ranges(range, nr_range);
839 printk(KERN_INFO "total RAM coverred: %ldM\n",
840 range_sums >> (20 - PAGE_SHIFT));
841
842 if (mtrr_chunk_size && mtrr_gran_size) {
843 i = 0;
844 mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
845 extra_remove_base, extra_remove_size, i);
846
847 mtrr_print_out_one_result(i);
848
849 if (!result[i].bad) {
850 set_var_mtrr_all(address_bits);
851 printk(KERN_DEBUG "New variable MTRRs\n");
852 print_out_mtrr_range_state();
853 return 1;
854 }
855 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
856 "will find optimal one\n");
857 }
858
859 i = 0;
860 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
861 memset(result, 0, sizeof(result));
862 for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
863
864 for (chunk_size = gran_size; chunk_size < (1ULL<<32);
865 chunk_size <<= 1) {
866
867 if (i >= NUM_RESULT)
868 continue;
869
870 mtrr_calc_range_state(chunk_size, gran_size,
871 extra_remove_base, extra_remove_size, i);
872 if (debug_print) {
873 mtrr_print_out_one_result(i);
874 printk(KERN_INFO "\n");
875 }
876
877 i++;
878 }
879 }
880
881 /* try to find the optimal index */
882 index_good = mtrr_search_optimal_index();
883
884 if (index_good != -1) {
885 printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
886 i = index_good;
887 mtrr_print_out_one_result(i);
888
889 /* convert ranges to var ranges state */
890 chunk_size = result[i].chunk_sizek;
891 chunk_size <<= 10;
892 gran_size = result[i].gran_sizek;
893 gran_size <<= 10;
894 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
895 set_var_mtrr_all(address_bits);
896 printk(KERN_DEBUG "New variable MTRRs\n");
897 print_out_mtrr_range_state();
898 return 1;
899 } else {
900 /* print out all */
901 for (i = 0; i < NUM_RESULT; i++)
902 mtrr_print_out_one_result(i);
903 }
904
905 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
906 printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
907
908 return 0;
909}
910#else
911int __init mtrr_cleanup(unsigned address_bits)
912{
913 return 0;
914}
915#endif
916
917static int disable_mtrr_trim;
918
919static int __init disable_mtrr_trim_setup(char *str)
920{
921 disable_mtrr_trim = 1;
922 return 0;
923}
924early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
925
926/*
927 * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
928 * for memory >4GB. Check for that here.
929 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
930 * apply to are wrong, but so far we don't know of any such case in the wild.
931 */
932#define Tom2Enabled (1U << 21)
933#define Tom2ForceMemTypeWB (1U << 22)
934
935int __init amd_special_default_mtrr(void)
936{
937 u32 l, h;
938
939 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
940 return 0;
941 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
942 return 0;
943 /* In case some hypervisor doesn't pass SYSCFG through */
944 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
945 return 0;
946 /*
947 * Memory between 4GB and top of mem is forced WB by this magic bit.
948 * Reserved before K8RevF, but should be zero there.
949 */
950 if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
951 (Tom2Enabled | Tom2ForceMemTypeWB))
952 return 1;
953 return 0;
954}
955
956static u64 __init real_trim_memory(unsigned long start_pfn,
957 unsigned long limit_pfn)
958{
959 u64 trim_start, trim_size;
960 trim_start = start_pfn;
961 trim_start <<= PAGE_SHIFT;
962 trim_size = limit_pfn;
963 trim_size <<= PAGE_SHIFT;
964 trim_size -= trim_start;
965
966 return e820_update_range(trim_start, trim_size, E820_RAM,
967 E820_RESERVED);
968}
969/**
970 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
971 * @end_pfn: ending page frame number
972 *
973 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
974 * memory configurations. This routine checks that the highest MTRR matches
975 * the end of memory, to make sure the MTRRs having a write back type cover
976 * all of the memory the kernel is intending to use. If not, it'll trim any
977 * memory off the end by adjusting end_pfn, removing it from the kernel's
978 * allocation pools, warning the user with an obnoxious message.
979 */
980int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
981{
982 unsigned long i, base, size, highest_pfn = 0, def, dummy;
983 mtrr_type type;
984 u64 total_trim_size;
985
986 /* extra one for all 0 */
987 int num[MTRR_NUM_TYPES + 1];
988 /*
989 * Make sure we only trim uncachable memory on machines that
990 * support the Intel MTRR architecture:
991 */
992 if (!is_cpu(INTEL) || disable_mtrr_trim)
993 return 0;
994 rdmsr(MTRRdefType_MSR, def, dummy);
995 def &= 0xff;
996 if (def != MTRR_TYPE_UNCACHABLE)
997 return 0;
998
999 /* get it and store it aside */
1000 memset(range_state, 0, sizeof(range_state));
1001 for (i = 0; i < num_var_ranges; i++) {
1002 mtrr_if->get(i, &base, &size, &type);
1003 range_state[i].base_pfn = base;
1004 range_state[i].size_pfn = size;
1005 range_state[i].type = type;
1006 }
1007
1008 /* Find highest cached pfn */
1009 for (i = 0; i < num_var_ranges; i++) {
1010 type = range_state[i].type;
1011 if (type != MTRR_TYPE_WRBACK)
1012 continue;
1013 base = range_state[i].base_pfn;
1014 size = range_state[i].size_pfn;
1015 if (highest_pfn < base + size)
1016 highest_pfn = base + size;
1017 }
1018
1019 /* kvm/qemu doesn't have mtrr set right, don't trim them all */
1020 if (!highest_pfn) {
1021 printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
1022 return 0;
1023 }
1024
1025 /* check entries number */
1026 memset(num, 0, sizeof(num));
1027 for (i = 0; i < num_var_ranges; i++) {
1028 type = range_state[i].type;
1029 if (type >= MTRR_NUM_TYPES)
1030 continue;
1031 size = range_state[i].size_pfn;
1032 if (!size)
1033 type = MTRR_NUM_TYPES;
1034 num[type]++;
1035 }
1036
1037 /* no entry for WB? */
1038 if (!num[MTRR_TYPE_WRBACK])
1039 return 0;
1040
1041 /* check if we only had WB and UC */
1042 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1043 num_var_ranges - num[MTRR_NUM_TYPES])
1044 return 0;
1045
1046 memset(range, 0, sizeof(range));
1047 nr_range = 0;
1048 if (mtrr_tom2) {
1049 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
1050 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
1051 if (highest_pfn < range[nr_range].end + 1)
1052 highest_pfn = range[nr_range].end + 1;
1053 nr_range++;
1054 }
1055 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
1056
1057 total_trim_size = 0;
1058 /* check the head */
1059 if (range[0].start)
1060 total_trim_size += real_trim_memory(0, range[0].start);
1061 /* check the holes */
1062 for (i = 0; i < nr_range - 1; i++) {
1063 if (range[i].end + 1 < range[i+1].start)
1064 total_trim_size += real_trim_memory(range[i].end + 1,
1065 range[i+1].start);
1066 }
1067 /* check the top */
1068 i = nr_range - 1;
1069 if (range[i].end + 1 < end_pfn)
1070 total_trim_size += real_trim_memory(range[i].end + 1,
1071 end_pfn);
1072
1073 if (total_trim_size) {
1074 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
1075 " all of memory, losing %lluMB of RAM.\n",
1076 total_trim_size >> 20);
1077
1078 if (!changed_by_mtrr_cleanup)
1079 WARN_ON(1);
1080
1081 printk(KERN_INFO "update e820 for mtrr\n");
1082 update_e820();
1083
1084 return 1;
1085 }
1086
1087 return 0;
1088}
1089