]>
Commit | Line | Data |
---|---|---|
0d890355 YL |
1 | /* MTRR (Memory Type Range Register) cleanup |
2 | ||
3 | Copyright (C) 2009 Yinghai Lu | |
4 | ||
5 | This library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Library General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2 of the License, or (at your option) any later version. | |
9 | ||
10 | This library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Library General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Library General Public | |
16 | License along with this library; if not, write to the Free | |
17 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
18 | */ | |
19 | ||
20 | #include <linux/module.h> | |
21 | #include <linux/init.h> | |
22 | #include <linux/pci.h> | |
23 | #include <linux/smp.h> | |
24 | #include <linux/cpu.h> | |
25 | #include <linux/mutex.h> | |
26 | #include <linux/sort.h> | |
27 | ||
28 | #include <asm/e820.h> | |
29 | #include <asm/mtrr.h> | |
30 | #include <asm/uaccess.h> | |
31 | #include <asm/processor.h> | |
32 | #include <asm/msr.h> | |
33 | #include <asm/kvm_para.h> | |
34 | #include "mtrr.h" | |
35 | ||
36 | /* should be related to MTRR_VAR_RANGES nums */ | |
37 | #define RANGE_NUM 256 | |
38 | ||
39 | struct res_range { | |
40 | unsigned long start; | |
41 | unsigned long end; | |
42 | }; | |
43 | ||
44 | static int __init | |
45 | add_range(struct res_range *range, int nr_range, unsigned long start, | |
46 | unsigned long end) | |
47 | { | |
48 | /* out of slots */ | |
49 | if (nr_range >= RANGE_NUM) | |
50 | return nr_range; | |
51 | ||
52 | range[nr_range].start = start; | |
53 | range[nr_range].end = end; | |
54 | ||
55 | nr_range++; | |
56 | ||
57 | return nr_range; | |
58 | } | |
59 | ||
60 | static int __init | |
61 | add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, | |
62 | unsigned long end) | |
63 | { | |
64 | int i; | |
65 | ||
66 | /* try to merge it with old one */ | |
67 | for (i = 0; i < nr_range; i++) { | |
68 | unsigned long final_start, final_end; | |
69 | unsigned long common_start, common_end; | |
70 | ||
71 | if (!range[i].end) | |
72 | continue; | |
73 | ||
74 | common_start = max(range[i].start, start); | |
75 | common_end = min(range[i].end, end); | |
76 | if (common_start > common_end + 1) | |
77 | continue; | |
78 | ||
79 | final_start = min(range[i].start, start); | |
80 | final_end = max(range[i].end, end); | |
81 | ||
82 | range[i].start = final_start; | |
83 | range[i].end = final_end; | |
84 | return nr_range; | |
85 | } | |
86 | ||
87 | /* need to add that */ | |
88 | return add_range(range, nr_range, start, end); | |
89 | } | |
90 | ||
91 | static void __init | |
92 | subtract_range(struct res_range *range, unsigned long start, unsigned long end) | |
93 | { | |
94 | int i, j; | |
95 | ||
96 | for (j = 0; j < RANGE_NUM; j++) { | |
97 | if (!range[j].end) | |
98 | continue; | |
99 | ||
100 | if (start <= range[j].start && end >= range[j].end) { | |
101 | range[j].start = 0; | |
102 | range[j].end = 0; | |
103 | continue; | |
104 | } | |
105 | ||
106 | if (start <= range[j].start && end < range[j].end && | |
107 | range[j].start < end + 1) { | |
108 | range[j].start = end + 1; | |
109 | continue; | |
110 | } | |
111 | ||
112 | ||
113 | if (start > range[j].start && end >= range[j].end && | |
114 | range[j].end > start - 1) { | |
115 | range[j].end = start - 1; | |
116 | continue; | |
117 | } | |
118 | ||
119 | if (start > range[j].start && end < range[j].end) { | |
120 | /* find the new spare */ | |
121 | for (i = 0; i < RANGE_NUM; i++) { | |
122 | if (range[i].end == 0) | |
123 | break; | |
124 | } | |
125 | if (i < RANGE_NUM) { | |
126 | range[i].end = range[j].end; | |
127 | range[i].start = end + 1; | |
128 | } else { | |
129 | printk(KERN_ERR "run of slot in ranges\n"); | |
130 | } | |
131 | range[j].end = start - 1; | |
132 | continue; | |
133 | } | |
134 | } | |
135 | } | |
136 | ||
137 | static int __init cmp_range(const void *x1, const void *x2) | |
138 | { | |
139 | const struct res_range *r1 = x1; | |
140 | const struct res_range *r2 = x2; | |
141 | long start1, start2; | |
142 | ||
143 | start1 = r1->start; | |
144 | start2 = r2->start; | |
145 | ||
146 | return start1 - start2; | |
147 | } | |
148 | ||
149 | struct var_mtrr_range_state { | |
150 | unsigned long base_pfn; | |
151 | unsigned long size_pfn; | |
152 | mtrr_type type; | |
153 | }; | |
154 | ||
155 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | |
156 | static int __initdata debug_print; | |
157 | ||
158 | static int __init | |
159 | x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |
160 | unsigned long extra_remove_base, | |
161 | unsigned long extra_remove_size) | |
162 | { | |
163 | unsigned long i, base, size; | |
164 | mtrr_type type; | |
165 | ||
166 | for (i = 0; i < num_var_ranges; i++) { | |
167 | type = range_state[i].type; | |
168 | if (type != MTRR_TYPE_WRBACK) | |
169 | continue; | |
170 | base = range_state[i].base_pfn; | |
171 | size = range_state[i].size_pfn; | |
172 | nr_range = add_range_with_merge(range, nr_range, base, | |
173 | base + size - 1); | |
174 | } | |
175 | if (debug_print) { | |
176 | printk(KERN_DEBUG "After WB checking\n"); | |
177 | for (i = 0; i < nr_range; i++) | |
178 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | |
179 | range[i].start, range[i].end + 1); | |
180 | } | |
181 | ||
182 | /* take out UC ranges */ | |
183 | for (i = 0; i < num_var_ranges; i++) { | |
184 | type = range_state[i].type; | |
185 | if (type != MTRR_TYPE_UNCACHABLE && | |
186 | type != MTRR_TYPE_WRPROT) | |
187 | continue; | |
188 | size = range_state[i].size_pfn; | |
189 | if (!size) | |
190 | continue; | |
191 | base = range_state[i].base_pfn; | |
192 | subtract_range(range, base, base + size - 1); | |
193 | } | |
194 | if (extra_remove_size) | |
195 | subtract_range(range, extra_remove_base, | |
196 | extra_remove_base + extra_remove_size - 1); | |
197 | ||
198 | /* get new range num */ | |
199 | nr_range = 0; | |
200 | for (i = 0; i < RANGE_NUM; i++) { | |
201 | if (!range[i].end) | |
202 | continue; | |
203 | nr_range++; | |
204 | } | |
205 | if (debug_print) { | |
206 | printk(KERN_DEBUG "After UC checking\n"); | |
207 | for (i = 0; i < nr_range; i++) | |
208 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | |
209 | range[i].start, range[i].end + 1); | |
210 | } | |
211 | ||
212 | /* sort the ranges */ | |
213 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | |
214 | if (debug_print) { | |
215 | printk(KERN_DEBUG "After sorting\n"); | |
216 | for (i = 0; i < nr_range; i++) | |
217 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | |
218 | range[i].start, range[i].end + 1); | |
219 | } | |
220 | ||
221 | /* clear those is not used */ | |
222 | for (i = nr_range; i < RANGE_NUM; i++) | |
223 | memset(&range[i], 0, sizeof(range[i])); | |
224 | ||
225 | return nr_range; | |
226 | } | |
227 | ||
228 | static struct res_range __initdata range[RANGE_NUM]; | |
229 | static int __initdata nr_range; | |
230 | ||
231 | #ifdef CONFIG_MTRR_SANITIZER | |
232 | ||
233 | static unsigned long __init sum_ranges(struct res_range *range, int nr_range) | |
234 | { | |
235 | unsigned long sum; | |
236 | int i; | |
237 | ||
238 | sum = 0; | |
239 | for (i = 0; i < nr_range; i++) | |
240 | sum += range[i].end + 1 - range[i].start; | |
241 | ||
242 | return sum; | |
243 | } | |
244 | ||
245 | static int enable_mtrr_cleanup __initdata = | |
246 | CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; | |
247 | ||
248 | static int __init disable_mtrr_cleanup_setup(char *str) | |
249 | { | |
250 | enable_mtrr_cleanup = 0; | |
251 | return 0; | |
252 | } | |
253 | early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); | |
254 | ||
255 | static int __init enable_mtrr_cleanup_setup(char *str) | |
256 | { | |
257 | enable_mtrr_cleanup = 1; | |
258 | return 0; | |
259 | } | |
260 | early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); | |
261 | ||
262 | static int __init mtrr_cleanup_debug_setup(char *str) | |
263 | { | |
264 | debug_print = 1; | |
265 | return 0; | |
266 | } | |
267 | early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); | |
268 | ||
269 | struct var_mtrr_state { | |
270 | unsigned long range_startk; | |
271 | unsigned long range_sizek; | |
272 | unsigned long chunk_sizek; | |
273 | unsigned long gran_sizek; | |
274 | unsigned int reg; | |
275 | }; | |
276 | ||
277 | static void __init | |
278 | set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | |
279 | unsigned char type, unsigned int address_bits) | |
280 | { | |
281 | u32 base_lo, base_hi, mask_lo, mask_hi; | |
282 | u64 base, mask; | |
283 | ||
284 | if (!sizek) { | |
285 | fill_mtrr_var_range(reg, 0, 0, 0, 0); | |
286 | return; | |
287 | } | |
288 | ||
289 | mask = (1ULL << address_bits) - 1; | |
290 | mask &= ~((((u64)sizek) << 10) - 1); | |
291 | ||
292 | base = ((u64)basek) << 10; | |
293 | ||
294 | base |= type; | |
295 | mask |= 0x800; | |
296 | ||
297 | base_lo = base & ((1ULL<<32) - 1); | |
298 | base_hi = base >> 32; | |
299 | ||
300 | mask_lo = mask & ((1ULL<<32) - 1); | |
301 | mask_hi = mask >> 32; | |
302 | ||
303 | fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); | |
304 | } | |
305 | ||
306 | static void __init | |
307 | save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | |
308 | unsigned char type) | |
309 | { | |
310 | range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); | |
311 | range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); | |
312 | range_state[reg].type = type; | |
313 | } | |
314 | ||
315 | static void __init | |
316 | set_var_mtrr_all(unsigned int address_bits) | |
317 | { | |
318 | unsigned long basek, sizek; | |
319 | unsigned char type; | |
320 | unsigned int reg; | |
321 | ||
322 | for (reg = 0; reg < num_var_ranges; reg++) { | |
323 | basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10); | |
324 | sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); | |
325 | type = range_state[reg].type; | |
326 | ||
327 | set_var_mtrr(reg, basek, sizek, type, address_bits); | |
328 | } | |
329 | } | |
330 | ||
331 | static unsigned long to_size_factor(unsigned long sizek, char *factorp) | |
332 | { | |
333 | char factor; | |
334 | unsigned long base = sizek; | |
335 | ||
336 | if (base & ((1<<10) - 1)) { | |
337 | /* not MB alignment */ | |
338 | factor = 'K'; | |
339 | } else if (base & ((1<<20) - 1)) { | |
340 | factor = 'M'; | |
341 | base >>= 10; | |
342 | } else { | |
343 | factor = 'G'; | |
344 | base >>= 20; | |
345 | } | |
346 | ||
347 | *factorp = factor; | |
348 | ||
349 | return base; | |
350 | } | |
351 | ||
352 | static unsigned int __init | |
353 | range_to_mtrr(unsigned int reg, unsigned long range_startk, | |
354 | unsigned long range_sizek, unsigned char type) | |
355 | { | |
356 | if (!range_sizek || (reg >= num_var_ranges)) | |
357 | return reg; | |
358 | ||
359 | while (range_sizek) { | |
360 | unsigned long max_align, align; | |
361 | unsigned long sizek; | |
362 | ||
363 | /* Compute the maximum size I can make a range */ | |
364 | if (range_startk) | |
365 | max_align = ffs(range_startk) - 1; | |
366 | else | |
367 | max_align = 32; | |
368 | align = fls(range_sizek) - 1; | |
369 | if (align > max_align) | |
370 | align = max_align; | |
371 | ||
372 | sizek = 1 << align; | |
373 | if (debug_print) { | |
374 | char start_factor = 'K', size_factor = 'K'; | |
375 | unsigned long start_base, size_base; | |
376 | ||
377 | start_base = to_size_factor(range_startk, | |
378 | &start_factor), | |
379 | size_base = to_size_factor(sizek, &size_factor), | |
380 | ||
381 | printk(KERN_DEBUG "Setting variable MTRR %d, " | |
382 | "base: %ld%cB, range: %ld%cB, type %s\n", | |
383 | reg, start_base, start_factor, | |
384 | size_base, size_factor, | |
385 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | |
386 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other") | |
387 | ); | |
388 | } | |
389 | save_var_mtrr(reg++, range_startk, sizek, type); | |
390 | range_startk += sizek; | |
391 | range_sizek -= sizek; | |
392 | if (reg >= num_var_ranges) | |
393 | break; | |
394 | } | |
395 | return reg; | |
396 | } | |
397 | ||
398 | static unsigned __init | |
399 | range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |
400 | unsigned long sizek) | |
401 | { | |
402 | unsigned long hole_basek, hole_sizek; | |
403 | unsigned long second_basek, second_sizek; | |
404 | unsigned long range0_basek, range0_sizek; | |
405 | unsigned long range_basek, range_sizek; | |
406 | unsigned long chunk_sizek; | |
407 | unsigned long gran_sizek; | |
408 | ||
409 | hole_basek = 0; | |
410 | hole_sizek = 0; | |
411 | second_basek = 0; | |
412 | second_sizek = 0; | |
413 | chunk_sizek = state->chunk_sizek; | |
414 | gran_sizek = state->gran_sizek; | |
415 | ||
416 | /* align with gran size, prevent small block used up MTRRs */ | |
417 | range_basek = ALIGN(state->range_startk, gran_sizek); | |
418 | if ((range_basek > basek) && basek) | |
419 | return second_sizek; | |
420 | state->range_sizek -= (range_basek - state->range_startk); | |
421 | range_sizek = ALIGN(state->range_sizek, gran_sizek); | |
422 | ||
423 | while (range_sizek > state->range_sizek) { | |
424 | range_sizek -= gran_sizek; | |
425 | if (!range_sizek) | |
426 | return 0; | |
427 | } | |
428 | state->range_sizek = range_sizek; | |
429 | ||
430 | /* try to append some small hole */ | |
431 | range0_basek = state->range_startk; | |
432 | range0_sizek = ALIGN(state->range_sizek, chunk_sizek); | |
433 | ||
434 | /* no increase */ | |
435 | if (range0_sizek == state->range_sizek) { | |
436 | if (debug_print) | |
437 | printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", | |
438 | range0_basek<<10, | |
439 | (range0_basek + state->range_sizek)<<10); | |
440 | state->reg = range_to_mtrr(state->reg, range0_basek, | |
441 | state->range_sizek, MTRR_TYPE_WRBACK); | |
442 | return 0; | |
443 | } | |
444 | ||
445 | /* only cut back, when it is not the last */ | |
446 | if (sizek) { | |
447 | while (range0_basek + range0_sizek > (basek + sizek)) { | |
448 | if (range0_sizek >= chunk_sizek) | |
449 | range0_sizek -= chunk_sizek; | |
450 | else | |
451 | range0_sizek = 0; | |
452 | ||
453 | if (!range0_sizek) | |
454 | break; | |
455 | } | |
456 | } | |
457 | ||
458 | second_try: | |
459 | range_basek = range0_basek + range0_sizek; | |
460 | ||
461 | /* one hole in the middle */ | |
462 | if (range_basek > basek && range_basek <= (basek + sizek)) | |
463 | second_sizek = range_basek - basek; | |
464 | ||
465 | if (range0_sizek > state->range_sizek) { | |
466 | ||
467 | /* one hole in middle or at end */ | |
468 | hole_sizek = range0_sizek - state->range_sizek - second_sizek; | |
469 | ||
470 | /* hole size should be less than half of range0 size */ | |
471 | if (hole_sizek >= (range0_sizek >> 1) && | |
472 | range0_sizek >= chunk_sizek) { | |
473 | range0_sizek -= chunk_sizek; | |
474 | second_sizek = 0; | |
475 | hole_sizek = 0; | |
476 | ||
477 | goto second_try; | |
478 | } | |
479 | } | |
480 | ||
481 | if (range0_sizek) { | |
482 | if (debug_print) | |
483 | printk(KERN_DEBUG "range0: %016lx - %016lx\n", | |
484 | range0_basek<<10, | |
485 | (range0_basek + range0_sizek)<<10); | |
486 | state->reg = range_to_mtrr(state->reg, range0_basek, | |
487 | range0_sizek, MTRR_TYPE_WRBACK); | |
488 | } | |
489 | ||
490 | if (range0_sizek < state->range_sizek) { | |
491 | /* need to handle left over */ | |
492 | range_sizek = state->range_sizek - range0_sizek; | |
493 | ||
494 | if (debug_print) | |
495 | printk(KERN_DEBUG "range: %016lx - %016lx\n", | |
496 | range_basek<<10, | |
497 | (range_basek + range_sizek)<<10); | |
498 | state->reg = range_to_mtrr(state->reg, range_basek, | |
499 | range_sizek, MTRR_TYPE_WRBACK); | |
500 | } | |
501 | ||
502 | if (hole_sizek) { | |
503 | hole_basek = range_basek - hole_sizek - second_sizek; | |
504 | if (debug_print) | |
505 | printk(KERN_DEBUG "hole: %016lx - %016lx\n", | |
506 | hole_basek<<10, | |
507 | (hole_basek + hole_sizek)<<10); | |
508 | state->reg = range_to_mtrr(state->reg, hole_basek, | |
509 | hole_sizek, MTRR_TYPE_UNCACHABLE); | |
510 | } | |
511 | ||
512 | return second_sizek; | |
513 | } | |
514 | ||
515 | static void __init | |
516 | set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, | |
517 | unsigned long size_pfn) | |
518 | { | |
519 | unsigned long basek, sizek; | |
520 | unsigned long second_sizek = 0; | |
521 | ||
522 | if (state->reg >= num_var_ranges) | |
523 | return; | |
524 | ||
525 | basek = base_pfn << (PAGE_SHIFT - 10); | |
526 | sizek = size_pfn << (PAGE_SHIFT - 10); | |
527 | ||
528 | /* See if I can merge with the last range */ | |
529 | if ((basek <= 1024) || | |
530 | (state->range_startk + state->range_sizek == basek)) { | |
531 | unsigned long endk = basek + sizek; | |
532 | state->range_sizek = endk - state->range_startk; | |
533 | return; | |
534 | } | |
535 | /* Write the range mtrrs */ | |
536 | if (state->range_sizek != 0) | |
537 | second_sizek = range_to_mtrr_with_hole(state, basek, sizek); | |
538 | ||
539 | /* Allocate an msr */ | |
540 | state->range_startk = basek + second_sizek; | |
541 | state->range_sizek = sizek - second_sizek; | |
542 | } | |
543 | ||
544 | /* mininum size of mtrr block that can take hole */ | |
545 | static u64 mtrr_chunk_size __initdata = (256ULL<<20); | |
546 | ||
547 | static int __init parse_mtrr_chunk_size_opt(char *p) | |
548 | { | |
549 | if (!p) | |
550 | return -EINVAL; | |
551 | mtrr_chunk_size = memparse(p, &p); | |
552 | return 0; | |
553 | } | |
554 | early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); | |
555 | ||
556 | /* granity of mtrr of block */ | |
557 | static u64 mtrr_gran_size __initdata; | |
558 | ||
559 | static int __init parse_mtrr_gran_size_opt(char *p) | |
560 | { | |
561 | if (!p) | |
562 | return -EINVAL; | |
563 | mtrr_gran_size = memparse(p, &p); | |
564 | return 0; | |
565 | } | |
566 | early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); | |
567 | ||
568 | static int nr_mtrr_spare_reg __initdata = | |
569 | CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; | |
570 | ||
571 | static int __init parse_mtrr_spare_reg(char *arg) | |
572 | { | |
573 | if (arg) | |
574 | nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); | |
575 | return 0; | |
576 | } | |
577 | ||
578 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); | |
579 | ||
580 | static int __init | |
581 | x86_setup_var_mtrrs(struct res_range *range, int nr_range, | |
582 | u64 chunk_size, u64 gran_size) | |
583 | { | |
584 | struct var_mtrr_state var_state; | |
585 | int i; | |
586 | int num_reg; | |
587 | ||
588 | var_state.range_startk = 0; | |
589 | var_state.range_sizek = 0; | |
590 | var_state.reg = 0; | |
591 | var_state.chunk_sizek = chunk_size >> 10; | |
592 | var_state.gran_sizek = gran_size >> 10; | |
593 | ||
594 | memset(range_state, 0, sizeof(range_state)); | |
595 | ||
596 | /* Write the range etc */ | |
597 | for (i = 0; i < nr_range; i++) | |
598 | set_var_mtrr_range(&var_state, range[i].start, | |
599 | range[i].end - range[i].start + 1); | |
600 | ||
601 | /* Write the last range */ | |
602 | if (var_state.range_sizek != 0) | |
603 | range_to_mtrr_with_hole(&var_state, 0, 0); | |
604 | ||
605 | num_reg = var_state.reg; | |
606 | /* Clear out the extra MTRR's */ | |
607 | while (var_state.reg < num_var_ranges) { | |
608 | save_var_mtrr(var_state.reg, 0, 0, 0); | |
609 | var_state.reg++; | |
610 | } | |
611 | ||
612 | return num_reg; | |
613 | } | |
614 | ||
615 | struct mtrr_cleanup_result { | |
616 | unsigned long gran_sizek; | |
617 | unsigned long chunk_sizek; | |
618 | unsigned long lose_cover_sizek; | |
619 | unsigned int num_reg; | |
620 | int bad; | |
621 | }; | |
622 | ||
623 | /* | |
624 | * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G | |
625 | * chunk size: gran_size, ..., 2G | |
626 | * so we need (1+16)*8 | |
627 | */ | |
628 | #define NUM_RESULT 136 | |
629 | #define PSHIFT (PAGE_SHIFT - 10) | |
630 | ||
631 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; | |
632 | static unsigned long __initdata min_loss_pfn[RANGE_NUM]; | |
633 | ||
634 | static void __init print_out_mtrr_range_state(void) | |
635 | { | |
636 | int i; | |
637 | char start_factor = 'K', size_factor = 'K'; | |
638 | unsigned long start_base, size_base; | |
639 | mtrr_type type; | |
640 | ||
641 | for (i = 0; i < num_var_ranges; i++) { | |
642 | ||
643 | size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); | |
644 | if (!size_base) | |
645 | continue; | |
646 | ||
647 | size_base = to_size_factor(size_base, &size_factor), | |
648 | start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); | |
649 | start_base = to_size_factor(start_base, &start_factor), | |
650 | type = range_state[i].type; | |
651 | ||
652 | printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", | |
653 | i, start_base, start_factor, | |
654 | size_base, size_factor, | |
655 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | |
656 | ((type == MTRR_TYPE_WRPROT) ? "WP" : | |
657 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) | |
658 | ); | |
659 | } | |
660 | } | |
661 | ||
662 | static int __init mtrr_need_cleanup(void) | |
663 | { | |
664 | int i; | |
665 | mtrr_type type; | |
666 | unsigned long size; | |
667 | /* extra one for all 0 */ | |
668 | int num[MTRR_NUM_TYPES + 1]; | |
669 | ||
670 | /* check entries number */ | |
671 | memset(num, 0, sizeof(num)); | |
672 | for (i = 0; i < num_var_ranges; i++) { | |
673 | type = range_state[i].type; | |
674 | size = range_state[i].size_pfn; | |
675 | if (type >= MTRR_NUM_TYPES) | |
676 | continue; | |
677 | if (!size) | |
678 | type = MTRR_NUM_TYPES; | |
679 | if (type == MTRR_TYPE_WRPROT) | |
680 | type = MTRR_TYPE_UNCACHABLE; | |
681 | num[type]++; | |
682 | } | |
683 | ||
684 | /* check if we got UC entries */ | |
685 | if (!num[MTRR_TYPE_UNCACHABLE]) | |
686 | return 0; | |
687 | ||
688 | /* check if we only had WB and UC */ | |
689 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != | |
690 | num_var_ranges - num[MTRR_NUM_TYPES]) | |
691 | return 0; | |
692 | ||
693 | return 1; | |
694 | } | |
695 | ||
696 | static unsigned long __initdata range_sums; | |
697 | static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size, | |
698 | unsigned long extra_remove_base, | |
699 | unsigned long extra_remove_size, | |
700 | int i) | |
701 | { | |
702 | int num_reg; | |
703 | static struct res_range range_new[RANGE_NUM]; | |
704 | static int nr_range_new; | |
705 | unsigned long range_sums_new; | |
706 | ||
707 | /* convert ranges to var ranges state */ | |
708 | num_reg = x86_setup_var_mtrrs(range, nr_range, | |
709 | chunk_size, gran_size); | |
710 | ||
711 | /* we got new setting in range_state, check it */ | |
712 | memset(range_new, 0, sizeof(range_new)); | |
713 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | |
714 | extra_remove_base, extra_remove_size); | |
715 | range_sums_new = sum_ranges(range_new, nr_range_new); | |
716 | ||
717 | result[i].chunk_sizek = chunk_size >> 10; | |
718 | result[i].gran_sizek = gran_size >> 10; | |
719 | result[i].num_reg = num_reg; | |
720 | if (range_sums < range_sums_new) { | |
721 | result[i].lose_cover_sizek = | |
722 | (range_sums_new - range_sums) << PSHIFT; | |
723 | result[i].bad = 1; | |
724 | } else | |
725 | result[i].lose_cover_sizek = | |
726 | (range_sums - range_sums_new) << PSHIFT; | |
727 | ||
728 | /* double check it */ | |
729 | if (!result[i].bad && !result[i].lose_cover_sizek) { | |
730 | if (nr_range_new != nr_range || | |
731 | memcmp(range, range_new, sizeof(range))) | |
732 | result[i].bad = 1; | |
733 | } | |
734 | ||
735 | if (!result[i].bad && (range_sums - range_sums_new < | |
736 | min_loss_pfn[num_reg])) { | |
737 | min_loss_pfn[num_reg] = | |
738 | range_sums - range_sums_new; | |
739 | } | |
740 | } | |
741 | ||
742 | static void __init mtrr_print_out_one_result(int i) | |
743 | { | |
744 | char gran_factor, chunk_factor, lose_factor; | |
745 | unsigned long gran_base, chunk_base, lose_base; | |
746 | ||
747 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | |
748 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | |
749 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | |
750 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | |
751 | result[i].bad ? "*BAD*" : " ", | |
752 | gran_base, gran_factor, chunk_base, chunk_factor); | |
753 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | |
754 | result[i].num_reg, result[i].bad ? "-" : "", | |
755 | lose_base, lose_factor); | |
756 | } | |
757 | ||
758 | static int __init mtrr_search_optimal_index(void) | |
759 | { | |
760 | int i; | |
761 | int num_reg_good; | |
762 | int index_good; | |
763 | ||
764 | if (nr_mtrr_spare_reg >= num_var_ranges) | |
765 | nr_mtrr_spare_reg = num_var_ranges - 1; | |
766 | num_reg_good = -1; | |
767 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | |
768 | if (!min_loss_pfn[i]) | |
769 | num_reg_good = i; | |
770 | } | |
771 | ||
772 | index_good = -1; | |
773 | if (num_reg_good != -1) { | |
774 | for (i = 0; i < NUM_RESULT; i++) { | |
775 | if (!result[i].bad && | |
776 | result[i].num_reg == num_reg_good && | |
777 | !result[i].lose_cover_sizek) { | |
778 | index_good = i; | |
779 | break; | |
780 | } | |
781 | } | |
782 | } | |
783 | ||
784 | return index_good; | |
785 | } | |
786 | ||
787 | ||
788 | int __init mtrr_cleanup(unsigned address_bits) | |
789 | { | |
790 | unsigned long extra_remove_base, extra_remove_size; | |
791 | unsigned long base, size, def, dummy; | |
792 | mtrr_type type; | |
793 | u64 chunk_size, gran_size; | |
794 | int index_good; | |
795 | int i; | |
796 | ||
797 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | |
798 | return 0; | |
799 | rdmsr(MTRRdefType_MSR, def, dummy); | |
800 | def &= 0xff; | |
801 | if (def != MTRR_TYPE_UNCACHABLE) | |
802 | return 0; | |
803 | ||
804 | /* get it and store it aside */ | |
805 | memset(range_state, 0, sizeof(range_state)); | |
806 | for (i = 0; i < num_var_ranges; i++) { | |
807 | mtrr_if->get(i, &base, &size, &type); | |
808 | range_state[i].base_pfn = base; | |
809 | range_state[i].size_pfn = size; | |
810 | range_state[i].type = type; | |
811 | } | |
812 | ||
813 | /* check if we need handle it and can handle it */ | |
814 | if (!mtrr_need_cleanup()) | |
815 | return 0; | |
816 | ||
817 | /* print original var MTRRs at first, for debugging: */ | |
818 | printk(KERN_DEBUG "original variable MTRRs\n"); | |
819 | print_out_mtrr_range_state(); | |
820 | ||
821 | memset(range, 0, sizeof(range)); | |
822 | extra_remove_size = 0; | |
823 | extra_remove_base = 1 << (32 - PAGE_SHIFT); | |
824 | if (mtrr_tom2) | |
825 | extra_remove_size = | |
826 | (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; | |
827 | nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, | |
828 | extra_remove_size); | |
829 | /* | |
830 | * [0, 1M) should always be coverred by var mtrr with WB | |
831 | * and fixed mtrrs should take effective before var mtrr for it | |
832 | */ | |
833 | nr_range = add_range_with_merge(range, nr_range, 0, | |
834 | (1ULL<<(20 - PAGE_SHIFT)) - 1); | |
835 | /* sort the ranges */ | |
836 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | |
837 | ||
838 | range_sums = sum_ranges(range, nr_range); | |
839 | printk(KERN_INFO "total RAM coverred: %ldM\n", | |
840 | range_sums >> (20 - PAGE_SHIFT)); | |
841 | ||
842 | if (mtrr_chunk_size && mtrr_gran_size) { | |
843 | i = 0; | |
844 | mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, | |
845 | extra_remove_base, extra_remove_size, i); | |
846 | ||
847 | mtrr_print_out_one_result(i); | |
848 | ||
849 | if (!result[i].bad) { | |
850 | set_var_mtrr_all(address_bits); | |
851 | printk(KERN_DEBUG "New variable MTRRs\n"); | |
852 | print_out_mtrr_range_state(); | |
853 | return 1; | |
854 | } | |
855 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " | |
856 | "will find optimal one\n"); | |
857 | } | |
858 | ||
859 | i = 0; | |
860 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); | |
861 | memset(result, 0, sizeof(result)); | |
862 | for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { | |
863 | ||
864 | for (chunk_size = gran_size; chunk_size < (1ULL<<32); | |
865 | chunk_size <<= 1) { | |
866 | ||
867 | if (i >= NUM_RESULT) | |
868 | continue; | |
869 | ||
870 | mtrr_calc_range_state(chunk_size, gran_size, | |
871 | extra_remove_base, extra_remove_size, i); | |
872 | if (debug_print) { | |
873 | mtrr_print_out_one_result(i); | |
874 | printk(KERN_INFO "\n"); | |
875 | } | |
876 | ||
877 | i++; | |
878 | } | |
879 | } | |
880 | ||
881 | /* try to find the optimal index */ | |
882 | index_good = mtrr_search_optimal_index(); | |
883 | ||
884 | if (index_good != -1) { | |
885 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); | |
886 | i = index_good; | |
887 | mtrr_print_out_one_result(i); | |
888 | ||
889 | /* convert ranges to var ranges state */ | |
890 | chunk_size = result[i].chunk_sizek; | |
891 | chunk_size <<= 10; | |
892 | gran_size = result[i].gran_sizek; | |
893 | gran_size <<= 10; | |
894 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); | |
895 | set_var_mtrr_all(address_bits); | |
896 | printk(KERN_DEBUG "New variable MTRRs\n"); | |
897 | print_out_mtrr_range_state(); | |
898 | return 1; | |
899 | } else { | |
900 | /* print out all */ | |
901 | for (i = 0; i < NUM_RESULT; i++) | |
902 | mtrr_print_out_one_result(i); | |
903 | } | |
904 | ||
905 | printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); | |
906 | printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); | |
907 | ||
908 | return 0; | |
909 | } | |
910 | #else | |
911 | int __init mtrr_cleanup(unsigned address_bits) | |
912 | { | |
913 | return 0; | |
914 | } | |
915 | #endif | |
916 | ||
917 | static int disable_mtrr_trim; | |
918 | ||
919 | static int __init disable_mtrr_trim_setup(char *str) | |
920 | { | |
921 | disable_mtrr_trim = 1; | |
922 | return 0; | |
923 | } | |
924 | early_param("disable_mtrr_trim", disable_mtrr_trim_setup); | |
925 | ||
926 | /* | |
927 | * Newer AMD K8s and later CPUs have a special magic MSR way to force WB | |
928 | * for memory >4GB. Check for that here. | |
929 | * Note this won't check if the MTRRs < 4GB where the magic bit doesn't | |
930 | * apply to are wrong, but so far we don't know of any such case in the wild. | |
931 | */ | |
932 | #define Tom2Enabled (1U << 21) | |
933 | #define Tom2ForceMemTypeWB (1U << 22) | |
934 | ||
935 | int __init amd_special_default_mtrr(void) | |
936 | { | |
937 | u32 l, h; | |
938 | ||
939 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) | |
940 | return 0; | |
941 | if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) | |
942 | return 0; | |
943 | /* In case some hypervisor doesn't pass SYSCFG through */ | |
944 | if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) | |
945 | return 0; | |
946 | /* | |
947 | * Memory between 4GB and top of mem is forced WB by this magic bit. | |
948 | * Reserved before K8RevF, but should be zero there. | |
949 | */ | |
950 | if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) == | |
951 | (Tom2Enabled | Tom2ForceMemTypeWB)) | |
952 | return 1; | |
953 | return 0; | |
954 | } | |
955 | ||
956 | static u64 __init real_trim_memory(unsigned long start_pfn, | |
957 | unsigned long limit_pfn) | |
958 | { | |
959 | u64 trim_start, trim_size; | |
960 | trim_start = start_pfn; | |
961 | trim_start <<= PAGE_SHIFT; | |
962 | trim_size = limit_pfn; | |
963 | trim_size <<= PAGE_SHIFT; | |
964 | trim_size -= trim_start; | |
965 | ||
966 | return e820_update_range(trim_start, trim_size, E820_RAM, | |
967 | E820_RESERVED); | |
968 | } | |
969 | /** | |
970 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs | |
971 | * @end_pfn: ending page frame number | |
972 | * | |
973 | * Some buggy BIOSes don't setup the MTRRs properly for systems with certain | |
974 | * memory configurations. This routine checks that the highest MTRR matches | |
975 | * the end of memory, to make sure the MTRRs having a write back type cover | |
976 | * all of the memory the kernel is intending to use. If not, it'll trim any | |
977 | * memory off the end by adjusting end_pfn, removing it from the kernel's | |
978 | * allocation pools, warning the user with an obnoxious message. | |
979 | */ | |
980 | int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |
981 | { | |
982 | unsigned long i, base, size, highest_pfn = 0, def, dummy; | |
983 | mtrr_type type; | |
984 | u64 total_trim_size; | |
985 | ||
986 | /* extra one for all 0 */ | |
987 | int num[MTRR_NUM_TYPES + 1]; | |
988 | /* | |
989 | * Make sure we only trim uncachable memory on machines that | |
990 | * support the Intel MTRR architecture: | |
991 | */ | |
992 | if (!is_cpu(INTEL) || disable_mtrr_trim) | |
993 | return 0; | |
994 | rdmsr(MTRRdefType_MSR, def, dummy); | |
995 | def &= 0xff; | |
996 | if (def != MTRR_TYPE_UNCACHABLE) | |
997 | return 0; | |
998 | ||
999 | /* get it and store it aside */ | |
1000 | memset(range_state, 0, sizeof(range_state)); | |
1001 | for (i = 0; i < num_var_ranges; i++) { | |
1002 | mtrr_if->get(i, &base, &size, &type); | |
1003 | range_state[i].base_pfn = base; | |
1004 | range_state[i].size_pfn = size; | |
1005 | range_state[i].type = type; | |
1006 | } | |
1007 | ||
1008 | /* Find highest cached pfn */ | |
1009 | for (i = 0; i < num_var_ranges; i++) { | |
1010 | type = range_state[i].type; | |
1011 | if (type != MTRR_TYPE_WRBACK) | |
1012 | continue; | |
1013 | base = range_state[i].base_pfn; | |
1014 | size = range_state[i].size_pfn; | |
1015 | if (highest_pfn < base + size) | |
1016 | highest_pfn = base + size; | |
1017 | } | |
1018 | ||
1019 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ | |
1020 | if (!highest_pfn) { | |
1021 | printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); | |
1022 | return 0; | |
1023 | } | |
1024 | ||
1025 | /* check entries number */ | |
1026 | memset(num, 0, sizeof(num)); | |
1027 | for (i = 0; i < num_var_ranges; i++) { | |
1028 | type = range_state[i].type; | |
1029 | if (type >= MTRR_NUM_TYPES) | |
1030 | continue; | |
1031 | size = range_state[i].size_pfn; | |
1032 | if (!size) | |
1033 | type = MTRR_NUM_TYPES; | |
1034 | num[type]++; | |
1035 | } | |
1036 | ||
1037 | /* no entry for WB? */ | |
1038 | if (!num[MTRR_TYPE_WRBACK]) | |
1039 | return 0; | |
1040 | ||
1041 | /* check if we only had WB and UC */ | |
1042 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != | |
1043 | num_var_ranges - num[MTRR_NUM_TYPES]) | |
1044 | return 0; | |
1045 | ||
1046 | memset(range, 0, sizeof(range)); | |
1047 | nr_range = 0; | |
1048 | if (mtrr_tom2) { | |
1049 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); | |
1050 | range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; | |
1051 | if (highest_pfn < range[nr_range].end + 1) | |
1052 | highest_pfn = range[nr_range].end + 1; | |
1053 | nr_range++; | |
1054 | } | |
1055 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); | |
1056 | ||
1057 | total_trim_size = 0; | |
1058 | /* check the head */ | |
1059 | if (range[0].start) | |
1060 | total_trim_size += real_trim_memory(0, range[0].start); | |
1061 | /* check the holes */ | |
1062 | for (i = 0; i < nr_range - 1; i++) { | |
1063 | if (range[i].end + 1 < range[i+1].start) | |
1064 | total_trim_size += real_trim_memory(range[i].end + 1, | |
1065 | range[i+1].start); | |
1066 | } | |
1067 | /* check the top */ | |
1068 | i = nr_range - 1; | |
1069 | if (range[i].end + 1 < end_pfn) | |
1070 | total_trim_size += real_trim_memory(range[i].end + 1, | |
1071 | end_pfn); | |
1072 | ||
1073 | if (total_trim_size) { | |
1074 | printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" | |
1075 | " all of memory, losing %lluMB of RAM.\n", | |
1076 | total_trim_size >> 20); | |
1077 | ||
1078 | if (!changed_by_mtrr_cleanup) | |
1079 | WARN_ON(1); | |
1080 | ||
1081 | printk(KERN_INFO "update e820 for mtrr\n"); | |
1082 | update_e820(); | |
1083 | ||
1084 | return 1; | |
1085 | } | |
1086 | ||
1087 | return 0; | |
1088 | } | |
1089 |