]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/kernel/alternative.c
x86: optimize lock prefix switching to run less frequently
[net-next-2.6.git] / arch / x86 / kernel / alternative.c
CommitLineData
9a0b5817 1#include <linux/module.h>
f6a57033 2#include <linux/sched.h>
9a0b5817
GH
3#include <linux/spinlock.h>
4#include <linux/list.h>
19d36ccd
AK
5#include <linux/kprobes.h>
6#include <linux/mm.h>
7#include <linux/vmalloc.h>
9a0b5817
GH
8#include <asm/alternative.h>
9#include <asm/sections.h>
19d36ccd 10#include <asm/pgtable.h>
8f4e956b
AK
11#include <asm/mce.h>
12#include <asm/nmi.h>
b097976e 13#include <asm/vsyscall.h>
9a0b5817 14
ab144f5e
AK
15#define MAX_PATCH_LEN (255-1)
16
09488165
JB
17#ifdef CONFIG_HOTPLUG_CPU
18static int smp_alt_once;
9a0b5817 19
d167a518
GH
20static int __init bootonly(char *str)
21{
22 smp_alt_once = 1;
23 return 1;
24}
b7fb4af0 25__setup("smp-alt-boot", bootonly);
09488165
JB
26#else
27#define smp_alt_once 1
28#endif
29
30static int debug_alternative;
b7fb4af0 31
d167a518
GH
32static int __init debug_alt(char *str)
33{
34 debug_alternative = 1;
35 return 1;
36}
d167a518
GH
37__setup("debug-alternative", debug_alt);
38
09488165
JB
39static int noreplace_smp;
40
b7fb4af0
JF
41static int __init setup_noreplace_smp(char *str)
42{
43 noreplace_smp = 1;
44 return 1;
45}
46__setup("noreplace-smp", setup_noreplace_smp);
47
959b4fdf
JF
48#ifdef CONFIG_PARAVIRT
49static int noreplace_paravirt = 0;
50
51static int __init setup_noreplace_paravirt(char *str)
52{
53 noreplace_paravirt = 1;
54 return 1;
55}
56__setup("noreplace-paravirt", setup_noreplace_paravirt);
57#endif
b7fb4af0 58
d167a518
GH
59#define DPRINTK(fmt, args...) if (debug_alternative) \
60 printk(KERN_DEBUG fmt, args)
61
62#ifdef GENERIC_NOP1
9a0b5817
GH
63/* Use inline assembly to define this because the nops are defined
64 as inline assembly strings in the include files and we cannot
65 get them easily into strings. */
121d7bf5 66asm("\t.section .rodata, \"a\"\nintelnops: "
9a0b5817
GH
67 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
68 GENERIC_NOP7 GENERIC_NOP8);
121d7bf5
JB
69extern const unsigned char intelnops[];
70static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
9a0b5817
GH
71 NULL,
72 intelnops,
73 intelnops + 1,
74 intelnops + 1 + 2,
75 intelnops + 1 + 2 + 3,
76 intelnops + 1 + 2 + 3 + 4,
77 intelnops + 1 + 2 + 3 + 4 + 5,
78 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
79 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
80};
d167a518
GH
81#endif
82
83#ifdef K8_NOP1
121d7bf5 84asm("\t.section .rodata, \"a\"\nk8nops: "
d167a518
GH
85 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
86 K8_NOP7 K8_NOP8);
121d7bf5
JB
87extern const unsigned char k8nops[];
88static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
9a0b5817
GH
89 NULL,
90 k8nops,
91 k8nops + 1,
92 k8nops + 1 + 2,
93 k8nops + 1 + 2 + 3,
94 k8nops + 1 + 2 + 3 + 4,
95 k8nops + 1 + 2 + 3 + 4 + 5,
96 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
97 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
98};
d167a518
GH
99#endif
100
101#ifdef K7_NOP1
121d7bf5 102asm("\t.section .rodata, \"a\"\nk7nops: "
d167a518
GH
103 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
104 K7_NOP7 K7_NOP8);
121d7bf5
JB
105extern const unsigned char k7nops[];
106static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
9a0b5817
GH
107 NULL,
108 k7nops,
109 k7nops + 1,
110 k7nops + 1 + 2,
111 k7nops + 1 + 2 + 3,
112 k7nops + 1 + 2 + 3 + 4,
113 k7nops + 1 + 2 + 3 + 4 + 5,
114 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
115 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
116};
d167a518
GH
117#endif
118
32c464f5
JB
119#ifdef P6_NOP1
120asm("\t.section .rodata, \"a\"\np6nops: "
121 P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
122 P6_NOP7 P6_NOP8);
123extern const unsigned char p6nops[];
124static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
125 NULL,
126 p6nops,
127 p6nops + 1,
128 p6nops + 1 + 2,
129 p6nops + 1 + 2 + 3,
130 p6nops + 1 + 2 + 3 + 4,
131 p6nops + 1 + 2 + 3 + 4 + 5,
132 p6nops + 1 + 2 + 3 + 4 + 5 + 6,
133 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
134};
135#endif
136
d167a518
GH
137#ifdef CONFIG_X86_64
138
139extern char __vsyscall_0;
121d7bf5 140static inline const unsigned char*const * find_nop_table(void)
d167a518 141{
32c464f5
JB
142 return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
143 boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
d167a518
GH
144}
145
146#else /* CONFIG_X86_64 */
147
121d7bf5 148static const struct nop {
9a0b5817 149 int cpuid;
121d7bf5 150 const unsigned char *const *noptable;
9a0b5817
GH
151} noptypes[] = {
152 { X86_FEATURE_K8, k8_nops },
153 { X86_FEATURE_K7, k7_nops },
32c464f5
JB
154 { X86_FEATURE_P4, p6_nops },
155 { X86_FEATURE_P3, p6_nops },
9a0b5817
GH
156 { -1, NULL }
157};
158
121d7bf5 159static const unsigned char*const * find_nop_table(void)
9a0b5817 160{
121d7bf5 161 const unsigned char *const *noptable = intel_nops;
9a0b5817
GH
162 int i;
163
164 for (i = 0; noptypes[i].cpuid >= 0; i++) {
165 if (boot_cpu_has(noptypes[i].cpuid)) {
166 noptable = noptypes[i].noptable;
167 break;
168 }
169 }
170 return noptable;
171}
172
d167a518
GH
173#endif /* CONFIG_X86_64 */
174
ab144f5e
AK
175/* Use this to add nops to a buffer, then text_poke the whole buffer. */
176static void add_nops(void *insns, unsigned int len)
139ec7c4 177{
121d7bf5 178 const unsigned char *const *noptable = find_nop_table();
139ec7c4
RR
179
180 while (len > 0) {
181 unsigned int noplen = len;
182 if (noplen > ASM_NOP_MAX)
183 noplen = ASM_NOP_MAX;
ab144f5e 184 memcpy(insns, noptable[noplen], noplen);
139ec7c4
RR
185 insns += noplen;
186 len -= noplen;
187 }
188}
189
d167a518 190extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
d167a518
GH
191extern u8 *__smp_locks[], *__smp_locks_end[];
192
9a0b5817
GH
193/* Replace instructions with better alternatives for this CPU type.
194 This runs before SMP is initialized to avoid SMP problems with
195 self modifying code. This implies that assymetric systems where
196 APs have less capabilities than the boot processor are not handled.
197 Tough. Make sure you disable such features by hand. */
198
199void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
200{
9a0b5817 201 struct alt_instr *a;
ab144f5e 202 char insnbuf[MAX_PATCH_LEN];
9a0b5817
GH
203
204 DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
205 for (a = start; a < end; a++) {
ab144f5e 206 u8 *instr = a->instr;
9a0b5817 207 BUG_ON(a->replacementlen > a->instrlen);
ab144f5e 208 BUG_ON(a->instrlen > sizeof(insnbuf));
9a0b5817
GH
209 if (!boot_cpu_has(a->cpuid))
210 continue;
d167a518
GH
211#ifdef CONFIG_X86_64
212 /* vsyscall code is not mapped yet. resolve it manually. */
213 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
214 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
215 DPRINTK("%s: vsyscall fixup: %p => %p\n",
216 __FUNCTION__, a->instr, instr);
217 }
218#endif
ab144f5e
AK
219 memcpy(insnbuf, a->replacement, a->replacementlen);
220 add_nops(insnbuf + a->replacementlen,
221 a->instrlen - a->replacementlen);
222 text_poke(instr, insnbuf, a->instrlen);
9a0b5817
GH
223 }
224}
225
8ec4d41f
GH
226#ifdef CONFIG_SMP
227
9a0b5817
GH
228static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
229{
230 u8 **ptr;
231
232 for (ptr = start; ptr < end; ptr++) {
233 if (*ptr < text)
234 continue;
235 if (*ptr > text_end)
236 continue;
19d36ccd 237 text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
9a0b5817
GH
238 };
239}
240
241static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
242{
9a0b5817 243 u8 **ptr;
ab144f5e 244 char insn[1];
9a0b5817 245
b7fb4af0
JF
246 if (noreplace_smp)
247 return;
248
ab144f5e 249 add_nops(insn, 1);
9a0b5817
GH
250 for (ptr = start; ptr < end; ptr++) {
251 if (*ptr < text)
252 continue;
253 if (*ptr > text_end)
254 continue;
ab144f5e 255 text_poke(*ptr, insn, 1);
9a0b5817
GH
256 };
257}
258
259struct smp_alt_module {
260 /* what is this ??? */
261 struct module *mod;
262 char *name;
263
264 /* ptrs to lock prefixes */
265 u8 **locks;
266 u8 **locks_end;
267
268 /* .text segment, needed to avoid patching init code ;) */
269 u8 *text;
270 u8 *text_end;
271
272 struct list_head next;
273};
274static LIST_HEAD(smp_alt_modules);
275static DEFINE_SPINLOCK(smp_alt);
ca74a6f8 276static int smp_mode = 1; /* protected by smp_alt */
9a0b5817 277
9a0b5817
GH
278void alternatives_smp_module_add(struct module *mod, char *name,
279 void *locks, void *locks_end,
280 void *text, void *text_end)
281{
282 struct smp_alt_module *smp;
283 unsigned long flags;
284
b7fb4af0
JF
285 if (noreplace_smp)
286 return;
287
9a0b5817
GH
288 if (smp_alt_once) {
289 if (boot_cpu_has(X86_FEATURE_UP))
290 alternatives_smp_unlock(locks, locks_end,
291 text, text_end);
292 return;
293 }
294
295 smp = kzalloc(sizeof(*smp), GFP_KERNEL);
296 if (NULL == smp)
297 return; /* we'll run the (safe but slow) SMP code then ... */
298
299 smp->mod = mod;
300 smp->name = name;
301 smp->locks = locks;
302 smp->locks_end = locks_end;
303 smp->text = text;
304 smp->text_end = text_end;
305 DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
306 __FUNCTION__, smp->locks, smp->locks_end,
307 smp->text, smp->text_end, smp->name);
308
309 spin_lock_irqsave(&smp_alt, flags);
310 list_add_tail(&smp->next, &smp_alt_modules);
311 if (boot_cpu_has(X86_FEATURE_UP))
312 alternatives_smp_unlock(smp->locks, smp->locks_end,
313 smp->text, smp->text_end);
314 spin_unlock_irqrestore(&smp_alt, flags);
315}
316
317void alternatives_smp_module_del(struct module *mod)
318{
319 struct smp_alt_module *item;
320 unsigned long flags;
321
b7fb4af0 322 if (smp_alt_once || noreplace_smp)
9a0b5817
GH
323 return;
324
325 spin_lock_irqsave(&smp_alt, flags);
326 list_for_each_entry(item, &smp_alt_modules, next) {
327 if (mod != item->mod)
328 continue;
329 list_del(&item->next);
330 spin_unlock_irqrestore(&smp_alt, flags);
331 DPRINTK("%s: %s\n", __FUNCTION__, item->name);
332 kfree(item);
333 return;
334 }
335 spin_unlock_irqrestore(&smp_alt, flags);
336}
337
338void alternatives_smp_switch(int smp)
339{
340 struct smp_alt_module *mod;
341 unsigned long flags;
342
3047e99e
IM
343#ifdef CONFIG_LOCKDEP
344 /*
345 * A not yet fixed binutils section handling bug prevents
346 * alternatives-replacement from working reliably, so turn
347 * it off:
348 */
349 printk("lockdep: not fixing up alternatives.\n");
350 return;
351#endif
352
b7fb4af0 353 if (noreplace_smp || smp_alt_once)
9a0b5817
GH
354 return;
355 BUG_ON(!smp && (num_online_cpus() > 1));
356
357 spin_lock_irqsave(&smp_alt, flags);
ca74a6f8
AK
358
359 /*
360 * Avoid unnecessary switches because it forces JIT based VMs to
361 * throw away all cached translations, which can be quite costly.
362 */
363 if (smp == smp_mode) {
364 /* nothing */
365 } else if (smp) {
9a0b5817 366 printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
53756d37
JF
367 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
368 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
9a0b5817
GH
369 list_for_each_entry(mod, &smp_alt_modules, next)
370 alternatives_smp_lock(mod->locks, mod->locks_end,
371 mod->text, mod->text_end);
372 } else {
373 printk(KERN_INFO "SMP alternatives: switching to UP code\n");
53756d37
JF
374 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
375 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
9a0b5817
GH
376 list_for_each_entry(mod, &smp_alt_modules, next)
377 alternatives_smp_unlock(mod->locks, mod->locks_end,
378 mod->text, mod->text_end);
379 }
ca74a6f8 380 smp_mode = smp;
9a0b5817
GH
381 spin_unlock_irqrestore(&smp_alt, flags);
382}
383
8ec4d41f
GH
384#endif
385
139ec7c4 386#ifdef CONFIG_PARAVIRT
98de032b
JF
387void apply_paravirt(struct paravirt_patch_site *start,
388 struct paravirt_patch_site *end)
139ec7c4 389{
98de032b 390 struct paravirt_patch_site *p;
ab144f5e 391 char insnbuf[MAX_PATCH_LEN];
139ec7c4 392
959b4fdf
JF
393 if (noreplace_paravirt)
394 return;
395
139ec7c4
RR
396 for (p = start; p < end; p++) {
397 unsigned int used;
398
ab144f5e 399 BUG_ON(p->len > MAX_PATCH_LEN);
d34fda4a
CW
400 /* prep the buffer with the original instructions */
401 memcpy(insnbuf, p->instr, p->len);
93b1eab3
JF
402 used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
403 (unsigned long)p->instr, p->len);
7f63c41c 404
63f70270
JF
405 BUG_ON(used > p->len);
406
139ec7c4 407 /* Pad the rest with nops */
ab144f5e
AK
408 add_nops(insnbuf + used, p->len - used);
409 text_poke(p->instr, insnbuf, p->len);
139ec7c4 410 }
139ec7c4 411}
98de032b 412extern struct paravirt_patch_site __start_parainstructions[],
139ec7c4
RR
413 __stop_parainstructions[];
414#endif /* CONFIG_PARAVIRT */
415
9a0b5817
GH
416void __init alternative_instructions(void)
417{
e51959fa 418 unsigned long flags;
e51959fa 419
8f4e956b
AK
420 /* The patching is not fully atomic, so try to avoid local interruptions
421 that might execute the to be patched code.
422 Other CPUs are not running. */
423 stop_nmi();
d2d0251f 424#ifdef CONFIG_X86_MCE
8f4e956b
AK
425 stop_mce();
426#endif
427
e51959fa 428 local_irq_save(flags);
9a0b5817
GH
429 apply_alternatives(__alt_instructions, __alt_instructions_end);
430
431 /* switch to patch-once-at-boottime-only mode and free the
432 * tables in case we know the number of CPUs will never ever
433 * change */
434#ifdef CONFIG_HOTPLUG_CPU
435 if (num_possible_cpus() < 2)
436 smp_alt_once = 1;
9a0b5817
GH
437#endif
438
8ec4d41f 439#ifdef CONFIG_SMP
9a0b5817
GH
440 if (smp_alt_once) {
441 if (1 == num_possible_cpus()) {
442 printk(KERN_INFO "SMP alternatives: switching to UP code\n");
53756d37
JF
443 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
444 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
445
9a0b5817
GH
446 alternatives_smp_unlock(__smp_locks, __smp_locks_end,
447 _text, _etext);
448 }
9a0b5817 449 } else {
9a0b5817
GH
450 alternatives_smp_module_add(NULL, "core kernel",
451 __smp_locks, __smp_locks_end,
452 _text, _etext);
ca74a6f8
AK
453
454 /* Only switch to UP mode if we don't immediately boot others */
455 if (num_possible_cpus() == 1 || setup_max_cpus <= 1)
456 alternatives_smp_switch(0);
9a0b5817 457 }
8ec4d41f 458#endif
441d40dc 459 apply_paravirt(__parainstructions, __parainstructions_end);
e51959fa 460 local_irq_restore(flags);
8f4e956b 461
f68fd5f4
FW
462 if (smp_alt_once)
463 free_init_pages("SMP alternatives",
464 (unsigned long)__smp_locks,
465 (unsigned long)__smp_locks_end);
466
8f4e956b 467 restart_nmi();
d2d0251f 468#ifdef CONFIG_X86_MCE
8f4e956b
AK
469 restart_mce();
470#endif
9a0b5817 471}
19d36ccd
AK
472
473/*
474 * Warning:
475 * When you use this code to patch more than one byte of an instruction
476 * you need to make sure that other CPUs cannot execute this code in parallel.
477 * Also no thread must be currently preempted in the middle of these instructions.
478 * And on the local CPU you need to be protected again NMI or MCE handlers
479 * seeing an inconsistent instruction while you patch.
480 */
602033ed 481void __kprobes text_poke(void *addr, unsigned char *opcode, int len)
19d36ccd 482{
19d36ccd
AK
483 memcpy(addr, opcode, len);
484 sync_core();
a534b679
AK
485 /* Could also do a CLFLUSH here to speed up CPU recovery; but
486 that causes hangs on some VIA CPUs. */
19d36ccd 487}