]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/oprofile/nmi_int.c
x86/oprofile: use 64 bit values to save MSR states
[net-next-2.6.git] / arch / x86 / oprofile / nmi_int.c
CommitLineData
1da177e4
LT
1/**
2 * @file nmi_int.c
3 *
adf5ec0b 4 * @remark Copyright 2002-2008 OProfile authors
1da177e4
LT
5 * @remark Read the file COPYING
6 *
7 * @author John Levon <levon@movementarian.org>
adf5ec0b 8 * @author Robert Richter <robert.richter@amd.com>
1da177e4
LT
9 */
10
11#include <linux/init.h>
12#include <linux/notifier.h>
13#include <linux/smp.h>
14#include <linux/oprofile.h>
15#include <linux/sysdev.h>
16#include <linux/slab.h>
1cfcea1b 17#include <linux/moduleparam.h>
1eeb66a1 18#include <linux/kdebug.h>
80a8c9ff 19#include <linux/cpu.h>
1da177e4
LT
20#include <asm/nmi.h>
21#include <asm/msr.h>
22#include <asm/apic.h>
b75f53db 23
1da177e4
LT
24#include "op_counter.h"
25#include "op_x86_model.h"
2fbe7b25 26
b75f53db 27static struct op_x86_model_spec const *model;
d18d00f5
MT
28static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
29static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
2fbe7b25 30
1da177e4
LT
31/* 0 == registered but off, 1 == registered and on */
32static int nmi_enabled = 0;
33
3370d358
RR
34/* common functions */
35
36u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
37 struct op_counter_config *counter_config)
38{
39 u64 val = 0;
40 u16 event = (u16)counter_config->event;
41
42 val |= ARCH_PERFMON_EVENTSEL_INT;
43 val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
44 val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
45 val |= (counter_config->unit_mask & 0xFF) << 8;
46 event &= model->event_mask ? model->event_mask : 0xFF;
47 val |= event & 0xFF;
48 val |= (event & 0x0F00) << 24;
49
50 return val;
51}
52
53
c7c19f8e
AB
54static int profile_exceptions_notify(struct notifier_block *self,
55 unsigned long val, void *data)
1da177e4 56{
2fbe7b25
DZ
57 struct die_args *args = (struct die_args *)data;
58 int ret = NOTIFY_DONE;
59 int cpu = smp_processor_id();
60
b75f53db 61 switch (val) {
2fbe7b25 62 case DIE_NMI:
d18d00f5 63 if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
2fbe7b25
DZ
64 ret = NOTIFY_STOP;
65 break;
66 default:
67 break;
68 }
69 return ret;
1da177e4 70}
2fbe7b25 71
b75f53db 72static void nmi_cpu_save_registers(struct op_msrs *msrs)
1da177e4
LT
73{
74 unsigned int const nr_ctrs = model->num_counters;
b75f53db
CM
75 unsigned int const nr_ctrls = model->num_controls;
76 struct op_msr *counters = msrs->counters;
77 struct op_msr *controls = msrs->controls;
1da177e4
LT
78 unsigned int i;
79
80 for (i = 0; i < nr_ctrs; ++i) {
95e74e62
RR
81 if (counters[i].addr)
82 rdmsrl(counters[i].addr, counters[i].saved);
1da177e4 83 }
b75f53db 84
1da177e4 85 for (i = 0; i < nr_ctrls; ++i) {
95e74e62
RR
86 if (controls[i].addr)
87 rdmsrl(controls[i].addr, controls[i].saved);
1da177e4
LT
88 }
89}
90
b75f53db 91static void nmi_save_registers(void *dummy)
1da177e4
LT
92{
93 int cpu = smp_processor_id();
d18d00f5 94 struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
1da177e4
LT
95 nmi_cpu_save_registers(msrs);
96}
97
1da177e4
LT
98static void free_msrs(void)
99{
100 int i;
c8912599 101 for_each_possible_cpu(i) {
d18d00f5
MT
102 kfree(per_cpu(cpu_msrs, i).counters);
103 per_cpu(cpu_msrs, i).counters = NULL;
104 kfree(per_cpu(cpu_msrs, i).controls);
105 per_cpu(cpu_msrs, i).controls = NULL;
1da177e4
LT
106 }
107}
108
1da177e4
LT
109static int allocate_msrs(void)
110{
4c168eaf 111 int success = 1;
1da177e4
LT
112 size_t controls_size = sizeof(struct op_msr) * model->num_controls;
113 size_t counters_size = sizeof(struct op_msr) * model->num_counters;
114
4c168eaf 115 int i;
0939c17c 116 for_each_possible_cpu(i) {
d18d00f5
MT
117 per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
118 GFP_KERNEL);
119 if (!per_cpu(cpu_msrs, i).counters) {
1da177e4
LT
120 success = 0;
121 break;
122 }
4c168eaf
RR
123 per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
124 GFP_KERNEL);
d18d00f5 125 if (!per_cpu(cpu_msrs, i).controls) {
1da177e4
LT
126 success = 0;
127 break;
128 }
129 }
130
131 if (!success)
132 free_msrs();
133
134 return success;
135}
136
b75f53db 137static void nmi_cpu_setup(void *dummy)
1da177e4
LT
138{
139 int cpu = smp_processor_id();
d18d00f5 140 struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
1da177e4 141 spin_lock(&oprofilefs_lock);
ef8828dd 142 model->setup_ctrs(model, msrs);
1da177e4 143 spin_unlock(&oprofilefs_lock);
d18d00f5 144 per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
1da177e4
LT
145 apic_write(APIC_LVTPC, APIC_DM_NMI);
146}
147
2fbe7b25
DZ
148static struct notifier_block profile_exceptions_nb = {
149 .notifier_call = profile_exceptions_notify,
150 .next = NULL,
151 .priority = 0
152};
1da177e4
LT
153
154static int nmi_setup(void)
155{
b75f53db 156 int err = 0;
6c977aad 157 int cpu;
2fbe7b25 158
1da177e4
LT
159 if (!allocate_msrs())
160 return -ENOMEM;
161
b75f53db
CM
162 err = register_die_notifier(&profile_exceptions_nb);
163 if (err) {
1da177e4 164 free_msrs();
2fbe7b25 165 return err;
1da177e4 166 }
2fbe7b25 167
4c168eaf 168 /* We need to serialize save and setup for HT because the subset
1da177e4
LT
169 * of msrs are distinct for save and setup operations
170 */
6c977aad
AK
171
172 /* Assume saved/restored counters are the same on all CPUs */
d18d00f5 173 model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
b75f53db 174 for_each_possible_cpu(cpu) {
0939c17c 175 if (cpu != 0) {
d18d00f5
MT
176 memcpy(per_cpu(cpu_msrs, cpu).counters,
177 per_cpu(cpu_msrs, 0).counters,
0939c17c
CW
178 sizeof(struct op_msr) * model->num_counters);
179
d18d00f5
MT
180 memcpy(per_cpu(cpu_msrs, cpu).controls,
181 per_cpu(cpu_msrs, 0).controls,
0939c17c
CW
182 sizeof(struct op_msr) * model->num_controls);
183 }
4c168eaf 184
6c977aad 185 }
15c8b6c1
JA
186 on_each_cpu(nmi_save_registers, NULL, 1);
187 on_each_cpu(nmi_cpu_setup, NULL, 1);
1da177e4
LT
188 nmi_enabled = 1;
189 return 0;
190}
191
4c168eaf 192static void nmi_restore_registers(struct op_msrs *msrs)
1da177e4
LT
193{
194 unsigned int const nr_ctrs = model->num_counters;
b75f53db
CM
195 unsigned int const nr_ctrls = model->num_controls;
196 struct op_msr *counters = msrs->counters;
197 struct op_msr *controls = msrs->controls;
1da177e4
LT
198 unsigned int i;
199
200 for (i = 0; i < nr_ctrls; ++i) {
95e74e62
RR
201 if (controls[i].addr)
202 wrmsrl(controls[i].addr, controls[i].saved);
1da177e4 203 }
b75f53db 204
1da177e4 205 for (i = 0; i < nr_ctrs; ++i) {
95e74e62
RR
206 if (counters[i].addr)
207 wrmsrl(counters[i].addr, counters[i].saved);
1da177e4
LT
208 }
209}
1da177e4 210
b75f53db 211static void nmi_cpu_shutdown(void *dummy)
1da177e4
LT
212{
213 unsigned int v;
214 int cpu = smp_processor_id();
d18d00f5 215 struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
b75f53db 216
1da177e4
LT
217 /* restoring APIC_LVTPC can trigger an apic error because the delivery
218 * mode and vector nr combination can be illegal. That's by design: on
219 * power on apic lvt contain a zero vector nr which are legal only for
220 * NMI delivery mode. So inhibit apic err before restoring lvtpc
221 */
222 v = apic_read(APIC_LVTERR);
223 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
d18d00f5 224 apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
1da177e4 225 apic_write(APIC_LVTERR, v);
4c168eaf 226 nmi_restore_registers(msrs);
1da177e4
LT
227}
228
1da177e4
LT
229static void nmi_shutdown(void)
230{
b61e06f2
AR
231 struct op_msrs *msrs;
232
1da177e4 233 nmi_enabled = 0;
15c8b6c1 234 on_each_cpu(nmi_cpu_shutdown, NULL, 1);
2fbe7b25 235 unregister_die_notifier(&profile_exceptions_nb);
b61e06f2 236 msrs = &get_cpu_var(cpu_msrs);
d18d00f5 237 model->shutdown(msrs);
1da177e4 238 free_msrs();
93e1ade5 239 put_cpu_var(cpu_msrs);
1da177e4
LT
240}
241
b75f53db 242static void nmi_cpu_start(void *dummy)
1da177e4 243{
d18d00f5 244 struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
1da177e4
LT
245 model->start(msrs);
246}
1da177e4
LT
247
248static int nmi_start(void)
249{
15c8b6c1 250 on_each_cpu(nmi_cpu_start, NULL, 1);
1da177e4
LT
251 return 0;
252}
b75f53db
CM
253
254static void nmi_cpu_stop(void *dummy)
1da177e4 255{
d18d00f5 256 struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
1da177e4
LT
257 model->stop(msrs);
258}
b75f53db 259
1da177e4
LT
260static void nmi_stop(void)
261{
15c8b6c1 262 on_each_cpu(nmi_cpu_stop, NULL, 1);
1da177e4
LT
263}
264
1da177e4
LT
265struct op_counter_config counter_config[OP_MAX_COUNTER];
266
b75f53db 267static int nmi_create_files(struct super_block *sb, struct dentry *root)
1da177e4
LT
268{
269 unsigned int i;
270
271 for (i = 0; i < model->num_counters; ++i) {
b75f53db 272 struct dentry *dir;
0c6856f7 273 char buf[4];
b75f53db
CM
274
275 /* quick little hack to _not_ expose a counter if it is not
cb9c448c
DZ
276 * available for use. This should protect userspace app.
277 * NOTE: assumes 1:1 mapping here (that counters are organized
278 * sequentially in their struct assignment).
279 */
280 if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
281 continue;
282
0c6856f7 283 snprintf(buf, sizeof(buf), "%d", i);
1da177e4 284 dir = oprofilefs_mkdir(sb, root, buf);
b75f53db
CM
285 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
286 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
287 oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
288 oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
289 oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
290 oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
1da177e4
LT
291 }
292
293 return 0;
294}
b75f53db 295
69046d43
RR
296#ifdef CONFIG_SMP
297static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
298 void *data)
299{
300 int cpu = (unsigned long)data;
301 switch (action) {
302 case CPU_DOWN_FAILED:
303 case CPU_ONLINE:
304 smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
305 break;
306 case CPU_DOWN_PREPARE:
307 smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
308 break;
309 }
310 return NOTIFY_DONE;
311}
312
313static struct notifier_block oprofile_cpu_nb = {
314 .notifier_call = oprofile_cpu_notifier
315};
316#endif
317
318#ifdef CONFIG_PM
319
320static int nmi_suspend(struct sys_device *dev, pm_message_t state)
321{
322 /* Only one CPU left, just stop that one */
323 if (nmi_enabled == 1)
324 nmi_cpu_stop(NULL);
325 return 0;
326}
327
328static int nmi_resume(struct sys_device *dev)
329{
330 if (nmi_enabled == 1)
331 nmi_cpu_start(NULL);
332 return 0;
333}
334
335static struct sysdev_class oprofile_sysclass = {
336 .name = "oprofile",
337 .resume = nmi_resume,
338 .suspend = nmi_suspend,
339};
340
341static struct sys_device device_oprofile = {
342 .id = 0,
343 .cls = &oprofile_sysclass,
344};
345
346static int __init init_sysfs(void)
347{
348 int error;
349
350 error = sysdev_class_register(&oprofile_sysclass);
351 if (!error)
352 error = sysdev_register(&device_oprofile);
353 return error;
354}
355
356static void exit_sysfs(void)
357{
358 sysdev_unregister(&device_oprofile);
359 sysdev_class_unregister(&oprofile_sysclass);
360}
361
362#else
363#define init_sysfs() do { } while (0)
364#define exit_sysfs() do { } while (0)
365#endif /* CONFIG_PM */
366
b75f53db 367static int __init p4_init(char **cpu_type)
1da177e4
LT
368{
369 __u8 cpu_model = boot_cpu_data.x86_model;
370
1f3d7b60 371 if (cpu_model > 6 || cpu_model == 5)
1da177e4
LT
372 return 0;
373
374#ifndef CONFIG_SMP
375 *cpu_type = "i386/p4";
376 model = &op_p4_spec;
377 return 1;
378#else
379 switch (smp_num_siblings) {
b75f53db
CM
380 case 1:
381 *cpu_type = "i386/p4";
382 model = &op_p4_spec;
383 return 1;
384
385 case 2:
386 *cpu_type = "i386/p4-ht";
387 model = &op_p4_ht2_spec;
388 return 1;
1da177e4
LT
389 }
390#endif
391
392 printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
393 printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
394 return 0;
395}
396
7e4e0bd5
RR
397static int force_arch_perfmon;
398static int force_cpu_type(const char *str, struct kernel_param *kp)
399{
400 if (!strcmp(str, "archperfmon")) {
401 force_arch_perfmon = 1;
402 printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
403 }
404
405 return 0;
406}
407module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
1dcdb5a9 408
b75f53db 409static int __init ppro_init(char **cpu_type)
1da177e4
LT
410{
411 __u8 cpu_model = boot_cpu_data.x86_model;
412
1dcdb5a9
AK
413 if (force_arch_perfmon && cpu_has_arch_perfmon)
414 return 0;
415
4b9f12a3
LT
416 switch (cpu_model) {
417 case 0 ... 2:
418 *cpu_type = "i386/ppro";
419 break;
420 case 3 ... 5:
421 *cpu_type = "i386/pii";
422 break;
423 case 6 ... 8:
3d337c65 424 case 10 ... 11:
4b9f12a3
LT
425 *cpu_type = "i386/piii";
426 break;
427 case 9:
3d337c65 428 case 13:
4b9f12a3
LT
429 *cpu_type = "i386/p6_mobile";
430 break;
4b9f12a3 431 case 14:
64471ebe 432 *cpu_type = "i386/core";
4b9f12a3
LT
433 break;
434 case 15: case 23:
435 *cpu_type = "i386/core_2";
436 break;
6adf406f 437 case 26:
e419294e 438 model = &op_arch_perfmon_spec;
6adf406f
AK
439 *cpu_type = "i386/core_i7";
440 break;
441 case 28:
442 *cpu_type = "i386/atom";
443 break;
4b9f12a3
LT
444 default:
445 /* Unknown */
1da177e4 446 return 0;
1da177e4
LT
447 }
448
449 model = &op_ppro_spec;
450 return 1;
451}
452
405ae7d3 453/* in order to get sysfs right */
1da177e4
LT
454static int using_nmi;
455
96d0821c 456int __init op_nmi_init(struct oprofile_operations *ops)
1da177e4
LT
457{
458 __u8 vendor = boot_cpu_data.x86_vendor;
459 __u8 family = boot_cpu_data.x86;
b9917028 460 char *cpu_type = NULL;
adf5ec0b 461 int ret = 0;
1da177e4
LT
462
463 if (!cpu_has_apic)
464 return -ENODEV;
b75f53db 465
1da177e4 466 switch (vendor) {
b75f53db
CM
467 case X86_VENDOR_AMD:
468 /* Needs to be at least an Athlon (or hammer in 32bit mode) */
1da177e4 469
b75f53db 470 switch (family) {
b75f53db 471 case 6:
b75f53db
CM
472 cpu_type = "i386/athlon";
473 break;
474 case 0xf:
d20f24c6
RR
475 /*
476 * Actually it could be i386/hammer too, but
477 * give user space an consistent name.
478 */
b75f53db
CM
479 cpu_type = "x86-64/hammer";
480 break;
481 case 0x10:
b75f53db
CM
482 cpu_type = "x86-64/family10";
483 break;
12f2b261 484 case 0x11:
12f2b261
BK
485 cpu_type = "x86-64/family11h";
486 break;
d20f24c6
RR
487 default:
488 return -ENODEV;
b75f53db 489 }
d20f24c6 490 model = &op_amd_spec;
b75f53db
CM
491 break;
492
493 case X86_VENDOR_INTEL:
494 switch (family) {
495 /* Pentium IV */
496 case 0xf:
b9917028 497 p4_init(&cpu_type);
1da177e4 498 break;
b75f53db
CM
499
500 /* A P6-class processor */
501 case 6:
b9917028 502 ppro_init(&cpu_type);
1da177e4
LT
503 break;
504
505 default:
b9917028 506 break;
b75f53db 507 }
b9917028 508
e419294e
RR
509 if (cpu_type)
510 break;
511
512 if (!cpu_has_arch_perfmon)
b9917028 513 return -ENODEV;
e419294e
RR
514
515 /* use arch perfmon as fallback */
516 cpu_type = "i386/arch_perfmon";
517 model = &op_arch_perfmon_spec;
b75f53db
CM
518 break;
519
520 default:
521 return -ENODEV;
1da177e4
LT
522 }
523
80a8c9ff
AK
524#ifdef CONFIG_SMP
525 register_cpu_notifier(&oprofile_cpu_nb);
526#endif
270d3e1a
RR
527 /* default values, can be overwritten by model */
528 ops->create_files = nmi_create_files;
529 ops->setup = nmi_setup;
530 ops->shutdown = nmi_shutdown;
531 ops->start = nmi_start;
532 ops->stop = nmi_stop;
533 ops->cpu_type = cpu_type;
534
adf5ec0b
RR
535 if (model->init)
536 ret = model->init(ops);
537 if (ret)
538 return ret;
539
405ae7d3 540 init_sysfs();
1da177e4 541 using_nmi = 1;
1da177e4
LT
542 printk(KERN_INFO "oprofile: using NMI interrupt.\n");
543 return 0;
544}
545
96d0821c 546void op_nmi_exit(void)
1da177e4 547{
80a8c9ff 548 if (using_nmi) {
405ae7d3 549 exit_sysfs();
80a8c9ff
AK
550#ifdef CONFIG_SMP
551 unregister_cpu_notifier(&oprofile_cpu_nb);
552#endif
553 }
adf5ec0b
RR
554 if (model->exit)
555 model->exit();
1da177e4 556}