]> bbs.cooldavid.org Git - net-next-2.6.git/blob - arch/x86/oprofile/nmi_int.c
x86/oprofile: remove some local variables in MSR save/restore functions
[net-next-2.6.git] / arch / x86 / oprofile / nmi_int.c
1 /**
2  * @file nmi_int.c
3  *
4  * @remark Copyright 2002-2008 OProfile authors
5  * @remark Read the file COPYING
6  *
7  * @author John Levon <levon@movementarian.org>
8  * @author Robert Richter <robert.richter@amd.com>
9  */
10
11 #include <linux/init.h>
12 #include <linux/notifier.h>
13 #include <linux/smp.h>
14 #include <linux/oprofile.h>
15 #include <linux/sysdev.h>
16 #include <linux/slab.h>
17 #include <linux/moduleparam.h>
18 #include <linux/kdebug.h>
19 #include <linux/cpu.h>
20 #include <asm/nmi.h>
21 #include <asm/msr.h>
22 #include <asm/apic.h>
23
24 #include "op_counter.h"
25 #include "op_x86_model.h"
26
27 static struct op_x86_model_spec const *model;
28 static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
29 static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
30
31 /* 0 == registered but off, 1 == registered and on */
32 static int nmi_enabled = 0;
33
34 /* common functions */
35
36 u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
37                     struct op_counter_config *counter_config)
38 {
39         u64 val = 0;
40         u16 event = (u16)counter_config->event;
41
42         val |= ARCH_PERFMON_EVENTSEL_INT;
43         val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
44         val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
45         val |= (counter_config->unit_mask & 0xFF) << 8;
46         event &= model->event_mask ? model->event_mask : 0xFF;
47         val |= event & 0xFF;
48         val |= (event & 0x0F00) << 24;
49
50         return val;
51 }
52
53
54 static int profile_exceptions_notify(struct notifier_block *self,
55                                      unsigned long val, void *data)
56 {
57         struct die_args *args = (struct die_args *)data;
58         int ret = NOTIFY_DONE;
59         int cpu = smp_processor_id();
60
61         switch (val) {
62         case DIE_NMI:
63                 if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
64                         ret = NOTIFY_STOP;
65                 break;
66         default:
67                 break;
68         }
69         return ret;
70 }
71
72 static void nmi_cpu_save_registers(struct op_msrs *msrs)
73 {
74         struct op_msr *counters = msrs->counters;
75         struct op_msr *controls = msrs->controls;
76         unsigned int i;
77
78         for (i = 0; i < model->num_counters; ++i) {
79                 if (counters[i].addr)
80                         rdmsrl(counters[i].addr, counters[i].saved);
81         }
82
83         for (i = 0; i < model->num_controls; ++i) {
84                 if (controls[i].addr)
85                         rdmsrl(controls[i].addr, controls[i].saved);
86         }
87 }
88
89 static void nmi_save_registers(void *dummy)
90 {
91         int cpu = smp_processor_id();
92         struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
93         nmi_cpu_save_registers(msrs);
94 }
95
96 static void free_msrs(void)
97 {
98         int i;
99         for_each_possible_cpu(i) {
100                 kfree(per_cpu(cpu_msrs, i).counters);
101                 per_cpu(cpu_msrs, i).counters = NULL;
102                 kfree(per_cpu(cpu_msrs, i).controls);
103                 per_cpu(cpu_msrs, i).controls = NULL;
104         }
105 }
106
107 static int allocate_msrs(void)
108 {
109         int success = 1;
110         size_t controls_size = sizeof(struct op_msr) * model->num_controls;
111         size_t counters_size = sizeof(struct op_msr) * model->num_counters;
112
113         int i;
114         for_each_possible_cpu(i) {
115                 per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
116                                                                 GFP_KERNEL);
117                 if (!per_cpu(cpu_msrs, i).counters) {
118                         success = 0;
119                         break;
120                 }
121                 per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
122                                                                 GFP_KERNEL);
123                 if (!per_cpu(cpu_msrs, i).controls) {
124                         success = 0;
125                         break;
126                 }
127         }
128
129         if (!success)
130                 free_msrs();
131
132         return success;
133 }
134
135 static void nmi_cpu_setup(void *dummy)
136 {
137         int cpu = smp_processor_id();
138         struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
139         spin_lock(&oprofilefs_lock);
140         model->setup_ctrs(model, msrs);
141         spin_unlock(&oprofilefs_lock);
142         per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
143         apic_write(APIC_LVTPC, APIC_DM_NMI);
144 }
145
146 static struct notifier_block profile_exceptions_nb = {
147         .notifier_call = profile_exceptions_notify,
148         .next = NULL,
149         .priority = 0
150 };
151
152 static int nmi_setup(void)
153 {
154         int err = 0;
155         int cpu;
156
157         if (!allocate_msrs())
158                 return -ENOMEM;
159
160         err = register_die_notifier(&profile_exceptions_nb);
161         if (err) {
162                 free_msrs();
163                 return err;
164         }
165
166         /* We need to serialize save and setup for HT because the subset
167          * of msrs are distinct for save and setup operations
168          */
169
170         /* Assume saved/restored counters are the same on all CPUs */
171         model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
172         for_each_possible_cpu(cpu) {
173                 if (cpu != 0) {
174                         memcpy(per_cpu(cpu_msrs, cpu).counters,
175                                 per_cpu(cpu_msrs, 0).counters,
176                                 sizeof(struct op_msr) * model->num_counters);
177
178                         memcpy(per_cpu(cpu_msrs, cpu).controls,
179                                 per_cpu(cpu_msrs, 0).controls,
180                                 sizeof(struct op_msr) * model->num_controls);
181                 }
182
183         }
184         on_each_cpu(nmi_save_registers, NULL, 1);
185         on_each_cpu(nmi_cpu_setup, NULL, 1);
186         nmi_enabled = 1;
187         return 0;
188 }
189
190 static void nmi_restore_registers(struct op_msrs *msrs)
191 {
192         struct op_msr *counters = msrs->counters;
193         struct op_msr *controls = msrs->controls;
194         unsigned int i;
195
196         for (i = 0; i < model->num_controls; ++i) {
197                 if (controls[i].addr)
198                         wrmsrl(controls[i].addr, controls[i].saved);
199         }
200
201         for (i = 0; i < model->num_counters; ++i) {
202                 if (counters[i].addr)
203                         wrmsrl(counters[i].addr, counters[i].saved);
204         }
205 }
206
207 static void nmi_cpu_shutdown(void *dummy)
208 {
209         unsigned int v;
210         int cpu = smp_processor_id();
211         struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
212
213         /* restoring APIC_LVTPC can trigger an apic error because the delivery
214          * mode and vector nr combination can be illegal. That's by design: on
215          * power on apic lvt contain a zero vector nr which are legal only for
216          * NMI delivery mode. So inhibit apic err before restoring lvtpc
217          */
218         v = apic_read(APIC_LVTERR);
219         apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
220         apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
221         apic_write(APIC_LVTERR, v);
222         nmi_restore_registers(msrs);
223 }
224
225 static void nmi_shutdown(void)
226 {
227         struct op_msrs *msrs;
228
229         nmi_enabled = 0;
230         on_each_cpu(nmi_cpu_shutdown, NULL, 1);
231         unregister_die_notifier(&profile_exceptions_nb);
232         msrs = &get_cpu_var(cpu_msrs);
233         model->shutdown(msrs);
234         free_msrs();
235         put_cpu_var(cpu_msrs);
236 }
237
238 static void nmi_cpu_start(void *dummy)
239 {
240         struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
241         model->start(msrs);
242 }
243
244 static int nmi_start(void)
245 {
246         on_each_cpu(nmi_cpu_start, NULL, 1);
247         return 0;
248 }
249
250 static void nmi_cpu_stop(void *dummy)
251 {
252         struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
253         model->stop(msrs);
254 }
255
256 static void nmi_stop(void)
257 {
258         on_each_cpu(nmi_cpu_stop, NULL, 1);
259 }
260
261 struct op_counter_config counter_config[OP_MAX_COUNTER];
262
263 static int nmi_create_files(struct super_block *sb, struct dentry *root)
264 {
265         unsigned int i;
266
267         for (i = 0; i < model->num_counters; ++i) {
268                 struct dentry *dir;
269                 char buf[4];
270
271                 /* quick little hack to _not_ expose a counter if it is not
272                  * available for use.  This should protect userspace app.
273                  * NOTE:  assumes 1:1 mapping here (that counters are organized
274                  *        sequentially in their struct assignment).
275                  */
276                 if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
277                         continue;
278
279                 snprintf(buf,  sizeof(buf), "%d", i);
280                 dir = oprofilefs_mkdir(sb, root, buf);
281                 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
282                 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
283                 oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
284                 oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
285                 oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
286                 oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
287         }
288
289         return 0;
290 }
291
292 #ifdef CONFIG_SMP
293 static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
294                                  void *data)
295 {
296         int cpu = (unsigned long)data;
297         switch (action) {
298         case CPU_DOWN_FAILED:
299         case CPU_ONLINE:
300                 smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
301                 break;
302         case CPU_DOWN_PREPARE:
303                 smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
304                 break;
305         }
306         return NOTIFY_DONE;
307 }
308
309 static struct notifier_block oprofile_cpu_nb = {
310         .notifier_call = oprofile_cpu_notifier
311 };
312 #endif
313
314 #ifdef CONFIG_PM
315
316 static int nmi_suspend(struct sys_device *dev, pm_message_t state)
317 {
318         /* Only one CPU left, just stop that one */
319         if (nmi_enabled == 1)
320                 nmi_cpu_stop(NULL);
321         return 0;
322 }
323
324 static int nmi_resume(struct sys_device *dev)
325 {
326         if (nmi_enabled == 1)
327                 nmi_cpu_start(NULL);
328         return 0;
329 }
330
331 static struct sysdev_class oprofile_sysclass = {
332         .name           = "oprofile",
333         .resume         = nmi_resume,
334         .suspend        = nmi_suspend,
335 };
336
337 static struct sys_device device_oprofile = {
338         .id     = 0,
339         .cls    = &oprofile_sysclass,
340 };
341
342 static int __init init_sysfs(void)
343 {
344         int error;
345
346         error = sysdev_class_register(&oprofile_sysclass);
347         if (!error)
348                 error = sysdev_register(&device_oprofile);
349         return error;
350 }
351
352 static void exit_sysfs(void)
353 {
354         sysdev_unregister(&device_oprofile);
355         sysdev_class_unregister(&oprofile_sysclass);
356 }
357
358 #else
359 #define init_sysfs() do { } while (0)
360 #define exit_sysfs() do { } while (0)
361 #endif /* CONFIG_PM */
362
363 static int __init p4_init(char **cpu_type)
364 {
365         __u8 cpu_model = boot_cpu_data.x86_model;
366
367         if (cpu_model > 6 || cpu_model == 5)
368                 return 0;
369
370 #ifndef CONFIG_SMP
371         *cpu_type = "i386/p4";
372         model = &op_p4_spec;
373         return 1;
374 #else
375         switch (smp_num_siblings) {
376         case 1:
377                 *cpu_type = "i386/p4";
378                 model = &op_p4_spec;
379                 return 1;
380
381         case 2:
382                 *cpu_type = "i386/p4-ht";
383                 model = &op_p4_ht2_spec;
384                 return 1;
385         }
386 #endif
387
388         printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
389         printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
390         return 0;
391 }
392
393 static int force_arch_perfmon;
394 static int force_cpu_type(const char *str, struct kernel_param *kp)
395 {
396         if (!strcmp(str, "archperfmon")) {
397                 force_arch_perfmon = 1;
398                 printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
399         }
400
401         return 0;
402 }
403 module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
404
405 static int __init ppro_init(char **cpu_type)
406 {
407         __u8 cpu_model = boot_cpu_data.x86_model;
408
409         if (force_arch_perfmon && cpu_has_arch_perfmon)
410                 return 0;
411
412         switch (cpu_model) {
413         case 0 ... 2:
414                 *cpu_type = "i386/ppro";
415                 break;
416         case 3 ... 5:
417                 *cpu_type = "i386/pii";
418                 break;
419         case 6 ... 8:
420         case 10 ... 11:
421                 *cpu_type = "i386/piii";
422                 break;
423         case 9:
424         case 13:
425                 *cpu_type = "i386/p6_mobile";
426                 break;
427         case 14:
428                 *cpu_type = "i386/core";
429                 break;
430         case 15: case 23:
431                 *cpu_type = "i386/core_2";
432                 break;
433         case 26:
434                 model = &op_arch_perfmon_spec;
435                 *cpu_type = "i386/core_i7";
436                 break;
437         case 28:
438                 *cpu_type = "i386/atom";
439                 break;
440         default:
441                 /* Unknown */
442                 return 0;
443         }
444
445         model = &op_ppro_spec;
446         return 1;
447 }
448
449 /* in order to get sysfs right */
450 static int using_nmi;
451
452 int __init op_nmi_init(struct oprofile_operations *ops)
453 {
454         __u8 vendor = boot_cpu_data.x86_vendor;
455         __u8 family = boot_cpu_data.x86;
456         char *cpu_type = NULL;
457         int ret = 0;
458
459         if (!cpu_has_apic)
460                 return -ENODEV;
461
462         switch (vendor) {
463         case X86_VENDOR_AMD:
464                 /* Needs to be at least an Athlon (or hammer in 32bit mode) */
465
466                 switch (family) {
467                 case 6:
468                         cpu_type = "i386/athlon";
469                         break;
470                 case 0xf:
471                         /*
472                          * Actually it could be i386/hammer too, but
473                          * give user space an consistent name.
474                          */
475                         cpu_type = "x86-64/hammer";
476                         break;
477                 case 0x10:
478                         cpu_type = "x86-64/family10";
479                         break;
480                 case 0x11:
481                         cpu_type = "x86-64/family11h";
482                         break;
483                 default:
484                         return -ENODEV;
485                 }
486                 model = &op_amd_spec;
487                 break;
488
489         case X86_VENDOR_INTEL:
490                 switch (family) {
491                         /* Pentium IV */
492                 case 0xf:
493                         p4_init(&cpu_type);
494                         break;
495
496                         /* A P6-class processor */
497                 case 6:
498                         ppro_init(&cpu_type);
499                         break;
500
501                 default:
502                         break;
503                 }
504
505                 if (cpu_type)
506                         break;
507
508                 if (!cpu_has_arch_perfmon)
509                         return -ENODEV;
510
511                 /* use arch perfmon as fallback */
512                 cpu_type = "i386/arch_perfmon";
513                 model = &op_arch_perfmon_spec;
514                 break;
515
516         default:
517                 return -ENODEV;
518         }
519
520 #ifdef CONFIG_SMP
521         register_cpu_notifier(&oprofile_cpu_nb);
522 #endif
523         /* default values, can be overwritten by model */
524         ops->create_files = nmi_create_files;
525         ops->setup = nmi_setup;
526         ops->shutdown = nmi_shutdown;
527         ops->start = nmi_start;
528         ops->stop = nmi_stop;
529         ops->cpu_type = cpu_type;
530
531         if (model->init)
532                 ret = model->init(ops);
533         if (ret)
534                 return ret;
535
536         init_sysfs();
537         using_nmi = 1;
538         printk(KERN_INFO "oprofile: using NMI interrupt.\n");
539         return 0;
540 }
541
542 void op_nmi_exit(void)
543 {
544         if (using_nmi) {
545                 exit_sysfs();
546 #ifdef CONFIG_SMP
547                 unregister_cpu_notifier(&oprofile_cpu_nb);
548 #endif
549         }
550         if (model->exit)
551                 model->exit();
552 }