]>
Commit | Line | Data |
---|---|---|
47a486cc CG |
1 | /* |
2 | * local apic based NMI watchdog for various CPUs. | |
3 | * | |
4 | * This file also handles reservation of performance counters for coordination | |
5 | * with other users (like oprofile). | |
6 | * | |
7 | * Note that these events normally don't tick when the CPU idles. This means | |
8 | * the frequency varies with CPU load. | |
9 | * | |
10 | * Original code for K7/P6 written by Keith Owens | |
11 | * | |
12 | */ | |
09198e68 AK |
13 | |
14 | #include <linux/percpu.h> | |
15 | #include <linux/module.h> | |
16 | #include <linux/kernel.h> | |
17 | #include <linux/bitops.h> | |
18 | #include <linux/smp.h> | |
19 | #include <linux/nmi.h> | |
8b1fa1d7 IM |
20 | #include <linux/kprobes.h> |
21 | ||
09198e68 | 22 | #include <asm/apic.h> |
cdd6c482 | 23 | #include <asm/perf_event.h> |
09198e68 AK |
24 | |
25 | struct nmi_watchdog_ctlblk { | |
26 | unsigned int cccr_msr; | |
27 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ | |
28 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ | |
29 | }; | |
30 | ||
31 | /* Interface defining a CPU specific perfctr watchdog */ | |
32 | struct wd_ops { | |
33 | int (*reserve)(void); | |
34 | void (*unreserve)(void); | |
35 | int (*setup)(unsigned nmi_hz); | |
36 | void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); | |
54c6ed75 | 37 | void (*stop)(void); |
09198e68 AK |
38 | unsigned perfctr; |
39 | unsigned evntsel; | |
40 | u64 checkbit; | |
41 | }; | |
42 | ||
d1e08474 | 43 | static const struct wd_ops *wd_ops; |
09198e68 | 44 | |
47a486cc CG |
45 | /* |
46 | * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's | |
47 | * offset from MSR_P4_BSU_ESCR0. | |
48 | * | |
49 | * It will be the max for all platforms (for now) | |
09198e68 AK |
50 | */ |
51 | #define NMI_MAX_COUNTER_BITS 66 | |
52 | ||
47a486cc CG |
53 | /* |
54 | * perfctr_nmi_owner tracks the ownership of the perfctr registers: | |
09198e68 AK |
55 | * evtsel_nmi_owner tracks the ownership of the event selection |
56 | * - different performance counters/ event selection may be reserved for | |
57 | * different subsystems this reservation system just tries to coordinate | |
58 | * things a little | |
59 | */ | |
60 | static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); | |
61 | static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); | |
62 | ||
63 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | |
64 | ||
65 | /* converts an msr to an appropriate reservation bit */ | |
66 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | |
67 | { | |
5dcccd8d AK |
68 | /* returns the bit offset of the performance counter register */ |
69 | switch (boot_cpu_data.x86_vendor) { | |
70 | case X86_VENDOR_AMD: | |
8bdbd962 | 71 | return msr - MSR_K7_PERFCTR0; |
5dcccd8d AK |
72 | case X86_VENDOR_INTEL: |
73 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | |
8bdbd962 | 74 | return msr - MSR_ARCH_PERFMON_PERFCTR0; |
5dcccd8d AK |
75 | |
76 | switch (boot_cpu_data.x86) { | |
77 | case 6: | |
8bdbd962 | 78 | return msr - MSR_P6_PERFCTR0; |
5dcccd8d | 79 | case 15: |
8bdbd962 | 80 | return msr - MSR_P4_BPU_PERFCTR0; |
5dcccd8d AK |
81 | } |
82 | } | |
83 | return 0; | |
09198e68 AK |
84 | } |
85 | ||
47a486cc CG |
86 | /* |
87 | * converts an msr to an appropriate reservation bit | |
88 | * returns the bit offset of the event selection register | |
89 | */ | |
09198e68 AK |
90 | static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) |
91 | { | |
5dcccd8d AK |
92 | /* returns the bit offset of the event selection register */ |
93 | switch (boot_cpu_data.x86_vendor) { | |
94 | case X86_VENDOR_AMD: | |
8bdbd962 | 95 | return msr - MSR_K7_EVNTSEL0; |
5dcccd8d AK |
96 | case X86_VENDOR_INTEL: |
97 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | |
8bdbd962 | 98 | return msr - MSR_ARCH_PERFMON_EVENTSEL0; |
5dcccd8d AK |
99 | |
100 | switch (boot_cpu_data.x86) { | |
101 | case 6: | |
8bdbd962 | 102 | return msr - MSR_P6_EVNTSEL0; |
5dcccd8d | 103 | case 15: |
8bdbd962 | 104 | return msr - MSR_P4_BSU_ESCR0; |
5dcccd8d AK |
105 | } |
106 | } | |
107 | return 0; | |
108 | ||
09198e68 AK |
109 | } |
110 | ||
111 | /* checks for a bit availability (hack for oprofile) */ | |
112 | int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | |
113 | { | |
114 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | |
115 | ||
8bdbd962 | 116 | return !test_bit(counter, perfctr_nmi_owner); |
09198e68 | 117 | } |
47a486cc | 118 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); |
09198e68 AK |
119 | |
120 | int reserve_perfctr_nmi(unsigned int msr) | |
121 | { | |
122 | unsigned int counter; | |
123 | ||
124 | counter = nmi_perfctr_msr_to_bit(msr); | |
124d395f SE |
125 | /* register not managed by the allocator? */ |
126 | if (counter > NMI_MAX_COUNTER_BITS) | |
127 | return 1; | |
09198e68 AK |
128 | |
129 | if (!test_and_set_bit(counter, perfctr_nmi_owner)) | |
130 | return 1; | |
131 | return 0; | |
132 | } | |
47a486cc | 133 | EXPORT_SYMBOL(reserve_perfctr_nmi); |
09198e68 AK |
134 | |
135 | void release_perfctr_nmi(unsigned int msr) | |
136 | { | |
137 | unsigned int counter; | |
138 | ||
139 | counter = nmi_perfctr_msr_to_bit(msr); | |
124d395f SE |
140 | /* register not managed by the allocator? */ |
141 | if (counter > NMI_MAX_COUNTER_BITS) | |
142 | return; | |
09198e68 AK |
143 | |
144 | clear_bit(counter, perfctr_nmi_owner); | |
145 | } | |
47a486cc | 146 | EXPORT_SYMBOL(release_perfctr_nmi); |
09198e68 AK |
147 | |
148 | int reserve_evntsel_nmi(unsigned int msr) | |
149 | { | |
150 | unsigned int counter; | |
151 | ||
152 | counter = nmi_evntsel_msr_to_bit(msr); | |
124d395f SE |
153 | /* register not managed by the allocator? */ |
154 | if (counter > NMI_MAX_COUNTER_BITS) | |
155 | return 1; | |
09198e68 AK |
156 | |
157 | if (!test_and_set_bit(counter, evntsel_nmi_owner)) | |
158 | return 1; | |
159 | return 0; | |
160 | } | |
47a486cc | 161 | EXPORT_SYMBOL(reserve_evntsel_nmi); |
09198e68 AK |
162 | |
163 | void release_evntsel_nmi(unsigned int msr) | |
164 | { | |
165 | unsigned int counter; | |
166 | ||
167 | counter = nmi_evntsel_msr_to_bit(msr); | |
124d395f SE |
168 | /* register not managed by the allocator? */ |
169 | if (counter > NMI_MAX_COUNTER_BITS) | |
170 | return; | |
09198e68 AK |
171 | |
172 | clear_bit(counter, evntsel_nmi_owner); | |
173 | } | |
09198e68 AK |
174 | EXPORT_SYMBOL(release_evntsel_nmi); |
175 | ||
176 | void disable_lapic_nmi_watchdog(void) | |
177 | { | |
178 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | |
179 | ||
180 | if (atomic_read(&nmi_active) <= 0) | |
181 | return; | |
182 | ||
15c8b6c1 | 183 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); |
1a1b1d13 CG |
184 | |
185 | if (wd_ops) | |
186 | wd_ops->unreserve(); | |
09198e68 AK |
187 | |
188 | BUG_ON(atomic_read(&nmi_active) != 0); | |
189 | } | |
190 | ||
191 | void enable_lapic_nmi_watchdog(void) | |
192 | { | |
193 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | |
194 | ||
195 | /* are we already enabled */ | |
196 | if (atomic_read(&nmi_active) != 0) | |
197 | return; | |
198 | ||
199 | /* are we lapic aware */ | |
200 | if (!wd_ops) | |
201 | return; | |
202 | if (!wd_ops->reserve()) { | |
203 | printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); | |
204 | return; | |
205 | } | |
206 | ||
15c8b6c1 | 207 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 1); |
09198e68 AK |
208 | touch_nmi_watchdog(); |
209 | } | |
210 | ||
211 | /* | |
212 | * Activate the NMI watchdog via the local APIC. | |
213 | */ | |
214 | ||
215 | static unsigned int adjust_for_32bit_ctr(unsigned int hz) | |
216 | { | |
217 | u64 counter_val; | |
218 | unsigned int retval = hz; | |
219 | ||
220 | /* | |
221 | * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter | |
222 | * are writable, with higher bits sign extending from bit 31. | |
223 | * So, we can only program the counter with 31 bit values and | |
224 | * 32nd bit should be 1, for 33.. to be 1. | |
225 | * Find the appropriate nmi_hz | |
226 | */ | |
227 | counter_val = (u64)cpu_khz * 1000; | |
228 | do_div(counter_val, retval); | |
8bdbd962 | 229 | if (counter_val > 0x7fffffffULL) { |
09198e68 AK |
230 | u64 count = (u64)cpu_khz * 1000; |
231 | do_div(count, 0x7fffffffUL); | |
232 | retval = count + 1; | |
233 | } | |
234 | return retval; | |
235 | } | |
236 | ||
47a486cc CG |
237 | static void write_watchdog_counter(unsigned int perfctr_msr, |
238 | const char *descr, unsigned nmi_hz) | |
09198e68 AK |
239 | { |
240 | u64 count = (u64)cpu_khz * 1000; | |
241 | ||
242 | do_div(count, nmi_hz); | |
8bdbd962 | 243 | if (descr) |
cfc1b9a6 | 244 | pr_debug("setting %s to -0x%08Lx\n", descr, count); |
09198e68 AK |
245 | wrmsrl(perfctr_msr, 0 - count); |
246 | } | |
247 | ||
248 | static void write_watchdog_counter32(unsigned int perfctr_msr, | |
47a486cc | 249 | const char *descr, unsigned nmi_hz) |
09198e68 AK |
250 | { |
251 | u64 count = (u64)cpu_khz * 1000; | |
252 | ||
253 | do_div(count, nmi_hz); | |
8bdbd962 | 254 | if (descr) |
cfc1b9a6 | 255 | pr_debug("setting %s to -0x%08Lx\n", descr, count); |
09198e68 AK |
256 | wrmsr(perfctr_msr, (u32)(-count), 0); |
257 | } | |
258 | ||
47a486cc CG |
259 | /* |
260 | * AMD K7/K8/Family10h/Family11h support. | |
261 | * AMD keeps this interface nicely stable so there is not much variety | |
262 | */ | |
09198e68 AK |
263 | #define K7_EVNTSEL_ENABLE (1 << 22) |
264 | #define K7_EVNTSEL_INT (1 << 20) | |
265 | #define K7_EVNTSEL_OS (1 << 17) | |
266 | #define K7_EVNTSEL_USR (1 << 16) | |
267 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | |
268 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | |
269 | ||
270 | static int setup_k7_watchdog(unsigned nmi_hz) | |
271 | { | |
272 | unsigned int perfctr_msr, evntsel_msr; | |
273 | unsigned int evntsel; | |
274 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | |
275 | ||
23d5ea5d SE |
276 | perfctr_msr = wd_ops->perfctr; |
277 | evntsel_msr = wd_ops->evntsel; | |
09198e68 AK |
278 | |
279 | wrmsrl(perfctr_msr, 0UL); | |
280 | ||
281 | evntsel = K7_EVNTSEL_INT | |
282 | | K7_EVNTSEL_OS | |
283 | | K7_EVNTSEL_USR | |
284 | | K7_NMI_EVENT; | |
285 | ||
286 | /* setup the timer */ | |
287 | wrmsr(evntsel_msr, evntsel, 0); | |
8bdbd962 | 288 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz); |
09198e68 | 289 | |
b3e15bde | 290 | /* initialize the wd struct before enabling */ |
09198e68 AK |
291 | wd->perfctr_msr = perfctr_msr; |
292 | wd->evntsel_msr = evntsel_msr; | |
47a486cc | 293 | wd->cccr_msr = 0; /* unused */ |
b3e15bde AR |
294 | |
295 | /* ok, everything is initialized, announce that we're set */ | |
296 | cpu_nmi_set_wd_enabled(); | |
297 | ||
298 | apic_write(APIC_LVTPC, APIC_DM_NMI); | |
299 | evntsel |= K7_EVNTSEL_ENABLE; | |
300 | wrmsr(evntsel_msr, evntsel, 0); | |
301 | ||
09198e68 AK |
302 | return 1; |
303 | } | |
304 | ||
54c6ed75 | 305 | static void single_msr_stop_watchdog(void) |
09198e68 AK |
306 | { |
307 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | |
308 | ||
309 | wrmsr(wd->evntsel_msr, 0, 0); | |
310 | } | |
311 | ||
312 | static int single_msr_reserve(void) | |
313 | { | |
314 | if (!reserve_perfctr_nmi(wd_ops->perfctr)) | |
315 | return 0; | |
316 | ||
317 | if (!reserve_evntsel_nmi(wd_ops->evntsel)) { | |
318 | release_perfctr_nmi(wd_ops->perfctr); | |
319 | return 0; | |
320 | } | |
321 | return 1; | |
322 | } | |
323 | ||
324 | static void single_msr_unreserve(void) | |
325 | { | |
da88ba17 BS |
326 | release_evntsel_nmi(wd_ops->evntsel); |
327 | release_perfctr_nmi(wd_ops->perfctr); | |
09198e68 AK |
328 | } |
329 | ||
8b1fa1d7 IM |
330 | static void __kprobes |
331 | single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | |
09198e68 AK |
332 | { |
333 | /* start the cycle over again */ | |
334 | write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | |
335 | } | |
336 | ||
d1e08474 | 337 | static const struct wd_ops k7_wd_ops = { |
47a486cc CG |
338 | .reserve = single_msr_reserve, |
339 | .unreserve = single_msr_unreserve, | |
340 | .setup = setup_k7_watchdog, | |
341 | .rearm = single_msr_rearm, | |
342 | .stop = single_msr_stop_watchdog, | |
343 | .perfctr = MSR_K7_PERFCTR0, | |
344 | .evntsel = MSR_K7_EVNTSEL0, | |
345 | .checkbit = 1ULL << 47, | |
09198e68 AK |
346 | }; |
347 | ||
47a486cc CG |
348 | /* |
349 | * Intel Model 6 (PPro+,P2,P3,P-M,Core1) | |
350 | */ | |
09198e68 AK |
351 | #define P6_EVNTSEL0_ENABLE (1 << 22) |
352 | #define P6_EVNTSEL_INT (1 << 20) | |
353 | #define P6_EVNTSEL_OS (1 << 17) | |
354 | #define P6_EVNTSEL_USR (1 << 16) | |
355 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | |
356 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | |
357 | ||
358 | static int setup_p6_watchdog(unsigned nmi_hz) | |
359 | { | |
360 | unsigned int perfctr_msr, evntsel_msr; | |
361 | unsigned int evntsel; | |
362 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | |
363 | ||
23d5ea5d SE |
364 | perfctr_msr = wd_ops->perfctr; |
365 | evntsel_msr = wd_ops->evntsel; | |
09198e68 | 366 | |
57c22f49 AK |
367 | /* KVM doesn't implement this MSR */ |
368 | if (wrmsr_safe(perfctr_msr, 0, 0) < 0) | |
369 | return 0; | |
09198e68 AK |
370 | |
371 | evntsel = P6_EVNTSEL_INT | |
372 | | P6_EVNTSEL_OS | |
373 | | P6_EVNTSEL_USR | |
374 | | P6_NMI_EVENT; | |
375 | ||
376 | /* setup the timer */ | |
377 | wrmsr(evntsel_msr, evntsel, 0); | |
378 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | |
8bdbd962 | 379 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz); |
09198e68 | 380 | |
b3e15bde | 381 | /* initialize the wd struct before enabling */ |
09198e68 AK |
382 | wd->perfctr_msr = perfctr_msr; |
383 | wd->evntsel_msr = evntsel_msr; | |
47a486cc | 384 | wd->cccr_msr = 0; /* unused */ |
b3e15bde AR |
385 | |
386 | /* ok, everything is initialized, announce that we're set */ | |
387 | cpu_nmi_set_wd_enabled(); | |
388 | ||
389 | apic_write(APIC_LVTPC, APIC_DM_NMI); | |
390 | evntsel |= P6_EVNTSEL0_ENABLE; | |
391 | wrmsr(evntsel_msr, evntsel, 0); | |
392 | ||
09198e68 AK |
393 | return 1; |
394 | } | |
395 | ||
8b1fa1d7 | 396 | static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) |
09198e68 | 397 | { |
47a486cc CG |
398 | /* |
399 | * P6 based Pentium M need to re-unmask | |
09198e68 AK |
400 | * the apic vector but it doesn't hurt |
401 | * other P6 variant. | |
47a486cc CG |
402 | * ArchPerfom/Core Duo also needs this |
403 | */ | |
09198e68 | 404 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
47a486cc | 405 | |
09198e68 | 406 | /* P6/ARCH_PERFMON has 32 bit counter write */ |
8bdbd962 | 407 | write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz); |
09198e68 AK |
408 | } |
409 | ||
d1e08474 | 410 | static const struct wd_ops p6_wd_ops = { |
47a486cc CG |
411 | .reserve = single_msr_reserve, |
412 | .unreserve = single_msr_unreserve, | |
413 | .setup = setup_p6_watchdog, | |
414 | .rearm = p6_rearm, | |
415 | .stop = single_msr_stop_watchdog, | |
416 | .perfctr = MSR_P6_PERFCTR0, | |
417 | .evntsel = MSR_P6_EVNTSEL0, | |
418 | .checkbit = 1ULL << 39, | |
09198e68 AK |
419 | }; |
420 | ||
47a486cc CG |
421 | /* |
422 | * Intel P4 performance counters. | |
423 | * By far the most complicated of all. | |
424 | */ | |
425 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7) | |
426 | #define P4_ESCR_EVENT_SELECT(N) ((N) << 25) | |
427 | #define P4_ESCR_OS (1 << 3) | |
428 | #define P4_ESCR_USR (1 << 2) | |
429 | #define P4_CCCR_OVF_PMI0 (1 << 26) | |
430 | #define P4_CCCR_OVF_PMI1 (1 << 27) | |
431 | #define P4_CCCR_THRESHOLD(N) ((N) << 20) | |
432 | #define P4_CCCR_COMPLEMENT (1 << 19) | |
433 | #define P4_CCCR_COMPARE (1 << 18) | |
434 | #define P4_CCCR_REQUIRED (3 << 16) | |
435 | #define P4_CCCR_ESCR_SELECT(N) ((N) << 13) | |
436 | #define P4_CCCR_ENABLE (1 << 12) | |
437 | #define P4_CCCR_OVF (1 << 31) | |
09198e68 | 438 | |
28b166a7 AR |
439 | #define P4_CONTROLS 18 |
440 | static unsigned int p4_controls[18] = { | |
441 | MSR_P4_BPU_CCCR0, | |
442 | MSR_P4_BPU_CCCR1, | |
443 | MSR_P4_BPU_CCCR2, | |
444 | MSR_P4_BPU_CCCR3, | |
445 | MSR_P4_MS_CCCR0, | |
446 | MSR_P4_MS_CCCR1, | |
447 | MSR_P4_MS_CCCR2, | |
448 | MSR_P4_MS_CCCR3, | |
449 | MSR_P4_FLAME_CCCR0, | |
450 | MSR_P4_FLAME_CCCR1, | |
451 | MSR_P4_FLAME_CCCR2, | |
452 | MSR_P4_FLAME_CCCR3, | |
453 | MSR_P4_IQ_CCCR0, | |
454 | MSR_P4_IQ_CCCR1, | |
455 | MSR_P4_IQ_CCCR2, | |
456 | MSR_P4_IQ_CCCR3, | |
457 | MSR_P4_IQ_CCCR4, | |
458 | MSR_P4_IQ_CCCR5, | |
459 | }; | |
47a486cc CG |
460 | /* |
461 | * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | |
462 | * CRU_ESCR0 (with any non-null event selector) through a complemented | |
463 | * max threshold. [IA32-Vol3, Section 14.9.9] | |
464 | */ | |
09198e68 AK |
465 | static int setup_p4_watchdog(unsigned nmi_hz) |
466 | { | |
467 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | |
468 | unsigned int evntsel, cccr_val; | |
469 | unsigned int misc_enable, dummy; | |
470 | unsigned int ht_num; | |
471 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | |
472 | ||
473 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); | |
474 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | |
475 | return 0; | |
476 | ||
477 | #ifdef CONFIG_SMP | |
478 | /* detect which hyperthread we are on */ | |
479 | if (smp_num_siblings == 2) { | |
480 | unsigned int ebx, apicid; | |
481 | ||
8bdbd962 AC |
482 | ebx = cpuid_ebx(1); |
483 | apicid = (ebx >> 24) & 0xff; | |
484 | ht_num = apicid & 1; | |
09198e68 AK |
485 | } else |
486 | #endif | |
487 | ht_num = 0; | |
488 | ||
47a486cc CG |
489 | /* |
490 | * performance counters are shared resources | |
09198e68 AK |
491 | * assign each hyperthread its own set |
492 | * (re-use the ESCR0 register, seems safe | |
493 | * and keeps the cccr_val the same) | |
494 | */ | |
495 | if (!ht_num) { | |
496 | /* logical cpu 0 */ | |
497 | perfctr_msr = MSR_P4_IQ_PERFCTR0; | |
498 | evntsel_msr = MSR_P4_CRU_ESCR0; | |
499 | cccr_msr = MSR_P4_IQ_CCCR0; | |
500 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | |
28b166a7 AR |
501 | |
502 | /* | |
503 | * If we're on the kdump kernel or other situation, we may | |
504 | * still have other performance counter registers set to | |
505 | * interrupt and they'll keep interrupting forever because | |
506 | * of the P4_CCCR_OVF quirk. So we need to ACK all the | |
507 | * pending interrupts and disable all the registers here, | |
508 | * before reenabling the NMI delivery. Refer to p4_rearm() | |
509 | * about the P4_CCCR_OVF quirk. | |
510 | */ | |
511 | if (reset_devices) { | |
512 | unsigned int low, high; | |
513 | int i; | |
514 | ||
515 | for (i = 0; i < P4_CONTROLS; i++) { | |
516 | rdmsr(p4_controls[i], low, high); | |
517 | low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); | |
518 | wrmsr(p4_controls[i], low, high); | |
519 | } | |
520 | } | |
09198e68 AK |
521 | } else { |
522 | /* logical cpu 1 */ | |
523 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | |
524 | evntsel_msr = MSR_P4_CRU_ESCR0; | |
525 | cccr_msr = MSR_P4_IQ_CCCR1; | |
dcc98416 AR |
526 | |
527 | /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ | |
528 | if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) | |
529 | cccr_val = P4_CCCR_OVF_PMI0; | |
530 | else | |
531 | cccr_val = P4_CCCR_OVF_PMI1; | |
532 | cccr_val |= P4_CCCR_ESCR_SELECT(4); | |
09198e68 AK |
533 | } |
534 | ||
535 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | |
8bdbd962 | 536 | | P4_ESCR_OS |
09198e68 AK |
537 | | P4_ESCR_USR; |
538 | ||
539 | cccr_val |= P4_CCCR_THRESHOLD(15) | |
540 | | P4_CCCR_COMPLEMENT | |
541 | | P4_CCCR_COMPARE | |
542 | | P4_CCCR_REQUIRED; | |
543 | ||
544 | wrmsr(evntsel_msr, evntsel, 0); | |
545 | wrmsr(cccr_msr, cccr_val, 0); | |
546 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); | |
b3e15bde | 547 | |
09198e68 AK |
548 | wd->perfctr_msr = perfctr_msr; |
549 | wd->evntsel_msr = evntsel_msr; | |
550 | wd->cccr_msr = cccr_msr; | |
b3e15bde AR |
551 | |
552 | /* ok, everything is initialized, announce that we're set */ | |
553 | cpu_nmi_set_wd_enabled(); | |
554 | ||
555 | apic_write(APIC_LVTPC, APIC_DM_NMI); | |
556 | cccr_val |= P4_CCCR_ENABLE; | |
557 | wrmsr(cccr_msr, cccr_val, 0); | |
09198e68 AK |
558 | return 1; |
559 | } | |
560 | ||
54c6ed75 | 561 | static void stop_p4_watchdog(void) |
09198e68 AK |
562 | { |
563 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | |
564 | wrmsr(wd->cccr_msr, 0, 0); | |
565 | wrmsr(wd->evntsel_msr, 0, 0); | |
566 | } | |
567 | ||
568 | static int p4_reserve(void) | |
569 | { | |
570 | if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) | |
571 | return 0; | |
572 | #ifdef CONFIG_SMP | |
573 | if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) | |
574 | goto fail1; | |
575 | #endif | |
576 | if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) | |
577 | goto fail2; | |
578 | /* RED-PEN why is ESCR1 not reserved here? */ | |
579 | return 1; | |
580 | fail2: | |
581 | #ifdef CONFIG_SMP | |
582 | if (smp_num_siblings > 1) | |
583 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); | |
584 | fail1: | |
585 | #endif | |
586 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); | |
587 | return 0; | |
588 | } | |
589 | ||
590 | static void p4_unreserve(void) | |
591 | { | |
592 | #ifdef CONFIG_SMP | |
593 | if (smp_num_siblings > 1) | |
da88ba17 | 594 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); |
09198e68 | 595 | #endif |
da88ba17 BS |
596 | release_evntsel_nmi(MSR_P4_CRU_ESCR0); |
597 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); | |
09198e68 AK |
598 | } |
599 | ||
8b1fa1d7 | 600 | static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) |
09198e68 AK |
601 | { |
602 | unsigned dummy; | |
603 | /* | |
8bdbd962 | 604 | * P4 quirks: |
09198e68 AK |
605 | * - An overflown perfctr will assert its interrupt |
606 | * until the OVF flag in its CCCR is cleared. | |
607 | * - LVTPC is masked on interrupt and must be | |
608 | * unmasked by the LVTPC handler. | |
609 | */ | |
610 | rdmsrl(wd->cccr_msr, dummy); | |
611 | dummy &= ~P4_CCCR_OVF; | |
612 | wrmsrl(wd->cccr_msr, dummy); | |
613 | apic_write(APIC_LVTPC, APIC_DM_NMI); | |
614 | /* start the cycle over again */ | |
615 | write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | |
616 | } | |
617 | ||
d1e08474 | 618 | static const struct wd_ops p4_wd_ops = { |
47a486cc CG |
619 | .reserve = p4_reserve, |
620 | .unreserve = p4_unreserve, | |
621 | .setup = setup_p4_watchdog, | |
622 | .rearm = p4_rearm, | |
623 | .stop = stop_p4_watchdog, | |
09198e68 | 624 | /* RED-PEN this is wrong for the other sibling */ |
47a486cc CG |
625 | .perfctr = MSR_P4_BPU_PERFCTR0, |
626 | .evntsel = MSR_P4_BSU_ESCR0, | |
627 | .checkbit = 1ULL << 39, | |
09198e68 AK |
628 | }; |
629 | ||
47a486cc CG |
630 | /* |
631 | * Watchdog using the Intel architected PerfMon. | |
632 | * Used for Core2 and hopefully all future Intel CPUs. | |
633 | */ | |
09198e68 AK |
634 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL |
635 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | |
636 | ||
d1e08474 JB |
637 | static struct wd_ops intel_arch_wd_ops; |
638 | ||
09198e68 AK |
639 | static int setup_intel_arch_watchdog(unsigned nmi_hz) |
640 | { | |
641 | unsigned int ebx; | |
642 | union cpuid10_eax eax; | |
643 | unsigned int unused; | |
644 | unsigned int perfctr_msr, evntsel_msr; | |
645 | unsigned int evntsel; | |
646 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | |
647 | ||
648 | /* | |
649 | * Check whether the Architectural PerfMon supports | |
650 | * Unhalted Core Cycles Event or not. | |
651 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | |
652 | */ | |
653 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | |
8bdbd962 AC |
654 | if ((eax.split.mask_length < |
655 | (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | |
09198e68 AK |
656 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) |
657 | return 0; | |
658 | ||
23d5ea5d SE |
659 | perfctr_msr = wd_ops->perfctr; |
660 | evntsel_msr = wd_ops->evntsel; | |
09198e68 AK |
661 | |
662 | wrmsrl(perfctr_msr, 0UL); | |
663 | ||
664 | evntsel = ARCH_PERFMON_EVENTSEL_INT | |
665 | | ARCH_PERFMON_EVENTSEL_OS | |
666 | | ARCH_PERFMON_EVENTSEL_USR | |
667 | | ARCH_PERFMON_NMI_EVENT_SEL | |
668 | | ARCH_PERFMON_NMI_EVENT_UMASK; | |
669 | ||
670 | /* setup the timer */ | |
671 | wrmsr(evntsel_msr, evntsel, 0); | |
672 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | |
673 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); | |
09198e68 AK |
674 | |
675 | wd->perfctr_msr = perfctr_msr; | |
676 | wd->evntsel_msr = evntsel_msr; | |
47a486cc | 677 | wd->cccr_msr = 0; /* unused */ |
b3e15bde AR |
678 | |
679 | /* ok, everything is initialized, announce that we're set */ | |
680 | cpu_nmi_set_wd_enabled(); | |
681 | ||
682 | apic_write(APIC_LVTPC, APIC_DM_NMI); | |
bb1165d6 | 683 | evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE; |
b3e15bde | 684 | wrmsr(evntsel_msr, evntsel, 0); |
d1e08474 | 685 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); |
09198e68 AK |
686 | return 1; |
687 | } | |
688 | ||
d1e08474 | 689 | static struct wd_ops intel_arch_wd_ops __read_mostly = { |
47a486cc CG |
690 | .reserve = single_msr_reserve, |
691 | .unreserve = single_msr_unreserve, | |
692 | .setup = setup_intel_arch_watchdog, | |
693 | .rearm = p6_rearm, | |
694 | .stop = single_msr_stop_watchdog, | |
695 | .perfctr = MSR_ARCH_PERFMON_PERFCTR1, | |
696 | .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, | |
09198e68 AK |
697 | }; |
698 | ||
699 | static void probe_nmi_watchdog(void) | |
700 | { | |
701 | switch (boot_cpu_data.x86_vendor) { | |
702 | case X86_VENDOR_AMD: | |
420b13b6 AH |
703 | if (boot_cpu_data.x86 == 6 || |
704 | (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15)) | |
705 | wd_ops = &k7_wd_ops; | |
706 | return; | |
09198e68 | 707 | case X86_VENDOR_INTEL: |
fe955e5c PB |
708 | /* Work around where perfctr1 doesn't have a working enable |
709 | * bit as described in the following errata: | |
710 | * AE49 Core Duo and Intel Core Solo 65 nm | |
711 | * AN49 Intel Pentium Dual-Core | |
712 | * AF49 Dual-Core Intel Xeon Processor LV | |
47a486cc | 713 | */ |
fe955e5c PB |
714 | if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) || |
715 | ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 && | |
716 | boot_cpu_data.x86_mask == 4))) { | |
86d78f64 JB |
717 | intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; |
718 | intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; | |
23d5ea5d | 719 | } |
09198e68 AK |
720 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
721 | wd_ops = &intel_arch_wd_ops; | |
722 | break; | |
723 | } | |
724 | switch (boot_cpu_data.x86) { | |
725 | case 6: | |
47a486cc | 726 | if (boot_cpu_data.x86_model > 13) |
09198e68 AK |
727 | return; |
728 | ||
729 | wd_ops = &p6_wd_ops; | |
730 | break; | |
731 | case 15: | |
09198e68 AK |
732 | wd_ops = &p4_wd_ops; |
733 | break; | |
734 | default: | |
735 | return; | |
736 | } | |
737 | break; | |
738 | } | |
739 | } | |
740 | ||
741 | /* Interface to nmi.c */ | |
742 | ||
743 | int lapic_watchdog_init(unsigned nmi_hz) | |
744 | { | |
745 | if (!wd_ops) { | |
746 | probe_nmi_watchdog(); | |
9c9b81f7 IM |
747 | if (!wd_ops) { |
748 | printk(KERN_INFO "NMI watchdog: CPU not supported\n"); | |
09198e68 | 749 | return -1; |
9c9b81f7 | 750 | } |
faa4cfa6 BS |
751 | |
752 | if (!wd_ops->reserve()) { | |
753 | printk(KERN_ERR | |
754 | "NMI watchdog: cannot reserve perfctrs\n"); | |
755 | return -1; | |
756 | } | |
09198e68 AK |
757 | } |
758 | ||
759 | if (!(wd_ops->setup(nmi_hz))) { | |
760 | printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", | |
761 | raw_smp_processor_id()); | |
762 | return -1; | |
763 | } | |
764 | ||
765 | return 0; | |
766 | } | |
767 | ||
768 | void lapic_watchdog_stop(void) | |
769 | { | |
770 | if (wd_ops) | |
54c6ed75 | 771 | wd_ops->stop(); |
09198e68 AK |
772 | } |
773 | ||
774 | unsigned lapic_adjust_nmi_hz(unsigned hz) | |
775 | { | |
776 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | |
777 | if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | |
778 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) | |
779 | hz = adjust_for_32bit_ctr(hz); | |
780 | return hz; | |
781 | } | |
782 | ||
8b1fa1d7 | 783 | int __kprobes lapic_wd_event(unsigned nmi_hz) |
09198e68 AK |
784 | { |
785 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | |
786 | u64 ctr; | |
47a486cc | 787 | |
09198e68 | 788 | rdmsrl(wd->perfctr_msr, ctr); |
47a486cc | 789 | if (ctr & wd_ops->checkbit) /* perfctr still running? */ |
09198e68 | 790 | return 0; |
47a486cc | 791 | |
09198e68 AK |
792 | wd_ops->rearm(wd, nmi_hz); |
793 | return 1; | |
794 | } |