]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/oprofile/op_model_p4.c
oprofile/x86: use kzalloc() instead of kmalloc()
[net-next-2.6.git] / arch / x86 / oprofile / op_model_p4.c
CommitLineData
1da177e4
LT
1/**
2 * @file op_model_p4.c
3 * P4 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11#include <linux/oprofile.h>
12#include <linux/smp.h>
20211e4d
PC
13#include <linux/ptrace.h>
14#include <linux/nmi.h>
1da177e4 15#include <asm/msr.h>
1da177e4
LT
16#include <asm/fixmap.h>
17#include <asm/apic.h>
20211e4d 18
1da177e4
LT
19
20#include "op_x86_model.h"
21#include "op_counter.h"
22
23#define NUM_EVENTS 39
24
25#define NUM_COUNTERS_NON_HT 8
26#define NUM_ESCRS_NON_HT 45
27#define NUM_CCCRS_NON_HT 18
28#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
29
30#define NUM_COUNTERS_HT2 4
31#define NUM_ESCRS_HT2 23
32#define NUM_CCCRS_HT2 9
33#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
34
42399adb
RR
35#define OP_CTR_OVERFLOW (1ULL<<31)
36
1da177e4 37static unsigned int num_counters = NUM_COUNTERS_NON_HT;
cb9c448c 38static unsigned int num_controls = NUM_CONTROLS_NON_HT;
1da177e4
LT
39
40/* this has to be checked dynamically since the
41 hyper-threadedness of a chip is discovered at
42 kernel boot-time. */
43static inline void setup_num_counters(void)
44{
45#ifdef CONFIG_SMP
20211e4d 46 if (smp_num_siblings == 2) {
1da177e4 47 num_counters = NUM_COUNTERS_HT2;
cb9c448c
DZ
48 num_controls = NUM_CONTROLS_HT2;
49 }
1da177e4
LT
50#endif
51}
52
53static int inline addr_increment(void)
54{
55#ifdef CONFIG_SMP
56 return smp_num_siblings == 2 ? 2 : 1;
57#else
58 return 1;
59#endif
60}
61
62
63/* tables to simulate simplified hardware view of p4 registers */
64struct p4_counter_binding {
65 int virt_counter;
66 int counter_address;
67 int cccr_address;
68};
69
70struct p4_event_binding {
71 int escr_select; /* value to put in CCCR */
72 int event_select; /* value to put in ESCR */
73 struct {
74 int virt_counter; /* for this counter... */
75 int escr_address; /* use this ESCR */
76 } bindings[2];
77};
78
79/* nb: these CTR_* defines are a duplicate of defines in
80 event/i386.p4*events. */
81
82
83#define CTR_BPU_0 (1 << 0)
84#define CTR_MS_0 (1 << 1)
85#define CTR_FLAME_0 (1 << 2)
86#define CTR_IQ_4 (1 << 3)
87#define CTR_BPU_2 (1 << 4)
88#define CTR_MS_2 (1 << 5)
89#define CTR_FLAME_2 (1 << 6)
90#define CTR_IQ_5 (1 << 7)
91
20211e4d 92static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
1da177e4
LT
93 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
94 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
95 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
96 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
97 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
98 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
99 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
100 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
101};
102
20211e4d 103#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
1da177e4 104
1da177e4
LT
105/* p4 event codes in libop/op_event.h are indices into this table. */
106
107static struct p4_event_binding p4_events[NUM_EVENTS] = {
20211e4d 108
1da177e4 109 { /* BRANCH_RETIRED */
20211e4d 110 0x05, 0x06,
1da177e4
LT
111 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
112 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
113 },
20211e4d 114
1da177e4 115 { /* MISPRED_BRANCH_RETIRED */
20211e4d 116 0x04, 0x03,
1da177e4
LT
117 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
118 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
119 },
20211e4d 120
1da177e4
LT
121 { /* TC_DELIVER_MODE */
122 0x01, 0x01,
20211e4d 123 { { CTR_MS_0, MSR_P4_TC_ESCR0},
1da177e4
LT
124 { CTR_MS_2, MSR_P4_TC_ESCR1} }
125 },
20211e4d 126
1da177e4 127 { /* BPU_FETCH_REQUEST */
20211e4d 128 0x00, 0x03,
1da177e4
LT
129 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
130 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
131 },
132
133 { /* ITLB_REFERENCE */
134 0x03, 0x18,
135 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
136 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
137 },
138
139 { /* MEMORY_CANCEL */
140 0x05, 0x02,
141 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
142 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
143 },
144
145 { /* MEMORY_COMPLETE */
146 0x02, 0x08,
147 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
148 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
149 },
150
151 { /* LOAD_PORT_REPLAY */
20211e4d 152 0x02, 0x04,
1da177e4
LT
153 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
154 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
155 },
156
157 { /* STORE_PORT_REPLAY */
158 0x02, 0x05,
159 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
160 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
161 },
162
163 { /* MOB_LOAD_REPLAY */
164 0x02, 0x03,
165 { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
166 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
167 },
168
169 { /* PAGE_WALK_TYPE */
170 0x04, 0x01,
171 { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
172 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
173 },
174
175 { /* BSQ_CACHE_REFERENCE */
20211e4d 176 0x07, 0x0c,
1da177e4
LT
177 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
178 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
179 },
180
181 { /* IOQ_ALLOCATION */
20211e4d 182 0x06, 0x03,
1da177e4
LT
183 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
184 { 0, 0 } }
185 },
186
187 { /* IOQ_ACTIVE_ENTRIES */
20211e4d 188 0x06, 0x1a,
1da177e4
LT
189 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
190 { 0, 0 } }
191 },
192
193 { /* FSB_DATA_ACTIVITY */
20211e4d 194 0x06, 0x17,
1da177e4
LT
195 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
196 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
197 },
198
199 { /* BSQ_ALLOCATION */
20211e4d 200 0x07, 0x05,
1da177e4
LT
201 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
202 { 0, 0 } }
203 },
204
205 { /* BSQ_ACTIVE_ENTRIES */
206 0x07, 0x06,
20211e4d 207 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
1da177e4
LT
208 { 0, 0 } }
209 },
210
211 { /* X87_ASSIST */
20211e4d 212 0x05, 0x03,
1da177e4
LT
213 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
214 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
215 },
216
217 { /* SSE_INPUT_ASSIST */
218 0x01, 0x34,
219 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
220 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
221 },
20211e4d 222
1da177e4 223 { /* PACKED_SP_UOP */
20211e4d 224 0x01, 0x08,
1da177e4
LT
225 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
226 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
227 },
20211e4d 228
1da177e4 229 { /* PACKED_DP_UOP */
20211e4d 230 0x01, 0x0c,
1da177e4
LT
231 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
232 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
233 },
234
235 { /* SCALAR_SP_UOP */
20211e4d 236 0x01, 0x0a,
1da177e4
LT
237 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
238 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
239 },
240
241 { /* SCALAR_DP_UOP */
242 0x01, 0x0e,
243 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
244 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
245 },
246
247 { /* 64BIT_MMX_UOP */
20211e4d 248 0x01, 0x02,
1da177e4
LT
249 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
250 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
251 },
20211e4d 252
1da177e4 253 { /* 128BIT_MMX_UOP */
20211e4d 254 0x01, 0x1a,
1da177e4
LT
255 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
256 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
257 },
258
259 { /* X87_FP_UOP */
20211e4d 260 0x01, 0x04,
1da177e4
LT
261 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
262 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
263 },
20211e4d 264
1da177e4 265 { /* X87_SIMD_MOVES_UOP */
20211e4d 266 0x01, 0x2e,
1da177e4
LT
267 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
268 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
269 },
20211e4d 270
1da177e4 271 { /* MACHINE_CLEAR */
20211e4d 272 0x05, 0x02,
1da177e4
LT
273 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
274 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
275 },
276
277 { /* GLOBAL_POWER_EVENTS */
278 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
279 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
280 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
281 },
20211e4d 282
1da177e4 283 { /* TC_MS_XFER */
20211e4d 284 0x00, 0x05,
1da177e4
LT
285 { { CTR_MS_0, MSR_P4_MS_ESCR0},
286 { CTR_MS_2, MSR_P4_MS_ESCR1} }
287 },
288
289 { /* UOP_QUEUE_WRITES */
290 0x00, 0x09,
291 { { CTR_MS_0, MSR_P4_MS_ESCR0},
292 { CTR_MS_2, MSR_P4_MS_ESCR1} }
293 },
294
295 { /* FRONT_END_EVENT */
296 0x05, 0x08,
297 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
298 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
299 },
300
301 { /* EXECUTION_EVENT */
302 0x05, 0x0c,
303 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
304 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
305 },
306
307 { /* REPLAY_EVENT */
308 0x05, 0x09,
309 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
310 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
311 },
312
313 { /* INSTR_RETIRED */
20211e4d 314 0x04, 0x02,
1da177e4
LT
315 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
316 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
317 },
318
319 { /* UOPS_RETIRED */
320 0x04, 0x01,
321 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
322 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
323 },
324
20211e4d
PC
325 { /* UOP_TYPE */
326 0x02, 0x02,
1da177e4
LT
327 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
328 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
329 },
330
331 { /* RETIRED_MISPRED_BRANCH_TYPE */
20211e4d 332 0x02, 0x05,
1da177e4
LT
333 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
334 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
335 },
336
337 { /* RETIRED_BRANCH_TYPE */
338 0x02, 0x04,
339 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
340 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
341 }
342};
343
344
345#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
346
347#define ESCR_RESERVED_BITS 0x80000003
348#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
349#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
350#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
351#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
352#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
353#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
354#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
1da177e4
LT
355
356#define CCCR_RESERVED_BITS 0x38030FFF
357#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
358#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
359#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
360#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
361#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
362#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
363#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
1da177e4
LT
364#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
365#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
366
1da177e4
LT
367
368/* this assigns a "stagger" to the current CPU, which is used throughout
369 the code in this module as an extra array offset, to select the "even"
370 or "odd" part of all the divided resources. */
371static unsigned int get_stagger(void)
372{
373#ifdef CONFIG_SMP
374 int cpu = smp_processor_id();
7ad728f9 375 return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
20211e4d 376#endif
1da177e4
LT
377 return 0;
378}
379
380
381/* finally, mediate access to a real hardware counter
382 by passing a "virtual" counter numer to this macro,
383 along with your stagger setting. */
384#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
385
386static unsigned long reset_value[NUM_COUNTERS_NON_HT];
387
388
389static void p4_fill_in_addresses(struct op_msrs * const msrs)
390{
20211e4d 391 unsigned int i;
cb9c448c 392 unsigned int addr, cccraddr, stag;
1da177e4
LT
393
394 setup_num_counters();
395 stag = get_stagger();
396
cb9c448c
DZ
397 /* the counter & cccr registers we pay attention to */
398 for (i = 0; i < num_counters; ++i) {
399 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
400 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
20211e4d 401 if (reserve_perfctr_nmi(addr)) {
cb9c448c
DZ
402 msrs->counters[i].addr = addr;
403 msrs->controls[i].addr = cccraddr;
404 }
405 }
406
1da177e4
LT
407 /* 43 ESCR registers in three or four discontiguous group */
408 for (addr = MSR_P4_BSU_ESCR0 + stag;
409 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
cb9c448c
DZ
410 if (reserve_evntsel_nmi(addr))
411 msrs->controls[i].addr = addr;
1da177e4
LT
412 }
413
414 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
415 * to avoid special case in nmi_{save|restore}_registers() */
416 if (boot_cpu_data.x86_model >= 0x3) {
417 for (addr = MSR_P4_BSU_ESCR0 + stag;
418 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
cb9c448c
DZ
419 if (reserve_evntsel_nmi(addr))
420 msrs->controls[i].addr = addr;
1da177e4
LT
421 }
422 } else {
423 for (addr = MSR_P4_IQ_ESCR0 + stag;
424 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
cb9c448c
DZ
425 if (reserve_evntsel_nmi(addr))
426 msrs->controls[i].addr = addr;
1da177e4
LT
427 }
428 }
429
430 for (addr = MSR_P4_RAT_ESCR0 + stag;
431 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
cb9c448c
DZ
432 if (reserve_evntsel_nmi(addr))
433 msrs->controls[i].addr = addr;
1da177e4 434 }
20211e4d 435
1da177e4 436 for (addr = MSR_P4_MS_ESCR0 + stag;
20211e4d 437 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
cb9c448c
DZ
438 if (reserve_evntsel_nmi(addr))
439 msrs->controls[i].addr = addr;
1da177e4 440 }
20211e4d 441
1da177e4 442 for (addr = MSR_P4_IX_ESCR0 + stag;
20211e4d 443 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
cb9c448c
DZ
444 if (reserve_evntsel_nmi(addr))
445 msrs->controls[i].addr = addr;
1da177e4
LT
446 }
447
448 /* there are 2 remaining non-contiguously located ESCRs */
449
20211e4d 450 if (num_counters == NUM_COUNTERS_NON_HT) {
1da177e4 451 /* standard non-HT CPUs handle both remaining ESCRs*/
cb9c448c
DZ
452 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
453 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
454 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
455 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
1da177e4
LT
456
457 } else if (stag == 0) {
458 /* HT CPUs give the first remainder to the even thread, as
459 the 32nd control register */
cb9c448c
DZ
460 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
461 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
1da177e4
LT
462
463 } else {
464 /* and two copies of the second to the odd thread,
465 for the 22st and 23nd control registers */
cb9c448c
DZ
466 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
467 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
469 }
1da177e4
LT
470 }
471}
472
473
474static void pmc_setup_one_p4_counter(unsigned int ctr)
475{
476 int i;
477 int const maxbind = 2;
478 unsigned int cccr = 0;
479 unsigned int escr = 0;
480 unsigned int high = 0;
481 unsigned int counter_bit;
482 struct p4_event_binding *ev = NULL;
483 unsigned int stag;
484
485 stag = get_stagger();
20211e4d 486
1da177e4
LT
487 /* convert from counter *number* to counter *bit* */
488 counter_bit = 1 << VIRT_CTR(stag, ctr);
20211e4d 489
1da177e4
LT
490 /* find our event binding structure. */
491 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
20211e4d
PC
492 printk(KERN_ERR
493 "oprofile: P4 event code 0x%lx out of range\n",
1da177e4
LT
494 counter_config[ctr].event);
495 return;
496 }
20211e4d 497
1da177e4 498 ev = &(p4_events[counter_config[ctr].event - 1]);
20211e4d 499
1da177e4
LT
500 for (i = 0; i < maxbind; i++) {
501 if (ev->bindings[i].virt_counter & counter_bit) {
502
503 /* modify ESCR */
1131a478 504 rdmsr(ev->bindings[i].escr_address, escr, high);
1da177e4
LT
505 ESCR_CLEAR(escr);
506 if (stag == 0) {
507 ESCR_SET_USR_0(escr, counter_config[ctr].user);
508 ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
509 } else {
510 ESCR_SET_USR_1(escr, counter_config[ctr].user);
511 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
512 }
513 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
20211e4d 514 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
1131a478 515 wrmsr(ev->bindings[i].escr_address, escr, high);
20211e4d 516
1da177e4 517 /* modify CCCR */
1131a478
RR
518 rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
519 cccr, high);
1da177e4
LT
520 CCCR_CLEAR(cccr);
521 CCCR_SET_REQUIRED_BITS(cccr);
522 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
20211e4d 523 if (stag == 0)
1da177e4 524 CCCR_SET_PMI_OVF_0(cccr);
20211e4d 525 else
1da177e4 526 CCCR_SET_PMI_OVF_1(cccr);
1131a478
RR
527 wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
528 cccr, high);
1da177e4
LT
529 return;
530 }
531 }
532
20211e4d 533 printk(KERN_ERR
1da177e4
LT
534 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
535 counter_config[ctr].event, stag, ctr);
536}
537
538
ef8828dd
RR
539static void p4_setup_ctrs(struct op_x86_model_spec const *model,
540 struct op_msrs const * const msrs)
1da177e4
LT
541{
542 unsigned int i;
543 unsigned int low, high;
1da177e4
LT
544 unsigned int stag;
545
546 stag = get_stagger();
547
548 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
20211e4d 549 if (!MISC_PMC_ENABLED_P(low)) {
1da177e4
LT
550 printk(KERN_ERR "oprofile: P4 PMC not available\n");
551 return;
552 }
553
554 /* clear the cccrs we will use */
6e63ea4b 555 for (i = 0; i < num_counters; i++) {
217d3cfb 556 if (unlikely(!msrs->controls[i].addr))
cb9c448c 557 continue;
1da177e4
LT
558 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
559 CCCR_CLEAR(low);
560 CCCR_SET_REQUIRED_BITS(low);
561 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
562 }
563
1da177e4 564 /* clear all escrs (including those outside our concern) */
cb9c448c 565 for (i = num_counters; i < num_controls; i++) {
217d3cfb 566 if (unlikely(!msrs->controls[i].addr))
cb9c448c
DZ
567 continue;
568 wrmsr(msrs->controls[i].addr, 0, 0);
1da177e4
LT
569 }
570
1da177e4 571 /* setup all counters */
6e63ea4b 572 for (i = 0; i < num_counters; ++i) {
217d3cfb 573 if (counter_config[i].enabled && msrs->controls[i].addr) {
1da177e4
LT
574 reset_value[i] = counter_config[i].count;
575 pmc_setup_one_p4_counter(i);
bbc5986d 576 wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
8045a4c2 577 -(u64)counter_config[i].count);
1da177e4
LT
578 } else {
579 reset_value[i] = 0;
580 }
581 }
582}
583
584
585static int p4_check_ctrs(struct pt_regs * const regs,
586 struct op_msrs const * const msrs)
587{
588 unsigned long ctr, low, high, stag, real;
589 int i;
590
591 stag = get_stagger();
592
593 for (i = 0; i < num_counters; ++i) {
20211e4d
PC
594
595 if (!reset_value[i])
1da177e4
LT
596 continue;
597
20211e4d 598 /*
1da177e4
LT
599 * there is some eccentricity in the hardware which
600 * requires that we perform 2 extra corrections:
601 *
602 * - check both the CCCR:OVF flag for overflow and the
603 * counter high bit for un-flagged overflows.
604 *
605 * - write the counter back twice to ensure it gets
606 * updated properly.
20211e4d 607 *
1da177e4
LT
608 * the former seems to be related to extra NMIs happening
609 * during the current NMI; the latter is reported as errata
610 * N15 in intel doc 249199-029, pentium 4 specification
611 * update, though their suggested work-around does not
612 * appear to solve the problem.
613 */
20211e4d 614
1da177e4
LT
615 real = VIRT_CTR(stag, i);
616
1131a478
RR
617 rdmsr(p4_counters[real].cccr_address, low, high);
618 rdmsr(p4_counters[real].counter_address, ctr, high);
42399adb 619 if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
1da177e4 620 oprofile_add_sample(regs, i);
bbc5986d 621 wrmsrl(p4_counters[real].counter_address,
8045a4c2 622 -(u64)reset_value[i]);
1da177e4 623 CCCR_CLEAR_OVF(low);
1131a478 624 wrmsr(p4_counters[real].cccr_address, low, high);
bbc5986d 625 wrmsrl(p4_counters[real].counter_address,
8045a4c2 626 -(u64)reset_value[i]);
1da177e4
LT
627 }
628 }
629
630 /* P4 quirk: you have to re-unmask the apic vector */
631 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
632
633 /* See op_model_ppro.c */
634 return 1;
635}
636
637
638static void p4_start(struct op_msrs const * const msrs)
639{
640 unsigned int low, high, stag;
641 int i;
642
643 stag = get_stagger();
644
645 for (i = 0; i < num_counters; ++i) {
646 if (!reset_value[i])
647 continue;
1131a478 648 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4 649 CCCR_SET_ENABLE(low);
1131a478 650 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4
LT
651 }
652}
653
654
655static void p4_stop(struct op_msrs const * const msrs)
656{
657 unsigned int low, high, stag;
658 int i;
659
660 stag = get_stagger();
661
662 for (i = 0; i < num_counters; ++i) {
cb9c448c
DZ
663 if (!reset_value[i])
664 continue;
1131a478 665 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4 666 CCCR_SET_DISABLE(low);
1131a478 667 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4
LT
668 }
669}
670
cb9c448c
DZ
671static void p4_shutdown(struct op_msrs const * const msrs)
672{
673 int i;
674
6e63ea4b 675 for (i = 0; i < num_counters; ++i) {
217d3cfb 676 if (msrs->counters[i].addr)
cb9c448c
DZ
677 release_perfctr_nmi(msrs->counters[i].addr);
678 }
20211e4d
PC
679 /*
680 * some of the control registers are specially reserved in
cb9c448c
DZ
681 * conjunction with the counter registers (hence the starting offset).
682 * This saves a few bits.
683 */
6e63ea4b 684 for (i = num_counters; i < num_controls; ++i) {
217d3cfb 685 if (msrs->controls[i].addr)
cb9c448c
DZ
686 release_evntsel_nmi(msrs->controls[i].addr);
687 }
688}
689
1da177e4
LT
690
691#ifdef CONFIG_SMP
259a83a8 692struct op_x86_model_spec op_p4_ht2_spec = {
c92960fc
RR
693 .num_counters = NUM_COUNTERS_HT2,
694 .num_controls = NUM_CONTROLS_HT2,
695 .fill_in_addresses = &p4_fill_in_addresses,
696 .setup_ctrs = &p4_setup_ctrs,
697 .check_ctrs = &p4_check_ctrs,
698 .start = &p4_start,
699 .stop = &p4_stop,
700 .shutdown = &p4_shutdown
1da177e4
LT
701};
702#endif
703
259a83a8 704struct op_x86_model_spec op_p4_spec = {
c92960fc
RR
705 .num_counters = NUM_COUNTERS_NON_HT,
706 .num_controls = NUM_CONTROLS_NON_HT,
707 .fill_in_addresses = &p4_fill_in_addresses,
708 .setup_ctrs = &p4_setup_ctrs,
709 .check_ctrs = &p4_check_ctrs,
710 .start = &p4_start,
711 .stop = &p4_stop,
712 .shutdown = &p4_shutdown
1da177e4 713};