]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/oprofile/op_model_p4.c
x86/oprofile: Remove unused num_virt_controls from struct op_x86_model_spec
[net-next-2.6.git] / arch / x86 / oprofile / op_model_p4.c
CommitLineData
1da177e4
LT
1/**
2 * @file op_model_p4.c
3 * P4 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11#include <linux/oprofile.h>
12#include <linux/smp.h>
20211e4d
PC
13#include <linux/ptrace.h>
14#include <linux/nmi.h>
1da177e4 15#include <asm/msr.h>
1da177e4
LT
16#include <asm/fixmap.h>
17#include <asm/apic.h>
20211e4d 18
1da177e4
LT
19
20#include "op_x86_model.h"
21#include "op_counter.h"
22
23#define NUM_EVENTS 39
24
25#define NUM_COUNTERS_NON_HT 8
26#define NUM_ESCRS_NON_HT 45
27#define NUM_CCCRS_NON_HT 18
28#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
29
30#define NUM_COUNTERS_HT2 4
31#define NUM_ESCRS_HT2 23
32#define NUM_CCCRS_HT2 9
33#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
34
42399adb
RR
35#define OP_CTR_OVERFLOW (1ULL<<31)
36
1da177e4 37static unsigned int num_counters = NUM_COUNTERS_NON_HT;
cb9c448c 38static unsigned int num_controls = NUM_CONTROLS_NON_HT;
1da177e4
LT
39
40/* this has to be checked dynamically since the
41 hyper-threadedness of a chip is discovered at
42 kernel boot-time. */
43static inline void setup_num_counters(void)
44{
45#ifdef CONFIG_SMP
20211e4d 46 if (smp_num_siblings == 2) {
1da177e4 47 num_counters = NUM_COUNTERS_HT2;
cb9c448c
DZ
48 num_controls = NUM_CONTROLS_HT2;
49 }
1da177e4
LT
50#endif
51}
52
53static int inline addr_increment(void)
54{
55#ifdef CONFIG_SMP
56 return smp_num_siblings == 2 ? 2 : 1;
57#else
58 return 1;
59#endif
60}
61
62
63/* tables to simulate simplified hardware view of p4 registers */
64struct p4_counter_binding {
65 int virt_counter;
66 int counter_address;
67 int cccr_address;
68};
69
70struct p4_event_binding {
71 int escr_select; /* value to put in CCCR */
72 int event_select; /* value to put in ESCR */
73 struct {
74 int virt_counter; /* for this counter... */
75 int escr_address; /* use this ESCR */
76 } bindings[2];
77};
78
79/* nb: these CTR_* defines are a duplicate of defines in
80 event/i386.p4*events. */
81
82
83#define CTR_BPU_0 (1 << 0)
84#define CTR_MS_0 (1 << 1)
85#define CTR_FLAME_0 (1 << 2)
86#define CTR_IQ_4 (1 << 3)
87#define CTR_BPU_2 (1 << 4)
88#define CTR_MS_2 (1 << 5)
89#define CTR_FLAME_2 (1 << 6)
90#define CTR_IQ_5 (1 << 7)
91
20211e4d 92static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
1da177e4
LT
93 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
94 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
95 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
96 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
97 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
98 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
99 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
100 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
101};
102
20211e4d 103#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
1da177e4 104
1da177e4
LT
105/* p4 event codes in libop/op_event.h are indices into this table. */
106
107static struct p4_event_binding p4_events[NUM_EVENTS] = {
20211e4d 108
1da177e4 109 { /* BRANCH_RETIRED */
20211e4d 110 0x05, 0x06,
1da177e4
LT
111 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
112 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
113 },
20211e4d 114
1da177e4 115 { /* MISPRED_BRANCH_RETIRED */
20211e4d 116 0x04, 0x03,
1da177e4
LT
117 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
118 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
119 },
20211e4d 120
1da177e4
LT
121 { /* TC_DELIVER_MODE */
122 0x01, 0x01,
20211e4d 123 { { CTR_MS_0, MSR_P4_TC_ESCR0},
1da177e4
LT
124 { CTR_MS_2, MSR_P4_TC_ESCR1} }
125 },
20211e4d 126
1da177e4 127 { /* BPU_FETCH_REQUEST */
20211e4d 128 0x00, 0x03,
1da177e4
LT
129 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
130 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
131 },
132
133 { /* ITLB_REFERENCE */
134 0x03, 0x18,
135 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
136 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
137 },
138
139 { /* MEMORY_CANCEL */
140 0x05, 0x02,
141 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
142 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
143 },
144
145 { /* MEMORY_COMPLETE */
146 0x02, 0x08,
147 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
148 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
149 },
150
151 { /* LOAD_PORT_REPLAY */
20211e4d 152 0x02, 0x04,
1da177e4
LT
153 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
154 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
155 },
156
157 { /* STORE_PORT_REPLAY */
158 0x02, 0x05,
159 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
160 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
161 },
162
163 { /* MOB_LOAD_REPLAY */
164 0x02, 0x03,
165 { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
166 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
167 },
168
169 { /* PAGE_WALK_TYPE */
170 0x04, 0x01,
171 { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
172 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
173 },
174
175 { /* BSQ_CACHE_REFERENCE */
20211e4d 176 0x07, 0x0c,
1da177e4
LT
177 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
178 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
179 },
180
181 { /* IOQ_ALLOCATION */
20211e4d 182 0x06, 0x03,
1da177e4
LT
183 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
184 { 0, 0 } }
185 },
186
187 { /* IOQ_ACTIVE_ENTRIES */
20211e4d 188 0x06, 0x1a,
1da177e4
LT
189 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
190 { 0, 0 } }
191 },
192
193 { /* FSB_DATA_ACTIVITY */
20211e4d 194 0x06, 0x17,
1da177e4
LT
195 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
196 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
197 },
198
199 { /* BSQ_ALLOCATION */
20211e4d 200 0x07, 0x05,
1da177e4
LT
201 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
202 { 0, 0 } }
203 },
204
205 { /* BSQ_ACTIVE_ENTRIES */
206 0x07, 0x06,
20211e4d 207 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
1da177e4
LT
208 { 0, 0 } }
209 },
210
211 { /* X87_ASSIST */
20211e4d 212 0x05, 0x03,
1da177e4
LT
213 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
214 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
215 },
216
217 { /* SSE_INPUT_ASSIST */
218 0x01, 0x34,
219 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
220 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
221 },
20211e4d 222
1da177e4 223 { /* PACKED_SP_UOP */
20211e4d 224 0x01, 0x08,
1da177e4
LT
225 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
226 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
227 },
20211e4d 228
1da177e4 229 { /* PACKED_DP_UOP */
20211e4d 230 0x01, 0x0c,
1da177e4
LT
231 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
232 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
233 },
234
235 { /* SCALAR_SP_UOP */
20211e4d 236 0x01, 0x0a,
1da177e4
LT
237 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
238 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
239 },
240
241 { /* SCALAR_DP_UOP */
242 0x01, 0x0e,
243 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
244 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
245 },
246
247 { /* 64BIT_MMX_UOP */
20211e4d 248 0x01, 0x02,
1da177e4
LT
249 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
250 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
251 },
20211e4d 252
1da177e4 253 { /* 128BIT_MMX_UOP */
20211e4d 254 0x01, 0x1a,
1da177e4
LT
255 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
256 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
257 },
258
259 { /* X87_FP_UOP */
20211e4d 260 0x01, 0x04,
1da177e4
LT
261 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
262 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
263 },
20211e4d 264
1da177e4 265 { /* X87_SIMD_MOVES_UOP */
20211e4d 266 0x01, 0x2e,
1da177e4
LT
267 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
268 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
269 },
20211e4d 270
1da177e4 271 { /* MACHINE_CLEAR */
20211e4d 272 0x05, 0x02,
1da177e4
LT
273 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
274 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
275 },
276
277 { /* GLOBAL_POWER_EVENTS */
278 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
279 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
280 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
281 },
20211e4d 282
1da177e4 283 { /* TC_MS_XFER */
20211e4d 284 0x00, 0x05,
1da177e4
LT
285 { { CTR_MS_0, MSR_P4_MS_ESCR0},
286 { CTR_MS_2, MSR_P4_MS_ESCR1} }
287 },
288
289 { /* UOP_QUEUE_WRITES */
290 0x00, 0x09,
291 { { CTR_MS_0, MSR_P4_MS_ESCR0},
292 { CTR_MS_2, MSR_P4_MS_ESCR1} }
293 },
294
295 { /* FRONT_END_EVENT */
296 0x05, 0x08,
297 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
298 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
299 },
300
301 { /* EXECUTION_EVENT */
302 0x05, 0x0c,
303 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
304 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
305 },
306
307 { /* REPLAY_EVENT */
308 0x05, 0x09,
309 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
310 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
311 },
312
313 { /* INSTR_RETIRED */
20211e4d 314 0x04, 0x02,
1da177e4
LT
315 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
316 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
317 },
318
319 { /* UOPS_RETIRED */
320 0x04, 0x01,
321 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
322 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
323 },
324
20211e4d
PC
325 { /* UOP_TYPE */
326 0x02, 0x02,
1da177e4
LT
327 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
328 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
329 },
330
331 { /* RETIRED_MISPRED_BRANCH_TYPE */
20211e4d 332 0x02, 0x05,
1da177e4
LT
333 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
334 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
335 },
336
337 { /* RETIRED_BRANCH_TYPE */
338 0x02, 0x04,
339 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
340 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
341 }
342};
343
344
345#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
346
347#define ESCR_RESERVED_BITS 0x80000003
348#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
349#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
350#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
351#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
352#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
353#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
354#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
1da177e4
LT
355
356#define CCCR_RESERVED_BITS 0x38030FFF
357#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
358#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
359#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
360#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
361#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
362#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
363#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
1da177e4
LT
364#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
365#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
366
1da177e4
LT
367
368/* this assigns a "stagger" to the current CPU, which is used throughout
369 the code in this module as an extra array offset, to select the "even"
370 or "odd" part of all the divided resources. */
371static unsigned int get_stagger(void)
372{
373#ifdef CONFIG_SMP
374 int cpu = smp_processor_id();
7ad728f9 375 return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
20211e4d 376#endif
1da177e4
LT
377 return 0;
378}
379
380
381/* finally, mediate access to a real hardware counter
382 by passing a "virtual" counter numer to this macro,
383 along with your stagger setting. */
384#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
385
386static unsigned long reset_value[NUM_COUNTERS_NON_HT];
387
388
389static void p4_fill_in_addresses(struct op_msrs * const msrs)
390{
20211e4d 391 unsigned int i;
cb9c448c 392 unsigned int addr, cccraddr, stag;
1da177e4
LT
393
394 setup_num_counters();
395 stag = get_stagger();
396
cb9c448c 397 /* initialize some registers */
20211e4d 398 for (i = 0; i < num_counters; ++i)
cb9c448c 399 msrs->counters[i].addr = 0;
20211e4d 400 for (i = 0; i < num_controls; ++i)
cb9c448c 401 msrs->controls[i].addr = 0;
20211e4d 402
cb9c448c
DZ
403 /* the counter & cccr registers we pay attention to */
404 for (i = 0; i < num_counters; ++i) {
405 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
406 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
20211e4d 407 if (reserve_perfctr_nmi(addr)) {
cb9c448c
DZ
408 msrs->counters[i].addr = addr;
409 msrs->controls[i].addr = cccraddr;
410 }
411 }
412
1da177e4
LT
413 /* 43 ESCR registers in three or four discontiguous group */
414 for (addr = MSR_P4_BSU_ESCR0 + stag;
415 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
cb9c448c
DZ
416 if (reserve_evntsel_nmi(addr))
417 msrs->controls[i].addr = addr;
1da177e4
LT
418 }
419
420 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
421 * to avoid special case in nmi_{save|restore}_registers() */
422 if (boot_cpu_data.x86_model >= 0x3) {
423 for (addr = MSR_P4_BSU_ESCR0 + stag;
424 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
cb9c448c
DZ
425 if (reserve_evntsel_nmi(addr))
426 msrs->controls[i].addr = addr;
1da177e4
LT
427 }
428 } else {
429 for (addr = MSR_P4_IQ_ESCR0 + stag;
430 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
cb9c448c
DZ
431 if (reserve_evntsel_nmi(addr))
432 msrs->controls[i].addr = addr;
1da177e4
LT
433 }
434 }
435
436 for (addr = MSR_P4_RAT_ESCR0 + stag;
437 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
cb9c448c
DZ
438 if (reserve_evntsel_nmi(addr))
439 msrs->controls[i].addr = addr;
1da177e4 440 }
20211e4d 441
1da177e4 442 for (addr = MSR_P4_MS_ESCR0 + stag;
20211e4d 443 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
cb9c448c
DZ
444 if (reserve_evntsel_nmi(addr))
445 msrs->controls[i].addr = addr;
1da177e4 446 }
20211e4d 447
1da177e4 448 for (addr = MSR_P4_IX_ESCR0 + stag;
20211e4d 449 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
cb9c448c
DZ
450 if (reserve_evntsel_nmi(addr))
451 msrs->controls[i].addr = addr;
1da177e4
LT
452 }
453
454 /* there are 2 remaining non-contiguously located ESCRs */
455
20211e4d 456 if (num_counters == NUM_COUNTERS_NON_HT) {
1da177e4 457 /* standard non-HT CPUs handle both remaining ESCRs*/
cb9c448c
DZ
458 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
459 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
460 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
461 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
1da177e4
LT
462
463 } else if (stag == 0) {
464 /* HT CPUs give the first remainder to the even thread, as
465 the 32nd control register */
cb9c448c
DZ
466 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
467 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
1da177e4
LT
468
469 } else {
470 /* and two copies of the second to the odd thread,
471 for the 22st and 23nd control registers */
cb9c448c
DZ
472 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
473 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
474 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
475 }
1da177e4
LT
476 }
477}
478
479
480static void pmc_setup_one_p4_counter(unsigned int ctr)
481{
482 int i;
483 int const maxbind = 2;
484 unsigned int cccr = 0;
485 unsigned int escr = 0;
486 unsigned int high = 0;
487 unsigned int counter_bit;
488 struct p4_event_binding *ev = NULL;
489 unsigned int stag;
490
491 stag = get_stagger();
20211e4d 492
1da177e4
LT
493 /* convert from counter *number* to counter *bit* */
494 counter_bit = 1 << VIRT_CTR(stag, ctr);
20211e4d 495
1da177e4
LT
496 /* find our event binding structure. */
497 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
20211e4d
PC
498 printk(KERN_ERR
499 "oprofile: P4 event code 0x%lx out of range\n",
1da177e4
LT
500 counter_config[ctr].event);
501 return;
502 }
20211e4d 503
1da177e4 504 ev = &(p4_events[counter_config[ctr].event - 1]);
20211e4d 505
1da177e4
LT
506 for (i = 0; i < maxbind; i++) {
507 if (ev->bindings[i].virt_counter & counter_bit) {
508
509 /* modify ESCR */
1131a478 510 rdmsr(ev->bindings[i].escr_address, escr, high);
1da177e4
LT
511 ESCR_CLEAR(escr);
512 if (stag == 0) {
513 ESCR_SET_USR_0(escr, counter_config[ctr].user);
514 ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
515 } else {
516 ESCR_SET_USR_1(escr, counter_config[ctr].user);
517 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
518 }
519 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
20211e4d 520 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
1131a478 521 wrmsr(ev->bindings[i].escr_address, escr, high);
20211e4d 522
1da177e4 523 /* modify CCCR */
1131a478
RR
524 rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
525 cccr, high);
1da177e4
LT
526 CCCR_CLEAR(cccr);
527 CCCR_SET_REQUIRED_BITS(cccr);
528 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
20211e4d 529 if (stag == 0)
1da177e4 530 CCCR_SET_PMI_OVF_0(cccr);
20211e4d 531 else
1da177e4 532 CCCR_SET_PMI_OVF_1(cccr);
1131a478
RR
533 wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
534 cccr, high);
1da177e4
LT
535 return;
536 }
537 }
538
20211e4d 539 printk(KERN_ERR
1da177e4
LT
540 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
541 counter_config[ctr].event, stag, ctr);
542}
543
544
ef8828dd
RR
545static void p4_setup_ctrs(struct op_x86_model_spec const *model,
546 struct op_msrs const * const msrs)
1da177e4
LT
547{
548 unsigned int i;
549 unsigned int low, high;
1da177e4
LT
550 unsigned int stag;
551
552 stag = get_stagger();
553
554 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
20211e4d 555 if (!MISC_PMC_ENABLED_P(low)) {
1da177e4
LT
556 printk(KERN_ERR "oprofile: P4 PMC not available\n");
557 return;
558 }
559
560 /* clear the cccrs we will use */
6e63ea4b 561 for (i = 0; i < num_counters; i++) {
217d3cfb 562 if (unlikely(!msrs->controls[i].addr))
cb9c448c 563 continue;
1da177e4
LT
564 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
565 CCCR_CLEAR(low);
566 CCCR_SET_REQUIRED_BITS(low);
567 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
568 }
569
1da177e4 570 /* clear all escrs (including those outside our concern) */
cb9c448c 571 for (i = num_counters; i < num_controls; i++) {
217d3cfb 572 if (unlikely(!msrs->controls[i].addr))
cb9c448c
DZ
573 continue;
574 wrmsr(msrs->controls[i].addr, 0, 0);
1da177e4
LT
575 }
576
1da177e4 577 /* setup all counters */
6e63ea4b 578 for (i = 0; i < num_counters; ++i) {
217d3cfb 579 if (counter_config[i].enabled && msrs->controls[i].addr) {
1da177e4
LT
580 reset_value[i] = counter_config[i].count;
581 pmc_setup_one_p4_counter(i);
bbc5986d 582 wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
8045a4c2 583 -(u64)counter_config[i].count);
1da177e4
LT
584 } else {
585 reset_value[i] = 0;
586 }
587 }
588}
589
590
591static int p4_check_ctrs(struct pt_regs * const regs,
592 struct op_msrs const * const msrs)
593{
594 unsigned long ctr, low, high, stag, real;
595 int i;
596
597 stag = get_stagger();
598
599 for (i = 0; i < num_counters; ++i) {
20211e4d
PC
600
601 if (!reset_value[i])
1da177e4
LT
602 continue;
603
20211e4d 604 /*
1da177e4
LT
605 * there is some eccentricity in the hardware which
606 * requires that we perform 2 extra corrections:
607 *
608 * - check both the CCCR:OVF flag for overflow and the
609 * counter high bit for un-flagged overflows.
610 *
611 * - write the counter back twice to ensure it gets
612 * updated properly.
20211e4d 613 *
1da177e4
LT
614 * the former seems to be related to extra NMIs happening
615 * during the current NMI; the latter is reported as errata
616 * N15 in intel doc 249199-029, pentium 4 specification
617 * update, though their suggested work-around does not
618 * appear to solve the problem.
619 */
20211e4d 620
1da177e4
LT
621 real = VIRT_CTR(stag, i);
622
1131a478
RR
623 rdmsr(p4_counters[real].cccr_address, low, high);
624 rdmsr(p4_counters[real].counter_address, ctr, high);
42399adb 625 if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
1da177e4 626 oprofile_add_sample(regs, i);
bbc5986d 627 wrmsrl(p4_counters[real].counter_address,
8045a4c2 628 -(u64)reset_value[i]);
1da177e4 629 CCCR_CLEAR_OVF(low);
1131a478 630 wrmsr(p4_counters[real].cccr_address, low, high);
bbc5986d 631 wrmsrl(p4_counters[real].counter_address,
8045a4c2 632 -(u64)reset_value[i]);
1da177e4
LT
633 }
634 }
635
636 /* P4 quirk: you have to re-unmask the apic vector */
637 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
638
639 /* See op_model_ppro.c */
640 return 1;
641}
642
643
644static void p4_start(struct op_msrs const * const msrs)
645{
646 unsigned int low, high, stag;
647 int i;
648
649 stag = get_stagger();
650
651 for (i = 0; i < num_counters; ++i) {
652 if (!reset_value[i])
653 continue;
1131a478 654 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4 655 CCCR_SET_ENABLE(low);
1131a478 656 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4
LT
657 }
658}
659
660
661static void p4_stop(struct op_msrs const * const msrs)
662{
663 unsigned int low, high, stag;
664 int i;
665
666 stag = get_stagger();
667
668 for (i = 0; i < num_counters; ++i) {
cb9c448c
DZ
669 if (!reset_value[i])
670 continue;
1131a478 671 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4 672 CCCR_SET_DISABLE(low);
1131a478 673 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4
LT
674 }
675}
676
cb9c448c
DZ
677static void p4_shutdown(struct op_msrs const * const msrs)
678{
679 int i;
680
6e63ea4b 681 for (i = 0; i < num_counters; ++i) {
217d3cfb 682 if (msrs->counters[i].addr)
cb9c448c
DZ
683 release_perfctr_nmi(msrs->counters[i].addr);
684 }
20211e4d
PC
685 /*
686 * some of the control registers are specially reserved in
cb9c448c
DZ
687 * conjunction with the counter registers (hence the starting offset).
688 * This saves a few bits.
689 */
6e63ea4b 690 for (i = num_counters; i < num_controls; ++i) {
217d3cfb 691 if (msrs->controls[i].addr)
cb9c448c
DZ
692 release_evntsel_nmi(msrs->controls[i].addr);
693 }
694}
695
1da177e4
LT
696
697#ifdef CONFIG_SMP
259a83a8 698struct op_x86_model_spec op_p4_ht2_spec = {
c92960fc
RR
699 .num_counters = NUM_COUNTERS_HT2,
700 .num_controls = NUM_CONTROLS_HT2,
4d4036e0 701 .num_virt_counters = NUM_COUNTERS_HT2,
c92960fc
RR
702 .fill_in_addresses = &p4_fill_in_addresses,
703 .setup_ctrs = &p4_setup_ctrs,
704 .check_ctrs = &p4_check_ctrs,
705 .start = &p4_start,
706 .stop = &p4_stop,
707 .shutdown = &p4_shutdown
1da177e4
LT
708};
709#endif
710
259a83a8 711struct op_x86_model_spec op_p4_spec = {
c92960fc
RR
712 .num_counters = NUM_COUNTERS_NON_HT,
713 .num_controls = NUM_CONTROLS_NON_HT,
4d4036e0 714 .num_virt_counters = NUM_COUNTERS_NON_HT,
c92960fc
RR
715 .fill_in_addresses = &p4_fill_in_addresses,
716 .setup_ctrs = &p4_setup_ctrs,
717 .check_ctrs = &p4_check_ctrs,
718 .start = &p4_start,
719 .stop = &p4_stop,
720 .shutdown = &p4_shutdown
1da177e4 721};