]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/kernel/cpu/perf_event_p4.c
perf, x86: Add Nehelem PMU programming errata workaround
[net-next-2.6.git] / arch / x86 / kernel / cpu / perf_event_p4.c
CommitLineData
a072738e
CG
1/*
2 * Netburst Perfomance Events (P4, old Xeon)
3 *
4 * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
5 * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
6 *
7 * For licencing details see kernel-base/COPYING
8 */
9
10#ifdef CONFIG_CPU_SUP_INTEL
11
12#include <asm/perf_event_p4.h>
13
d814f301 14#define P4_CNTR_LIMIT 3
a072738e
CG
15/*
16 * array indices: 0,1 - HT threads, used with HT enabled cpu
17 */
d814f301
CG
18struct p4_event_bind {
19 unsigned int opcode; /* Event code and ESCR selector */
20 unsigned int escr_msr[2]; /* ESCR MSR for this event */
21 unsigned char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
a072738e
CG
22};
23
d814f301
CG
24struct p4_cache_event_bind {
25 unsigned int metric_pebs;
26 unsigned int metric_vert;
a072738e
CG
27};
28
d814f301
CG
29#define P4_GEN_CACHE_EVENT_BIND(name) \
30 [P4_CACHE__##name] = { \
31 .metric_pebs = P4_PEBS__##name, \
32 .metric_vert = P4_VERT__##name, \
33 }
34
35static struct p4_cache_event_bind p4_cache_event_bind_map[] = {
36 P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired),
37 P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired),
38 P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired),
39 P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired),
40};
41
42/*
43 * Note that we don't use CCCR1 here, there is an
44 * exception for P4_BSQ_ALLOCATION but we just have
45 * no workaround
46 *
47 * consider this binding as resources which particular
48 * event may borrow, it doesn't contain EventMask,
49 * Tags and friends -- they are left to a caller
50 */
51static struct p4_event_bind p4_event_bind_map[] = {
52 [P4_EVENT_TC_DELIVER_MODE] = {
53 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
54 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
55 .cntr = { {4, 5, -1}, {6, 7, -1} },
56 },
57 [P4_EVENT_BPU_FETCH_REQUEST] = {
58 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
59 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
60 .cntr = { {0, -1, -1}, {2, -1, -1} },
61 },
62 [P4_EVENT_ITLB_REFERENCE] = {
63 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
64 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
65 .cntr = { {0, -1, -1}, {2, -1, -1} },
66 },
67 [P4_EVENT_MEMORY_CANCEL] = {
68 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
69 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
70 .cntr = { {8, 9, -1}, {10, 11, -1} },
71 },
72 [P4_EVENT_MEMORY_COMPLETE] = {
73 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
74 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
75 .cntr = { {8, 9, -1}, {10, 11, -1} },
76 },
77 [P4_EVENT_LOAD_PORT_REPLAY] = {
78 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
79 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
80 .cntr = { {8, 9, -1}, {10, 11, -1} },
81 },
82 [P4_EVENT_STORE_PORT_REPLAY] = {
83 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
84 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
85 .cntr = { {8, 9, -1}, {10, 11, -1} },
86 },
87 [P4_EVENT_MOB_LOAD_REPLAY] = {
88 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
89 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
90 .cntr = { {0, -1, -1}, {2, -1, -1} },
91 },
92 [P4_EVENT_PAGE_WALK_TYPE] = {
93 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
94 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
95 .cntr = { {0, -1, -1}, {2, -1, -1} },
96 },
97 [P4_EVENT_BSQ_CACHE_REFERENCE] = {
98 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
99 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
100 .cntr = { {0, -1, -1}, {2, -1, -1} },
101 },
102 [P4_EVENT_IOQ_ALLOCATION] = {
103 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
104 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
105 .cntr = { {0, -1, -1}, {2, -1, -1} },
106 },
107 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
108 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
109 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
110 .cntr = { {2, -1, -1}, {3, -1, -1} },
111 },
112 [P4_EVENT_FSB_DATA_ACTIVITY] = {
113 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
114 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
115 .cntr = { {0, -1, -1}, {2, -1, -1} },
116 },
117 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
118 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
119 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
120 .cntr = { {0, -1, -1}, {1, -1, -1} },
121 },
122 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
123 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
124 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
125 .cntr = { {2, -1, -1}, {3, -1, -1} },
126 },
127 [P4_EVENT_SSE_INPUT_ASSIST] = {
128 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
129 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
130 .cntr = { {8, 9, -1}, {10, 11, -1} },
131 },
132 [P4_EVENT_PACKED_SP_UOP] = {
133 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
134 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
135 .cntr = { {8, 9, -1}, {10, 11, -1} },
136 },
137 [P4_EVENT_PACKED_DP_UOP] = {
138 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
139 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
140 .cntr = { {8, 9, -1}, {10, 11, -1} },
141 },
142 [P4_EVENT_SCALAR_SP_UOP] = {
143 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
144 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
145 .cntr = { {8, 9, -1}, {10, 11, -1} },
146 },
147 [P4_EVENT_SCALAR_DP_UOP] = {
148 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
149 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
150 .cntr = { {8, 9, -1}, {10, 11, -1} },
151 },
152 [P4_EVENT_64BIT_MMX_UOP] = {
153 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
154 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
155 .cntr = { {8, 9, -1}, {10, 11, -1} },
156 },
157 [P4_EVENT_128BIT_MMX_UOP] = {
158 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
159 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
160 .cntr = { {8, 9, -1}, {10, 11, -1} },
161 },
162 [P4_EVENT_X87_FP_UOP] = {
163 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
164 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
165 .cntr = { {8, 9, -1}, {10, 11, -1} },
166 },
167 [P4_EVENT_TC_MISC] = {
168 .opcode = P4_OPCODE(P4_EVENT_TC_MISC),
169 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
170 .cntr = { {4, 5, -1}, {6, 7, -1} },
171 },
172 [P4_EVENT_GLOBAL_POWER_EVENTS] = {
173 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
174 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
175 .cntr = { {0, -1, -1}, {2, -1, -1} },
176 },
177 [P4_EVENT_TC_MS_XFER] = {
178 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
179 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
180 .cntr = { {4, 5, -1}, {6, 7, -1} },
181 },
182 [P4_EVENT_UOP_QUEUE_WRITES] = {
183 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
184 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
185 .cntr = { {4, 5, -1}, {6, 7, -1} },
186 },
187 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
188 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
189 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
190 .cntr = { {4, 5, -1}, {6, 7, -1} },
191 },
192 [P4_EVENT_RETIRED_BRANCH_TYPE] = {
193 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
194 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
195 .cntr = { {4, 5, -1}, {6, 7, -1} },
196 },
197 [P4_EVENT_RESOURCE_STALL] = {
198 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
199 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
200 .cntr = { {12, 13, 16}, {14, 15, 17} },
201 },
202 [P4_EVENT_WC_BUFFER] = {
203 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
204 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
205 .cntr = { {8, 9, -1}, {10, 11, -1} },
206 },
207 [P4_EVENT_B2B_CYCLES] = {
208 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
209 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
210 .cntr = { {0, -1, -1}, {2, -1, -1} },
211 },
212 [P4_EVENT_BNR] = {
213 .opcode = P4_OPCODE(P4_EVENT_BNR),
214 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
215 .cntr = { {0, -1, -1}, {2, -1, -1} },
216 },
217 [P4_EVENT_SNOOP] = {
218 .opcode = P4_OPCODE(P4_EVENT_SNOOP),
219 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
220 .cntr = { {0, -1, -1}, {2, -1, -1} },
221 },
222 [P4_EVENT_RESPONSE] = {
223 .opcode = P4_OPCODE(P4_EVENT_RESPONSE),
224 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
225 .cntr = { {0, -1, -1}, {2, -1, -1} },
226 },
227 [P4_EVENT_FRONT_END_EVENT] = {
228 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
229 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
230 .cntr = { {12, 13, 16}, {14, 15, 17} },
231 },
232 [P4_EVENT_EXECUTION_EVENT] = {
233 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
234 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
235 .cntr = { {12, 13, 16}, {14, 15, 17} },
236 },
237 [P4_EVENT_REPLAY_EVENT] = {
238 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
239 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
240 .cntr = { {12, 13, 16}, {14, 15, 17} },
241 },
242 [P4_EVENT_INSTR_RETIRED] = {
243 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
244 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
245 .cntr = { {12, 13, 16}, {14, 15, 17} },
246 },
247 [P4_EVENT_UOPS_RETIRED] = {
248 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
249 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
250 .cntr = { {12, 13, 16}, {14, 15, 17} },
251 },
252 [P4_EVENT_UOP_TYPE] = {
253 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
254 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
255 .cntr = { {12, 13, 16}, {14, 15, 17} },
256 },
257 [P4_EVENT_BRANCH_RETIRED] = {
258 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
259 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
260 .cntr = { {12, 13, 16}, {14, 15, 17} },
261 },
262 [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
263 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
264 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
265 .cntr = { {12, 13, 16}, {14, 15, 17} },
266 },
267 [P4_EVENT_X87_ASSIST] = {
268 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
269 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
270 .cntr = { {12, 13, 16}, {14, 15, 17} },
271 },
272 [P4_EVENT_MACHINE_CLEAR] = {
273 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
274 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
275 .cntr = { {12, 13, 16}, {14, 15, 17} },
276 },
277 [P4_EVENT_INSTR_COMPLETED] = {
278 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
279 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
280 .cntr = { {12, 13, 16}, {14, 15, 17} },
281 },
282};
a072738e 283
d814f301
CG
284#define P4_GEN_CACHE_EVENT(event, bit, cache_event) \
285 p4_config_pack_escr(P4_ESCR_EVENT(event) | \
286 P4_ESCR_EMASK_BIT(event, bit)) | \
287 p4_config_pack_cccr(cache_event | \
288 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
cb7d6b50
LM
289
290static __initconst u64 p4_hw_cache_event_ids
291 [PERF_COUNT_HW_CACHE_MAX]
292 [PERF_COUNT_HW_CACHE_OP_MAX]
293 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
294{
295 [ C(L1D ) ] = {
296 [ C(OP_READ) ] = {
297 [ C(RESULT_ACCESS) ] = 0x0,
d814f301
CG
298 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
299 P4_CACHE__1stl_cache_load_miss_retired),
cb7d6b50
LM
300 },
301 },
302 [ C(LL ) ] = {
303 [ C(OP_READ) ] = {
304 [ C(RESULT_ACCESS) ] = 0x0,
d814f301
CG
305 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
306 P4_CACHE__2ndl_cache_load_miss_retired),
cb7d6b50 307 },
d814f301 308},
cb7d6b50
LM
309 [ C(DTLB) ] = {
310 [ C(OP_READ) ] = {
311 [ C(RESULT_ACCESS) ] = 0x0,
d814f301
CG
312 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
313 P4_CACHE__dtlb_load_miss_retired),
cb7d6b50
LM
314 },
315 [ C(OP_WRITE) ] = {
316 [ C(RESULT_ACCESS) ] = 0x0,
d814f301
CG
317 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
318 P4_CACHE__dtlb_store_miss_retired),
cb7d6b50
LM
319 },
320 },
321 [ C(ITLB) ] = {
322 [ C(OP_READ) ] = {
d814f301
CG
323 [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
324 P4_CACHE__itlb_reference_hit),
325 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
326 P4_CACHE__itlb_reference_miss),
cb7d6b50
LM
327 },
328 [ C(OP_WRITE) ] = {
329 [ C(RESULT_ACCESS) ] = -1,
330 [ C(RESULT_MISS) ] = -1,
331 },
332 [ C(OP_PREFETCH) ] = {
333 [ C(RESULT_ACCESS) ] = -1,
334 [ C(RESULT_MISS) ] = -1,
335 },
336 },
337};
338
d814f301
CG
339static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
340 /* non-halted CPU clocks */
341 [PERF_COUNT_HW_CPU_CYCLES] =
342 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
343 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
344
345 /*
346 * retired instructions
347 * in a sake of simplicity we don't use the FSB tagging
348 */
349 [PERF_COUNT_HW_INSTRUCTIONS] =
350 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) |
351 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
352 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
353
354 /* cache hits */
355 [PERF_COUNT_HW_CACHE_REFERENCES] =
356 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
357 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
358 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
359 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
360 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
361 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
362 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
363
364 /* cache misses */
365 [PERF_COUNT_HW_CACHE_MISSES] =
366 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
367 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
368 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
369 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
370
371 /* branch instructions retired */
372 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
373 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) |
374 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
375 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
376 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
377 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
378
379 /* mispredicted branches retired */
380 [PERF_COUNT_HW_BRANCH_MISSES] =
381 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) |
382 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
383
384 /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */
385 [PERF_COUNT_HW_BUS_CYCLES] =
386 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) |
387 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
388 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) |
389 p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
a072738e
CG
390};
391
d814f301
CG
392static struct p4_event_bind *p4_config_get_bind(u64 config)
393{
394 unsigned int evnt = p4_config_unpack_event(config);
395 struct p4_event_bind *bind = NULL;
396
397 if (evnt < ARRAY_SIZE(p4_event_bind_map))
398 bind = &p4_event_bind_map[evnt];
399
400 return bind;
401}
402
a072738e
CG
403static u64 p4_pmu_event_map(int hw_event)
404{
d814f301
CG
405 struct p4_event_bind *bind;
406 unsigned int esel;
a072738e
CG
407 u64 config;
408
d814f301
CG
409 if (hw_event > ARRAY_SIZE(p4_general_events)) {
410 printk_once(KERN_ERR "P4 PMU: Bad index: %i\n", hw_event);
a072738e
CG
411 return 0;
412 }
a072738e 413
d814f301
CG
414 config = p4_general_events[hw_event];
415 bind = p4_config_get_bind(config);
416 esel = P4_OPCODE_ESEL(bind->opcode);
417 config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
a072738e 418
a072738e
CG
419 return config;
420}
421
a072738e
CG
422/*
423 * We don't control raw events so it's up to the caller
424 * to pass sane values (and we don't count the thread number
425 * on HT machine but allow HT-compatible specifics to be
426 * passed on)
427 */
428static u64 p4_pmu_raw_event(u64 hw_event)
429{
430 return hw_event &
d814f301 431 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
a072738e
CG
432 p4_config_pack_cccr(P4_CCCR_MASK_HT));
433}
434
435static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
436{
437 int cpu = raw_smp_processor_id();
d814f301 438 u32 escr, cccr;
a072738e
CG
439
440 /*
441 * the reason we use cpu that early is that: if we get scheduled
442 * first time on the same cpu -- we will not need swap thread
443 * specific flags in config (and will save some cpu cycles)
444 */
445
d814f301
CG
446 cccr = p4_default_cccr_conf(cpu);
447 escr = p4_default_escr_conf(cpu, attr->exclude_kernel, attr->exclude_user);
448 hwc->config = p4_config_pack_escr(escr) | p4_config_pack_cccr(cccr);
a072738e 449
cb7d6b50
LM
450 if (p4_ht_active() && p4_ht_thread(cpu))
451 hwc->config = p4_set_ht_bit(hwc->config);
452
a072738e
CG
453 return 0;
454}
455
456static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
457{
458 unsigned long dummy;
459
460 rdmsrl(hwc->config_base + hwc->idx, dummy);
461 if (dummy & P4_CCCR_OVF) {
462 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
463 ((u64)dummy) & ~P4_CCCR_OVF);
464 }
465}
466
467static inline void p4_pmu_disable_event(struct perf_event *event)
468{
469 struct hw_perf_event *hwc = &event->hw;
470
471 /*
472 * If event gets disabled while counter is in overflowed
473 * state we need to clear P4_CCCR_OVF, otherwise interrupt get
474 * asserted again and again
475 */
476 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
477 (u64)(p4_config_unpack_cccr(hwc->config)) &
d814f301 478 ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
a072738e
CG
479}
480
481static void p4_pmu_disable_all(void)
482{
483 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
484 int idx;
485
486 for (idx = 0; idx < x86_pmu.num_events; idx++) {
487 struct perf_event *event = cpuc->events[idx];
488 if (!test_bit(idx, cpuc->active_mask))
489 continue;
490 p4_pmu_disable_event(event);
491 }
492}
493
494static void p4_pmu_enable_event(struct perf_event *event)
495{
496 struct hw_perf_event *hwc = &event->hw;
497 int thread = p4_ht_config_thread(hwc->config);
498 u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
d814f301
CG
499 unsigned int idx = p4_config_unpack_event(hwc->config);
500 unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
501 struct p4_event_bind *bind;
502 struct p4_cache_event_bind *bind_cache;
503 u64 escr_addr, cccr;
a072738e 504
d814f301
CG
505 bind = &p4_event_bind_map[idx];
506 escr_addr = (u64)bind->escr_msr[thread];
a072738e
CG
507
508 /*
509 * - we dont support cascaded counters yet
510 * - and counter 1 is broken (erratum)
511 */
512 WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
513 WARN_ON_ONCE(hwc->idx == 1);
514
d814f301
CG
515 /* we need a real Event value */
516 escr_conf &= ~P4_ESCR_EVENT_MASK;
517 escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
518
519 cccr = p4_config_unpack_cccr(hwc->config);
520
521 /*
522 * it could be Cache event so that we need to
523 * set metrics into additional MSRs
524 */
525 BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK);
526 if (idx_cache > P4_CACHE__NONE &&
527 idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
528 bind_cache = &p4_cache_event_bind_map[idx_cache];
529 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
530 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
531 }
532
533 (void)checking_wrmsrl(escr_addr, escr_conf);
a072738e 534 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
d814f301 535 (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
a072738e
CG
536}
537
11164cd4 538static void p4_pmu_enable_all(int added)
a072738e
CG
539{
540 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
541 int idx;
542
543 for (idx = 0; idx < x86_pmu.num_events; idx++) {
544 struct perf_event *event = cpuc->events[idx];
545 if (!test_bit(idx, cpuc->active_mask))
546 continue;
547 p4_pmu_enable_event(event);
548 }
549}
550
551static int p4_pmu_handle_irq(struct pt_regs *regs)
552{
553 struct perf_sample_data data;
554 struct cpu_hw_events *cpuc;
555 struct perf_event *event;
556 struct hw_perf_event *hwc;
557 int idx, handled = 0;
558 u64 val;
559
560 data.addr = 0;
561 data.raw = NULL;
562
563 cpuc = &__get_cpu_var(cpu_hw_events);
564
565 for (idx = 0; idx < x86_pmu.num_events; idx++) {
566
567 if (!test_bit(idx, cpuc->active_mask))
568 continue;
569
570 event = cpuc->events[idx];
571 hwc = &event->hw;
572
573 WARN_ON_ONCE(hwc->idx != idx);
574
575 /*
576 * FIXME: Redundant call, actually not needed
577 * but just to check if we're screwed
578 */
579 p4_pmu_clear_cccr_ovf(hwc);
580
581 val = x86_perf_event_update(event);
582 if (val & (1ULL << (x86_pmu.event_bits - 1)))
583 continue;
584
585 /*
586 * event overflow
587 */
588 handled = 1;
589 data.period = event->hw.last_period;
590
591 if (!x86_perf_event_set_period(event))
592 continue;
593 if (perf_event_overflow(event, 1, &data, regs))
594 p4_pmu_disable_event(event);
595 }
596
597 if (handled) {
598 /* p4 quirk: unmask it again */
599 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
600 inc_irq_stat(apic_perf_irqs);
601 }
602
603 return handled;
604}
605
606/*
607 * swap thread specific fields according to a thread
608 * we are going to run on
609 */
610static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
611{
612 u32 escr, cccr;
613
614 /*
615 * we either lucky and continue on same cpu or no HT support
616 */
617 if (!p4_should_swap_ts(hwc->config, cpu))
618 return;
619
620 /*
621 * the event is migrated from an another logical
622 * cpu, so we need to swap thread specific flags
623 */
624
625 escr = p4_config_unpack_escr(hwc->config);
626 cccr = p4_config_unpack_cccr(hwc->config);
627
628 if (p4_ht_thread(cpu)) {
629 cccr &= ~P4_CCCR_OVF_PMI_T0;
630 cccr |= P4_CCCR_OVF_PMI_T1;
d814f301
CG
631 if (escr & P4_ESCR_T0_OS) {
632 escr &= ~P4_ESCR_T0_OS;
633 escr |= P4_ESCR_T1_OS;
a072738e 634 }
d814f301
CG
635 if (escr & P4_ESCR_T0_USR) {
636 escr &= ~P4_ESCR_T0_USR;
637 escr |= P4_ESCR_T1_USR;
a072738e
CG
638 }
639 hwc->config = p4_config_pack_escr(escr);
640 hwc->config |= p4_config_pack_cccr(cccr);
641 hwc->config |= P4_CONFIG_HT;
642 } else {
643 cccr &= ~P4_CCCR_OVF_PMI_T1;
644 cccr |= P4_CCCR_OVF_PMI_T0;
d814f301
CG
645 if (escr & P4_ESCR_T1_OS) {
646 escr &= ~P4_ESCR_T1_OS;
647 escr |= P4_ESCR_T0_OS;
a072738e 648 }
d814f301
CG
649 if (escr & P4_ESCR_T1_USR) {
650 escr &= ~P4_ESCR_T1_USR;
651 escr |= P4_ESCR_T0_USR;
a072738e
CG
652 }
653 hwc->config = p4_config_pack_escr(escr);
654 hwc->config |= p4_config_pack_cccr(cccr);
655 hwc->config &= ~P4_CONFIG_HT;
656 }
657}
658
659/* ESCRs are not sequential in memory so we need a map */
9c8c6bad 660static const unsigned int p4_escr_map[ARCH_P4_TOTAL_ESCR] = {
a072738e
CG
661 MSR_P4_ALF_ESCR0, /* 0 */
662 MSR_P4_ALF_ESCR1, /* 1 */
663 MSR_P4_BPU_ESCR0, /* 2 */
664 MSR_P4_BPU_ESCR1, /* 3 */
665 MSR_P4_BSU_ESCR0, /* 4 */
666 MSR_P4_BSU_ESCR1, /* 5 */
667 MSR_P4_CRU_ESCR0, /* 6 */
668 MSR_P4_CRU_ESCR1, /* 7 */
669 MSR_P4_CRU_ESCR2, /* 8 */
670 MSR_P4_CRU_ESCR3, /* 9 */
671 MSR_P4_CRU_ESCR4, /* 10 */
672 MSR_P4_CRU_ESCR5, /* 11 */
673 MSR_P4_DAC_ESCR0, /* 12 */
674 MSR_P4_DAC_ESCR1, /* 13 */
675 MSR_P4_FIRM_ESCR0, /* 14 */
676 MSR_P4_FIRM_ESCR1, /* 15 */
677 MSR_P4_FLAME_ESCR0, /* 16 */
678 MSR_P4_FLAME_ESCR1, /* 17 */
679 MSR_P4_FSB_ESCR0, /* 18 */
680 MSR_P4_FSB_ESCR1, /* 19 */
681 MSR_P4_IQ_ESCR0, /* 20 */
682 MSR_P4_IQ_ESCR1, /* 21 */
683 MSR_P4_IS_ESCR0, /* 22 */
684 MSR_P4_IS_ESCR1, /* 23 */
685 MSR_P4_ITLB_ESCR0, /* 24 */
686 MSR_P4_ITLB_ESCR1, /* 25 */
687 MSR_P4_IX_ESCR0, /* 26 */
688 MSR_P4_IX_ESCR1, /* 27 */
689 MSR_P4_MOB_ESCR0, /* 28 */
690 MSR_P4_MOB_ESCR1, /* 29 */
691 MSR_P4_MS_ESCR0, /* 30 */
692 MSR_P4_MS_ESCR1, /* 31 */
693 MSR_P4_PMH_ESCR0, /* 32 */
694 MSR_P4_PMH_ESCR1, /* 33 */
695 MSR_P4_RAT_ESCR0, /* 34 */
696 MSR_P4_RAT_ESCR1, /* 35 */
697 MSR_P4_SAAT_ESCR0, /* 36 */
698 MSR_P4_SAAT_ESCR1, /* 37 */
699 MSR_P4_SSU_ESCR0, /* 38 */
700 MSR_P4_SSU_ESCR1, /* 39 */
701 MSR_P4_TBPU_ESCR0, /* 40 */
702 MSR_P4_TBPU_ESCR1, /* 41 */
703 MSR_P4_TC_ESCR0, /* 42 */
704 MSR_P4_TC_ESCR1, /* 43 */
705 MSR_P4_U2L_ESCR0, /* 44 */
706 MSR_P4_U2L_ESCR1, /* 45 */
707};
708
709static int p4_get_escr_idx(unsigned int addr)
710{
711 unsigned int i;
712
713 for (i = 0; i < ARRAY_SIZE(p4_escr_map); i++) {
714 if (addr == p4_escr_map[i])
715 return i;
716 }
717
718 return -1;
719}
720
d814f301
CG
721static int p4_next_cntr(int thread, unsigned long *used_mask,
722 struct p4_event_bind *bind)
723{
724 int i = 0, j;
725
726 for (i = 0; i < P4_CNTR_LIMIT; i++) {
727 j = bind->cntr[thread][i++];
728 if (j == -1 || !test_bit(j, used_mask))
729 return j;
730 }
731
732 return -1;
733}
734
a072738e
CG
735static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
736{
737 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
738 unsigned long escr_mask[BITS_TO_LONGS(ARCH_P4_TOTAL_ESCR)];
a072738e 739 int cpu = raw_smp_processor_id();
d814f301
CG
740 struct hw_perf_event *hwc;
741 struct p4_event_bind *bind;
742 unsigned int i, thread, num;
743 int cntr_idx, escr_idx;
a072738e
CG
744
745 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
746 bitmap_zero(escr_mask, ARCH_P4_TOTAL_ESCR);
747
a072738e 748 for (i = 0, num = n; i < n; i++, num--) {
d814f301 749
a072738e 750 hwc = &cpuc->event_list[i]->hw;
a072738e 751 thread = p4_ht_thread(cpu);
d814f301
CG
752 bind = p4_config_get_bind(hwc->config);
753 escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
a072738e 754
a072738e 755 if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
d814f301 756 cntr_idx = hwc->idx;
a072738e
CG
757 if (assign)
758 assign[i] = hwc->idx;
a072738e
CG
759 goto reserve;
760 }
761
d814f301
CG
762 cntr_idx = p4_next_cntr(thread, used_mask, bind);
763 if (cntr_idx == -1 || test_bit(escr_idx, escr_mask))
a072738e
CG
764 goto done;
765
a072738e 766 p4_pmu_swap_config_ts(hwc, cpu);
d814f301
CG
767 if (assign)
768 assign[i] = cntr_idx;
a072738e 769reserve:
d814f301 770 set_bit(cntr_idx, used_mask);
a072738e
CG
771 set_bit(escr_idx, escr_mask);
772 }
773
774done:
775 return num ? -ENOSPC : 0;
776}
777
778static __initconst struct x86_pmu p4_pmu = {
779 .name = "Netburst P4/Xeon",
780 .handle_irq = p4_pmu_handle_irq,
781 .disable_all = p4_pmu_disable_all,
782 .enable_all = p4_pmu_enable_all,
783 .enable = p4_pmu_enable_event,
784 .disable = p4_pmu_disable_event,
785 .eventsel = MSR_P4_BPU_CCCR0,
786 .perfctr = MSR_P4_BPU_PERFCTR0,
787 .event_map = p4_pmu_event_map,
788 .raw_event = p4_pmu_raw_event,
d814f301 789 .max_events = ARRAY_SIZE(p4_general_events),
a072738e
CG
790 .get_event_constraints = x86_get_event_constraints,
791 /*
792 * IF HT disabled we may need to use all
793 * ARCH_P4_MAX_CCCR counters simulaneously
794 * though leave it restricted at moment assuming
795 * HT is on
796 */
797 .num_events = ARCH_P4_MAX_CCCR,
798 .apic = 1,
799 .event_bits = 40,
800 .event_mask = (1ULL << 40) - 1,
801 .max_period = (1ULL << 39) - 1,
802 .hw_config = p4_hw_config,
803 .schedule_events = p4_pmu_schedule_events,
804};
805
806static __init int p4_pmu_init(void)
807{
808 unsigned int low, high;
809
810 /* If we get stripped -- indexig fails */
811 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
812
813 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
814 if (!(low & (1 << 7))) {
815 pr_cont("unsupported Netburst CPU model %d ",
816 boot_cpu_data.x86_model);
817 return -ENODEV;
818 }
819
cb7d6b50 820 memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
d814f301 821 sizeof(hw_cache_event_ids));
cb7d6b50 822
a072738e
CG
823 pr_cont("Netburst events, ");
824
825 x86_pmu = p4_pmu;
826
827 return 0;
828}
829
830#endif /* CONFIG_CPU_SUP_INTEL */