]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/kernel/cpu/perf_event_p4.c
x86, perf: P4 PMU -- Get rid of redundant check for array index
[net-next-2.6.git] / arch / x86 / kernel / cpu / perf_event_p4.c
CommitLineData
a072738e
CG
1/*
2 * Netburst Perfomance Events (P4, old Xeon)
3 *
4 * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
5 * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
6 *
7 * For licencing details see kernel-base/COPYING
8 */
9
10#ifdef CONFIG_CPU_SUP_INTEL
11
12#include <asm/perf_event_p4.h>
13
d814f301 14#define P4_CNTR_LIMIT 3
a072738e
CG
15/*
16 * array indices: 0,1 - HT threads, used with HT enabled cpu
17 */
d814f301
CG
18struct p4_event_bind {
19 unsigned int opcode; /* Event code and ESCR selector */
20 unsigned int escr_msr[2]; /* ESCR MSR for this event */
21 unsigned char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
a072738e
CG
22};
23
d814f301
CG
24struct p4_cache_event_bind {
25 unsigned int metric_pebs;
26 unsigned int metric_vert;
a072738e
CG
27};
28
d814f301
CG
29#define P4_GEN_CACHE_EVENT_BIND(name) \
30 [P4_CACHE__##name] = { \
31 .metric_pebs = P4_PEBS__##name, \
32 .metric_vert = P4_VERT__##name, \
33 }
34
35static struct p4_cache_event_bind p4_cache_event_bind_map[] = {
36 P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired),
37 P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired),
38 P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired),
39 P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired),
40};
41
42/*
43 * Note that we don't use CCCR1 here, there is an
44 * exception for P4_BSQ_ALLOCATION but we just have
45 * no workaround
46 *
47 * consider this binding as resources which particular
48 * event may borrow, it doesn't contain EventMask,
49 * Tags and friends -- they are left to a caller
50 */
51static struct p4_event_bind p4_event_bind_map[] = {
52 [P4_EVENT_TC_DELIVER_MODE] = {
53 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
54 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
55 .cntr = { {4, 5, -1}, {6, 7, -1} },
56 },
57 [P4_EVENT_BPU_FETCH_REQUEST] = {
58 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
59 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
60 .cntr = { {0, -1, -1}, {2, -1, -1} },
61 },
62 [P4_EVENT_ITLB_REFERENCE] = {
63 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
64 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
65 .cntr = { {0, -1, -1}, {2, -1, -1} },
66 },
67 [P4_EVENT_MEMORY_CANCEL] = {
68 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
69 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
70 .cntr = { {8, 9, -1}, {10, 11, -1} },
71 },
72 [P4_EVENT_MEMORY_COMPLETE] = {
73 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
74 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
75 .cntr = { {8, 9, -1}, {10, 11, -1} },
76 },
77 [P4_EVENT_LOAD_PORT_REPLAY] = {
78 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
79 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
80 .cntr = { {8, 9, -1}, {10, 11, -1} },
81 },
82 [P4_EVENT_STORE_PORT_REPLAY] = {
83 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
84 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
85 .cntr = { {8, 9, -1}, {10, 11, -1} },
86 },
87 [P4_EVENT_MOB_LOAD_REPLAY] = {
88 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
89 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
90 .cntr = { {0, -1, -1}, {2, -1, -1} },
91 },
92 [P4_EVENT_PAGE_WALK_TYPE] = {
93 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
94 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
95 .cntr = { {0, -1, -1}, {2, -1, -1} },
96 },
97 [P4_EVENT_BSQ_CACHE_REFERENCE] = {
98 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
99 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
100 .cntr = { {0, -1, -1}, {2, -1, -1} },
101 },
102 [P4_EVENT_IOQ_ALLOCATION] = {
103 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
104 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
105 .cntr = { {0, -1, -1}, {2, -1, -1} },
106 },
107 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
108 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
109 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
110 .cntr = { {2, -1, -1}, {3, -1, -1} },
111 },
112 [P4_EVENT_FSB_DATA_ACTIVITY] = {
113 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
114 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
115 .cntr = { {0, -1, -1}, {2, -1, -1} },
116 },
117 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
118 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
119 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
120 .cntr = { {0, -1, -1}, {1, -1, -1} },
121 },
122 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
123 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
124 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
125 .cntr = { {2, -1, -1}, {3, -1, -1} },
126 },
127 [P4_EVENT_SSE_INPUT_ASSIST] = {
128 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
129 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
130 .cntr = { {8, 9, -1}, {10, 11, -1} },
131 },
132 [P4_EVENT_PACKED_SP_UOP] = {
133 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
134 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
135 .cntr = { {8, 9, -1}, {10, 11, -1} },
136 },
137 [P4_EVENT_PACKED_DP_UOP] = {
138 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
139 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
140 .cntr = { {8, 9, -1}, {10, 11, -1} },
141 },
142 [P4_EVENT_SCALAR_SP_UOP] = {
143 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
144 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
145 .cntr = { {8, 9, -1}, {10, 11, -1} },
146 },
147 [P4_EVENT_SCALAR_DP_UOP] = {
148 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
149 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
150 .cntr = { {8, 9, -1}, {10, 11, -1} },
151 },
152 [P4_EVENT_64BIT_MMX_UOP] = {
153 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
154 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
155 .cntr = { {8, 9, -1}, {10, 11, -1} },
156 },
157 [P4_EVENT_128BIT_MMX_UOP] = {
158 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
159 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
160 .cntr = { {8, 9, -1}, {10, 11, -1} },
161 },
162 [P4_EVENT_X87_FP_UOP] = {
163 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
164 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
165 .cntr = { {8, 9, -1}, {10, 11, -1} },
166 },
167 [P4_EVENT_TC_MISC] = {
168 .opcode = P4_OPCODE(P4_EVENT_TC_MISC),
169 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
170 .cntr = { {4, 5, -1}, {6, 7, -1} },
171 },
172 [P4_EVENT_GLOBAL_POWER_EVENTS] = {
173 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
174 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
175 .cntr = { {0, -1, -1}, {2, -1, -1} },
176 },
177 [P4_EVENT_TC_MS_XFER] = {
178 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
179 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
180 .cntr = { {4, 5, -1}, {6, 7, -1} },
181 },
182 [P4_EVENT_UOP_QUEUE_WRITES] = {
183 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
184 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
185 .cntr = { {4, 5, -1}, {6, 7, -1} },
186 },
187 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
188 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
189 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
190 .cntr = { {4, 5, -1}, {6, 7, -1} },
191 },
192 [P4_EVENT_RETIRED_BRANCH_TYPE] = {
193 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
194 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
195 .cntr = { {4, 5, -1}, {6, 7, -1} },
196 },
197 [P4_EVENT_RESOURCE_STALL] = {
198 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
199 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
200 .cntr = { {12, 13, 16}, {14, 15, 17} },
201 },
202 [P4_EVENT_WC_BUFFER] = {
203 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
204 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
205 .cntr = { {8, 9, -1}, {10, 11, -1} },
206 },
207 [P4_EVENT_B2B_CYCLES] = {
208 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
209 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
210 .cntr = { {0, -1, -1}, {2, -1, -1} },
211 },
212 [P4_EVENT_BNR] = {
213 .opcode = P4_OPCODE(P4_EVENT_BNR),
214 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
215 .cntr = { {0, -1, -1}, {2, -1, -1} },
216 },
217 [P4_EVENT_SNOOP] = {
218 .opcode = P4_OPCODE(P4_EVENT_SNOOP),
219 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
220 .cntr = { {0, -1, -1}, {2, -1, -1} },
221 },
222 [P4_EVENT_RESPONSE] = {
223 .opcode = P4_OPCODE(P4_EVENT_RESPONSE),
224 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
225 .cntr = { {0, -1, -1}, {2, -1, -1} },
226 },
227 [P4_EVENT_FRONT_END_EVENT] = {
228 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
229 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
230 .cntr = { {12, 13, 16}, {14, 15, 17} },
231 },
232 [P4_EVENT_EXECUTION_EVENT] = {
233 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
234 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
235 .cntr = { {12, 13, 16}, {14, 15, 17} },
236 },
237 [P4_EVENT_REPLAY_EVENT] = {
238 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
239 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
240 .cntr = { {12, 13, 16}, {14, 15, 17} },
241 },
242 [P4_EVENT_INSTR_RETIRED] = {
243 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
244 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
245 .cntr = { {12, 13, 16}, {14, 15, 17} },
246 },
247 [P4_EVENT_UOPS_RETIRED] = {
248 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
249 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
250 .cntr = { {12, 13, 16}, {14, 15, 17} },
251 },
252 [P4_EVENT_UOP_TYPE] = {
253 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
254 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
255 .cntr = { {12, 13, 16}, {14, 15, 17} },
256 },
257 [P4_EVENT_BRANCH_RETIRED] = {
258 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
259 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
260 .cntr = { {12, 13, 16}, {14, 15, 17} },
261 },
262 [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
263 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
264 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
265 .cntr = { {12, 13, 16}, {14, 15, 17} },
266 },
267 [P4_EVENT_X87_ASSIST] = {
268 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
269 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
270 .cntr = { {12, 13, 16}, {14, 15, 17} },
271 },
272 [P4_EVENT_MACHINE_CLEAR] = {
273 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
274 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
275 .cntr = { {12, 13, 16}, {14, 15, 17} },
276 },
277 [P4_EVENT_INSTR_COMPLETED] = {
278 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
279 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
280 .cntr = { {12, 13, 16}, {14, 15, 17} },
281 },
282};
a072738e 283
d814f301
CG
284#define P4_GEN_CACHE_EVENT(event, bit, cache_event) \
285 p4_config_pack_escr(P4_ESCR_EVENT(event) | \
286 P4_ESCR_EMASK_BIT(event, bit)) | \
287 p4_config_pack_cccr(cache_event | \
288 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
cb7d6b50 289
caaa8be3 290static __initconst const u64 p4_hw_cache_event_ids
cb7d6b50
LM
291 [PERF_COUNT_HW_CACHE_MAX]
292 [PERF_COUNT_HW_CACHE_OP_MAX]
293 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
294{
295 [ C(L1D ) ] = {
296 [ C(OP_READ) ] = {
297 [ C(RESULT_ACCESS) ] = 0x0,
d814f301
CG
298 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
299 P4_CACHE__1stl_cache_load_miss_retired),
cb7d6b50
LM
300 },
301 },
302 [ C(LL ) ] = {
303 [ C(OP_READ) ] = {
304 [ C(RESULT_ACCESS) ] = 0x0,
d814f301
CG
305 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
306 P4_CACHE__2ndl_cache_load_miss_retired),
cb7d6b50 307 },
d814f301 308},
cb7d6b50
LM
309 [ C(DTLB) ] = {
310 [ C(OP_READ) ] = {
311 [ C(RESULT_ACCESS) ] = 0x0,
d814f301
CG
312 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
313 P4_CACHE__dtlb_load_miss_retired),
cb7d6b50
LM
314 },
315 [ C(OP_WRITE) ] = {
316 [ C(RESULT_ACCESS) ] = 0x0,
d814f301
CG
317 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
318 P4_CACHE__dtlb_store_miss_retired),
cb7d6b50
LM
319 },
320 },
321 [ C(ITLB) ] = {
322 [ C(OP_READ) ] = {
d814f301
CG
323 [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
324 P4_CACHE__itlb_reference_hit),
325 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
326 P4_CACHE__itlb_reference_miss),
cb7d6b50
LM
327 },
328 [ C(OP_WRITE) ] = {
329 [ C(RESULT_ACCESS) ] = -1,
330 [ C(RESULT_MISS) ] = -1,
331 },
332 [ C(OP_PREFETCH) ] = {
333 [ C(RESULT_ACCESS) ] = -1,
334 [ C(RESULT_MISS) ] = -1,
335 },
336 },
337};
338
d814f301
CG
339static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
340 /* non-halted CPU clocks */
341 [PERF_COUNT_HW_CPU_CYCLES] =
342 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
343 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
344
345 /*
346 * retired instructions
347 * in a sake of simplicity we don't use the FSB tagging
348 */
349 [PERF_COUNT_HW_INSTRUCTIONS] =
350 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) |
351 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
352 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
353
354 /* cache hits */
355 [PERF_COUNT_HW_CACHE_REFERENCES] =
356 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
357 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
358 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
359 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
360 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
361 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
362 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
363
364 /* cache misses */
365 [PERF_COUNT_HW_CACHE_MISSES] =
366 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
367 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
368 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
369 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
370
371 /* branch instructions retired */
372 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
373 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) |
374 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
375 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
376 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
377 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
378
379 /* mispredicted branches retired */
380 [PERF_COUNT_HW_BRANCH_MISSES] =
381 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) |
382 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
383
384 /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */
385 [PERF_COUNT_HW_BUS_CYCLES] =
386 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) |
387 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
388 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) |
389 p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
a072738e
CG
390};
391
d814f301
CG
392static struct p4_event_bind *p4_config_get_bind(u64 config)
393{
394 unsigned int evnt = p4_config_unpack_event(config);
395 struct p4_event_bind *bind = NULL;
396
397 if (evnt < ARRAY_SIZE(p4_event_bind_map))
398 bind = &p4_event_bind_map[evnt];
399
400 return bind;
401}
402
a072738e
CG
403static u64 p4_pmu_event_map(int hw_event)
404{
d814f301
CG
405 struct p4_event_bind *bind;
406 unsigned int esel;
a072738e
CG
407 u64 config;
408
d814f301
CG
409 config = p4_general_events[hw_event];
410 bind = p4_config_get_bind(config);
411 esel = P4_OPCODE_ESEL(bind->opcode);
412 config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
a072738e 413
a072738e
CG
414 return config;
415}
416
b4cdc5c2 417static int p4_hw_config(struct perf_event *event)
a072738e 418{
137351e0
CG
419 int cpu = get_cpu();
420 int rc = 0;
d814f301 421 u32 escr, cccr;
a072738e
CG
422
423 /*
424 * the reason we use cpu that early is that: if we get scheduled
425 * first time on the same cpu -- we will not need swap thread
426 * specific flags in config (and will save some cpu cycles)
427 */
428
d814f301 429 cccr = p4_default_cccr_conf(cpu);
b4cdc5c2
PZ
430 escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
431 event->attr.exclude_user);
432 event->hw.config = p4_config_pack_escr(escr) |
433 p4_config_pack_cccr(cccr);
a072738e 434
cb7d6b50 435 if (p4_ht_active() && p4_ht_thread(cpu))
b4cdc5c2
PZ
436 event->hw.config = p4_set_ht_bit(event->hw.config);
437
de902d96
CG
438 if (event->attr.type == PERF_TYPE_RAW) {
439 /*
440 * We don't control raw events so it's up to the caller
441 * to pass sane values (and we don't count the thread number
442 * on HT machine but allow HT-compatible specifics to be
443 * passed on)
444 *
445 * XXX: HT wide things should check perf_paranoid_cpu() &&
446 * CAP_SYS_ADMIN
447 */
448 event->hw.config |= event->attr.config &
449 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
450 p4_config_pack_cccr(P4_CCCR_MASK_HT));
451 }
cb7d6b50 452
137351e0
CG
453 rc = x86_setup_perfctr(event);
454 put_cpu();
455
456 return rc;
a072738e
CG
457}
458
459static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
460{
461 unsigned long dummy;
462
463 rdmsrl(hwc->config_base + hwc->idx, dummy);
464 if (dummy & P4_CCCR_OVF) {
465 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
466 ((u64)dummy) & ~P4_CCCR_OVF);
467 }
468}
469
470static inline void p4_pmu_disable_event(struct perf_event *event)
471{
472 struct hw_perf_event *hwc = &event->hw;
473
474 /*
475 * If event gets disabled while counter is in overflowed
476 * state we need to clear P4_CCCR_OVF, otherwise interrupt get
477 * asserted again and again
478 */
479 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
480 (u64)(p4_config_unpack_cccr(hwc->config)) &
d814f301 481 ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
a072738e
CG
482}
483
484static void p4_pmu_disable_all(void)
485{
486 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
487 int idx;
488
948b1bb8 489 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
a072738e
CG
490 struct perf_event *event = cpuc->events[idx];
491 if (!test_bit(idx, cpuc->active_mask))
492 continue;
493 p4_pmu_disable_event(event);
494 }
495}
496
497static void p4_pmu_enable_event(struct perf_event *event)
498{
499 struct hw_perf_event *hwc = &event->hw;
500 int thread = p4_ht_config_thread(hwc->config);
501 u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
d814f301
CG
502 unsigned int idx = p4_config_unpack_event(hwc->config);
503 unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
504 struct p4_event_bind *bind;
505 struct p4_cache_event_bind *bind_cache;
506 u64 escr_addr, cccr;
a072738e 507
d814f301
CG
508 bind = &p4_event_bind_map[idx];
509 escr_addr = (u64)bind->escr_msr[thread];
a072738e
CG
510
511 /*
512 * - we dont support cascaded counters yet
513 * - and counter 1 is broken (erratum)
514 */
515 WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
516 WARN_ON_ONCE(hwc->idx == 1);
517
d814f301
CG
518 /* we need a real Event value */
519 escr_conf &= ~P4_ESCR_EVENT_MASK;
520 escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
521
522 cccr = p4_config_unpack_cccr(hwc->config);
523
524 /*
525 * it could be Cache event so that we need to
526 * set metrics into additional MSRs
527 */
528 BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK);
529 if (idx_cache > P4_CACHE__NONE &&
530 idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
531 bind_cache = &p4_cache_event_bind_map[idx_cache];
532 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
533 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
534 }
535
536 (void)checking_wrmsrl(escr_addr, escr_conf);
a072738e 537 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
d814f301 538 (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
a072738e
CG
539}
540
11164cd4 541static void p4_pmu_enable_all(int added)
a072738e
CG
542{
543 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
544 int idx;
545
948b1bb8 546 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
a072738e
CG
547 struct perf_event *event = cpuc->events[idx];
548 if (!test_bit(idx, cpuc->active_mask))
549 continue;
550 p4_pmu_enable_event(event);
551 }
552}
553
554static int p4_pmu_handle_irq(struct pt_regs *regs)
555{
556 struct perf_sample_data data;
557 struct cpu_hw_events *cpuc;
558 struct perf_event *event;
559 struct hw_perf_event *hwc;
560 int idx, handled = 0;
561 u64 val;
562
563 data.addr = 0;
564 data.raw = NULL;
565
566 cpuc = &__get_cpu_var(cpu_hw_events);
567
948b1bb8 568 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
a072738e
CG
569
570 if (!test_bit(idx, cpuc->active_mask))
571 continue;
572
573 event = cpuc->events[idx];
574 hwc = &event->hw;
575
576 WARN_ON_ONCE(hwc->idx != idx);
577
578 /*
579 * FIXME: Redundant call, actually not needed
580 * but just to check if we're screwed
581 */
582 p4_pmu_clear_cccr_ovf(hwc);
583
584 val = x86_perf_event_update(event);
948b1bb8 585 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
a072738e
CG
586 continue;
587
588 /*
589 * event overflow
590 */
591 handled = 1;
592 data.period = event->hw.last_period;
593
594 if (!x86_perf_event_set_period(event))
595 continue;
596 if (perf_event_overflow(event, 1, &data, regs))
597 p4_pmu_disable_event(event);
598 }
599
600 if (handled) {
601 /* p4 quirk: unmask it again */
602 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
603 inc_irq_stat(apic_perf_irqs);
604 }
605
606 return handled;
607}
608
609/*
610 * swap thread specific fields according to a thread
611 * we are going to run on
612 */
613static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
614{
615 u32 escr, cccr;
616
617 /*
618 * we either lucky and continue on same cpu or no HT support
619 */
620 if (!p4_should_swap_ts(hwc->config, cpu))
621 return;
622
623 /*
624 * the event is migrated from an another logical
625 * cpu, so we need to swap thread specific flags
626 */
627
628 escr = p4_config_unpack_escr(hwc->config);
629 cccr = p4_config_unpack_cccr(hwc->config);
630
631 if (p4_ht_thread(cpu)) {
632 cccr &= ~P4_CCCR_OVF_PMI_T0;
633 cccr |= P4_CCCR_OVF_PMI_T1;
d814f301
CG
634 if (escr & P4_ESCR_T0_OS) {
635 escr &= ~P4_ESCR_T0_OS;
636 escr |= P4_ESCR_T1_OS;
a072738e 637 }
d814f301
CG
638 if (escr & P4_ESCR_T0_USR) {
639 escr &= ~P4_ESCR_T0_USR;
640 escr |= P4_ESCR_T1_USR;
a072738e
CG
641 }
642 hwc->config = p4_config_pack_escr(escr);
643 hwc->config |= p4_config_pack_cccr(cccr);
644 hwc->config |= P4_CONFIG_HT;
645 } else {
646 cccr &= ~P4_CCCR_OVF_PMI_T1;
647 cccr |= P4_CCCR_OVF_PMI_T0;
d814f301
CG
648 if (escr & P4_ESCR_T1_OS) {
649 escr &= ~P4_ESCR_T1_OS;
650 escr |= P4_ESCR_T0_OS;
a072738e 651 }
d814f301
CG
652 if (escr & P4_ESCR_T1_USR) {
653 escr &= ~P4_ESCR_T1_USR;
654 escr |= P4_ESCR_T0_USR;
a072738e
CG
655 }
656 hwc->config = p4_config_pack_escr(escr);
657 hwc->config |= p4_config_pack_cccr(cccr);
658 hwc->config &= ~P4_CONFIG_HT;
659 }
660}
661
662/* ESCRs are not sequential in memory so we need a map */
9c8c6bad 663static const unsigned int p4_escr_map[ARCH_P4_TOTAL_ESCR] = {
a072738e
CG
664 MSR_P4_ALF_ESCR0, /* 0 */
665 MSR_P4_ALF_ESCR1, /* 1 */
666 MSR_P4_BPU_ESCR0, /* 2 */
667 MSR_P4_BPU_ESCR1, /* 3 */
668 MSR_P4_BSU_ESCR0, /* 4 */
669 MSR_P4_BSU_ESCR1, /* 5 */
670 MSR_P4_CRU_ESCR0, /* 6 */
671 MSR_P4_CRU_ESCR1, /* 7 */
672 MSR_P4_CRU_ESCR2, /* 8 */
673 MSR_P4_CRU_ESCR3, /* 9 */
674 MSR_P4_CRU_ESCR4, /* 10 */
675 MSR_P4_CRU_ESCR5, /* 11 */
676 MSR_P4_DAC_ESCR0, /* 12 */
677 MSR_P4_DAC_ESCR1, /* 13 */
678 MSR_P4_FIRM_ESCR0, /* 14 */
679 MSR_P4_FIRM_ESCR1, /* 15 */
680 MSR_P4_FLAME_ESCR0, /* 16 */
681 MSR_P4_FLAME_ESCR1, /* 17 */
682 MSR_P4_FSB_ESCR0, /* 18 */
683 MSR_P4_FSB_ESCR1, /* 19 */
684 MSR_P4_IQ_ESCR0, /* 20 */
685 MSR_P4_IQ_ESCR1, /* 21 */
686 MSR_P4_IS_ESCR0, /* 22 */
687 MSR_P4_IS_ESCR1, /* 23 */
688 MSR_P4_ITLB_ESCR0, /* 24 */
689 MSR_P4_ITLB_ESCR1, /* 25 */
690 MSR_P4_IX_ESCR0, /* 26 */
691 MSR_P4_IX_ESCR1, /* 27 */
692 MSR_P4_MOB_ESCR0, /* 28 */
693 MSR_P4_MOB_ESCR1, /* 29 */
694 MSR_P4_MS_ESCR0, /* 30 */
695 MSR_P4_MS_ESCR1, /* 31 */
696 MSR_P4_PMH_ESCR0, /* 32 */
697 MSR_P4_PMH_ESCR1, /* 33 */
698 MSR_P4_RAT_ESCR0, /* 34 */
699 MSR_P4_RAT_ESCR1, /* 35 */
700 MSR_P4_SAAT_ESCR0, /* 36 */
701 MSR_P4_SAAT_ESCR1, /* 37 */
702 MSR_P4_SSU_ESCR0, /* 38 */
703 MSR_P4_SSU_ESCR1, /* 39 */
704 MSR_P4_TBPU_ESCR0, /* 40 */
705 MSR_P4_TBPU_ESCR1, /* 41 */
706 MSR_P4_TC_ESCR0, /* 42 */
707 MSR_P4_TC_ESCR1, /* 43 */
708 MSR_P4_U2L_ESCR0, /* 44 */
709 MSR_P4_U2L_ESCR1, /* 45 */
710};
711
712static int p4_get_escr_idx(unsigned int addr)
713{
714 unsigned int i;
715
716 for (i = 0; i < ARRAY_SIZE(p4_escr_map); i++) {
717 if (addr == p4_escr_map[i])
718 return i;
719 }
720
721 return -1;
722}
723
d814f301
CG
724static int p4_next_cntr(int thread, unsigned long *used_mask,
725 struct p4_event_bind *bind)
726{
727 int i = 0, j;
728
729 for (i = 0; i < P4_CNTR_LIMIT; i++) {
730 j = bind->cntr[thread][i++];
731 if (j == -1 || !test_bit(j, used_mask))
732 return j;
733 }
734
735 return -1;
736}
737
a072738e
CG
738static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
739{
740 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
741 unsigned long escr_mask[BITS_TO_LONGS(ARCH_P4_TOTAL_ESCR)];
a072738e 742 int cpu = raw_smp_processor_id();
d814f301
CG
743 struct hw_perf_event *hwc;
744 struct p4_event_bind *bind;
745 unsigned int i, thread, num;
746 int cntr_idx, escr_idx;
a072738e
CG
747
748 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
749 bitmap_zero(escr_mask, ARCH_P4_TOTAL_ESCR);
750
a072738e 751 for (i = 0, num = n; i < n; i++, num--) {
d814f301 752
a072738e 753 hwc = &cpuc->event_list[i]->hw;
a072738e 754 thread = p4_ht_thread(cpu);
d814f301
CG
755 bind = p4_config_get_bind(hwc->config);
756 escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
a072738e 757
a072738e 758 if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
d814f301 759 cntr_idx = hwc->idx;
a072738e
CG
760 if (assign)
761 assign[i] = hwc->idx;
a072738e
CG
762 goto reserve;
763 }
764
d814f301
CG
765 cntr_idx = p4_next_cntr(thread, used_mask, bind);
766 if (cntr_idx == -1 || test_bit(escr_idx, escr_mask))
a072738e
CG
767 goto done;
768
a072738e 769 p4_pmu_swap_config_ts(hwc, cpu);
d814f301
CG
770 if (assign)
771 assign[i] = cntr_idx;
a072738e 772reserve:
d814f301 773 set_bit(cntr_idx, used_mask);
a072738e
CG
774 set_bit(escr_idx, escr_mask);
775 }
776
777done:
778 return num ? -ENOSPC : 0;
779}
780
caaa8be3 781static __initconst const struct x86_pmu p4_pmu = {
a072738e
CG
782 .name = "Netburst P4/Xeon",
783 .handle_irq = p4_pmu_handle_irq,
784 .disable_all = p4_pmu_disable_all,
785 .enable_all = p4_pmu_enable_all,
786 .enable = p4_pmu_enable_event,
787 .disable = p4_pmu_disable_event,
788 .eventsel = MSR_P4_BPU_CCCR0,
789 .perfctr = MSR_P4_BPU_PERFCTR0,
790 .event_map = p4_pmu_event_map,
d814f301 791 .max_events = ARRAY_SIZE(p4_general_events),
a072738e
CG
792 .get_event_constraints = x86_get_event_constraints,
793 /*
794 * IF HT disabled we may need to use all
795 * ARCH_P4_MAX_CCCR counters simulaneously
796 * though leave it restricted at moment assuming
797 * HT is on
798 */
948b1bb8 799 .num_counters = ARCH_P4_MAX_CCCR,
a072738e 800 .apic = 1,
948b1bb8
RR
801 .cntval_bits = 40,
802 .cntval_mask = (1ULL << 40) - 1,
a072738e
CG
803 .max_period = (1ULL << 39) - 1,
804 .hw_config = p4_hw_config,
805 .schedule_events = p4_pmu_schedule_events,
806};
807
808static __init int p4_pmu_init(void)
809{
810 unsigned int low, high;
811
812 /* If we get stripped -- indexig fails */
813 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
814
815 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
816 if (!(low & (1 << 7))) {
817 pr_cont("unsupported Netburst CPU model %d ",
818 boot_cpu_data.x86_model);
819 return -ENODEV;
820 }
821
cb7d6b50 822 memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
d814f301 823 sizeof(hw_cache_event_ids));
cb7d6b50 824
a072738e
CG
825 pr_cont("Netburst events, ");
826
827 x86_pmu = p4_pmu;
828
829 return 0;
830}
831
832#endif /* CONFIG_CPU_SUP_INTEL */