]>
Commit | Line | Data |
---|---|---|
a072738e CG |
1 | /* |
2 | * Netburst Perfomance Events (P4, old Xeon) | |
3 | * | |
4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | |
5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | |
6 | * | |
7 | * For licencing details see kernel-base/COPYING | |
8 | */ | |
9 | ||
10 | #ifdef CONFIG_CPU_SUP_INTEL | |
11 | ||
12 | #include <asm/perf_event_p4.h> | |
13 | ||
d814f301 | 14 | #define P4_CNTR_LIMIT 3 |
a072738e CG |
15 | /* |
16 | * array indices: 0,1 - HT threads, used with HT enabled cpu | |
17 | */ | |
d814f301 CG |
18 | struct p4_event_bind { |
19 | unsigned int opcode; /* Event code and ESCR selector */ | |
20 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | |
1ff3d7d7 | 21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ |
a072738e CG |
22 | }; |
23 | ||
39ef13a4 | 24 | struct p4_pebs_bind { |
d814f301 CG |
25 | unsigned int metric_pebs; |
26 | unsigned int metric_vert; | |
a072738e CG |
27 | }; |
28 | ||
39ef13a4 CG |
29 | /* it sets P4_PEBS_ENABLE_UOP_TAG as well */ |
30 | #define P4_GEN_PEBS_BIND(name, pebs, vert) \ | |
31 | [P4_PEBS_METRIC__##name] = { \ | |
32 | .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \ | |
33 | .metric_vert = vert, \ | |
d814f301 CG |
34 | } |
35 | ||
39ef13a4 CG |
36 | /* |
37 | * note we have P4_PEBS_ENABLE_UOP_TAG always set here | |
38 | * | |
39 | * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of | |
40 | * event configuration to find out which values are to be | |
41 | * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT | |
42 | * resgisters | |
43 | */ | |
44 | static struct p4_pebs_bind p4_pebs_bind_map[] = { | |
45 | P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001), | |
46 | P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001), | |
47 | P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001), | |
48 | P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002), | |
49 | P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003), | |
50 | P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010), | |
51 | P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001), | |
52 | P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001), | |
53 | P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002), | |
d814f301 CG |
54 | }; |
55 | ||
56 | /* | |
57 | * Note that we don't use CCCR1 here, there is an | |
58 | * exception for P4_BSQ_ALLOCATION but we just have | |
59 | * no workaround | |
60 | * | |
61 | * consider this binding as resources which particular | |
62 | * event may borrow, it doesn't contain EventMask, | |
63 | * Tags and friends -- they are left to a caller | |
64 | */ | |
65 | static struct p4_event_bind p4_event_bind_map[] = { | |
66 | [P4_EVENT_TC_DELIVER_MODE] = { | |
67 | .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), | |
68 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | |
69 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | |
70 | }, | |
71 | [P4_EVENT_BPU_FETCH_REQUEST] = { | |
72 | .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), | |
73 | .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, | |
74 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | |
75 | }, | |
76 | [P4_EVENT_ITLB_REFERENCE] = { | |
77 | .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), | |
78 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | |
79 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | |
80 | }, | |
81 | [P4_EVENT_MEMORY_CANCEL] = { | |
82 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), | |
83 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | |
84 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | |
85 | }, | |
86 | [P4_EVENT_MEMORY_COMPLETE] = { | |
87 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), | |
88 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | |
89 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | |
90 | }, | |
91 | [P4_EVENT_LOAD_PORT_REPLAY] = { | |
92 | .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), | |
93 | .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, | |
94 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | |
95 | }, | |
96 | [P4_EVENT_STORE_PORT_REPLAY] = { | |
97 | .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), | |
98 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | |
99 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | |
100 | }, | |
101 | [P4_EVENT_MOB_LOAD_REPLAY] = { | |
102 | .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), | |
103 | .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, | |
104 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | |
105 | }, | |
106 | [P4_EVENT_PAGE_WALK_TYPE] = { | |
107 | .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), | |
108 | .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, | |
109 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | |
110 | }, | |
111 | [P4_EVENT_BSQ_CACHE_REFERENCE] = { | |
112 | .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), | |
113 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | |
114 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | |
115 | }, | |
116 | [P4_EVENT_IOQ_ALLOCATION] = { | |
117 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), | |
118 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
119 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | |
120 | }, | |
121 | [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | |
122 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), | |
123 | .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, | |
124 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | |
125 | }, | |
126 | [P4_EVENT_FSB_DATA_ACTIVITY] = { | |
127 | .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), | |
128 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
129 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | |
130 | }, | |
131 | [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ | |
132 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), | |
133 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, | |
134 | .cntr = { {0, -1, -1}, {1, -1, -1} }, | |
135 | }, | |
136 | [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | |
137 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), | |
138 | .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, | |
139 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | |
140 | }, | |
141 | [P4_EVENT_SSE_INPUT_ASSIST] = { | |
142 | .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), | |
143 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
144 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | |
145 | }, | |
146 | [P4_EVENT_PACKED_SP_UOP] = { | |
147 | .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), | |
148 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
149 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | |
150 | }, | |
151 | [P4_EVENT_PACKED_DP_UOP] = { | |
152 | .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), | |
153 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
154 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | |
155 | }, | |
156 | [P4_EVENT_SCALAR_SP_UOP] = { | |
157 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), | |
158 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
159 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | |
160 | }, | |
161 | [P4_EVENT_SCALAR_DP_UOP] = { | |
162 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), | |
163 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
164 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | |
165 | }, | |
166 | [P4_EVENT_64BIT_MMX_UOP] = { | |
167 | .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), | |
168 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
169 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | |
170 | }, | |
171 | [P4_EVENT_128BIT_MMX_UOP] = { | |
172 | .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), | |
173 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
174 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | |
175 | }, | |
176 | [P4_EVENT_X87_FP_UOP] = { | |
177 | .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), | |
178 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | |
179 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | |
180 | }, | |
181 | [P4_EVENT_TC_MISC] = { | |
182 | .opcode = P4_OPCODE(P4_EVENT_TC_MISC), | |
183 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | |
184 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | |
185 | }, | |
186 | [P4_EVENT_GLOBAL_POWER_EVENTS] = { | |
187 | .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), | |
188 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
189 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | |
190 | }, | |
191 | [P4_EVENT_TC_MS_XFER] = { | |
192 | .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), | |
193 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | |
194 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | |
195 | }, | |
196 | [P4_EVENT_UOP_QUEUE_WRITES] = { | |
197 | .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), | |
198 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | |
199 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | |
200 | }, | |
201 | [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { | |
202 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), | |
203 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, | |
204 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | |
205 | }, | |
206 | [P4_EVENT_RETIRED_BRANCH_TYPE] = { | |
207 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), | |
208 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, | |
209 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | |
210 | }, | |
211 | [P4_EVENT_RESOURCE_STALL] = { | |
212 | .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), | |
213 | .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, | |
214 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | |
215 | }, | |
216 | [P4_EVENT_WC_BUFFER] = { | |
217 | .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), | |
218 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | |
219 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | |
220 | }, | |
221 | [P4_EVENT_B2B_CYCLES] = { | |
222 | .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), | |
223 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
224 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | |
225 | }, | |
226 | [P4_EVENT_BNR] = { | |
227 | .opcode = P4_OPCODE(P4_EVENT_BNR), | |
228 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
229 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | |
230 | }, | |
231 | [P4_EVENT_SNOOP] = { | |
232 | .opcode = P4_OPCODE(P4_EVENT_SNOOP), | |
233 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
234 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | |
235 | }, | |
236 | [P4_EVENT_RESPONSE] = { | |
237 | .opcode = P4_OPCODE(P4_EVENT_RESPONSE), | |
238 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
239 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | |
240 | }, | |
241 | [P4_EVENT_FRONT_END_EVENT] = { | |
242 | .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), | |
243 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | |
244 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | |
245 | }, | |
246 | [P4_EVENT_EXECUTION_EVENT] = { | |
247 | .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), | |
248 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | |
249 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | |
250 | }, | |
251 | [P4_EVENT_REPLAY_EVENT] = { | |
252 | .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), | |
253 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | |
254 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | |
255 | }, | |
256 | [P4_EVENT_INSTR_RETIRED] = { | |
257 | .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), | |
258 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | |
259 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | |
260 | }, | |
261 | [P4_EVENT_UOPS_RETIRED] = { | |
262 | .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), | |
263 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | |
264 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | |
265 | }, | |
266 | [P4_EVENT_UOP_TYPE] = { | |
267 | .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), | |
268 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | |
269 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | |
270 | }, | |
271 | [P4_EVENT_BRANCH_RETIRED] = { | |
272 | .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), | |
273 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | |
274 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | |
275 | }, | |
276 | [P4_EVENT_MISPRED_BRANCH_RETIRED] = { | |
277 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), | |
278 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | |
279 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | |
280 | }, | |
281 | [P4_EVENT_X87_ASSIST] = { | |
282 | .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), | |
283 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | |
284 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | |
285 | }, | |
286 | [P4_EVENT_MACHINE_CLEAR] = { | |
287 | .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), | |
288 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | |
289 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | |
290 | }, | |
291 | [P4_EVENT_INSTR_COMPLETED] = { | |
292 | .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), | |
293 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | |
294 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | |
295 | }, | |
296 | }; | |
a072738e | 297 | |
39ef13a4 | 298 | #define P4_GEN_CACHE_EVENT(event, bit, metric) \ |
d814f301 CG |
299 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ |
300 | P4_ESCR_EMASK_BIT(event, bit)) | \ | |
39ef13a4 | 301 | p4_config_pack_cccr(metric | \ |
d814f301 | 302 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) |
cb7d6b50 | 303 | |
caaa8be3 | 304 | static __initconst const u64 p4_hw_cache_event_ids |
cb7d6b50 LM |
305 | [PERF_COUNT_HW_CACHE_MAX] |
306 | [PERF_COUNT_HW_CACHE_OP_MAX] | |
307 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | |
308 | { | |
309 | [ C(L1D ) ] = { | |
310 | [ C(OP_READ) ] = { | |
311 | [ C(RESULT_ACCESS) ] = 0x0, | |
d814f301 | 312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
39ef13a4 | 313 | P4_PEBS_METRIC__1stl_cache_load_miss_retired), |
cb7d6b50 LM |
314 | }, |
315 | }, | |
316 | [ C(LL ) ] = { | |
317 | [ C(OP_READ) ] = { | |
318 | [ C(RESULT_ACCESS) ] = 0x0, | |
d814f301 | 319 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
39ef13a4 | 320 | P4_PEBS_METRIC__2ndl_cache_load_miss_retired), |
cb7d6b50 | 321 | }, |
d814f301 | 322 | }, |
cb7d6b50 LM |
323 | [ C(DTLB) ] = { |
324 | [ C(OP_READ) ] = { | |
325 | [ C(RESULT_ACCESS) ] = 0x0, | |
d814f301 | 326 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
39ef13a4 | 327 | P4_PEBS_METRIC__dtlb_load_miss_retired), |
cb7d6b50 LM |
328 | }, |
329 | [ C(OP_WRITE) ] = { | |
330 | [ C(RESULT_ACCESS) ] = 0x0, | |
d814f301 | 331 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
39ef13a4 | 332 | P4_PEBS_METRIC__dtlb_store_miss_retired), |
cb7d6b50 LM |
333 | }, |
334 | }, | |
335 | [ C(ITLB) ] = { | |
336 | [ C(OP_READ) ] = { | |
d814f301 | 337 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, |
39ef13a4 | 338 | P4_PEBS_METRIC__none), |
d814f301 | 339 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, |
39ef13a4 | 340 | P4_PEBS_METRIC__none), |
cb7d6b50 LM |
341 | }, |
342 | [ C(OP_WRITE) ] = { | |
343 | [ C(RESULT_ACCESS) ] = -1, | |
344 | [ C(RESULT_MISS) ] = -1, | |
345 | }, | |
346 | [ C(OP_PREFETCH) ] = { | |
347 | [ C(RESULT_ACCESS) ] = -1, | |
348 | [ C(RESULT_MISS) ] = -1, | |
349 | }, | |
350 | }, | |
351 | }; | |
352 | ||
d814f301 CG |
353 | static u64 p4_general_events[PERF_COUNT_HW_MAX] = { |
354 | /* non-halted CPU clocks */ | |
355 | [PERF_COUNT_HW_CPU_CYCLES] = | |
356 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | | |
357 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), | |
358 | ||
359 | /* | |
360 | * retired instructions | |
361 | * in a sake of simplicity we don't use the FSB tagging | |
362 | */ | |
363 | [PERF_COUNT_HW_INSTRUCTIONS] = | |
364 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) | | |
365 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | | |
366 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)), | |
367 | ||
368 | /* cache hits */ | |
369 | [PERF_COUNT_HW_CACHE_REFERENCES] = | |
370 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | |
371 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | |
372 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | |
373 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | |
374 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | |
375 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | |
376 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)), | |
377 | ||
378 | /* cache misses */ | |
379 | [PERF_COUNT_HW_CACHE_MISSES] = | |
380 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | |
381 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | |
382 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | |
383 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)), | |
384 | ||
385 | /* branch instructions retired */ | |
386 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = | |
387 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) | | |
388 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | | |
389 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | | |
390 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | | |
391 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)), | |
392 | ||
393 | /* mispredicted branches retired */ | |
394 | [PERF_COUNT_HW_BRANCH_MISSES] = | |
395 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) | | |
396 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)), | |
397 | ||
398 | /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ | |
399 | [PERF_COUNT_HW_BUS_CYCLES] = | |
400 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) | | |
401 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | | |
402 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) | | |
403 | p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), | |
a072738e CG |
404 | }; |
405 | ||
d814f301 CG |
406 | static struct p4_event_bind *p4_config_get_bind(u64 config) |
407 | { | |
408 | unsigned int evnt = p4_config_unpack_event(config); | |
409 | struct p4_event_bind *bind = NULL; | |
410 | ||
411 | if (evnt < ARRAY_SIZE(p4_event_bind_map)) | |
412 | bind = &p4_event_bind_map[evnt]; | |
413 | ||
414 | return bind; | |
415 | } | |
416 | ||
a072738e CG |
417 | static u64 p4_pmu_event_map(int hw_event) |
418 | { | |
d814f301 CG |
419 | struct p4_event_bind *bind; |
420 | unsigned int esel; | |
a072738e CG |
421 | u64 config; |
422 | ||
d814f301 CG |
423 | config = p4_general_events[hw_event]; |
424 | bind = p4_config_get_bind(config); | |
425 | esel = P4_OPCODE_ESEL(bind->opcode); | |
426 | config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); | |
a072738e | 427 | |
a072738e CG |
428 | return config; |
429 | } | |
430 | ||
39ef13a4 CG |
431 | static int p4_validate_raw_event(struct perf_event *event) |
432 | { | |
433 | unsigned int v; | |
434 | ||
435 | /* user data may have out-of-bound event index */ | |
436 | v = p4_config_unpack_event(event->attr.config); | |
437 | if (v >= ARRAY_SIZE(p4_event_bind_map)) { | |
438 | pr_warning("P4 PMU: Unknown event code: %d\n", v); | |
439 | return -EINVAL; | |
440 | } | |
441 | ||
442 | /* | |
443 | * it may have some screwed PEBS bits | |
444 | */ | |
445 | if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { | |
446 | pr_warning("P4 PMU: PEBS are not supported yet\n"); | |
447 | return -EINVAL; | |
448 | } | |
449 | v = p4_config_unpack_metric(event->attr.config); | |
450 | if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { | |
451 | pr_warning("P4 PMU: Unknown metric code: %d\n", v); | |
452 | return -EINVAL; | |
453 | } | |
454 | ||
455 | return 0; | |
456 | } | |
457 | ||
b4cdc5c2 | 458 | static int p4_hw_config(struct perf_event *event) |
a072738e | 459 | { |
137351e0 CG |
460 | int cpu = get_cpu(); |
461 | int rc = 0; | |
d814f301 | 462 | u32 escr, cccr; |
a072738e CG |
463 | |
464 | /* | |
465 | * the reason we use cpu that early is that: if we get scheduled | |
466 | * first time on the same cpu -- we will not need swap thread | |
467 | * specific flags in config (and will save some cpu cycles) | |
468 | */ | |
469 | ||
d814f301 | 470 | cccr = p4_default_cccr_conf(cpu); |
b4cdc5c2 PZ |
471 | escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel, |
472 | event->attr.exclude_user); | |
473 | event->hw.config = p4_config_pack_escr(escr) | | |
474 | p4_config_pack_cccr(cccr); | |
a072738e | 475 | |
cb7d6b50 | 476 | if (p4_ht_active() && p4_ht_thread(cpu)) |
b4cdc5c2 PZ |
477 | event->hw.config = p4_set_ht_bit(event->hw.config); |
478 | ||
de902d96 | 479 | if (event->attr.type == PERF_TYPE_RAW) { |
c7993165 | 480 | |
39ef13a4 CG |
481 | rc = p4_validate_raw_event(event); |
482 | if (rc) | |
c7993165 | 483 | goto out; |
c7993165 | 484 | |
de902d96 CG |
485 | /* |
486 | * We don't control raw events so it's up to the caller | |
487 | * to pass sane values (and we don't count the thread number | |
488 | * on HT machine but allow HT-compatible specifics to be | |
489 | * passed on) | |
490 | * | |
39ef13a4 CG |
491 | * Note that for RAW events we allow user to use P4_CCCR_RESERVED |
492 | * bits since we keep additional info here (for cache events and etc) | |
493 | * | |
de902d96 CG |
494 | * XXX: HT wide things should check perf_paranoid_cpu() && |
495 | * CAP_SYS_ADMIN | |
496 | */ | |
497 | event->hw.config |= event->attr.config & | |
498 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | |
39ef13a4 | 499 | p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); |
8d330919 LM |
500 | |
501 | event->hw.config &= ~P4_CCCR_FORCE_OVF; | |
de902d96 | 502 | } |
cb7d6b50 | 503 | |
137351e0 | 504 | rc = x86_setup_perfctr(event); |
c7993165 | 505 | out: |
137351e0 | 506 | put_cpu(); |
137351e0 | 507 | return rc; |
a072738e CG |
508 | } |
509 | ||
0db1a7bc | 510 | static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) |
a072738e | 511 | { |
0db1a7bc CG |
512 | int overflow = 0; |
513 | u32 low, high; | |
a072738e | 514 | |
0db1a7bc CG |
515 | rdmsr(hwc->config_base + hwc->idx, low, high); |
516 | ||
517 | /* we need to check high bit for unflagged overflows */ | |
ef4f30f5 | 518 | if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { |
0db1a7bc | 519 | overflow = 1; |
a072738e | 520 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, |
0db1a7bc | 521 | ((u64)low) & ~P4_CCCR_OVF); |
a072738e | 522 | } |
0db1a7bc CG |
523 | |
524 | return overflow; | |
a072738e CG |
525 | } |
526 | ||
39ef13a4 CG |
527 | static void p4_pmu_disable_pebs(void) |
528 | { | |
529 | /* | |
530 | * FIXME | |
531 | * | |
532 | * It's still allowed that two threads setup same cache | |
533 | * events so we can't simply clear metrics until we knew | |
534 | * noone is depending on us, so we need kind of counter | |
535 | * for "ReplayEvent" users. | |
536 | * | |
537 | * What is more complex -- RAW events, if user (for some | |
538 | * reason) will pass some cache event metric with improper | |
539 | * event opcode -- it's fine from hardware point of view | |
540 | * but completely nonsence from "meaning" of such action. | |
541 | * | |
542 | * So at moment let leave metrics turned on forever -- it's | |
543 | * ok for now but need to be revisited! | |
544 | * | |
545 | * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); | |
546 | * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); | |
547 | */ | |
548 | } | |
549 | ||
a072738e CG |
550 | static inline void p4_pmu_disable_event(struct perf_event *event) |
551 | { | |
552 | struct hw_perf_event *hwc = &event->hw; | |
553 | ||
554 | /* | |
555 | * If event gets disabled while counter is in overflowed | |
556 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | |
557 | * asserted again and again | |
558 | */ | |
559 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | |
560 | (u64)(p4_config_unpack_cccr(hwc->config)) & | |
d814f301 | 561 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); |
a072738e CG |
562 | } |
563 | ||
564 | static void p4_pmu_disable_all(void) | |
565 | { | |
566 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | |
567 | int idx; | |
568 | ||
948b1bb8 | 569 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
a072738e CG |
570 | struct perf_event *event = cpuc->events[idx]; |
571 | if (!test_bit(idx, cpuc->active_mask)) | |
572 | continue; | |
573 | p4_pmu_disable_event(event); | |
574 | } | |
39ef13a4 CG |
575 | |
576 | p4_pmu_disable_pebs(); | |
577 | } | |
578 | ||
579 | /* configuration must be valid */ | |
580 | static void p4_pmu_enable_pebs(u64 config) | |
581 | { | |
582 | struct p4_pebs_bind *bind; | |
583 | unsigned int idx; | |
584 | ||
585 | BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK); | |
586 | ||
587 | idx = p4_config_unpack_metric(config); | |
588 | if (idx == P4_PEBS_METRIC__none) | |
589 | return; | |
590 | ||
591 | bind = &p4_pebs_bind_map[idx]; | |
592 | ||
593 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); | |
594 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); | |
a072738e CG |
595 | } |
596 | ||
597 | static void p4_pmu_enable_event(struct perf_event *event) | |
598 | { | |
599 | struct hw_perf_event *hwc = &event->hw; | |
600 | int thread = p4_ht_config_thread(hwc->config); | |
601 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | |
d814f301 | 602 | unsigned int idx = p4_config_unpack_event(hwc->config); |
d814f301 | 603 | struct p4_event_bind *bind; |
d814f301 | 604 | u64 escr_addr, cccr; |
a072738e | 605 | |
d814f301 CG |
606 | bind = &p4_event_bind_map[idx]; |
607 | escr_addr = (u64)bind->escr_msr[thread]; | |
a072738e CG |
608 | |
609 | /* | |
610 | * - we dont support cascaded counters yet | |
611 | * - and counter 1 is broken (erratum) | |
612 | */ | |
613 | WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); | |
614 | WARN_ON_ONCE(hwc->idx == 1); | |
615 | ||
d814f301 CG |
616 | /* we need a real Event value */ |
617 | escr_conf &= ~P4_ESCR_EVENT_MASK; | |
618 | escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode)); | |
619 | ||
620 | cccr = p4_config_unpack_cccr(hwc->config); | |
621 | ||
622 | /* | |
39ef13a4 CG |
623 | * it could be Cache event so we need to write metrics |
624 | * into additional MSRs | |
d814f301 | 625 | */ |
39ef13a4 | 626 | p4_pmu_enable_pebs(hwc->config); |
d814f301 CG |
627 | |
628 | (void)checking_wrmsrl(escr_addr, escr_conf); | |
a072738e | 629 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, |
d814f301 | 630 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); |
a072738e CG |
631 | } |
632 | ||
11164cd4 | 633 | static void p4_pmu_enable_all(int added) |
a072738e CG |
634 | { |
635 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | |
636 | int idx; | |
637 | ||
948b1bb8 | 638 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
a072738e CG |
639 | struct perf_event *event = cpuc->events[idx]; |
640 | if (!test_bit(idx, cpuc->active_mask)) | |
641 | continue; | |
642 | p4_pmu_enable_event(event); | |
643 | } | |
644 | } | |
645 | ||
646 | static int p4_pmu_handle_irq(struct pt_regs *regs) | |
647 | { | |
648 | struct perf_sample_data data; | |
649 | struct cpu_hw_events *cpuc; | |
650 | struct perf_event *event; | |
651 | struct hw_perf_event *hwc; | |
652 | int idx, handled = 0; | |
653 | u64 val; | |
654 | ||
655 | data.addr = 0; | |
656 | data.raw = NULL; | |
657 | ||
658 | cpuc = &__get_cpu_var(cpu_hw_events); | |
659 | ||
948b1bb8 | 660 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1c250d70 | 661 | int overflow; |
a072738e | 662 | |
03e22198 CG |
663 | if (!test_bit(idx, cpuc->active_mask)) { |
664 | /* catch in-flight IRQs */ | |
665 | if (__test_and_clear_bit(idx, cpuc->running)) | |
666 | handled++; | |
a072738e | 667 | continue; |
03e22198 | 668 | } |
a072738e CG |
669 | |
670 | event = cpuc->events[idx]; | |
671 | hwc = &event->hw; | |
672 | ||
673 | WARN_ON_ONCE(hwc->idx != idx); | |
674 | ||
0db1a7bc | 675 | /* it might be unflagged overflow */ |
1c250d70 | 676 | overflow = p4_pmu_clear_cccr_ovf(hwc); |
a072738e CG |
677 | |
678 | val = x86_perf_event_update(event); | |
1c250d70 | 679 | if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) |
a072738e CG |
680 | continue; |
681 | ||
1c250d70 CG |
682 | handled += overflow; |
683 | ||
0db1a7bc CG |
684 | /* event overflow for sure */ |
685 | data.period = event->hw.last_period; | |
a072738e CG |
686 | |
687 | if (!x86_perf_event_set_period(event)) | |
688 | continue; | |
689 | if (perf_event_overflow(event, 1, &data, regs)) | |
690 | p4_pmu_disable_event(event); | |
691 | } | |
692 | ||
693 | if (handled) { | |
694 | /* p4 quirk: unmask it again */ | |
695 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | |
696 | inc_irq_stat(apic_perf_irqs); | |
697 | } | |
698 | ||
de725dec | 699 | return handled; |
a072738e CG |
700 | } |
701 | ||
702 | /* | |
703 | * swap thread specific fields according to a thread | |
704 | * we are going to run on | |
705 | */ | |
706 | static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | |
707 | { | |
708 | u32 escr, cccr; | |
709 | ||
710 | /* | |
711 | * we either lucky and continue on same cpu or no HT support | |
712 | */ | |
713 | if (!p4_should_swap_ts(hwc->config, cpu)) | |
714 | return; | |
715 | ||
716 | /* | |
717 | * the event is migrated from an another logical | |
718 | * cpu, so we need to swap thread specific flags | |
719 | */ | |
720 | ||
721 | escr = p4_config_unpack_escr(hwc->config); | |
722 | cccr = p4_config_unpack_cccr(hwc->config); | |
723 | ||
724 | if (p4_ht_thread(cpu)) { | |
725 | cccr &= ~P4_CCCR_OVF_PMI_T0; | |
726 | cccr |= P4_CCCR_OVF_PMI_T1; | |
d814f301 CG |
727 | if (escr & P4_ESCR_T0_OS) { |
728 | escr &= ~P4_ESCR_T0_OS; | |
729 | escr |= P4_ESCR_T1_OS; | |
a072738e | 730 | } |
d814f301 CG |
731 | if (escr & P4_ESCR_T0_USR) { |
732 | escr &= ~P4_ESCR_T0_USR; | |
733 | escr |= P4_ESCR_T1_USR; | |
a072738e CG |
734 | } |
735 | hwc->config = p4_config_pack_escr(escr); | |
736 | hwc->config |= p4_config_pack_cccr(cccr); | |
737 | hwc->config |= P4_CONFIG_HT; | |
738 | } else { | |
739 | cccr &= ~P4_CCCR_OVF_PMI_T1; | |
740 | cccr |= P4_CCCR_OVF_PMI_T0; | |
d814f301 CG |
741 | if (escr & P4_ESCR_T1_OS) { |
742 | escr &= ~P4_ESCR_T1_OS; | |
743 | escr |= P4_ESCR_T0_OS; | |
a072738e | 744 | } |
d814f301 CG |
745 | if (escr & P4_ESCR_T1_USR) { |
746 | escr &= ~P4_ESCR_T1_USR; | |
747 | escr |= P4_ESCR_T0_USR; | |
a072738e CG |
748 | } |
749 | hwc->config = p4_config_pack_escr(escr); | |
750 | hwc->config |= p4_config_pack_cccr(cccr); | |
751 | hwc->config &= ~P4_CONFIG_HT; | |
752 | } | |
753 | } | |
754 | ||
72001990 CG |
755 | /* |
756 | * ESCR address hashing is tricky, ESCRs are not sequential | |
623aab89 | 757 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and |
72001990 CG |
758 | * the metric between any ESCRs is laid in range [0xa0,0xe1] |
759 | * | |
760 | * so we make ~70% filled hashtable | |
761 | */ | |
762 | ||
763 | #define P4_ESCR_MSR_BASE 0x000003a0 | |
764 | #define P4_ESCR_MSR_MAX 0x000003e1 | |
765 | #define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1) | |
766 | #define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE) | |
767 | #define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr | |
768 | ||
769 | static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = { | |
770 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0), | |
771 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1), | |
772 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0), | |
773 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1), | |
774 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0), | |
775 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1), | |
776 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0), | |
777 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1), | |
778 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2), | |
779 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3), | |
780 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4), | |
781 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5), | |
782 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0), | |
783 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1), | |
784 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0), | |
785 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1), | |
786 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0), | |
787 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1), | |
788 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0), | |
789 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1), | |
790 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0), | |
791 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1), | |
792 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0), | |
793 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1), | |
794 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0), | |
795 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1), | |
796 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0), | |
797 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1), | |
798 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0), | |
799 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1), | |
800 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0), | |
801 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1), | |
802 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0), | |
803 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1), | |
804 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0), | |
805 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1), | |
806 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0), | |
807 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1), | |
808 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0), | |
809 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1), | |
810 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0), | |
811 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1), | |
812 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0), | |
813 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1), | |
814 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0), | |
815 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1), | |
a072738e CG |
816 | }; |
817 | ||
818 | static int p4_get_escr_idx(unsigned int addr) | |
819 | { | |
72001990 | 820 | unsigned int idx = P4_ESCR_MSR_IDX(addr); |
a072738e | 821 | |
623aab89 CG |
822 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || |
823 | !p4_escr_table[idx] || | |
824 | p4_escr_table[idx] != addr)) { | |
72001990 CG |
825 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); |
826 | return -1; | |
a072738e CG |
827 | } |
828 | ||
72001990 | 829 | return idx; |
a072738e CG |
830 | } |
831 | ||
d814f301 CG |
832 | static int p4_next_cntr(int thread, unsigned long *used_mask, |
833 | struct p4_event_bind *bind) | |
834 | { | |
1ff3d7d7 | 835 | int i, j; |
d814f301 CG |
836 | |
837 | for (i = 0; i < P4_CNTR_LIMIT; i++) { | |
1ff3d7d7 CG |
838 | j = bind->cntr[thread][i]; |
839 | if (j != -1 && !test_bit(j, used_mask)) | |
d814f301 CG |
840 | return j; |
841 | } | |
842 | ||
843 | return -1; | |
844 | } | |
845 | ||
a072738e CG |
846 | static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
847 | { | |
848 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | |
72001990 | 849 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; |
9d36dfcf | 850 | int cpu = smp_processor_id(); |
d814f301 CG |
851 | struct hw_perf_event *hwc; |
852 | struct p4_event_bind *bind; | |
853 | unsigned int i, thread, num; | |
854 | int cntr_idx, escr_idx; | |
a072738e CG |
855 | |
856 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | |
72001990 | 857 | bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); |
a072738e | 858 | |
a072738e | 859 | for (i = 0, num = n; i < n; i++, num--) { |
d814f301 | 860 | |
a072738e | 861 | hwc = &cpuc->event_list[i]->hw; |
a072738e | 862 | thread = p4_ht_thread(cpu); |
d814f301 CG |
863 | bind = p4_config_get_bind(hwc->config); |
864 | escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); | |
72001990 CG |
865 | if (unlikely(escr_idx == -1)) |
866 | goto done; | |
a072738e | 867 | |
a072738e | 868 | if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { |
d814f301 | 869 | cntr_idx = hwc->idx; |
a072738e CG |
870 | if (assign) |
871 | assign[i] = hwc->idx; | |
a072738e CG |
872 | goto reserve; |
873 | } | |
874 | ||
d814f301 CG |
875 | cntr_idx = p4_next_cntr(thread, used_mask, bind); |
876 | if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) | |
a072738e CG |
877 | goto done; |
878 | ||
a072738e | 879 | p4_pmu_swap_config_ts(hwc, cpu); |
d814f301 CG |
880 | if (assign) |
881 | assign[i] = cntr_idx; | |
a072738e | 882 | reserve: |
d814f301 | 883 | set_bit(cntr_idx, used_mask); |
a072738e CG |
884 | set_bit(escr_idx, escr_mask); |
885 | } | |
886 | ||
887 | done: | |
888 | return num ? -ENOSPC : 0; | |
889 | } | |
890 | ||
caaa8be3 | 891 | static __initconst const struct x86_pmu p4_pmu = { |
a072738e CG |
892 | .name = "Netburst P4/Xeon", |
893 | .handle_irq = p4_pmu_handle_irq, | |
894 | .disable_all = p4_pmu_disable_all, | |
895 | .enable_all = p4_pmu_enable_all, | |
896 | .enable = p4_pmu_enable_event, | |
897 | .disable = p4_pmu_disable_event, | |
898 | .eventsel = MSR_P4_BPU_CCCR0, | |
899 | .perfctr = MSR_P4_BPU_PERFCTR0, | |
900 | .event_map = p4_pmu_event_map, | |
d814f301 | 901 | .max_events = ARRAY_SIZE(p4_general_events), |
a072738e CG |
902 | .get_event_constraints = x86_get_event_constraints, |
903 | /* | |
904 | * IF HT disabled we may need to use all | |
905 | * ARCH_P4_MAX_CCCR counters simulaneously | |
906 | * though leave it restricted at moment assuming | |
907 | * HT is on | |
908 | */ | |
948b1bb8 | 909 | .num_counters = ARCH_P4_MAX_CCCR, |
a072738e | 910 | .apic = 1, |
948b1bb8 RR |
911 | .cntval_bits = 40, |
912 | .cntval_mask = (1ULL << 40) - 1, | |
a072738e CG |
913 | .max_period = (1ULL << 39) - 1, |
914 | .hw_config = p4_hw_config, | |
915 | .schedule_events = p4_pmu_schedule_events, | |
68aa00ac CG |
916 | /* |
917 | * This handles erratum N15 in intel doc 249199-029, | |
918 | * the counter may not be updated correctly on write | |
919 | * so we need a second write operation to do the trick | |
920 | * (the official workaround didn't work) | |
921 | * | |
922 | * the former idea is taken from OProfile code | |
923 | */ | |
924 | .perfctr_second_write = 1, | |
a072738e CG |
925 | }; |
926 | ||
927 | static __init int p4_pmu_init(void) | |
928 | { | |
929 | unsigned int low, high; | |
930 | ||
931 | /* If we get stripped -- indexig fails */ | |
932 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | |
933 | ||
934 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | |
935 | if (!(low & (1 << 7))) { | |
936 | pr_cont("unsupported Netburst CPU model %d ", | |
937 | boot_cpu_data.x86_model); | |
938 | return -ENODEV; | |
939 | } | |
940 | ||
cb7d6b50 | 941 | memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, |
d814f301 | 942 | sizeof(hw_cache_event_ids)); |
cb7d6b50 | 943 | |
a072738e CG |
944 | pr_cont("Netburst events, "); |
945 | ||
946 | x86_pmu = p4_pmu; | |
947 | ||
948 | return 0; | |
949 | } | |
950 | ||
951 | #endif /* CONFIG_CPU_SUP_INTEL */ |