]>
Commit | Line | Data |
---|---|---|
a072738e CG |
1 | /* |
2 | * Netburst Perfomance Events (P4, old Xeon) | |
3 | * | |
4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | |
5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | |
6 | * | |
7 | * For licencing details see kernel-base/COPYING | |
8 | */ | |
9 | ||
10 | #ifdef CONFIG_CPU_SUP_INTEL | |
11 | ||
12 | #include <asm/perf_event_p4.h> | |
13 | ||
14 | /* | |
15 | * array indices: 0,1 - HT threads, used with HT enabled cpu | |
16 | */ | |
17 | struct p4_event_template { | |
18 | u32 opcode; /* ESCR event + CCCR selector */ | |
19 | u64 config; /* packed predefined bits */ | |
20 | int dep; /* upstream dependency event index */ | |
21 | unsigned int emask; /* ESCR EventMask */ | |
22 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | |
23 | unsigned int cntr[2]; /* counter index (offset) */ | |
24 | }; | |
25 | ||
26 | struct p4_pmu_res { | |
27 | /* maps hw_conf::idx into template for ESCR sake */ | |
28 | struct p4_event_template *tpl[ARCH_P4_MAX_CCCR]; | |
29 | }; | |
30 | ||
31 | static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config); | |
32 | ||
33 | /* | |
34 | * WARN: CCCR1 doesn't have a working enable bit so try to not | |
35 | * use it if possible | |
36 | * | |
37 | * Also as only we start to support raw events we will need to | |
38 | * append _all_ P4_EVENT_PACK'ed events here | |
39 | */ | |
40 | struct p4_event_template p4_templates[] = { | |
41 | [0] = { | |
42 | .opcode = P4_UOP_TYPE, | |
43 | .config = 0, | |
44 | .dep = -1, | |
45 | .emask = | |
46 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | | |
47 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), | |
48 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | |
49 | .cntr = { 16, 17 }, | |
50 | }, | |
51 | [1] = { | |
52 | .opcode = P4_GLOBAL_POWER_EVENTS, | |
53 | .config = 0, | |
54 | .dep = -1, | |
55 | .emask = | |
56 | P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING), | |
57 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
58 | .cntr = { 0, 2 }, | |
59 | }, | |
60 | [2] = { | |
61 | .opcode = P4_INSTR_RETIRED, | |
62 | .config = 0, | |
e4495262 | 63 | .dep = -1, /* needs front-end tagging */ |
a072738e CG |
64 | .emask = |
65 | P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) | | |
e4495262 CG |
66 | P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG), |
67 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | |
a072738e CG |
68 | .cntr = { 12, 14 }, |
69 | }, | |
70 | [3] = { | |
71 | .opcode = P4_BSQ_CACHE_REFERENCE, | |
72 | .config = 0, | |
73 | .dep = -1, | |
74 | .emask = | |
75 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | |
76 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | |
77 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | |
78 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | |
79 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | |
80 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITM), | |
81 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | |
82 | .cntr = { 0, 2 }, | |
83 | }, | |
84 | [4] = { | |
85 | .opcode = P4_BSQ_CACHE_REFERENCE, | |
86 | .config = 0, | |
87 | .dep = -1, | |
88 | .emask = | |
89 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | |
90 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | |
91 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), | |
92 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | |
93 | .cntr = { 0, 3 }, | |
94 | }, | |
95 | [5] = { | |
96 | .opcode = P4_RETIRED_BRANCH_TYPE, | |
97 | .config = 0, | |
98 | .dep = -1, | |
99 | .emask = | |
100 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) | | |
101 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) | | |
102 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, RETURN) | | |
103 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, INDIRECT), | |
104 | .escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 }, | |
105 | .cntr = { 4, 6 }, | |
106 | }, | |
107 | [6] = { | |
108 | .opcode = P4_MISPRED_BRANCH_RETIRED, | |
109 | .config = 0, | |
110 | .dep = -1, | |
111 | .emask = | |
112 | P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS), | |
113 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | |
114 | .cntr = { 12, 14 }, | |
115 | }, | |
116 | [7] = { | |
117 | .opcode = P4_FSB_DATA_ACTIVITY, | |
118 | .config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), | |
119 | .dep = -1, | |
120 | .emask = | |
121 | P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) | | |
122 | P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN), | |
123 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
124 | .cntr = { 0, 2 }, | |
125 | }, | |
126 | }; | |
127 | ||
128 | static struct p4_event_template *p4_event_map[PERF_COUNT_HW_MAX] = { | |
129 | /* non-halted CPU clocks */ | |
130 | [PERF_COUNT_HW_CPU_CYCLES] = &p4_templates[1], | |
131 | ||
132 | /* retired instructions: dep on tagging the FSB */ | |
133 | [PERF_COUNT_HW_INSTRUCTIONS] = &p4_templates[2], | |
134 | ||
135 | /* cache hits */ | |
136 | [PERF_COUNT_HW_CACHE_REFERENCES] = &p4_templates[3], | |
137 | ||
138 | /* cache misses */ | |
139 | [PERF_COUNT_HW_CACHE_MISSES] = &p4_templates[4], | |
140 | ||
141 | /* branch instructions retired */ | |
142 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = &p4_templates[5], | |
143 | ||
144 | /* mispredicted branches retired */ | |
145 | [PERF_COUNT_HW_BRANCH_MISSES] = &p4_templates[6], | |
146 | ||
147 | /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ | |
148 | [PERF_COUNT_HW_BUS_CYCLES] = &p4_templates[7], | |
149 | }; | |
150 | ||
151 | static u64 p4_pmu_event_map(int hw_event) | |
152 | { | |
153 | struct p4_event_template *tpl; | |
154 | u64 config; | |
155 | ||
156 | if (hw_event > ARRAY_SIZE(p4_event_map)) { | |
157 | printk_once(KERN_ERR "PMU: Incorrect event index\n"); | |
158 | return 0; | |
159 | } | |
160 | tpl = p4_event_map[hw_event]; | |
161 | ||
162 | /* | |
163 | * fill config up according to | |
164 | * a predefined event template | |
165 | */ | |
166 | config = tpl->config; | |
167 | config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT); | |
168 | config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT); | |
169 | config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); | |
170 | ||
171 | /* on HT machine we need a special bit */ | |
172 | if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id())) | |
173 | config = p4_set_ht_bit(config); | |
174 | ||
175 | return config; | |
176 | } | |
177 | ||
178 | /* | |
179 | * Note that we still have 5 events (from global events SDM list) | |
180 | * intersected in opcode+emask bits so we will need another | |
181 | * scheme there do distinguish templates. | |
182 | */ | |
183 | static inline int p4_pmu_emask_match(unsigned int dst, unsigned int src) | |
184 | { | |
185 | return dst & src; | |
186 | } | |
187 | ||
188 | static struct p4_event_template *p4_pmu_template_lookup(u64 config) | |
189 | { | |
190 | u32 opcode = p4_config_unpack_opcode(config); | |
191 | unsigned int emask = p4_config_unpack_emask(config); | |
192 | unsigned int i; | |
193 | ||
194 | for (i = 0; i < ARRAY_SIZE(p4_templates); i++) { | |
195 | if (opcode == p4_templates[i].opcode && | |
196 | p4_pmu_emask_match(emask, p4_templates[i].emask)) | |
197 | return &p4_templates[i]; | |
198 | } | |
199 | ||
200 | return NULL; | |
201 | } | |
202 | ||
203 | /* | |
204 | * We don't control raw events so it's up to the caller | |
205 | * to pass sane values (and we don't count the thread number | |
206 | * on HT machine but allow HT-compatible specifics to be | |
207 | * passed on) | |
208 | */ | |
209 | static u64 p4_pmu_raw_event(u64 hw_event) | |
210 | { | |
211 | return hw_event & | |
212 | (p4_config_pack_escr(P4_EVNTSEL_MASK_HT) | | |
213 | p4_config_pack_cccr(P4_CCCR_MASK_HT)); | |
214 | } | |
215 | ||
216 | static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) | |
217 | { | |
218 | int cpu = raw_smp_processor_id(); | |
219 | ||
220 | /* | |
221 | * the reason we use cpu that early is that: if we get scheduled | |
222 | * first time on the same cpu -- we will not need swap thread | |
223 | * specific flags in config (and will save some cpu cycles) | |
224 | */ | |
225 | ||
226 | /* CCCR by default */ | |
227 | hwc->config = p4_config_pack_cccr(p4_default_cccr_conf(cpu)); | |
228 | ||
229 | /* Count user and OS events unless not requested to */ | |
230 | hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel, | |
231 | attr->exclude_user)); | |
232 | return 0; | |
233 | } | |
234 | ||
235 | static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | |
236 | { | |
237 | unsigned long dummy; | |
238 | ||
239 | rdmsrl(hwc->config_base + hwc->idx, dummy); | |
240 | if (dummy & P4_CCCR_OVF) { | |
241 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | |
242 | ((u64)dummy) & ~P4_CCCR_OVF); | |
243 | } | |
244 | } | |
245 | ||
246 | static inline void p4_pmu_disable_event(struct perf_event *event) | |
247 | { | |
248 | struct hw_perf_event *hwc = &event->hw; | |
249 | ||
250 | /* | |
251 | * If event gets disabled while counter is in overflowed | |
252 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | |
253 | * asserted again and again | |
254 | */ | |
255 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | |
256 | (u64)(p4_config_unpack_cccr(hwc->config)) & | |
257 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF); | |
258 | } | |
259 | ||
260 | static void p4_pmu_disable_all(void) | |
261 | { | |
262 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | |
263 | int idx; | |
264 | ||
265 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | |
266 | struct perf_event *event = cpuc->events[idx]; | |
267 | if (!test_bit(idx, cpuc->active_mask)) | |
268 | continue; | |
269 | p4_pmu_disable_event(event); | |
270 | } | |
271 | } | |
272 | ||
273 | static void p4_pmu_enable_event(struct perf_event *event) | |
274 | { | |
275 | struct hw_perf_event *hwc = &event->hw; | |
276 | int thread = p4_ht_config_thread(hwc->config); | |
277 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | |
278 | u64 escr_base; | |
279 | struct p4_event_template *tpl; | |
280 | struct p4_pmu_res *c; | |
281 | ||
282 | /* | |
283 | * some preparation work from per-cpu private fields | |
284 | * since we need to find out which ESCR to use | |
285 | */ | |
286 | c = &__get_cpu_var(p4_pmu_config); | |
287 | tpl = c->tpl[hwc->idx]; | |
288 | if (!tpl) { | |
289 | pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx); | |
290 | return; | |
291 | } | |
292 | escr_base = (u64)tpl->escr_msr[thread]; | |
293 | ||
294 | /* | |
295 | * - we dont support cascaded counters yet | |
296 | * - and counter 1 is broken (erratum) | |
297 | */ | |
298 | WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); | |
299 | WARN_ON_ONCE(hwc->idx == 1); | |
300 | ||
301 | (void)checking_wrmsrl(escr_base, escr_conf); | |
302 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | |
303 | (u64)(p4_config_unpack_cccr(hwc->config)) | P4_CCCR_ENABLE); | |
304 | } | |
305 | ||
306 | static void p4_pmu_enable_all(void) | |
307 | { | |
308 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | |
309 | int idx; | |
310 | ||
311 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | |
312 | struct perf_event *event = cpuc->events[idx]; | |
313 | if (!test_bit(idx, cpuc->active_mask)) | |
314 | continue; | |
315 | p4_pmu_enable_event(event); | |
316 | } | |
317 | } | |
318 | ||
319 | static int p4_pmu_handle_irq(struct pt_regs *regs) | |
320 | { | |
321 | struct perf_sample_data data; | |
322 | struct cpu_hw_events *cpuc; | |
323 | struct perf_event *event; | |
324 | struct hw_perf_event *hwc; | |
325 | int idx, handled = 0; | |
326 | u64 val; | |
327 | ||
328 | data.addr = 0; | |
329 | data.raw = NULL; | |
330 | ||
331 | cpuc = &__get_cpu_var(cpu_hw_events); | |
332 | ||
333 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | |
334 | ||
335 | if (!test_bit(idx, cpuc->active_mask)) | |
336 | continue; | |
337 | ||
338 | event = cpuc->events[idx]; | |
339 | hwc = &event->hw; | |
340 | ||
341 | WARN_ON_ONCE(hwc->idx != idx); | |
342 | ||
343 | /* | |
344 | * FIXME: Redundant call, actually not needed | |
345 | * but just to check if we're screwed | |
346 | */ | |
347 | p4_pmu_clear_cccr_ovf(hwc); | |
348 | ||
349 | val = x86_perf_event_update(event); | |
350 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | |
351 | continue; | |
352 | ||
353 | /* | |
354 | * event overflow | |
355 | */ | |
356 | handled = 1; | |
357 | data.period = event->hw.last_period; | |
358 | ||
359 | if (!x86_perf_event_set_period(event)) | |
360 | continue; | |
361 | if (perf_event_overflow(event, 1, &data, regs)) | |
362 | p4_pmu_disable_event(event); | |
363 | } | |
364 | ||
365 | if (handled) { | |
8576e197 | 366 | #ifdef CONFIG_X86_LOCAL_APIC |
a072738e CG |
367 | /* p4 quirk: unmask it again */ |
368 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | |
8576e197 | 369 | #endif |
a072738e CG |
370 | inc_irq_stat(apic_perf_irqs); |
371 | } | |
372 | ||
373 | return handled; | |
374 | } | |
375 | ||
376 | /* | |
377 | * swap thread specific fields according to a thread | |
378 | * we are going to run on | |
379 | */ | |
380 | static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | |
381 | { | |
382 | u32 escr, cccr; | |
383 | ||
384 | /* | |
385 | * we either lucky and continue on same cpu or no HT support | |
386 | */ | |
387 | if (!p4_should_swap_ts(hwc->config, cpu)) | |
388 | return; | |
389 | ||
390 | /* | |
391 | * the event is migrated from an another logical | |
392 | * cpu, so we need to swap thread specific flags | |
393 | */ | |
394 | ||
395 | escr = p4_config_unpack_escr(hwc->config); | |
396 | cccr = p4_config_unpack_cccr(hwc->config); | |
397 | ||
398 | if (p4_ht_thread(cpu)) { | |
399 | cccr &= ~P4_CCCR_OVF_PMI_T0; | |
400 | cccr |= P4_CCCR_OVF_PMI_T1; | |
401 | if (escr & P4_EVNTSEL_T0_OS) { | |
402 | escr &= ~P4_EVNTSEL_T0_OS; | |
403 | escr |= P4_EVNTSEL_T1_OS; | |
404 | } | |
405 | if (escr & P4_EVNTSEL_T0_USR) { | |
406 | escr &= ~P4_EVNTSEL_T0_USR; | |
407 | escr |= P4_EVNTSEL_T1_USR; | |
408 | } | |
409 | hwc->config = p4_config_pack_escr(escr); | |
410 | hwc->config |= p4_config_pack_cccr(cccr); | |
411 | hwc->config |= P4_CONFIG_HT; | |
412 | } else { | |
413 | cccr &= ~P4_CCCR_OVF_PMI_T1; | |
414 | cccr |= P4_CCCR_OVF_PMI_T0; | |
415 | if (escr & P4_EVNTSEL_T1_OS) { | |
416 | escr &= ~P4_EVNTSEL_T1_OS; | |
417 | escr |= P4_EVNTSEL_T0_OS; | |
418 | } | |
419 | if (escr & P4_EVNTSEL_T1_USR) { | |
420 | escr &= ~P4_EVNTSEL_T1_USR; | |
421 | escr |= P4_EVNTSEL_T0_USR; | |
422 | } | |
423 | hwc->config = p4_config_pack_escr(escr); | |
424 | hwc->config |= p4_config_pack_cccr(cccr); | |
425 | hwc->config &= ~P4_CONFIG_HT; | |
426 | } | |
427 | } | |
428 | ||
429 | /* ESCRs are not sequential in memory so we need a map */ | |
430 | static unsigned int p4_escr_map[ARCH_P4_TOTAL_ESCR] = { | |
431 | MSR_P4_ALF_ESCR0, /* 0 */ | |
432 | MSR_P4_ALF_ESCR1, /* 1 */ | |
433 | MSR_P4_BPU_ESCR0, /* 2 */ | |
434 | MSR_P4_BPU_ESCR1, /* 3 */ | |
435 | MSR_P4_BSU_ESCR0, /* 4 */ | |
436 | MSR_P4_BSU_ESCR1, /* 5 */ | |
437 | MSR_P4_CRU_ESCR0, /* 6 */ | |
438 | MSR_P4_CRU_ESCR1, /* 7 */ | |
439 | MSR_P4_CRU_ESCR2, /* 8 */ | |
440 | MSR_P4_CRU_ESCR3, /* 9 */ | |
441 | MSR_P4_CRU_ESCR4, /* 10 */ | |
442 | MSR_P4_CRU_ESCR5, /* 11 */ | |
443 | MSR_P4_DAC_ESCR0, /* 12 */ | |
444 | MSR_P4_DAC_ESCR1, /* 13 */ | |
445 | MSR_P4_FIRM_ESCR0, /* 14 */ | |
446 | MSR_P4_FIRM_ESCR1, /* 15 */ | |
447 | MSR_P4_FLAME_ESCR0, /* 16 */ | |
448 | MSR_P4_FLAME_ESCR1, /* 17 */ | |
449 | MSR_P4_FSB_ESCR0, /* 18 */ | |
450 | MSR_P4_FSB_ESCR1, /* 19 */ | |
451 | MSR_P4_IQ_ESCR0, /* 20 */ | |
452 | MSR_P4_IQ_ESCR1, /* 21 */ | |
453 | MSR_P4_IS_ESCR0, /* 22 */ | |
454 | MSR_P4_IS_ESCR1, /* 23 */ | |
455 | MSR_P4_ITLB_ESCR0, /* 24 */ | |
456 | MSR_P4_ITLB_ESCR1, /* 25 */ | |
457 | MSR_P4_IX_ESCR0, /* 26 */ | |
458 | MSR_P4_IX_ESCR1, /* 27 */ | |
459 | MSR_P4_MOB_ESCR0, /* 28 */ | |
460 | MSR_P4_MOB_ESCR1, /* 29 */ | |
461 | MSR_P4_MS_ESCR0, /* 30 */ | |
462 | MSR_P4_MS_ESCR1, /* 31 */ | |
463 | MSR_P4_PMH_ESCR0, /* 32 */ | |
464 | MSR_P4_PMH_ESCR1, /* 33 */ | |
465 | MSR_P4_RAT_ESCR0, /* 34 */ | |
466 | MSR_P4_RAT_ESCR1, /* 35 */ | |
467 | MSR_P4_SAAT_ESCR0, /* 36 */ | |
468 | MSR_P4_SAAT_ESCR1, /* 37 */ | |
469 | MSR_P4_SSU_ESCR0, /* 38 */ | |
470 | MSR_P4_SSU_ESCR1, /* 39 */ | |
471 | MSR_P4_TBPU_ESCR0, /* 40 */ | |
472 | MSR_P4_TBPU_ESCR1, /* 41 */ | |
473 | MSR_P4_TC_ESCR0, /* 42 */ | |
474 | MSR_P4_TC_ESCR1, /* 43 */ | |
475 | MSR_P4_U2L_ESCR0, /* 44 */ | |
476 | MSR_P4_U2L_ESCR1, /* 45 */ | |
477 | }; | |
478 | ||
479 | static int p4_get_escr_idx(unsigned int addr) | |
480 | { | |
481 | unsigned int i; | |
482 | ||
483 | for (i = 0; i < ARRAY_SIZE(p4_escr_map); i++) { | |
484 | if (addr == p4_escr_map[i]) | |
485 | return i; | |
486 | } | |
487 | ||
488 | return -1; | |
489 | } | |
490 | ||
491 | static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |
492 | { | |
493 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | |
494 | unsigned long escr_mask[BITS_TO_LONGS(ARCH_P4_TOTAL_ESCR)]; | |
495 | ||
496 | struct hw_perf_event *hwc; | |
497 | struct p4_event_template *tpl; | |
498 | struct p4_pmu_res *c; | |
499 | int cpu = raw_smp_processor_id(); | |
500 | int escr_idx, thread, i, num; | |
501 | ||
502 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | |
503 | bitmap_zero(escr_mask, ARCH_P4_TOTAL_ESCR); | |
504 | ||
505 | c = &__get_cpu_var(p4_pmu_config); | |
506 | /* | |
507 | * Firstly find out which resource events are going | |
508 | * to use, if ESCR+CCCR tuple is already borrowed | |
509 | * then get out of here | |
510 | */ | |
511 | for (i = 0, num = n; i < n; i++, num--) { | |
512 | hwc = &cpuc->event_list[i]->hw; | |
513 | tpl = p4_pmu_template_lookup(hwc->config); | |
514 | if (!tpl) | |
515 | goto done; | |
516 | thread = p4_ht_thread(cpu); | |
517 | escr_idx = p4_get_escr_idx(tpl->escr_msr[thread]); | |
518 | if (escr_idx == -1) | |
519 | goto done; | |
520 | ||
521 | /* already allocated and remains on the same cpu */ | |
522 | if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { | |
523 | if (assign) | |
524 | assign[i] = hwc->idx; | |
525 | /* upstream dependent event */ | |
526 | if (unlikely(tpl->dep != -1)) | |
527 | printk_once(KERN_WARNING "PMU: Dep events are " | |
528 | "not implemented yet\n"); | |
529 | goto reserve; | |
530 | } | |
531 | ||
532 | /* it may be already borrowed */ | |
533 | if (test_bit(tpl->cntr[thread], used_mask) || | |
534 | test_bit(escr_idx, escr_mask)) | |
535 | goto done; | |
536 | ||
537 | /* | |
538 | * ESCR+CCCR+COUNTERs are available to use lets swap | |
539 | * thread specific bits, push assigned bits | |
540 | * back and save template into per-cpu | |
541 | * area (which will allow us to find out the ESCR | |
542 | * to be used at moment of "enable event via real MSR") | |
543 | */ | |
544 | p4_pmu_swap_config_ts(hwc, cpu); | |
545 | if (assign) { | |
546 | assign[i] = tpl->cntr[thread]; | |
547 | c->tpl[assign[i]] = tpl; | |
548 | } | |
549 | reserve: | |
550 | set_bit(tpl->cntr[thread], used_mask); | |
551 | set_bit(escr_idx, escr_mask); | |
552 | } | |
553 | ||
554 | done: | |
555 | return num ? -ENOSPC : 0; | |
556 | } | |
557 | ||
558 | static __initconst struct x86_pmu p4_pmu = { | |
559 | .name = "Netburst P4/Xeon", | |
560 | .handle_irq = p4_pmu_handle_irq, | |
561 | .disable_all = p4_pmu_disable_all, | |
562 | .enable_all = p4_pmu_enable_all, | |
563 | .enable = p4_pmu_enable_event, | |
564 | .disable = p4_pmu_disable_event, | |
565 | .eventsel = MSR_P4_BPU_CCCR0, | |
566 | .perfctr = MSR_P4_BPU_PERFCTR0, | |
567 | .event_map = p4_pmu_event_map, | |
568 | .raw_event = p4_pmu_raw_event, | |
569 | .max_events = ARRAY_SIZE(p4_event_map), | |
570 | .get_event_constraints = x86_get_event_constraints, | |
571 | /* | |
572 | * IF HT disabled we may need to use all | |
573 | * ARCH_P4_MAX_CCCR counters simulaneously | |
574 | * though leave it restricted at moment assuming | |
575 | * HT is on | |
576 | */ | |
577 | .num_events = ARCH_P4_MAX_CCCR, | |
578 | .apic = 1, | |
579 | .event_bits = 40, | |
580 | .event_mask = (1ULL << 40) - 1, | |
581 | .max_period = (1ULL << 39) - 1, | |
582 | .hw_config = p4_hw_config, | |
583 | .schedule_events = p4_pmu_schedule_events, | |
584 | }; | |
585 | ||
586 | static __init int p4_pmu_init(void) | |
587 | { | |
588 | unsigned int low, high; | |
589 | ||
590 | /* If we get stripped -- indexig fails */ | |
591 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | |
592 | ||
593 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | |
594 | if (!(low & (1 << 7))) { | |
595 | pr_cont("unsupported Netburst CPU model %d ", | |
596 | boot_cpu_data.x86_model); | |
597 | return -ENODEV; | |
598 | } | |
599 | ||
600 | pr_cont("Netburst events, "); | |
601 | ||
602 | x86_pmu = p4_pmu; | |
603 | ||
604 | return 0; | |
605 | } | |
606 | ||
607 | #endif /* CONFIG_CPU_SUP_INTEL */ |