]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/kernel/cpu/perf_event_amd.c
perf: Rework and fix the arch CPU-hotplug hooks
[net-next-2.6.git] / arch / x86 / kernel / cpu / perf_event_amd.c
CommitLineData
f22f54f4
PZ
1#ifdef CONFIG_CPU_SUP_AMD
2
1dd2980d 3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
f22f54f4
PZ
4
5static __initconst u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX]
8 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
9{
10 [ C(L1D) ] = {
11 [ C(OP_READ) ] = {
12 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
13 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
14 },
15 [ C(OP_WRITE) ] = {
16 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
17 [ C(RESULT_MISS) ] = 0,
18 },
19 [ C(OP_PREFETCH) ] = {
20 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
21 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
22 },
23 },
24 [ C(L1I ) ] = {
25 [ C(OP_READ) ] = {
26 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
27 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
28 },
29 [ C(OP_WRITE) ] = {
30 [ C(RESULT_ACCESS) ] = -1,
31 [ C(RESULT_MISS) ] = -1,
32 },
33 [ C(OP_PREFETCH) ] = {
34 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
35 [ C(RESULT_MISS) ] = 0,
36 },
37 },
38 [ C(LL ) ] = {
39 [ C(OP_READ) ] = {
40 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
41 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
42 },
43 [ C(OP_WRITE) ] = {
44 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
45 [ C(RESULT_MISS) ] = 0,
46 },
47 [ C(OP_PREFETCH) ] = {
48 [ C(RESULT_ACCESS) ] = 0,
49 [ C(RESULT_MISS) ] = 0,
50 },
51 },
52 [ C(DTLB) ] = {
53 [ C(OP_READ) ] = {
54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
55 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
56 },
57 [ C(OP_WRITE) ] = {
58 [ C(RESULT_ACCESS) ] = 0,
59 [ C(RESULT_MISS) ] = 0,
60 },
61 [ C(OP_PREFETCH) ] = {
62 [ C(RESULT_ACCESS) ] = 0,
63 [ C(RESULT_MISS) ] = 0,
64 },
65 },
66 [ C(ITLB) ] = {
67 [ C(OP_READ) ] = {
68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
69 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
70 },
71 [ C(OP_WRITE) ] = {
72 [ C(RESULT_ACCESS) ] = -1,
73 [ C(RESULT_MISS) ] = -1,
74 },
75 [ C(OP_PREFETCH) ] = {
76 [ C(RESULT_ACCESS) ] = -1,
77 [ C(RESULT_MISS) ] = -1,
78 },
79 },
80 [ C(BPU ) ] = {
81 [ C(OP_READ) ] = {
82 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
83 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
84 },
85 [ C(OP_WRITE) ] = {
86 [ C(RESULT_ACCESS) ] = -1,
87 [ C(RESULT_MISS) ] = -1,
88 },
89 [ C(OP_PREFETCH) ] = {
90 [ C(RESULT_ACCESS) ] = -1,
91 [ C(RESULT_MISS) ] = -1,
92 },
93 },
94};
95
96/*
97 * AMD Performance Monitor K7 and later.
98 */
99static const u64 amd_perfmon_event_map[] =
100{
101 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
102 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
103 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
104 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
105 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
106 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
107};
108
109static u64 amd_pmu_event_map(int hw_event)
110{
111 return amd_perfmon_event_map[hw_event];
112}
113
114static u64 amd_pmu_raw_event(u64 hw_event)
115{
116#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL
117#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
118#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
119#define K7_EVNTSEL_INV_MASK 0x000800000ULL
120#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
121
122#define K7_EVNTSEL_MASK \
123 (K7_EVNTSEL_EVENT_MASK | \
124 K7_EVNTSEL_UNIT_MASK | \
125 K7_EVNTSEL_EDGE_MASK | \
126 K7_EVNTSEL_INV_MASK | \
127 K7_EVNTSEL_REG_MASK)
128
129 return hw_event & K7_EVNTSEL_MASK;
130}
131
132/*
133 * AMD64 events are detected based on their event codes.
134 */
135static inline int amd_is_nb_event(struct hw_perf_event *hwc)
136{
137 return (hwc->config & 0xe0) == 0xe0;
138}
139
140static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
141 struct perf_event *event)
142{
143 struct hw_perf_event *hwc = &event->hw;
144 struct amd_nb *nb = cpuc->amd_nb;
145 int i;
146
147 /*
148 * only care about NB events
149 */
150 if (!(nb && amd_is_nb_event(hwc)))
151 return;
152
153 /*
154 * need to scan whole list because event may not have
155 * been assigned during scheduling
156 *
157 * no race condition possible because event can only
158 * be removed on one CPU at a time AND PMU is disabled
159 * when we come here
160 */
161 for (i = 0; i < x86_pmu.num_events; i++) {
162 if (nb->owners[i] == event) {
163 cmpxchg(nb->owners+i, event, NULL);
164 break;
165 }
166 }
167}
168
169 /*
170 * AMD64 NorthBridge events need special treatment because
171 * counter access needs to be synchronized across all cores
172 * of a package. Refer to BKDG section 3.12
173 *
174 * NB events are events measuring L3 cache, Hypertransport
175 * traffic. They are identified by an event code >= 0xe00.
176 * They measure events on the NorthBride which is shared
177 * by all cores on a package. NB events are counted on a
178 * shared set of counters. When a NB event is programmed
179 * in a counter, the data actually comes from a shared
180 * counter. Thus, access to those counters needs to be
181 * synchronized.
182 *
183 * We implement the synchronization such that no two cores
184 * can be measuring NB events using the same counters. Thus,
185 * we maintain a per-NB allocation table. The available slot
186 * is propagated using the event_constraint structure.
187 *
188 * We provide only one choice for each NB event based on
189 * the fact that only NB events have restrictions. Consequently,
190 * if a counter is available, there is a guarantee the NB event
191 * will be assigned to it. If no slot is available, an empty
192 * constraint is returned and scheduling will eventually fail
193 * for this event.
194 *
195 * Note that all cores attached the same NB compete for the same
196 * counters to host NB events, this is why we use atomic ops. Some
197 * multi-chip CPUs may have more than one NB.
198 *
199 * Given that resources are allocated (cmpxchg), they must be
200 * eventually freed for others to use. This is accomplished by
201 * calling amd_put_event_constraints().
202 *
203 * Non NB events are not impacted by this restriction.
204 */
205static struct event_constraint *
206amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
207{
208 struct hw_perf_event *hwc = &event->hw;
209 struct amd_nb *nb = cpuc->amd_nb;
210 struct perf_event *old = NULL;
211 int max = x86_pmu.num_events;
212 int i, j, k = -1;
213
214 /*
215 * if not NB event or no NB, then no constraints
216 */
217 if (!(nb && amd_is_nb_event(hwc)))
218 return &unconstrained;
219
220 /*
221 * detect if already present, if so reuse
222 *
223 * cannot merge with actual allocation
224 * because of possible holes
225 *
226 * event can already be present yet not assigned (in hwc->idx)
227 * because of successive calls to x86_schedule_events() from
228 * hw_perf_group_sched_in() without hw_perf_enable()
229 */
230 for (i = 0; i < max; i++) {
231 /*
232 * keep track of first free slot
233 */
234 if (k == -1 && !nb->owners[i])
235 k = i;
236
237 /* already present, reuse */
238 if (nb->owners[i] == event)
239 goto done;
240 }
241 /*
242 * not present, so grab a new slot
243 * starting either at:
244 */
245 if (hwc->idx != -1) {
246 /* previous assignment */
247 i = hwc->idx;
248 } else if (k != -1) {
249 /* start from free slot found */
250 i = k;
251 } else {
252 /*
253 * event not found, no slot found in
254 * first pass, try again from the
255 * beginning
256 */
257 i = 0;
258 }
259 j = i;
260 do {
261 old = cmpxchg(nb->owners+i, NULL, event);
262 if (!old)
263 break;
264 if (++i == max)
265 i = 0;
266 } while (i != j);
267done:
268 if (!old)
269 return &nb->event_constraints[i];
270
271 return &emptyconstraint;
272}
273
f22f54f4
PZ
274static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
275{
276 struct amd_nb *nb;
277 int i;
278
279 nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
280 if (!nb)
281 return NULL;
282
283 memset(nb, 0, sizeof(*nb));
284 nb->nb_id = nb_id;
285
286 /*
287 * initialize all possible NB constraints
288 */
289 for (i = 0; i < x86_pmu.num_events; i++) {
290 set_bit(i, nb->event_constraints[i].idxmsk);
291 nb->event_constraints[i].weight = 1;
292 }
293 return nb;
294}
295
296static void amd_pmu_cpu_online(int cpu)
297{
298 struct cpu_hw_events *cpu1, *cpu2;
299 struct amd_nb *nb = NULL;
300 int i, nb_id;
301
302 if (boot_cpu_data.x86_max_cores < 2)
303 return;
304
305 /*
306 * function may be called too early in the
307 * boot process, in which case nb_id is bogus
308 */
309 nb_id = amd_get_nb_id(cpu);
310 if (nb_id == BAD_APICID)
311 return;
312
313 cpu1 = &per_cpu(cpu_hw_events, cpu);
314 cpu1->amd_nb = NULL;
315
316 raw_spin_lock(&amd_nb_lock);
317
318 for_each_online_cpu(i) {
319 cpu2 = &per_cpu(cpu_hw_events, i);
320 nb = cpu2->amd_nb;
321 if (!nb)
322 continue;
323 if (nb->nb_id == nb_id)
324 goto found;
325 }
326
327 nb = amd_alloc_nb(cpu, nb_id);
328 if (!nb) {
329 pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
330 raw_spin_unlock(&amd_nb_lock);
331 return;
332 }
333found:
334 nb->refcnt++;
335 cpu1->amd_nb = nb;
336
337 raw_spin_unlock(&amd_nb_lock);
338}
339
340static void amd_pmu_cpu_offline(int cpu)
341{
342 struct cpu_hw_events *cpuhw;
343
344 if (boot_cpu_data.x86_max_cores < 2)
345 return;
346
347 cpuhw = &per_cpu(cpu_hw_events, cpu);
348
349 raw_spin_lock(&amd_nb_lock);
350
351 if (--cpuhw->amd_nb->refcnt == 0)
352 kfree(cpuhw->amd_nb);
353
354 cpuhw->amd_nb = NULL;
355
356 raw_spin_unlock(&amd_nb_lock);
357}
358
3f6da390
PZ
359static __initconst struct x86_pmu amd_pmu = {
360 .name = "AMD",
361 .handle_irq = x86_pmu_handle_irq,
362 .disable_all = x86_pmu_disable_all,
363 .enable_all = x86_pmu_enable_all,
364 .enable = x86_pmu_enable_event,
365 .disable = x86_pmu_disable_event,
366 .eventsel = MSR_K7_EVNTSEL0,
367 .perfctr = MSR_K7_PERFCTR0,
368 .event_map = amd_pmu_event_map,
369 .raw_event = amd_pmu_raw_event,
370 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
371 .num_events = 4,
372 .event_bits = 48,
373 .event_mask = (1ULL << 48) - 1,
374 .apic = 1,
375 /* use highest bit to detect overflow */
376 .max_period = (1ULL << 47) - 1,
377 .get_event_constraints = amd_get_event_constraints,
378 .put_event_constraints = amd_put_event_constraints,
379
380 .cpu_prepare = amd_pmu_cpu_online,
381 .cpu_dead = amd_pmu_cpu_offline,
382};
383
f22f54f4
PZ
384static __init int amd_pmu_init(void)
385{
386 /* Performance-monitoring supported from K7 and later: */
387 if (boot_cpu_data.x86 < 6)
388 return -ENODEV;
389
390 x86_pmu = amd_pmu;
391
392 /* Events are common for all AMDs */
393 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
394 sizeof(hw_cache_event_ids));
395
f22f54f4
PZ
396 return 0;
397}
398
399#else /* CONFIG_CPU_SUP_AMD */
400
401static int amd_pmu_init(void)
402{
403 return 0;
404}
405
f22f54f4 406#endif