]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/kernel/cpu/perf_event_amd.c
perf, amd: Use kmalloc_node(,__GFP_ZERO) for northbridge structure allocation
[net-next-2.6.git] / arch / x86 / kernel / cpu / perf_event_amd.c
CommitLineData
f22f54f4
PZ
1#ifdef CONFIG_CPU_SUP_AMD
2
1dd2980d 3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
f22f54f4 4
caaa8be3 5static __initconst const u64 amd_hw_cache_event_ids
f22f54f4
PZ
6 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX]
8 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
9{
10 [ C(L1D) ] = {
11 [ C(OP_READ) ] = {
12 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
13 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
14 },
15 [ C(OP_WRITE) ] = {
16 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
17 [ C(RESULT_MISS) ] = 0,
18 },
19 [ C(OP_PREFETCH) ] = {
20 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
21 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
22 },
23 },
24 [ C(L1I ) ] = {
25 [ C(OP_READ) ] = {
26 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
27 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
28 },
29 [ C(OP_WRITE) ] = {
30 [ C(RESULT_ACCESS) ] = -1,
31 [ C(RESULT_MISS) ] = -1,
32 },
33 [ C(OP_PREFETCH) ] = {
34 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
35 [ C(RESULT_MISS) ] = 0,
36 },
37 },
38 [ C(LL ) ] = {
39 [ C(OP_READ) ] = {
40 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
41 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
42 },
43 [ C(OP_WRITE) ] = {
44 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
45 [ C(RESULT_MISS) ] = 0,
46 },
47 [ C(OP_PREFETCH) ] = {
48 [ C(RESULT_ACCESS) ] = 0,
49 [ C(RESULT_MISS) ] = 0,
50 },
51 },
52 [ C(DTLB) ] = {
53 [ C(OP_READ) ] = {
54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
ba0cef3d 55 [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
f22f54f4
PZ
56 },
57 [ C(OP_WRITE) ] = {
58 [ C(RESULT_ACCESS) ] = 0,
59 [ C(RESULT_MISS) ] = 0,
60 },
61 [ C(OP_PREFETCH) ] = {
62 [ C(RESULT_ACCESS) ] = 0,
63 [ C(RESULT_MISS) ] = 0,
64 },
65 },
66 [ C(ITLB) ] = {
67 [ C(OP_READ) ] = {
68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
ba0cef3d 69 [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
f22f54f4
PZ
70 },
71 [ C(OP_WRITE) ] = {
72 [ C(RESULT_ACCESS) ] = -1,
73 [ C(RESULT_MISS) ] = -1,
74 },
75 [ C(OP_PREFETCH) ] = {
76 [ C(RESULT_ACCESS) ] = -1,
77 [ C(RESULT_MISS) ] = -1,
78 },
79 },
80 [ C(BPU ) ] = {
81 [ C(OP_READ) ] = {
82 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
83 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
84 },
85 [ C(OP_WRITE) ] = {
86 [ C(RESULT_ACCESS) ] = -1,
87 [ C(RESULT_MISS) ] = -1,
88 },
89 [ C(OP_PREFETCH) ] = {
90 [ C(RESULT_ACCESS) ] = -1,
91 [ C(RESULT_MISS) ] = -1,
92 },
93 },
94};
95
96/*
97 * AMD Performance Monitor K7 and later.
98 */
99static const u64 amd_perfmon_event_map[] =
100{
101 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
102 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
103 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
104 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
f287d332
VW
105 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
106 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
f22f54f4
PZ
107};
108
109static u64 amd_pmu_event_map(int hw_event)
110{
111 return amd_perfmon_event_map[hw_event];
112}
113
b4cdc5c2 114static int amd_pmu_hw_config(struct perf_event *event)
f22f54f4 115{
b4cdc5c2
PZ
116 int ret = x86_pmu_hw_config(event);
117
118 if (ret)
119 return ret;
120
121 if (event->attr.type != PERF_TYPE_RAW)
122 return 0;
123
124 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
125
126 return 0;
f22f54f4
PZ
127}
128
129/*
130 * AMD64 events are detected based on their event codes.
131 */
132static inline int amd_is_nb_event(struct hw_perf_event *hwc)
133{
134 return (hwc->config & 0xe0) == 0xe0;
135}
136
b38b24ea
PZ
137static inline int amd_has_nb(struct cpu_hw_events *cpuc)
138{
139 struct amd_nb *nb = cpuc->amd_nb;
140
141 return nb && nb->nb_id != -1;
142}
143
f22f54f4
PZ
144static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
145 struct perf_event *event)
146{
147 struct hw_perf_event *hwc = &event->hw;
148 struct amd_nb *nb = cpuc->amd_nb;
149 int i;
150
151 /*
152 * only care about NB events
153 */
b38b24ea 154 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
f22f54f4
PZ
155 return;
156
157 /*
158 * need to scan whole list because event may not have
159 * been assigned during scheduling
160 *
161 * no race condition possible because event can only
162 * be removed on one CPU at a time AND PMU is disabled
163 * when we come here
164 */
948b1bb8 165 for (i = 0; i < x86_pmu.num_counters; i++) {
f22f54f4
PZ
166 if (nb->owners[i] == event) {
167 cmpxchg(nb->owners+i, event, NULL);
168 break;
169 }
170 }
171}
172
173 /*
174 * AMD64 NorthBridge events need special treatment because
175 * counter access needs to be synchronized across all cores
176 * of a package. Refer to BKDG section 3.12
177 *
178 * NB events are events measuring L3 cache, Hypertransport
179 * traffic. They are identified by an event code >= 0xe00.
180 * They measure events on the NorthBride which is shared
181 * by all cores on a package. NB events are counted on a
182 * shared set of counters. When a NB event is programmed
183 * in a counter, the data actually comes from a shared
184 * counter. Thus, access to those counters needs to be
185 * synchronized.
186 *
187 * We implement the synchronization such that no two cores
188 * can be measuring NB events using the same counters. Thus,
189 * we maintain a per-NB allocation table. The available slot
190 * is propagated using the event_constraint structure.
191 *
192 * We provide only one choice for each NB event based on
193 * the fact that only NB events have restrictions. Consequently,
194 * if a counter is available, there is a guarantee the NB event
195 * will be assigned to it. If no slot is available, an empty
196 * constraint is returned and scheduling will eventually fail
197 * for this event.
198 *
199 * Note that all cores attached the same NB compete for the same
200 * counters to host NB events, this is why we use atomic ops. Some
201 * multi-chip CPUs may have more than one NB.
202 *
203 * Given that resources are allocated (cmpxchg), they must be
204 * eventually freed for others to use. This is accomplished by
205 * calling amd_put_event_constraints().
206 *
207 * Non NB events are not impacted by this restriction.
208 */
209static struct event_constraint *
210amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
211{
212 struct hw_perf_event *hwc = &event->hw;
213 struct amd_nb *nb = cpuc->amd_nb;
214 struct perf_event *old = NULL;
948b1bb8 215 int max = x86_pmu.num_counters;
f22f54f4
PZ
216 int i, j, k = -1;
217
218 /*
219 * if not NB event or no NB, then no constraints
220 */
b38b24ea 221 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
f22f54f4
PZ
222 return &unconstrained;
223
224 /*
225 * detect if already present, if so reuse
226 *
227 * cannot merge with actual allocation
228 * because of possible holes
229 *
230 * event can already be present yet not assigned (in hwc->idx)
231 * because of successive calls to x86_schedule_events() from
232 * hw_perf_group_sched_in() without hw_perf_enable()
233 */
234 for (i = 0; i < max; i++) {
235 /*
236 * keep track of first free slot
237 */
238 if (k == -1 && !nb->owners[i])
239 k = i;
240
241 /* already present, reuse */
242 if (nb->owners[i] == event)
243 goto done;
244 }
245 /*
246 * not present, so grab a new slot
247 * starting either at:
248 */
249 if (hwc->idx != -1) {
250 /* previous assignment */
251 i = hwc->idx;
252 } else if (k != -1) {
253 /* start from free slot found */
254 i = k;
255 } else {
256 /*
257 * event not found, no slot found in
258 * first pass, try again from the
259 * beginning
260 */
261 i = 0;
262 }
263 j = i;
264 do {
265 old = cmpxchg(nb->owners+i, NULL, event);
266 if (!old)
267 break;
268 if (++i == max)
269 i = 0;
270 } while (i != j);
271done:
272 if (!old)
273 return &nb->event_constraints[i];
274
275 return &emptyconstraint;
276}
277
f22f54f4
PZ
278static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
279{
280 struct amd_nb *nb;
281 int i;
282
034c6efa
PZ
283 nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO,
284 cpu_to_node(cpu));
f22f54f4
PZ
285 if (!nb)
286 return NULL;
287
f22f54f4
PZ
288 nb->nb_id = nb_id;
289
290 /*
291 * initialize all possible NB constraints
292 */
948b1bb8 293 for (i = 0; i < x86_pmu.num_counters; i++) {
34538ee7 294 __set_bit(i, nb->event_constraints[i].idxmsk);
f22f54f4
PZ
295 nb->event_constraints[i].weight = 1;
296 }
297 return nb;
298}
299
b38b24ea
PZ
300static int amd_pmu_cpu_prepare(int cpu)
301{
302 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
303
304 WARN_ON_ONCE(cpuc->amd_nb);
305
306 if (boot_cpu_data.x86_max_cores < 2)
307 return NOTIFY_OK;
308
309 cpuc->amd_nb = amd_alloc_nb(cpu, -1);
310 if (!cpuc->amd_nb)
311 return NOTIFY_BAD;
312
313 return NOTIFY_OK;
314}
315
316static void amd_pmu_cpu_starting(int cpu)
f22f54f4 317{
b38b24ea
PZ
318 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
319 struct amd_nb *nb;
f22f54f4
PZ
320 int i, nb_id;
321
322 if (boot_cpu_data.x86_max_cores < 2)
323 return;
324
f22f54f4 325 nb_id = amd_get_nb_id(cpu);
b38b24ea 326 WARN_ON_ONCE(nb_id == BAD_APICID);
f22f54f4
PZ
327
328 raw_spin_lock(&amd_nb_lock);
329
330 for_each_online_cpu(i) {
b38b24ea
PZ
331 nb = per_cpu(cpu_hw_events, i).amd_nb;
332 if (WARN_ON_ONCE(!nb))
f22f54f4 333 continue;
f22f54f4 334
b38b24ea
PZ
335 if (nb->nb_id == nb_id) {
336 kfree(cpuc->amd_nb);
337 cpuc->amd_nb = nb;
338 break;
339 }
f22f54f4 340 }
b38b24ea
PZ
341
342 cpuc->amd_nb->nb_id = nb_id;
343 cpuc->amd_nb->refcnt++;
f22f54f4
PZ
344
345 raw_spin_unlock(&amd_nb_lock);
346}
347
b38b24ea 348static void amd_pmu_cpu_dead(int cpu)
f22f54f4
PZ
349{
350 struct cpu_hw_events *cpuhw;
351
352 if (boot_cpu_data.x86_max_cores < 2)
353 return;
354
355 cpuhw = &per_cpu(cpu_hw_events, cpu);
356
357 raw_spin_lock(&amd_nb_lock);
358
a90110c6 359 if (cpuhw->amd_nb) {
b38b24ea
PZ
360 struct amd_nb *nb = cpuhw->amd_nb;
361
362 if (nb->nb_id == -1 || --nb->refcnt == 0)
363 kfree(nb);
f22f54f4 364
a90110c6
RW
365 cpuhw->amd_nb = NULL;
366 }
f22f54f4
PZ
367
368 raw_spin_unlock(&amd_nb_lock);
369}
370
caaa8be3 371static __initconst const struct x86_pmu amd_pmu = {
3f6da390
PZ
372 .name = "AMD",
373 .handle_irq = x86_pmu_handle_irq,
374 .disable_all = x86_pmu_disable_all,
375 .enable_all = x86_pmu_enable_all,
376 .enable = x86_pmu_enable_event,
377 .disable = x86_pmu_disable_event,
b4cdc5c2 378 .hw_config = amd_pmu_hw_config,
a072738e 379 .schedule_events = x86_schedule_events,
3f6da390
PZ
380 .eventsel = MSR_K7_EVNTSEL0,
381 .perfctr = MSR_K7_PERFCTR0,
382 .event_map = amd_pmu_event_map,
3f6da390 383 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
948b1bb8
RR
384 .num_counters = 4,
385 .cntval_bits = 48,
386 .cntval_mask = (1ULL << 48) - 1,
3f6da390
PZ
387 .apic = 1,
388 /* use highest bit to detect overflow */
389 .max_period = (1ULL << 47) - 1,
390 .get_event_constraints = amd_get_event_constraints,
391 .put_event_constraints = amd_put_event_constraints,
392
b38b24ea
PZ
393 .cpu_prepare = amd_pmu_cpu_prepare,
394 .cpu_starting = amd_pmu_cpu_starting,
395 .cpu_dead = amd_pmu_cpu_dead,
3f6da390
PZ
396};
397
f22f54f4
PZ
398static __init int amd_pmu_init(void)
399{
400 /* Performance-monitoring supported from K7 and later: */
401 if (boot_cpu_data.x86 < 6)
402 return -ENODEV;
403
404 x86_pmu = amd_pmu;
405
406 /* Events are common for all AMDs */
407 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
408 sizeof(hw_cache_event_ids));
409
f22f54f4
PZ
410 return 0;
411}
412
413#else /* CONFIG_CPU_SUP_AMD */
414
415static int amd_pmu_init(void)
416{
417 return 0;
418}
419
f22f54f4 420#endif