]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Intel specific MCE features. | |
3 | * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> | |
88ccbedd AK |
4 | * Copyright (C) 2008, 2009 Intel Corporation |
5 | * Author: Andi Kleen | |
1da177e4 LT |
6 | */ |
7 | ||
5a0e3ad6 | 8 | #include <linux/gfp.h> |
1da177e4 LT |
9 | #include <linux/init.h> |
10 | #include <linux/interrupt.h> | |
11 | #include <linux/percpu.h> | |
d43c36dc | 12 | #include <linux/sched.h> |
1bf7b31e | 13 | #include <asm/apic.h> |
1da177e4 LT |
14 | #include <asm/processor.h> |
15 | #include <asm/msr.h> | |
16 | #include <asm/mce.h> | |
1da177e4 | 17 | |
88ccbedd AK |
18 | /* |
19 | * Support for Intel Correct Machine Check Interrupts. This allows | |
20 | * the CPU to raise an interrupt when a corrected machine check happened. | |
21 | * Normally we pick those up using a regular polling timer. | |
22 | * Also supports reliable discovery of shared banks. | |
23 | */ | |
24 | ||
25 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); | |
26 | ||
27 | /* | |
28 | * cmci_discover_lock protects against parallel discovery attempts | |
29 | * which could race against each other. | |
30 | */ | |
31 | static DEFINE_SPINLOCK(cmci_discover_lock); | |
32 | ||
33 | #define CMCI_THRESHOLD 1 | |
34 | ||
df20e2eb | 35 | static int cmci_supported(int *banks) |
88ccbedd AK |
36 | { |
37 | u64 cap; | |
38 | ||
62fdac59 HS |
39 | if (mce_cmci_disabled || mce_ignore_ce) |
40 | return 0; | |
41 | ||
88ccbedd AK |
42 | /* |
43 | * Vendor check is not strictly needed, but the initial | |
44 | * initialization is vendor keyed and this | |
45 | * makes sure none of the backdoors are entered otherwise. | |
46 | */ | |
47 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | |
48 | return 0; | |
49 | if (!cpu_has_apic || lapic_get_maxlvt() < 6) | |
50 | return 0; | |
51 | rdmsrl(MSR_IA32_MCG_CAP, cap); | |
52 | *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); | |
53 | return !!(cap & MCG_CMCI_P); | |
54 | } | |
55 | ||
56 | /* | |
57 | * The interrupt handler. This is called on every event. | |
58 | * Just call the poller directly to log any events. | |
59 | * This could in theory increase the threshold under high load, | |
60 | * but doesn't for now. | |
61 | */ | |
62 | static void intel_threshold_interrupt(void) | |
63 | { | |
64 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | |
9ff36ee9 | 65 | mce_notify_irq(); |
88ccbedd AK |
66 | } |
67 | ||
68 | static void print_update(char *type, int *hdr, int num) | |
69 | { | |
70 | if (*hdr == 0) | |
71 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); | |
72 | *hdr = 1; | |
73 | printk(KERN_CONT " %s:%d", type, num); | |
74 | } | |
75 | ||
76 | /* | |
77 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks | |
78 | * on this CPU. Use the algorithm recommended in the SDM to discover shared | |
79 | * banks. | |
80 | */ | |
df20e2eb | 81 | static void cmci_discover(int banks, int boot) |
88ccbedd AK |
82 | { |
83 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | |
e5299926 | 84 | unsigned long flags; |
88ccbedd AK |
85 | int hdr = 0; |
86 | int i; | |
87 | ||
e5299926 | 88 | spin_lock_irqsave(&cmci_discover_lock, flags); |
88ccbedd AK |
89 | for (i = 0; i < banks; i++) { |
90 | u64 val; | |
91 | ||
92 | if (test_bit(i, owned)) | |
93 | continue; | |
94 | ||
a2d32bcb | 95 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
88ccbedd AK |
96 | |
97 | /* Already owned by someone else? */ | |
98 | if (val & CMCI_EN) { | |
10fb7f1f | 99 | if (test_and_clear_bit(i, owned) && !boot) |
88ccbedd AK |
100 | print_update("SHD", &hdr, i); |
101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | |
102 | continue; | |
103 | } | |
104 | ||
105 | val |= CMCI_EN | CMCI_THRESHOLD; | |
a2d32bcb AK |
106 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
107 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | |
88ccbedd AK |
108 | |
109 | /* Did the enable bit stick? -- the bank supports CMCI */ | |
110 | if (val & CMCI_EN) { | |
10fb7f1f | 111 | if (!test_and_set_bit(i, owned) && !boot) |
88ccbedd AK |
112 | print_update("CMCI", &hdr, i); |
113 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | |
114 | } else { | |
115 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | |
116 | } | |
117 | } | |
e5299926 | 118 | spin_unlock_irqrestore(&cmci_discover_lock, flags); |
88ccbedd AK |
119 | if (hdr) |
120 | printk(KERN_CONT "\n"); | |
121 | } | |
122 | ||
123 | /* | |
124 | * Just in case we missed an event during initialization check | |
125 | * all the CMCI owned banks. | |
126 | */ | |
df20e2eb | 127 | void cmci_recheck(void) |
88ccbedd AK |
128 | { |
129 | unsigned long flags; | |
130 | int banks; | |
131 | ||
132 | if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) | |
133 | return; | |
134 | local_irq_save(flags); | |
135 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | |
136 | local_irq_restore(flags); | |
137 | } | |
138 | ||
139 | /* | |
140 | * Disable CMCI on this CPU for all banks it owns when it goes down. | |
141 | * This allows other CPUs to claim the banks on rediscovery. | |
142 | */ | |
df20e2eb | 143 | void cmci_clear(void) |
88ccbedd | 144 | { |
e5299926 | 145 | unsigned long flags; |
88ccbedd AK |
146 | int i; |
147 | int banks; | |
148 | u64 val; | |
149 | ||
150 | if (!cmci_supported(&banks)) | |
151 | return; | |
e5299926 | 152 | spin_lock_irqsave(&cmci_discover_lock, flags); |
88ccbedd AK |
153 | for (i = 0; i < banks; i++) { |
154 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | |
155 | continue; | |
156 | /* Disable CMCI */ | |
a2d32bcb | 157 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
88ccbedd | 158 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); |
a2d32bcb | 159 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
88ccbedd AK |
160 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
161 | } | |
e5299926 | 162 | spin_unlock_irqrestore(&cmci_discover_lock, flags); |
88ccbedd AK |
163 | } |
164 | ||
165 | /* | |
166 | * After a CPU went down cycle through all the others and rediscover | |
167 | * Must run in process context. | |
168 | */ | |
df20e2eb | 169 | void cmci_rediscover(int dying) |
88ccbedd AK |
170 | { |
171 | int banks; | |
172 | int cpu; | |
173 | cpumask_var_t old; | |
174 | ||
175 | if (!cmci_supported(&banks)) | |
176 | return; | |
177 | if (!alloc_cpumask_var(&old, GFP_KERNEL)) | |
178 | return; | |
179 | cpumask_copy(old, ¤t->cpus_allowed); | |
180 | ||
61a021a0 | 181 | for_each_online_cpu(cpu) { |
88ccbedd AK |
182 | if (cpu == dying) |
183 | continue; | |
4f062896 | 184 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) |
88ccbedd AK |
185 | continue; |
186 | /* Recheck banks in case CPUs don't all have the same */ | |
187 | if (cmci_supported(&banks)) | |
188 | cmci_discover(banks, 0); | |
189 | } | |
190 | ||
191 | set_cpus_allowed_ptr(current, old); | |
192 | free_cpumask_var(old); | |
193 | } | |
194 | ||
195 | /* | |
196 | * Reenable CMCI on this CPU in case a CPU down failed. | |
197 | */ | |
198 | void cmci_reenable(void) | |
199 | { | |
200 | int banks; | |
201 | if (cmci_supported(&banks)) | |
202 | cmci_discover(banks, 0); | |
203 | } | |
204 | ||
514ec49a | 205 | static void intel_init_cmci(void) |
88ccbedd AK |
206 | { |
207 | int banks; | |
208 | ||
209 | if (!cmci_supported(&banks)) | |
210 | return; | |
211 | ||
212 | mce_threshold_vector = intel_threshold_interrupt; | |
213 | cmci_discover(banks, 1); | |
214 | /* | |
215 | * For CPU #0 this runs with still disabled APIC, but that's | |
216 | * ok because only the vector is set up. We still do another | |
217 | * check for the banks later for CPU #0 just to make sure | |
218 | * to not miss any events. | |
219 | */ | |
220 | apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); | |
221 | cmci_recheck(); | |
222 | } | |
223 | ||
cc3ca220 | 224 | void mce_intel_feature_init(struct cpuinfo_x86 *c) |
1da177e4 LT |
225 | { |
226 | intel_init_thermal(c); | |
88ccbedd | 227 | intel_init_cmci(); |
1da177e4 | 228 | } |