]>
Commit | Line | Data |
---|---|---|
bbab4f3b ZA |
1 | /* |
2 | * VMI paravirtual timer support routines. | |
3 | * | |
4 | * Copyright (C) 2005, VMware, Inc. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, but | |
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | |
14 | * NON INFRINGEMENT. See the GNU General Public License for more | |
15 | * details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
20 | * | |
21 | * Send feedback to dhecht@vmware.com | |
22 | * | |
23 | */ | |
24 | ||
25 | /* | |
26 | * Portions of this code from arch/i386/kernel/timers/timer_tsc.c. | |
27 | * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c. | |
28 | * See comments there for proper credits. | |
29 | */ | |
30 | ||
31 | #include <linux/spinlock.h> | |
32 | #include <linux/init.h> | |
33 | #include <linux/errno.h> | |
34 | #include <linux/jiffies.h> | |
35 | #include <linux/interrupt.h> | |
36 | #include <linux/kernel_stat.h> | |
37 | #include <linux/rcupdate.h> | |
38 | #include <linux/clocksource.h> | |
39 | ||
40 | #include <asm/timer.h> | |
41 | #include <asm/io.h> | |
42 | #include <asm/apic.h> | |
43 | #include <asm/div64.h> | |
44 | #include <asm/timer.h> | |
45 | #include <asm/desc.h> | |
46 | ||
47 | #include <asm/vmi.h> | |
48 | #include <asm/vmi_time.h> | |
49 | ||
50 | #include <mach_timer.h> | |
51 | #include <io_ports.h> | |
52 | ||
53 | #ifdef CONFIG_X86_LOCAL_APIC | |
54 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT | |
55 | #else | |
56 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0 | |
57 | #endif | |
58 | ||
59 | /* Cached VMI operations */ | |
60 | struct vmi_timer_ops vmi_timer_ops; | |
61 | ||
62 | #ifdef CONFIG_NO_IDLE_HZ | |
63 | ||
64 | /* /proc/sys/kernel/hz_timer state. */ | |
65 | int sysctl_hz_timer; | |
66 | ||
67 | /* Some stats */ | |
68 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs); | |
69 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies); | |
70 | static DEFINE_PER_CPU(unsigned long, idle_start_jiffies); | |
71 | ||
72 | #endif /* CONFIG_NO_IDLE_HZ */ | |
73 | ||
74 | /* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */ | |
75 | static int alarm_hz = CONFIG_VMI_ALARM_HZ; | |
76 | ||
77 | /* Cache of the value get_cycle_frequency / HZ. */ | |
78 | static signed long long cycles_per_jiffy; | |
79 | ||
80 | /* Cache of the value get_cycle_frequency / alarm_hz. */ | |
81 | static signed long long cycles_per_alarm; | |
82 | ||
83 | /* The number of cycles accounted for by the 'jiffies'/'xtime' count. | |
84 | * Protected by xtime_lock. */ | |
85 | static unsigned long long real_cycles_accounted_system; | |
86 | ||
87 | /* The number of cycles accounted for by update_process_times(), per cpu. */ | |
88 | static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu); | |
89 | ||
90 | /* The number of stolen cycles accounted, per cpu. */ | |
91 | static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu); | |
92 | ||
93 | /* Clock source. */ | |
94 | static cycle_t read_real_cycles(void) | |
95 | { | |
96 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | |
97 | } | |
98 | ||
99 | static cycle_t read_available_cycles(void) | |
100 | { | |
101 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); | |
102 | } | |
103 | ||
104 | #if 0 | |
105 | static cycle_t read_stolen_cycles(void) | |
106 | { | |
107 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN); | |
108 | } | |
109 | #endif /* 0 */ | |
110 | ||
111 | static struct clocksource clocksource_vmi = { | |
112 | .name = "vmi-timer", | |
113 | .rating = 450, | |
114 | .read = read_real_cycles, | |
115 | .mask = CLOCKSOURCE_MASK(64), | |
116 | .mult = 0, /* to be set */ | |
117 | .shift = 22, | |
73b08d2a | 118 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
bbab4f3b ZA |
119 | }; |
120 | ||
121 | ||
122 | /* Timer interrupt handler. */ | |
123 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id); | |
124 | ||
125 | static struct irqaction vmi_timer_irq = { | |
e585047e TG |
126 | .handler = vmi_timer_interrupt, |
127 | .flags = IRQF_DISABLED, | |
128 | .mask = CPU_MASK_NONE, | |
129 | .name = "VMI-alarm", | |
bbab4f3b ZA |
130 | }; |
131 | ||
132 | /* Alarm rate */ | |
133 | static int __init vmi_timer_alarm_rate_setup(char* str) | |
134 | { | |
135 | int alarm_rate; | |
136 | if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) { | |
137 | alarm_hz = alarm_rate; | |
138 | printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz); | |
139 | } | |
140 | return 1; | |
141 | } | |
142 | __setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup); | |
143 | ||
144 | ||
145 | /* Initialization */ | |
146 | static void vmi_get_wallclock_ts(struct timespec *ts) | |
147 | { | |
148 | unsigned long long wallclock; | |
149 | wallclock = vmi_timer_ops.get_wallclock(); // nsec units | |
150 | ts->tv_nsec = do_div(wallclock, 1000000000); | |
151 | ts->tv_sec = wallclock; | |
152 | } | |
153 | ||
bbab4f3b ZA |
154 | unsigned long vmi_get_wallclock(void) |
155 | { | |
156 | struct timespec ts; | |
157 | vmi_get_wallclock_ts(&ts); | |
158 | return ts.tv_sec; | |
159 | } | |
160 | ||
161 | int vmi_set_wallclock(unsigned long now) | |
162 | { | |
163 | return -1; | |
164 | } | |
165 | ||
6cb9a835 | 166 | unsigned long long vmi_get_sched_cycles(void) |
bbab4f3b ZA |
167 | { |
168 | return read_available_cycles(); | |
169 | } | |
170 | ||
1182d852 ZA |
171 | unsigned long vmi_cpu_khz(void) |
172 | { | |
173 | unsigned long long khz; | |
174 | ||
175 | khz = vmi_timer_ops.get_cycle_frequency(); | |
176 | (void)do_div(khz, 1000); | |
177 | return khz; | |
178 | } | |
179 | ||
bbab4f3b ZA |
180 | void __init vmi_time_init(void) |
181 | { | |
182 | unsigned long long cycles_per_sec, cycles_per_msec; | |
90736e20 | 183 | unsigned long flags; |
bbab4f3b | 184 | |
90736e20 | 185 | local_irq_save(flags); |
bbab4f3b ZA |
186 | setup_irq(0, &vmi_timer_irq); |
187 | #ifdef CONFIG_X86_LOCAL_APIC | |
188 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt); | |
189 | #endif | |
190 | ||
bbab4f3b | 191 | real_cycles_accounted_system = read_real_cycles(); |
bbab4f3b ZA |
192 | per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles(); |
193 | ||
194 | cycles_per_sec = vmi_timer_ops.get_cycle_frequency(); | |
bbab4f3b ZA |
195 | cycles_per_jiffy = cycles_per_sec; |
196 | (void)do_div(cycles_per_jiffy, HZ); | |
197 | cycles_per_alarm = cycles_per_sec; | |
198 | (void)do_div(cycles_per_alarm, alarm_hz); | |
199 | cycles_per_msec = cycles_per_sec; | |
200 | (void)do_div(cycles_per_msec, 1000); | |
bbab4f3b ZA |
201 | |
202 | printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;" | |
203 | "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy, | |
204 | cycles_per_alarm); | |
205 | ||
206 | clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, | |
207 | clocksource_vmi.shift); | |
208 | if (clocksource_register(&clocksource_vmi)) | |
209 | printk(KERN_WARNING "Error registering VMITIME clocksource."); | |
210 | ||
211 | /* Disable PIT. */ | |
212 | outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | |
213 | ||
214 | /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu | |
215 | * reduce the latency calling update_process_times. */ | |
216 | vmi_timer_ops.set_alarm( | |
217 | VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | |
218 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | |
219 | cycles_per_alarm); | |
90736e20 ZA |
220 | |
221 | local_irq_restore(flags); | |
bbab4f3b ZA |
222 | } |
223 | ||
224 | #ifdef CONFIG_X86_LOCAL_APIC | |
225 | ||
226 | void __init vmi_timer_setup_boot_alarm(void) | |
227 | { | |
228 | local_irq_disable(); | |
229 | ||
230 | /* Route the interrupt to the correct vector. */ | |
231 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | |
232 | ||
233 | /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */ | |
234 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | |
235 | vmi_timer_ops.set_alarm( | |
236 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | |
237 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | |
238 | cycles_per_alarm); | |
239 | local_irq_enable(); | |
240 | } | |
241 | ||
242 | /* Initialize the time accounting variables for an AP on an SMP system. | |
243 | * Also, set the local alarm for the AP. */ | |
c6b36e9a | 244 | void __devinit vmi_timer_setup_secondary_alarm(void) |
bbab4f3b ZA |
245 | { |
246 | int cpu = smp_processor_id(); | |
247 | ||
248 | /* Route the interrupt to the correct vector. */ | |
249 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | |
250 | ||
251 | per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles(); | |
252 | ||
253 | vmi_timer_ops.set_alarm( | |
254 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | |
255 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | |
256 | cycles_per_alarm); | |
257 | } | |
258 | ||
259 | #endif | |
260 | ||
261 | /* Update system wide (real) time accounting (e.g. jiffies, xtime). */ | |
262 | static void vmi_account_real_cycles(unsigned long long cur_real_cycles) | |
263 | { | |
264 | long long cycles_not_accounted; | |
265 | ||
266 | write_seqlock(&xtime_lock); | |
267 | ||
268 | cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system; | |
269 | while (cycles_not_accounted >= cycles_per_jiffy) { | |
7507ba34 | 270 | /* systems wide jiffies. */ |
bbab4f3b ZA |
271 | do_timer(1); |
272 | ||
273 | cycles_not_accounted -= cycles_per_jiffy; | |
274 | real_cycles_accounted_system += cycles_per_jiffy; | |
275 | } | |
276 | ||
bbab4f3b ZA |
277 | write_sequnlock(&xtime_lock); |
278 | } | |
279 | ||
280 | /* Update per-cpu process times. */ | |
281 | static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu, | |
282 | unsigned long long cur_process_times_cycles) | |
283 | { | |
284 | long long cycles_not_accounted; | |
285 | cycles_not_accounted = cur_process_times_cycles - | |
286 | per_cpu(process_times_cycles_accounted_cpu, cpu); | |
287 | ||
288 | while (cycles_not_accounted >= cycles_per_jiffy) { | |
289 | /* Account time to the current process. This includes | |
290 | * calling into the scheduler to decrement the timeslice | |
291 | * and possibly reschedule.*/ | |
292 | update_process_times(user_mode(regs)); | |
293 | /* XXX handle /proc/profile multiplier. */ | |
294 | profile_tick(CPU_PROFILING); | |
295 | ||
296 | cycles_not_accounted -= cycles_per_jiffy; | |
297 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | |
298 | } | |
299 | } | |
300 | ||
301 | #ifdef CONFIG_NO_IDLE_HZ | |
302 | /* Update per-cpu idle times. Used when a no-hz halt is ended. */ | |
303 | static void vmi_account_no_hz_idle_cycles(int cpu, | |
304 | unsigned long long cur_process_times_cycles) | |
305 | { | |
306 | long long cycles_not_accounted; | |
307 | unsigned long no_idle_hz_jiffies = 0; | |
308 | ||
309 | cycles_not_accounted = cur_process_times_cycles - | |
310 | per_cpu(process_times_cycles_accounted_cpu, cpu); | |
311 | ||
312 | while (cycles_not_accounted >= cycles_per_jiffy) { | |
313 | no_idle_hz_jiffies++; | |
314 | cycles_not_accounted -= cycles_per_jiffy; | |
315 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | |
316 | } | |
317 | /* Account time to the idle process. */ | |
318 | account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies)); | |
319 | } | |
320 | #endif | |
321 | ||
322 | /* Update per-cpu stolen time. */ | |
323 | static void vmi_account_stolen_cycles(int cpu, | |
324 | unsigned long long cur_real_cycles, | |
325 | unsigned long long cur_avail_cycles) | |
326 | { | |
327 | long long stolen_cycles_not_accounted; | |
328 | unsigned long stolen_jiffies = 0; | |
329 | ||
330 | if (cur_real_cycles < cur_avail_cycles) | |
331 | return; | |
332 | ||
333 | stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles - | |
334 | per_cpu(stolen_cycles_accounted_cpu, cpu); | |
335 | ||
336 | while (stolen_cycles_not_accounted >= cycles_per_jiffy) { | |
337 | stolen_jiffies++; | |
338 | stolen_cycles_not_accounted -= cycles_per_jiffy; | |
339 | per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | |
340 | } | |
341 | /* HACK: pass NULL to force time onto cpustat->steal. */ | |
342 | account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies)); | |
343 | } | |
344 | ||
345 | /* Body of either IRQ0 interrupt handler (UP no local-APIC) or | |
346 | * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */ | |
347 | static void vmi_local_timer_interrupt(int cpu) | |
348 | { | |
349 | unsigned long long cur_real_cycles, cur_process_times_cycles; | |
350 | ||
351 | cur_real_cycles = read_real_cycles(); | |
352 | cur_process_times_cycles = read_available_cycles(); | |
353 | /* Update system wide (real) time state (xtime, jiffies). */ | |
354 | vmi_account_real_cycles(cur_real_cycles); | |
355 | /* Update per-cpu process times. */ | |
356 | vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles); | |
357 | /* Update time stolen from this cpu by the hypervisor. */ | |
358 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | |
359 | } | |
360 | ||
361 | #ifdef CONFIG_NO_IDLE_HZ | |
362 | ||
363 | /* Must be called only from idle loop, with interrupts disabled. */ | |
364 | int vmi_stop_hz_timer(void) | |
365 | { | |
366 | /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */ | |
367 | ||
368 | unsigned long seq, next; | |
369 | unsigned long long real_cycles_expiry; | |
370 | int cpu = smp_processor_id(); | |
bbab4f3b ZA |
371 | |
372 | BUG_ON(!irqs_disabled()); | |
373 | if (sysctl_hz_timer != 0) | |
374 | return 0; | |
375 | ||
376 | cpu_set(cpu, nohz_cpu_mask); | |
377 | smp_mb(); | |
7507ba34 | 378 | |
bbab4f3b | 379 | if (rcu_needs_cpu(cpu) || local_softirq_pending() || |
7507ba34 ZA |
380 | (next = next_timer_interrupt(), |
381 | time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) { | |
bbab4f3b | 382 | cpu_clear(cpu, nohz_cpu_mask); |
7507ba34 ZA |
383 | return 0; |
384 | } | |
bbab4f3b ZA |
385 | |
386 | /* Convert jiffies to the real cycle counter. */ | |
387 | do { | |
388 | seq = read_seqbegin(&xtime_lock); | |
389 | real_cycles_expiry = real_cycles_accounted_system + | |
390 | (long)(next - jiffies) * cycles_per_jiffy; | |
391 | } while (read_seqretry(&xtime_lock, seq)); | |
392 | ||
393 | /* This cpu is going idle. Disable the periodic alarm. */ | |
7507ba34 ZA |
394 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); |
395 | per_cpu(idle_start_jiffies, cpu) = jiffies; | |
bbab4f3b ZA |
396 | /* Set the real time alarm to expire at the next event. */ |
397 | vmi_timer_ops.set_alarm( | |
7507ba34 ZA |
398 | VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, |
399 | real_cycles_expiry, 0); | |
400 | return 1; | |
bbab4f3b ZA |
401 | } |
402 | ||
403 | static void vmi_reenable_hz_timer(int cpu) | |
404 | { | |
405 | /* For /proc/vmi/info idle_hz stat. */ | |
406 | per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu); | |
407 | per_cpu(vmi_idle_no_hz_irqs, cpu)++; | |
408 | ||
409 | /* Don't bother explicitly cancelling the one-shot alarm -- at | |
410 | * worse we will receive a spurious timer interrupt. */ | |
411 | vmi_timer_ops.set_alarm( | |
412 | VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | |
413 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | |
414 | cycles_per_alarm); | |
415 | /* Indicate this cpu is no longer nohz idle. */ | |
416 | cpu_clear(cpu, nohz_cpu_mask); | |
417 | } | |
418 | ||
419 | /* Called from interrupt handlers when (local) HZ timer is disabled. */ | |
420 | void vmi_account_time_restart_hz_timer(void) | |
421 | { | |
422 | unsigned long long cur_real_cycles, cur_process_times_cycles; | |
423 | int cpu = smp_processor_id(); | |
424 | ||
425 | BUG_ON(!irqs_disabled()); | |
426 | /* Account the time during which the HZ timer was disabled. */ | |
427 | cur_real_cycles = read_real_cycles(); | |
428 | cur_process_times_cycles = read_available_cycles(); | |
429 | /* Update system wide (real) time state (xtime, jiffies). */ | |
430 | vmi_account_real_cycles(cur_real_cycles); | |
431 | /* Update per-cpu idle times. */ | |
432 | vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles); | |
433 | /* Update time stolen from this cpu by the hypervisor. */ | |
434 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | |
435 | /* Reenable the hz timer. */ | |
436 | vmi_reenable_hz_timer(cpu); | |
437 | } | |
438 | ||
439 | #endif /* CONFIG_NO_IDLE_HZ */ | |
440 | ||
441 | /* UP (and no local-APIC) VMI-timer alarm interrupt handler. | |
442 | * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after | |
443 | * APIC setup and setup_boot_vmi_alarm() is called. */ | |
444 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) | |
445 | { | |
446 | vmi_local_timer_interrupt(smp_processor_id()); | |
447 | return IRQ_HANDLED; | |
448 | } | |
449 | ||
450 | #ifdef CONFIG_X86_LOCAL_APIC | |
451 | ||
452 | /* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector. | |
453 | * Also used in UP when CONFIG_X86_LOCAL_APIC. | |
454 | * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */ | |
455 | void smp_apic_vmi_timer_interrupt(struct pt_regs *regs) | |
456 | { | |
457 | struct pt_regs *old_regs = set_irq_regs(regs); | |
458 | int cpu = smp_processor_id(); | |
459 | ||
460 | /* | |
461 | * the NMI deadlock-detector uses this. | |
462 | */ | |
463 | per_cpu(irq_stat,cpu).apic_timer_irqs++; | |
464 | ||
465 | /* | |
466 | * NOTE! We'd better ACK the irq immediately, | |
467 | * because timer handling can be slow. | |
468 | */ | |
469 | ack_APIC_irq(); | |
470 | ||
471 | /* | |
472 | * update_process_times() expects us to have done irq_enter(). | |
473 | * Besides, if we don't timer interrupts ignore the global | |
474 | * interrupt lock, which is the WrongThing (tm) to do. | |
475 | */ | |
476 | irq_enter(); | |
477 | vmi_local_timer_interrupt(cpu); | |
478 | irq_exit(); | |
479 | set_irq_regs(old_regs); | |
480 | } | |
481 | ||
482 | #endif /* CONFIG_X86_LOCAL_APIC */ |