[net-next-2.6.git] / arch / x86 / kernel / tlb_32.c

#include <linux/spinlock.h>
#include <linux/cpu.h>
#include <linux/interrupt.h>

#include <asm/tlbflush.h>

DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
			____cacheline_aligned = { &init_mm, 0, };

/* must come after the send_IPI functions above for inlining */
#include <mach_ipi.h>

/*
 *	Smarter SMP flushing macros.
 *		c/o Linus Torvalds.
 *
 *	These mean you can really definitely utterly forget about
 *	writing to user space from interrupts. (Its not allowed anyway).
 *
 *	Optimizations Manfred Spraul <manfred@colorfullife.com>
 */

static cpumask_t flush_cpumask;
static struct mm_struct *flush_mm;
static unsigned long flush_va;
static DEFINE_SPINLOCK(tlbstate_lock);

/*
 * We cannot call mmdrop() because we are in interrupt context,
 * instead update mm->cpu_vm_mask.
 *
 * We need to reload %cr3 since the page tables may be going
 * away from under us..
 */
void leave_mm(int cpu)
{
	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
		BUG();
	cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
	load_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);

/*
 *
 * The flush IPI assumes that a thread switch happens in this order:
 * [cpu0: the cpu that switches]
 * 1) switch_mm() either 1a) or 1b)
 * 1a) thread switch to a different mm
 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
 * 	Stop ipi delivery for the old mm. This is not synchronized with
 * 	the other cpus, but smp_invalidate_interrupt ignore flush ipis
 * 	for the wrong mm, and in the worst case we perform a superfluous
 * 	tlb flush.
 * 1a2) set cpu_tlbstate to TLBSTATE_OK
 * 	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
 *	was in lazy tlb mode.
 * 1a3) update cpu_tlbstate[].active_mm
 * 	Now cpu0 accepts tlb flushes for the new mm.
 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
 * 	Now the other cpus will send tlb flush ipis.
 * 1a4) change cr3.
 * 1b) thread switch without mm change
 *	cpu_tlbstate[].active_mm is correct, cpu0 already handles
 *	flush ipis.
 * 1b1) set cpu_tlbstate to TLBSTATE_OK
 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
 * 	Atomically set the bit [other cpus will start sending flush ipis],
 * 	and test the bit.
 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
 * 2) switch %%esp, ie current
 *
 * The interrupt must handle 2 special cases:
 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
 *   runs in kernel space, the cpu could load tlb entries for user space
 *   pages.
 *
 * The good news is that cpu_tlbstate is local to each cpu, no
 * write/read ordering problems.
 */

/*
 * TLB flush IPI:
 *
 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
 * 2) Leave the mm if we are in the lazy tlb mode.
 */

void smp_invalidate_interrupt(struct pt_regs *regs)
{
	unsigned long cpu;

	cpu = get_cpu();

	if (!cpu_isset(cpu, flush_cpumask))
		goto out;
		/*
		 * This was a BUG() but until someone can quote me the
		 * line from the intel manual that guarantees an IPI to
		 * multiple CPUs is retried _only_ on the erroring CPUs
		 * its staying as a return
		 *
		 * BUG();
		 */

	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
			if (flush_va == TLB_FLUSH_ALL)
				local_flush_tlb();
			else
				__flush_tlb_one(flush_va);
		} else
			leave_mm(cpu);
	}
	ack_APIC_irq();
	smp_mb__before_clear_bit();
	cpu_clear(cpu, flush_cpumask);
	smp_mb__after_clear_bit();
out:
	put_cpu_no_resched();
	__get_cpu_var(irq_stat).irq_tlb_count++;
}

void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
			     unsigned long va)
{
	cpumask_t cpumask = *cpumaskp;

	/*
	 * A couple of (to be removed) sanity checks:
	 *
	 * - current CPU must not be in mask
	 * - mask must exist :)
	 */
	BUG_ON(cpus_empty(cpumask));
	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
	BUG_ON(!mm);

#ifdef CONFIG_HOTPLUG_CPU
	/* If a CPU which we ran on has gone down, OK. */
	cpus_and(cpumask, cpumask, cpu_online_map);
	if (unlikely(cpus_empty(cpumask)))
		return;
#endif

	/*
	 * i'm not happy about this global shared spinlock in the
	 * MM hot path, but we'll see how contended it is.
	 * AK: x86-64 has a faster method that could be ported.
	 */
	spin_lock(&tlbstate_lock);

	flush_mm = mm;
	flush_va = va;
	cpus_or(flush_cpumask, cpumask, flush_cpumask);
	/*
	 * We have to send the IPI only to
	 * CPUs affected.
	 */
	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);

	while (!cpus_empty(flush_cpumask))
		/* nothing. lockup detection does not belong here */
		cpu_relax();

	flush_mm = NULL;
	flush_va = 0;
	spin_unlock(&tlbstate_lock);
}

void flush_tlb_current_task(void)
{
	struct mm_struct *mm = current->mm;
	cpumask_t cpu_mask;

	preempt_disable();
	cpu_mask = mm->cpu_vm_mask;
	cpu_clear(smp_processor_id(), cpu_mask);

	local_flush_tlb();
	if (!cpus_empty(cpu_mask))
		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
	preempt_enable();
}

void flush_tlb_mm(struct mm_struct *mm)
{
	cpumask_t cpu_mask;

	preempt_disable();
	cpu_mask = mm->cpu_vm_mask;
	cpu_clear(smp_processor_id(), cpu_mask);

	if (current->active_mm == mm) {
		if (current->mm)
			local_flush_tlb();
		else
			leave_mm(smp_processor_id());
	}
	if (!cpus_empty(cpu_mask))
		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);

	preempt_enable();
}

void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
{
	struct mm_struct *mm = vma->vm_mm;
	cpumask_t cpu_mask;

	preempt_disable();
	cpu_mask = mm->cpu_vm_mask;
	cpu_clear(smp_processor_id(), cpu_mask);

	if (current->active_mm == mm) {
		if (current->mm)
			__flush_tlb_one(va);
		 else
			leave_mm(smp_processor_id());
	}

	if (!cpus_empty(cpu_mask))
		flush_tlb_others(cpu_mask, mm, va);

	preempt_enable();
}
EXPORT_SYMBOL(flush_tlb_page);

static void do_flush_tlb_all(void *info)
{
	unsigned long cpu = smp_processor_id();

	__flush_tlb_all();
	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
		leave_mm(cpu);
}

void flush_tlb_all(void)
{
	on_each_cpu(do_flush_tlb_all, NULL, 1);
}
Commit	Line	Data
c048fdfe GC	1	#include <linux/spinlock.h>
	2	#include <linux/cpu.h>
	3	#include <linux/interrupt.h>
	4
	5	#include <asm/tlbflush.h>
	6
	7	DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
	8	____cacheline_aligned = { &init_mm, 0, };
	9
	10	/* must come after the send_IPI functions above for inlining */
	11	#include <mach_ipi.h>
	12
	13	/*
	14	* Smarter SMP flushing macros.
	15	* c/o Linus Torvalds.
	16	*
	17	* These mean you can really definitely utterly forget about
	18	* writing to user space from interrupts. (Its not allowed anyway).
	19	*
	20	* Optimizations Manfred Spraul <manfred@colorfullife.com>
	21	*/
	22
	23	static cpumask_t flush_cpumask;
	24	static struct mm_struct *flush_mm;
	25	static unsigned long flush_va;
	26	static DEFINE_SPINLOCK(tlbstate_lock);
	27
	28	/*
	29	* We cannot call mmdrop() because we are in interrupt context,
	30	* instead update mm->cpu_vm_mask.
	31	*
	32	* We need to reload %cr3 since the page tables may be going
	33	* away from under us..
	34	*/
	35	void leave_mm(int cpu)
	36	{
	37	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
	38	BUG();
	39	cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
	40	load_cr3(swapper_pg_dir);
	41	}
	42	EXPORT_SYMBOL_GPL(leave_mm);
	43
	44	/*
	45	*
	46	* The flush IPI assumes that a thread switch happens in this order:
	47	* [cpu0: the cpu that switches]
	48	* 1) switch_mm() either 1a) or 1b)
	49	* 1a) thread switch to a different mm
	50	* 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
	51	* Stop ipi delivery for the old mm. This is not synchronized with
	52	* the other cpus, but smp_invalidate_interrupt ignore flush ipis
	53	* for the wrong mm, and in the worst case we perform a superfluous
	54	* tlb flush.
	55	* 1a2) set cpu_tlbstate to TLBSTATE_OK
	56	* Now the smp_invalidate_interrupt won't call leave_mm if cpu0
	57	* was in lazy tlb mode.
	58	* 1a3) update cpu_tlbstate[].active_mm
	59	* Now cpu0 accepts tlb flushes for the new mm.
	60	* 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
	61	* Now the other cpus will send tlb flush ipis.
	62	* 1a4) change cr3.
	63	* 1b) thread switch without mm change
	64	* cpu_tlbstate[].active_mm is correct, cpu0 already handles
65	* flush ipis.
66	* 1b1) set cpu_tlbstate to TLBSTATE_OK
67	* 1b2) test_and_set the cpu bit in cpu_vm_mask.
68	* Atomically set the bit [other cpus will start sending flush ipis],
69	* and test the bit.
70	* 1b3) if the bit was 0: leave_mm was called, flush the tlb.
71	* 2) switch %%esp, ie current
72	*
73	* The interrupt must handle 2 special cases:
74	* - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
75	* - the cpu performs speculative tlb reads, i.e. even if the cpu only
76	* runs in kernel space, the cpu could load tlb entries for user space
77	* pages.
78	*
79	* The good news is that cpu_tlbstate is local to each cpu, no
80	* write/read ordering problems.
81	*/
82
83	/*
84	* TLB flush IPI:
85	*
86	* 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
87	* 2) Leave the mm if we are in the lazy tlb mode.
88	*/
89
90	void smp_invalidate_interrupt(struct pt_regs *regs)
91	{
92	unsigned long cpu;
93
94	cpu = get_cpu();
95
96	if (!cpu_isset(cpu, flush_cpumask))
97	goto out;
98	/*
99	* This was a BUG() but until someone can quote me the
100	* line from the intel manual that guarantees an IPI to
101	* multiple CPUs is retried _only_ on the erroring CPUs
102	* its staying as a return
103	*
104	* BUG();
105	*/
106
107	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
108	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
109	if (flush_va == TLB_FLUSH_ALL)
110	local_flush_tlb();
111	else
112	__flush_tlb_one(flush_va);
113	} else
114	leave_mm(cpu);
115	}
116	ack_APIC_irq();
117	smp_mb__before_clear_bit();
118	cpu_clear(cpu, flush_cpumask);
119	smp_mb__after_clear_bit();
120	out:
121	put_cpu_no_resched();
122	__get_cpu_var(irq_stat).irq_tlb_count++;
123	}
124
125	void native_flush_tlb_others(const cpumask_t cpumaskp, struct mm_struct mm,
126	unsigned long va)
127	{
128	cpumask_t cpumask = *cpumaskp;
129
130	/*
131	* A couple of (to be removed) sanity checks:
132	*
133	* - current CPU must not be in mask
134	* - mask must exist :)
135	*/
136	BUG_ON(cpus_empty(cpumask));
137	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
138	BUG_ON(!mm);
139
140	#ifdef CONFIG_HOTPLUG_CPU
141	/* If a CPU which we ran on has gone down, OK. */
142	cpus_and(cpumask, cpumask, cpu_online_map);
143	if (unlikely(cpus_empty(cpumask)))
144	return;
145	#endif
146
147	/*
148	* i'm not happy about this global shared spinlock in the
149	* MM hot path, but we'll see how contended it is.
150	* AK: x86-64 has a faster method that could be ported.
151	*/
152	spin_lock(&tlbstate_lock);
153
154	flush_mm = mm;
155	flush_va = va;
156	cpus_or(flush_cpumask, cpumask, flush_cpumask);
157	/*
158	* We have to send the IPI only to
159	* CPUs affected.
160	*/
161	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
162
163	while (!cpus_empty(flush_cpumask))
164	/* nothing. lockup detection does not belong here */
165	cpu_relax();
166
167	flush_mm = NULL;
168	flush_va = 0;
169	spin_unlock(&tlbstate_lock);
170	}
171
172	void flush_tlb_current_task(void)
173	{
174	struct mm_struct *mm = current->mm;
175	cpumask_t cpu_mask;
176
177	preempt_disable();
178	cpu_mask = mm->cpu_vm_mask;
179	cpu_clear(smp_processor_id(), cpu_mask);
180
181	local_flush_tlb();
182	if (!cpus_empty(cpu_mask))
183	flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
184	preempt_enable();
185	}
186
187	void flush_tlb_mm(struct mm_struct *mm)
188	{
189	cpumask_t cpu_mask;
190
191	preempt_disable();
192	cpu_mask = mm->cpu_vm_mask;
193	cpu_clear(smp_processor_id(), cpu_mask);
194
195	if (current->active_mm == mm) {
196	if (current->mm)
197	local_flush_tlb();
198	else
199	leave_mm(smp_processor_id());
200	}
201	if (!cpus_empty(cpu_mask))
202	flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
203
204	preempt_enable();
205	}
206
207	void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
208	{
209	struct mm_struct *mm = vma->vm_mm;
210	cpumask_t cpu_mask;
211
212	preempt_disable();
213	cpu_mask = mm->cpu_vm_mask;
214	cpu_clear(smp_processor_id(), cpu_mask);
215
216	if (current->active_mm == mm) {
217	if (current->mm)
218	__flush_tlb_one(va);
219	else
220	leave_mm(smp_processor_id());
221	}
222
223	if (!cpus_empty(cpu_mask))
224	flush_tlb_others(cpu_mask, mm, va);
225
226	preempt_enable();
227	}
228	EXPORT_SYMBOL(flush_tlb_page);
229
230	static void do_flush_tlb_all(void *info)
231	{
232	unsigned long cpu = smp_processor_id();
233
234	__flush_tlb_all();
235	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
236	leave_mm(cpu);
237	}
238
239	void flush_tlb_all(void)
240	{
15c8b6c1	241	on_each_cpu(do_flush_tlb_all, NULL, 1);
c048fdfe GC	242	}
c048fdfe GC	243