]> bbs.cooldavid.org Git - net-next-2.6.git/blame - fs/btrfs/async-thread.c
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs...
[net-next-2.6.git] / fs / btrfs / async-thread.c
CommitLineData
8b712842
CM
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/kthread.h>
20#include <linux/list.h>
21#include <linux/spinlock.h>
b51912c9 22#include <linux/freezer.h>
8b712842
CM
23#include "async-thread.h"
24
4a69a410
CM
25#define WORK_QUEUED_BIT 0
26#define WORK_DONE_BIT 1
27#define WORK_ORDER_DONE_BIT 2
d313d7a3 28#define WORK_HIGH_PRIO_BIT 3
4a69a410 29
8b712842
CM
30/*
31 * container for the kthread task pointer and the list of pending work
32 * One of these is allocated per thread.
33 */
34struct btrfs_worker_thread {
35d8ba66
CM
35 /* pool we belong to */
36 struct btrfs_workers *workers;
37
8b712842
CM
38 /* list of struct btrfs_work that are waiting for service */
39 struct list_head pending;
d313d7a3 40 struct list_head prio_pending;
8b712842
CM
41
42 /* list of worker threads from struct btrfs_workers */
43 struct list_head worker_list;
44
45 /* kthread */
46 struct task_struct *task;
47
48 /* number of things on the pending list */
49 atomic_t num_pending;
53863232 50
9042846b
CM
51 /* reference counter for this struct */
52 atomic_t refs;
53
4854ddd0 54 unsigned long sequence;
8b712842
CM
55
56 /* protects the pending list. */
57 spinlock_t lock;
58
59 /* set to non-zero when this thread is already awake and kicking */
60 int working;
35d8ba66
CM
61
62 /* are we currently idle */
63 int idle;
8b712842
CM
64};
65
35d8ba66
CM
66/*
67 * helper function to move a thread onto the idle list after it
68 * has finished some requests.
69 */
70static void check_idle_worker(struct btrfs_worker_thread *worker)
71{
72 if (!worker->idle && atomic_read(&worker->num_pending) <
73 worker->workers->idle_thresh / 2) {
74 unsigned long flags;
75 spin_lock_irqsave(&worker->workers->lock, flags);
76 worker->idle = 1;
77 list_move(&worker->worker_list, &worker->workers->idle_list);
78 spin_unlock_irqrestore(&worker->workers->lock, flags);
79 }
80}
81
82/*
83 * helper function to move a thread off the idle list after new
84 * pending work is added.
85 */
86static void check_busy_worker(struct btrfs_worker_thread *worker)
87{
88 if (worker->idle && atomic_read(&worker->num_pending) >=
89 worker->workers->idle_thresh) {
90 unsigned long flags;
91 spin_lock_irqsave(&worker->workers->lock, flags);
92 worker->idle = 0;
93 list_move_tail(&worker->worker_list,
94 &worker->workers->worker_list);
95 spin_unlock_irqrestore(&worker->workers->lock, flags);
96 }
97}
98
9042846b
CM
99static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
100{
101 struct btrfs_workers *workers = worker->workers;
102 unsigned long flags;
103
104 rmb();
105 if (!workers->atomic_start_pending)
106 return;
107
108 spin_lock_irqsave(&workers->lock, flags);
109 if (!workers->atomic_start_pending)
110 goto out;
111
112 workers->atomic_start_pending = 0;
113 if (workers->num_workers >= workers->max_workers)
114 goto out;
115
116 spin_unlock_irqrestore(&workers->lock, flags);
117 btrfs_start_workers(workers, 1);
118 return;
119
120out:
121 spin_unlock_irqrestore(&workers->lock, flags);
122}
123
4a69a410
CM
124static noinline int run_ordered_completions(struct btrfs_workers *workers,
125 struct btrfs_work *work)
126{
4a69a410
CM
127 if (!workers->ordered)
128 return 0;
129
130 set_bit(WORK_DONE_BIT, &work->flags);
131
4e3f9c50 132 spin_lock(&workers->order_lock);
4a69a410 133
d313d7a3
CM
134 while (1) {
135 if (!list_empty(&workers->prio_order_list)) {
136 work = list_entry(workers->prio_order_list.next,
137 struct btrfs_work, order_list);
138 } else if (!list_empty(&workers->order_list)) {
139 work = list_entry(workers->order_list.next,
140 struct btrfs_work, order_list);
141 } else {
142 break;
143 }
4a69a410
CM
144 if (!test_bit(WORK_DONE_BIT, &work->flags))
145 break;
146
147 /* we are going to call the ordered done function, but
148 * we leave the work item on the list as a barrier so
149 * that later work items that are done don't have their
150 * functions called before this one returns
151 */
152 if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
153 break;
154
4e3f9c50 155 spin_unlock(&workers->order_lock);
4a69a410
CM
156
157 work->ordered_func(work);
158
159 /* now take the lock again and call the freeing code */
4e3f9c50 160 spin_lock(&workers->order_lock);
4a69a410
CM
161 list_del(&work->order_list);
162 work->ordered_free(work);
163 }
164
4e3f9c50 165 spin_unlock(&workers->order_lock);
4a69a410
CM
166 return 0;
167}
168
9042846b
CM
169static void put_worker(struct btrfs_worker_thread *worker)
170{
171 if (atomic_dec_and_test(&worker->refs))
172 kfree(worker);
173}
174
175static int try_worker_shutdown(struct btrfs_worker_thread *worker)
176{
177 int freeit = 0;
178
179 spin_lock_irq(&worker->lock);
180 spin_lock_irq(&worker->workers->lock);
181 if (worker->workers->num_workers > 1 &&
182 worker->idle &&
183 !worker->working &&
184 !list_empty(&worker->worker_list) &&
185 list_empty(&worker->prio_pending) &&
186 list_empty(&worker->pending)) {
187 freeit = 1;
188 list_del_init(&worker->worker_list);
189 worker->workers->num_workers--;
190 }
191 spin_unlock_irq(&worker->workers->lock);
192 spin_unlock_irq(&worker->lock);
193
194 if (freeit)
195 put_worker(worker);
196 return freeit;
197}
198
4f878e84
CM
199static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
200 struct list_head *prio_head,
201 struct list_head *head)
202{
203 struct btrfs_work *work = NULL;
204 struct list_head *cur = NULL;
205
206 if(!list_empty(prio_head))
207 cur = prio_head->next;
208
209 smp_mb();
210 if (!list_empty(&worker->prio_pending))
211 goto refill;
212
213 if (!list_empty(head))
214 cur = head->next;
215
216 if (cur)
217 goto out;
218
219refill:
220 spin_lock_irq(&worker->lock);
221 list_splice_tail_init(&worker->prio_pending, prio_head);
222 list_splice_tail_init(&worker->pending, head);
223
224 if (!list_empty(prio_head))
225 cur = prio_head->next;
226 else if (!list_empty(head))
227 cur = head->next;
228 spin_unlock_irq(&worker->lock);
229
230 if (!cur)
231 goto out_fail;
232
233out:
234 work = list_entry(cur, struct btrfs_work, list);
235
236out_fail:
237 return work;
238}
239
8b712842
CM
240/*
241 * main loop for servicing work items
242 */
243static int worker_loop(void *arg)
244{
245 struct btrfs_worker_thread *worker = arg;
4f878e84
CM
246 struct list_head head;
247 struct list_head prio_head;
8b712842 248 struct btrfs_work *work;
4f878e84
CM
249
250 INIT_LIST_HEAD(&head);
251 INIT_LIST_HEAD(&prio_head);
252
8b712842 253 do {
4f878e84 254again:
d313d7a3 255 while (1) {
4f878e84
CM
256
257
258 work = get_next_work(worker, &prio_head, &head);
259 if (!work)
d313d7a3
CM
260 break;
261
8b712842 262 list_del(&work->list);
4a69a410 263 clear_bit(WORK_QUEUED_BIT, &work->flags);
8b712842
CM
264
265 work->worker = worker;
8b712842
CM
266
267 work->func(work);
268
269 atomic_dec(&worker->num_pending);
4a69a410
CM
270 /*
271 * unless this is an ordered work queue,
272 * 'work' was probably freed by func above.
273 */
274 run_ordered_completions(worker->workers, work);
275
9042846b
CM
276 check_pending_worker_creates(worker);
277
8b712842 278 }
4f878e84
CM
279
280 spin_lock_irq(&worker->lock);
281 check_idle_worker(worker);
282
8b712842 283 if (freezing(current)) {
b51912c9
CM
284 worker->working = 0;
285 spin_unlock_irq(&worker->lock);
8b712842
CM
286 refrigerator();
287 } else {
8b712842 288 spin_unlock_irq(&worker->lock);
b51912c9
CM
289 if (!kthread_should_stop()) {
290 cpu_relax();
291 /*
292 * we've dropped the lock, did someone else
293 * jump_in?
294 */
295 smp_mb();
d313d7a3
CM
296 if (!list_empty(&worker->pending) ||
297 !list_empty(&worker->prio_pending))
b51912c9
CM
298 continue;
299
300 /*
301 * this short schedule allows more work to
302 * come in without the queue functions
303 * needing to go through wake_up_process()
304 *
305 * worker->working is still 1, so nobody
306 * is going to try and wake us up
307 */
308 schedule_timeout(1);
309 smp_mb();
d313d7a3
CM
310 if (!list_empty(&worker->pending) ||
311 !list_empty(&worker->prio_pending))
b51912c9
CM
312 continue;
313
b5555f77
AG
314 if (kthread_should_stop())
315 break;
316
b51912c9
CM
317 /* still no more work?, sleep for real */
318 spin_lock_irq(&worker->lock);
319 set_current_state(TASK_INTERRUPTIBLE);
d313d7a3 320 if (!list_empty(&worker->pending) ||
4f878e84
CM
321 !list_empty(&worker->prio_pending)) {
322 spin_unlock_irq(&worker->lock);
323 goto again;
324 }
b51912c9
CM
325
326 /*
327 * this makes sure we get a wakeup when someone
328 * adds something new to the queue
329 */
330 worker->working = 0;
331 spin_unlock_irq(&worker->lock);
332
9042846b
CM
333 if (!kthread_should_stop()) {
334 schedule_timeout(HZ * 120);
335 if (!worker->working &&
336 try_worker_shutdown(worker)) {
337 return 0;
338 }
339 }
b51912c9 340 }
8b712842
CM
341 __set_current_state(TASK_RUNNING);
342 }
343 } while (!kthread_should_stop());
344 return 0;
345}
346
347/*
348 * this will wait for all the worker threads to shutdown
349 */
350int btrfs_stop_workers(struct btrfs_workers *workers)
351{
352 struct list_head *cur;
353 struct btrfs_worker_thread *worker;
9042846b 354 int can_stop;
8b712842 355
9042846b 356 spin_lock_irq(&workers->lock);
35d8ba66 357 list_splice_init(&workers->idle_list, &workers->worker_list);
d397712b 358 while (!list_empty(&workers->worker_list)) {
8b712842
CM
359 cur = workers->worker_list.next;
360 worker = list_entry(cur, struct btrfs_worker_thread,
361 worker_list);
9042846b
CM
362
363 atomic_inc(&worker->refs);
364 workers->num_workers -= 1;
365 if (!list_empty(&worker->worker_list)) {
366 list_del_init(&worker->worker_list);
367 put_worker(worker);
368 can_stop = 1;
369 } else
370 can_stop = 0;
371 spin_unlock_irq(&workers->lock);
372 if (can_stop)
373 kthread_stop(worker->task);
374 spin_lock_irq(&workers->lock);
375 put_worker(worker);
8b712842 376 }
9042846b 377 spin_unlock_irq(&workers->lock);
8b712842
CM
378 return 0;
379}
380
381/*
382 * simple init on struct btrfs_workers
383 */
5443be45 384void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
8b712842
CM
385{
386 workers->num_workers = 0;
387 INIT_LIST_HEAD(&workers->worker_list);
35d8ba66 388 INIT_LIST_HEAD(&workers->idle_list);
4a69a410 389 INIT_LIST_HEAD(&workers->order_list);
d313d7a3 390 INIT_LIST_HEAD(&workers->prio_order_list);
8b712842 391 spin_lock_init(&workers->lock);
4e3f9c50 392 spin_lock_init(&workers->order_lock);
8b712842 393 workers->max_workers = max;
61b49440 394 workers->idle_thresh = 32;
5443be45 395 workers->name = name;
4a69a410 396 workers->ordered = 0;
9042846b
CM
397 workers->atomic_start_pending = 0;
398 workers->atomic_worker_start = 0;
8b712842
CM
399}
400
401/*
402 * starts new worker threads. This does not enforce the max worker
403 * count in case you need to temporarily go past it.
404 */
405int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
406{
407 struct btrfs_worker_thread *worker;
408 int ret = 0;
409 int i;
410
411 for (i = 0; i < num_workers; i++) {
412 worker = kzalloc(sizeof(*worker), GFP_NOFS);
413 if (!worker) {
414 ret = -ENOMEM;
415 goto fail;
416 }
417
418 INIT_LIST_HEAD(&worker->pending);
d313d7a3 419 INIT_LIST_HEAD(&worker->prio_pending);
8b712842
CM
420 INIT_LIST_HEAD(&worker->worker_list);
421 spin_lock_init(&worker->lock);
4e3f9c50 422
8b712842 423 atomic_set(&worker->num_pending, 0);
9042846b 424 atomic_set(&worker->refs, 1);
fd0fb038 425 worker->workers = workers;
5443be45
CM
426 worker->task = kthread_run(worker_loop, worker,
427 "btrfs-%s-%d", workers->name,
428 workers->num_workers + i);
8b712842
CM
429 if (IS_ERR(worker->task)) {
430 ret = PTR_ERR(worker->task);
9b627e9b 431 kfree(worker);
8b712842
CM
432 goto fail;
433 }
8b712842 434 spin_lock_irq(&workers->lock);
35d8ba66 435 list_add_tail(&worker->worker_list, &workers->idle_list);
4854ddd0 436 worker->idle = 1;
8b712842
CM
437 workers->num_workers++;
438 spin_unlock_irq(&workers->lock);
439 }
440 return 0;
441fail:
442 btrfs_stop_workers(workers);
443 return ret;
444}
445
446/*
447 * run through the list and find a worker thread that doesn't have a lot
448 * to do right now. This can return null if we aren't yet at the thread
449 * count limit and all of the threads are busy.
450 */
451static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
452{
453 struct btrfs_worker_thread *worker;
454 struct list_head *next;
8b712842
CM
455 int enforce_min = workers->num_workers < workers->max_workers;
456
8b712842 457 /*
35d8ba66
CM
458 * if we find an idle thread, don't move it to the end of the
459 * idle list. This improves the chance that the next submission
460 * will reuse the same thread, and maybe catch it while it is still
461 * working
8b712842 462 */
35d8ba66
CM
463 if (!list_empty(&workers->idle_list)) {
464 next = workers->idle_list.next;
8b712842
CM
465 worker = list_entry(next, struct btrfs_worker_thread,
466 worker_list);
35d8ba66 467 return worker;
8b712842 468 }
35d8ba66
CM
469 if (enforce_min || list_empty(&workers->worker_list))
470 return NULL;
471
8b712842 472 /*
35d8ba66 473 * if we pick a busy task, move the task to the end of the list.
d352ac68
CM
474 * hopefully this will keep things somewhat evenly balanced.
475 * Do the move in batches based on the sequence number. This groups
476 * requests submitted at roughly the same time onto the same worker.
8b712842 477 */
35d8ba66
CM
478 next = workers->worker_list.next;
479 worker = list_entry(next, struct btrfs_worker_thread, worker_list);
4854ddd0
CM
480 atomic_inc(&worker->num_pending);
481 worker->sequence++;
d352ac68 482
53863232 483 if (worker->sequence % workers->idle_thresh == 0)
4854ddd0 484 list_move_tail(next, &workers->worker_list);
8b712842
CM
485 return worker;
486}
487
d352ac68
CM
488/*
489 * selects a worker thread to take the next job. This will either find
490 * an idle worker, start a new worker up to the max count, or just return
491 * one of the existing busy workers.
492 */
8b712842
CM
493static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
494{
495 struct btrfs_worker_thread *worker;
496 unsigned long flags;
9042846b 497 struct list_head *fallback;
8b712842
CM
498
499again:
500 spin_lock_irqsave(&workers->lock, flags);
501 worker = next_worker(workers);
8b712842
CM
502
503 if (!worker) {
8b712842 504 if (workers->num_workers >= workers->max_workers) {
9042846b
CM
505 goto fallback;
506 } else if (workers->atomic_worker_start) {
507 workers->atomic_start_pending = 1;
508 goto fallback;
8b712842
CM
509 } else {
510 spin_unlock_irqrestore(&workers->lock, flags);
511 /* we're below the limit, start another worker */
512 btrfs_start_workers(workers, 1);
513 goto again;
514 }
515 }
4e3f9c50 516 spin_unlock_irqrestore(&workers->lock, flags);
8b712842 517 return worker;
9042846b
CM
518
519fallback:
520 fallback = NULL;
521 /*
522 * we have failed to find any workers, just
523 * return the first one we can find.
524 */
525 if (!list_empty(&workers->worker_list))
526 fallback = workers->worker_list.next;
527 if (!list_empty(&workers->idle_list))
528 fallback = workers->idle_list.next;
529 BUG_ON(!fallback);
530 worker = list_entry(fallback,
531 struct btrfs_worker_thread, worker_list);
532 spin_unlock_irqrestore(&workers->lock, flags);
533 return worker;
8b712842
CM
534}
535
536/*
537 * btrfs_requeue_work just puts the work item back on the tail of the list
538 * it was taken from. It is intended for use with long running work functions
539 * that make some progress and want to give the cpu up for others.
540 */
541int btrfs_requeue_work(struct btrfs_work *work)
542{
543 struct btrfs_worker_thread *worker = work->worker;
544 unsigned long flags;
a6837051 545 int wake = 0;
8b712842 546
4a69a410 547 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
8b712842
CM
548 goto out;
549
550 spin_lock_irqsave(&worker->lock, flags);
d313d7a3
CM
551 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
552 list_add_tail(&work->list, &worker->prio_pending);
553 else
554 list_add_tail(&work->list, &worker->pending);
b51912c9 555 atomic_inc(&worker->num_pending);
75ccf47d
CM
556
557 /* by definition we're busy, take ourselves off the idle
558 * list
559 */
560 if (worker->idle) {
29c5e8ce 561 spin_lock(&worker->workers->lock);
75ccf47d
CM
562 worker->idle = 0;
563 list_move_tail(&worker->worker_list,
564 &worker->workers->worker_list);
29c5e8ce 565 spin_unlock(&worker->workers->lock);
75ccf47d 566 }
a6837051
CM
567 if (!worker->working) {
568 wake = 1;
569 worker->working = 1;
570 }
75ccf47d 571
a6837051
CM
572 if (wake)
573 wake_up_process(worker->task);
9042846b 574 spin_unlock_irqrestore(&worker->lock, flags);
8b712842 575out:
a6837051 576
8b712842
CM
577 return 0;
578}
579
d313d7a3
CM
580void btrfs_set_work_high_prio(struct btrfs_work *work)
581{
582 set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
583}
584
8b712842
CM
585/*
586 * places a struct btrfs_work into the pending queue of one of the kthreads
587 */
588int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
589{
590 struct btrfs_worker_thread *worker;
591 unsigned long flags;
592 int wake = 0;
593
594 /* don't requeue something already on a list */
4a69a410 595 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
8b712842
CM
596 goto out;
597
598 worker = find_worker(workers);
4a69a410 599 if (workers->ordered) {
4e3f9c50
CM
600 /*
601 * you're not allowed to do ordered queues from an
602 * interrupt handler
603 */
604 spin_lock(&workers->order_lock);
d313d7a3
CM
605 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
606 list_add_tail(&work->order_list,
607 &workers->prio_order_list);
608 } else {
609 list_add_tail(&work->order_list, &workers->order_list);
610 }
4e3f9c50 611 spin_unlock(&workers->order_lock);
4a69a410
CM
612 } else {
613 INIT_LIST_HEAD(&work->order_list);
614 }
8b712842
CM
615
616 spin_lock_irqsave(&worker->lock, flags);
a6837051 617
d313d7a3
CM
618 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
619 list_add_tail(&work->list, &worker->prio_pending);
620 else
621 list_add_tail(&work->list, &worker->pending);
8b712842 622 atomic_inc(&worker->num_pending);
35d8ba66 623 check_busy_worker(worker);
8b712842
CM
624
625 /*
626 * avoid calling into wake_up_process if this thread has already
627 * been kicked
628 */
629 if (!worker->working)
630 wake = 1;
631 worker->working = 1;
632
8b712842
CM
633 if (wake)
634 wake_up_process(worker->task);
9042846b
CM
635 spin_unlock_irqrestore(&worker->lock, flags);
636
8b712842
CM
637out:
638 return 0;
639}