]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright (C) 2002 Sistina Software (UK) Limited. | |
3 | * | |
4 | * This file is released under the GPL. | |
5 | * | |
6 | * Kcopyd provides a simple interface for copying an area of one | |
7 | * block-device to one or more other block-devices, with an asynchronous | |
8 | * completion notification. | |
9 | */ | |
10 | ||
715b49ef | 11 | #include <asm/types.h> |
1da177e4 LT |
12 | #include <asm/atomic.h> |
13 | ||
14 | #include <linux/blkdev.h> | |
15 | #include <linux/config.h> | |
16 | #include <linux/fs.h> | |
17 | #include <linux/init.h> | |
18 | #include <linux/list.h> | |
19 | #include <linux/mempool.h> | |
20 | #include <linux/module.h> | |
21 | #include <linux/pagemap.h> | |
22 | #include <linux/slab.h> | |
23 | #include <linux/vmalloc.h> | |
24 | #include <linux/workqueue.h> | |
25 | ||
26 | #include "kcopyd.h" | |
27 | ||
28 | static struct workqueue_struct *_kcopyd_wq; | |
29 | static struct work_struct _kcopyd_work; | |
30 | ||
31 | static inline void wake(void) | |
32 | { | |
33 | queue_work(_kcopyd_wq, &_kcopyd_work); | |
34 | } | |
35 | ||
36 | /*----------------------------------------------------------------- | |
37 | * Each kcopyd client has its own little pool of preallocated | |
38 | * pages for kcopyd io. | |
39 | *---------------------------------------------------------------*/ | |
40 | struct kcopyd_client { | |
41 | struct list_head list; | |
42 | ||
43 | spinlock_t lock; | |
44 | struct page_list *pages; | |
45 | unsigned int nr_pages; | |
46 | unsigned int nr_free_pages; | |
47 | }; | |
48 | ||
49 | static struct page_list *alloc_pl(void) | |
50 | { | |
51 | struct page_list *pl; | |
52 | ||
53 | pl = kmalloc(sizeof(*pl), GFP_KERNEL); | |
54 | if (!pl) | |
55 | return NULL; | |
56 | ||
57 | pl->page = alloc_page(GFP_KERNEL); | |
58 | if (!pl->page) { | |
59 | kfree(pl); | |
60 | return NULL; | |
61 | } | |
62 | ||
63 | return pl; | |
64 | } | |
65 | ||
66 | static void free_pl(struct page_list *pl) | |
67 | { | |
68 | __free_page(pl->page); | |
69 | kfree(pl); | |
70 | } | |
71 | ||
72 | static int kcopyd_get_pages(struct kcopyd_client *kc, | |
73 | unsigned int nr, struct page_list **pages) | |
74 | { | |
75 | struct page_list *pl; | |
76 | ||
77 | spin_lock(&kc->lock); | |
78 | if (kc->nr_free_pages < nr) { | |
79 | spin_unlock(&kc->lock); | |
80 | return -ENOMEM; | |
81 | } | |
82 | ||
83 | kc->nr_free_pages -= nr; | |
84 | for (*pages = pl = kc->pages; --nr; pl = pl->next) | |
85 | ; | |
86 | ||
87 | kc->pages = pl->next; | |
88 | pl->next = NULL; | |
89 | ||
90 | spin_unlock(&kc->lock); | |
91 | ||
92 | return 0; | |
93 | } | |
94 | ||
95 | static void kcopyd_put_pages(struct kcopyd_client *kc, struct page_list *pl) | |
96 | { | |
97 | struct page_list *cursor; | |
98 | ||
99 | spin_lock(&kc->lock); | |
100 | for (cursor = pl; cursor->next; cursor = cursor->next) | |
101 | kc->nr_free_pages++; | |
102 | ||
103 | kc->nr_free_pages++; | |
104 | cursor->next = kc->pages; | |
105 | kc->pages = pl; | |
106 | spin_unlock(&kc->lock); | |
107 | } | |
108 | ||
109 | /* | |
110 | * These three functions resize the page pool. | |
111 | */ | |
112 | static void drop_pages(struct page_list *pl) | |
113 | { | |
114 | struct page_list *next; | |
115 | ||
116 | while (pl) { | |
117 | next = pl->next; | |
118 | free_pl(pl); | |
119 | pl = next; | |
120 | } | |
121 | } | |
122 | ||
123 | static int client_alloc_pages(struct kcopyd_client *kc, unsigned int nr) | |
124 | { | |
125 | unsigned int i; | |
126 | struct page_list *pl = NULL, *next; | |
127 | ||
128 | for (i = 0; i < nr; i++) { | |
129 | next = alloc_pl(); | |
130 | if (!next) { | |
131 | if (pl) | |
132 | drop_pages(pl); | |
133 | return -ENOMEM; | |
134 | } | |
135 | next->next = pl; | |
136 | pl = next; | |
137 | } | |
138 | ||
139 | kcopyd_put_pages(kc, pl); | |
140 | kc->nr_pages += nr; | |
141 | return 0; | |
142 | } | |
143 | ||
144 | static void client_free_pages(struct kcopyd_client *kc) | |
145 | { | |
146 | BUG_ON(kc->nr_free_pages != kc->nr_pages); | |
147 | drop_pages(kc->pages); | |
148 | kc->pages = NULL; | |
149 | kc->nr_free_pages = kc->nr_pages = 0; | |
150 | } | |
151 | ||
152 | /*----------------------------------------------------------------- | |
153 | * kcopyd_jobs need to be allocated by the *clients* of kcopyd, | |
154 | * for this reason we use a mempool to prevent the client from | |
155 | * ever having to do io (which could cause a deadlock). | |
156 | *---------------------------------------------------------------*/ | |
157 | struct kcopyd_job { | |
158 | struct kcopyd_client *kc; | |
159 | struct list_head list; | |
160 | unsigned long flags; | |
161 | ||
162 | /* | |
163 | * Error state of the job. | |
164 | */ | |
165 | int read_err; | |
166 | unsigned int write_err; | |
167 | ||
168 | /* | |
169 | * Either READ or WRITE | |
170 | */ | |
171 | int rw; | |
172 | struct io_region source; | |
173 | ||
174 | /* | |
175 | * The destinations for the transfer. | |
176 | */ | |
177 | unsigned int num_dests; | |
178 | struct io_region dests[KCOPYD_MAX_REGIONS]; | |
179 | ||
180 | sector_t offset; | |
181 | unsigned int nr_pages; | |
182 | struct page_list *pages; | |
183 | ||
184 | /* | |
185 | * Set this to ensure you are notified when the job has | |
186 | * completed. 'context' is for callback to use. | |
187 | */ | |
188 | kcopyd_notify_fn fn; | |
189 | void *context; | |
190 | ||
191 | /* | |
192 | * These fields are only used if the job has been split | |
193 | * into more manageable parts. | |
194 | */ | |
195 | struct semaphore lock; | |
196 | atomic_t sub_jobs; | |
197 | sector_t progress; | |
198 | }; | |
199 | ||
200 | /* FIXME: this should scale with the number of pages */ | |
201 | #define MIN_JOBS 512 | |
202 | ||
203 | static kmem_cache_t *_job_cache; | |
204 | static mempool_t *_job_pool; | |
205 | ||
206 | /* | |
207 | * We maintain three lists of jobs: | |
208 | * | |
209 | * i) jobs waiting for pages | |
210 | * ii) jobs that have pages, and are waiting for the io to be issued. | |
211 | * iii) jobs that have completed. | |
212 | * | |
213 | * All three of these are protected by job_lock. | |
214 | */ | |
215 | static DEFINE_SPINLOCK(_job_lock); | |
216 | ||
217 | static LIST_HEAD(_complete_jobs); | |
218 | static LIST_HEAD(_io_jobs); | |
219 | static LIST_HEAD(_pages_jobs); | |
220 | ||
221 | static int jobs_init(void) | |
222 | { | |
223 | _job_cache = kmem_cache_create("kcopyd-jobs", | |
224 | sizeof(struct kcopyd_job), | |
225 | __alignof__(struct kcopyd_job), | |
226 | 0, NULL, NULL); | |
227 | if (!_job_cache) | |
228 | return -ENOMEM; | |
229 | ||
230 | _job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab, | |
231 | mempool_free_slab, _job_cache); | |
232 | if (!_job_pool) { | |
233 | kmem_cache_destroy(_job_cache); | |
234 | return -ENOMEM; | |
235 | } | |
236 | ||
237 | return 0; | |
238 | } | |
239 | ||
240 | static void jobs_exit(void) | |
241 | { | |
242 | BUG_ON(!list_empty(&_complete_jobs)); | |
243 | BUG_ON(!list_empty(&_io_jobs)); | |
244 | BUG_ON(!list_empty(&_pages_jobs)); | |
245 | ||
246 | mempool_destroy(_job_pool); | |
247 | kmem_cache_destroy(_job_cache); | |
248 | _job_pool = NULL; | |
249 | _job_cache = NULL; | |
250 | } | |
251 | ||
252 | /* | |
253 | * Functions to push and pop a job onto the head of a given job | |
254 | * list. | |
255 | */ | |
256 | static inline struct kcopyd_job *pop(struct list_head *jobs) | |
257 | { | |
258 | struct kcopyd_job *job = NULL; | |
259 | unsigned long flags; | |
260 | ||
261 | spin_lock_irqsave(&_job_lock, flags); | |
262 | ||
263 | if (!list_empty(jobs)) { | |
264 | job = list_entry(jobs->next, struct kcopyd_job, list); | |
265 | list_del(&job->list); | |
266 | } | |
267 | spin_unlock_irqrestore(&_job_lock, flags); | |
268 | ||
269 | return job; | |
270 | } | |
271 | ||
272 | static inline void push(struct list_head *jobs, struct kcopyd_job *job) | |
273 | { | |
274 | unsigned long flags; | |
275 | ||
276 | spin_lock_irqsave(&_job_lock, flags); | |
277 | list_add_tail(&job->list, jobs); | |
278 | spin_unlock_irqrestore(&_job_lock, flags); | |
279 | } | |
280 | ||
281 | /* | |
282 | * These three functions process 1 item from the corresponding | |
283 | * job list. | |
284 | * | |
285 | * They return: | |
286 | * < 0: error | |
287 | * 0: success | |
288 | * > 0: can't process yet. | |
289 | */ | |
290 | static int run_complete_job(struct kcopyd_job *job) | |
291 | { | |
292 | void *context = job->context; | |
293 | int read_err = job->read_err; | |
294 | unsigned int write_err = job->write_err; | |
295 | kcopyd_notify_fn fn = job->fn; | |
296 | ||
297 | kcopyd_put_pages(job->kc, job->pages); | |
298 | mempool_free(job, _job_pool); | |
299 | fn(read_err, write_err, context); | |
300 | return 0; | |
301 | } | |
302 | ||
303 | static void complete_io(unsigned long error, void *context) | |
304 | { | |
305 | struct kcopyd_job *job = (struct kcopyd_job *) context; | |
306 | ||
307 | if (error) { | |
308 | if (job->rw == WRITE) | |
309 | job->write_err &= error; | |
310 | else | |
311 | job->read_err = 1; | |
312 | ||
313 | if (!test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) { | |
314 | push(&_complete_jobs, job); | |
315 | wake(); | |
316 | return; | |
317 | } | |
318 | } | |
319 | ||
320 | if (job->rw == WRITE) | |
321 | push(&_complete_jobs, job); | |
322 | ||
323 | else { | |
324 | job->rw = WRITE; | |
325 | push(&_io_jobs, job); | |
326 | } | |
327 | ||
328 | wake(); | |
329 | } | |
330 | ||
331 | /* | |
332 | * Request io on as many buffer heads as we can currently get for | |
333 | * a particular job. | |
334 | */ | |
335 | static int run_io_job(struct kcopyd_job *job) | |
336 | { | |
337 | int r; | |
338 | ||
339 | if (job->rw == READ) | |
340 | r = dm_io_async(1, &job->source, job->rw, | |
341 | job->pages, | |
342 | job->offset, complete_io, job); | |
343 | ||
344 | else | |
345 | r = dm_io_async(job->num_dests, job->dests, job->rw, | |
346 | job->pages, | |
347 | job->offset, complete_io, job); | |
348 | ||
349 | return r; | |
350 | } | |
351 | ||
352 | static int run_pages_job(struct kcopyd_job *job) | |
353 | { | |
354 | int r; | |
355 | ||
356 | job->nr_pages = dm_div_up(job->dests[0].count + job->offset, | |
357 | PAGE_SIZE >> 9); | |
358 | r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages); | |
359 | if (!r) { | |
360 | /* this job is ready for io */ | |
361 | push(&_io_jobs, job); | |
362 | return 0; | |
363 | } | |
364 | ||
365 | if (r == -ENOMEM) | |
366 | /* can't complete now */ | |
367 | return 1; | |
368 | ||
369 | return r; | |
370 | } | |
371 | ||
372 | /* | |
373 | * Run through a list for as long as possible. Returns the count | |
374 | * of successful jobs. | |
375 | */ | |
376 | static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) | |
377 | { | |
378 | struct kcopyd_job *job; | |
379 | int r, count = 0; | |
380 | ||
381 | while ((job = pop(jobs))) { | |
382 | ||
383 | r = fn(job); | |
384 | ||
385 | if (r < 0) { | |
386 | /* error this rogue job */ | |
387 | if (job->rw == WRITE) | |
388 | job->write_err = (unsigned int) -1; | |
389 | else | |
390 | job->read_err = 1; | |
391 | push(&_complete_jobs, job); | |
392 | break; | |
393 | } | |
394 | ||
395 | if (r > 0) { | |
396 | /* | |
397 | * We couldn't service this job ATM, so | |
398 | * push this job back onto the list. | |
399 | */ | |
400 | push(jobs, job); | |
401 | break; | |
402 | } | |
403 | ||
404 | count++; | |
405 | } | |
406 | ||
407 | return count; | |
408 | } | |
409 | ||
410 | /* | |
411 | * kcopyd does this every time it's woken up. | |
412 | */ | |
413 | static void do_work(void *ignored) | |
414 | { | |
415 | /* | |
416 | * The order that these are called is *very* important. | |
417 | * complete jobs can free some pages for pages jobs. | |
418 | * Pages jobs when successful will jump onto the io jobs | |
419 | * list. io jobs call wake when they complete and it all | |
420 | * starts again. | |
421 | */ | |
422 | process_jobs(&_complete_jobs, run_complete_job); | |
423 | process_jobs(&_pages_jobs, run_pages_job); | |
424 | process_jobs(&_io_jobs, run_io_job); | |
425 | } | |
426 | ||
427 | /* | |
428 | * If we are copying a small region we just dispatch a single job | |
429 | * to do the copy, otherwise the io has to be split up into many | |
430 | * jobs. | |
431 | */ | |
432 | static void dispatch_job(struct kcopyd_job *job) | |
433 | { | |
434 | push(&_pages_jobs, job); | |
435 | wake(); | |
436 | } | |
437 | ||
438 | #define SUB_JOB_SIZE 128 | |
439 | static void segment_complete(int read_err, | |
440 | unsigned int write_err, void *context) | |
441 | { | |
442 | /* FIXME: tidy this function */ | |
443 | sector_t progress = 0; | |
444 | sector_t count = 0; | |
445 | struct kcopyd_job *job = (struct kcopyd_job *) context; | |
446 | ||
447 | down(&job->lock); | |
448 | ||
449 | /* update the error */ | |
450 | if (read_err) | |
451 | job->read_err = 1; | |
452 | ||
453 | if (write_err) | |
454 | job->write_err &= write_err; | |
455 | ||
456 | /* | |
457 | * Only dispatch more work if there hasn't been an error. | |
458 | */ | |
459 | if ((!job->read_err && !job->write_err) || | |
460 | test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) { | |
461 | /* get the next chunk of work */ | |
462 | progress = job->progress; | |
463 | count = job->source.count - progress; | |
464 | if (count) { | |
465 | if (count > SUB_JOB_SIZE) | |
466 | count = SUB_JOB_SIZE; | |
467 | ||
468 | job->progress += count; | |
469 | } | |
470 | } | |
471 | up(&job->lock); | |
472 | ||
473 | if (count) { | |
474 | int i; | |
475 | struct kcopyd_job *sub_job = mempool_alloc(_job_pool, GFP_NOIO); | |
476 | ||
477 | *sub_job = *job; | |
478 | sub_job->source.sector += progress; | |
479 | sub_job->source.count = count; | |
480 | ||
481 | for (i = 0; i < job->num_dests; i++) { | |
482 | sub_job->dests[i].sector += progress; | |
483 | sub_job->dests[i].count = count; | |
484 | } | |
485 | ||
486 | sub_job->fn = segment_complete; | |
487 | sub_job->context = job; | |
488 | dispatch_job(sub_job); | |
489 | ||
490 | } else if (atomic_dec_and_test(&job->sub_jobs)) { | |
491 | ||
492 | /* | |
493 | * To avoid a race we must keep the job around | |
494 | * until after the notify function has completed. | |
495 | * Otherwise the client may try and stop the job | |
496 | * after we've completed. | |
497 | */ | |
498 | job->fn(read_err, write_err, job->context); | |
499 | mempool_free(job, _job_pool); | |
500 | } | |
501 | } | |
502 | ||
503 | /* | |
504 | * Create some little jobs that will do the move between | |
505 | * them. | |
506 | */ | |
507 | #define SPLIT_COUNT 8 | |
508 | static void split_job(struct kcopyd_job *job) | |
509 | { | |
510 | int i; | |
511 | ||
512 | atomic_set(&job->sub_jobs, SPLIT_COUNT); | |
513 | for (i = 0; i < SPLIT_COUNT; i++) | |
514 | segment_complete(0, 0u, job); | |
515 | } | |
516 | ||
517 | int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from, | |
518 | unsigned int num_dests, struct io_region *dests, | |
519 | unsigned int flags, kcopyd_notify_fn fn, void *context) | |
520 | { | |
521 | struct kcopyd_job *job; | |
522 | ||
523 | /* | |
524 | * Allocate a new job. | |
525 | */ | |
526 | job = mempool_alloc(_job_pool, GFP_NOIO); | |
527 | ||
528 | /* | |
529 | * set up for the read. | |
530 | */ | |
531 | job->kc = kc; | |
532 | job->flags = flags; | |
533 | job->read_err = 0; | |
534 | job->write_err = 0; | |
535 | job->rw = READ; | |
536 | ||
537 | job->source = *from; | |
538 | ||
539 | job->num_dests = num_dests; | |
540 | memcpy(&job->dests, dests, sizeof(*dests) * num_dests); | |
541 | ||
542 | job->offset = 0; | |
543 | job->nr_pages = 0; | |
544 | job->pages = NULL; | |
545 | ||
546 | job->fn = fn; | |
547 | job->context = context; | |
548 | ||
549 | if (job->source.count < SUB_JOB_SIZE) | |
550 | dispatch_job(job); | |
551 | ||
552 | else { | |
553 | init_MUTEX(&job->lock); | |
554 | job->progress = 0; | |
555 | split_job(job); | |
556 | } | |
557 | ||
558 | return 0; | |
559 | } | |
560 | ||
561 | /* | |
562 | * Cancels a kcopyd job, eg. someone might be deactivating a | |
563 | * mirror. | |
564 | */ | |
0b56306e | 565 | #if 0 |
1da177e4 LT |
566 | int kcopyd_cancel(struct kcopyd_job *job, int block) |
567 | { | |
568 | /* FIXME: finish */ | |
569 | return -1; | |
570 | } | |
0b56306e | 571 | #endif /* 0 */ |
1da177e4 LT |
572 | |
573 | /*----------------------------------------------------------------- | |
574 | * Unit setup | |
575 | *---------------------------------------------------------------*/ | |
576 | static DECLARE_MUTEX(_client_lock); | |
577 | static LIST_HEAD(_clients); | |
578 | ||
579 | static void client_add(struct kcopyd_client *kc) | |
580 | { | |
581 | down(&_client_lock); | |
582 | list_add(&kc->list, &_clients); | |
583 | up(&_client_lock); | |
584 | } | |
585 | ||
586 | static void client_del(struct kcopyd_client *kc) | |
587 | { | |
588 | down(&_client_lock); | |
589 | list_del(&kc->list); | |
590 | up(&_client_lock); | |
591 | } | |
592 | ||
593 | static DECLARE_MUTEX(kcopyd_init_lock); | |
594 | static int kcopyd_clients = 0; | |
595 | ||
596 | static int kcopyd_init(void) | |
597 | { | |
598 | int r; | |
599 | ||
600 | down(&kcopyd_init_lock); | |
601 | ||
602 | if (kcopyd_clients) { | |
603 | /* Already initialized. */ | |
604 | kcopyd_clients++; | |
605 | up(&kcopyd_init_lock); | |
606 | return 0; | |
607 | } | |
608 | ||
609 | r = jobs_init(); | |
610 | if (r) { | |
611 | up(&kcopyd_init_lock); | |
612 | return r; | |
613 | } | |
614 | ||
615 | _kcopyd_wq = create_singlethread_workqueue("kcopyd"); | |
616 | if (!_kcopyd_wq) { | |
617 | jobs_exit(); | |
618 | up(&kcopyd_init_lock); | |
619 | return -ENOMEM; | |
620 | } | |
621 | ||
622 | kcopyd_clients++; | |
623 | INIT_WORK(&_kcopyd_work, do_work, NULL); | |
624 | up(&kcopyd_init_lock); | |
625 | return 0; | |
626 | } | |
627 | ||
628 | static void kcopyd_exit(void) | |
629 | { | |
630 | down(&kcopyd_init_lock); | |
631 | kcopyd_clients--; | |
632 | if (!kcopyd_clients) { | |
633 | jobs_exit(); | |
634 | destroy_workqueue(_kcopyd_wq); | |
635 | _kcopyd_wq = NULL; | |
636 | } | |
637 | up(&kcopyd_init_lock); | |
638 | } | |
639 | ||
640 | int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result) | |
641 | { | |
642 | int r = 0; | |
643 | struct kcopyd_client *kc; | |
644 | ||
645 | r = kcopyd_init(); | |
646 | if (r) | |
647 | return r; | |
648 | ||
649 | kc = kmalloc(sizeof(*kc), GFP_KERNEL); | |
650 | if (!kc) { | |
651 | kcopyd_exit(); | |
652 | return -ENOMEM; | |
653 | } | |
654 | ||
655 | spin_lock_init(&kc->lock); | |
656 | kc->pages = NULL; | |
657 | kc->nr_pages = kc->nr_free_pages = 0; | |
658 | r = client_alloc_pages(kc, nr_pages); | |
659 | if (r) { | |
660 | kfree(kc); | |
661 | kcopyd_exit(); | |
662 | return r; | |
663 | } | |
664 | ||
665 | r = dm_io_get(nr_pages); | |
666 | if (r) { | |
667 | client_free_pages(kc); | |
668 | kfree(kc); | |
669 | kcopyd_exit(); | |
670 | return r; | |
671 | } | |
672 | ||
673 | client_add(kc); | |
674 | *result = kc; | |
675 | return 0; | |
676 | } | |
677 | ||
678 | void kcopyd_client_destroy(struct kcopyd_client *kc) | |
679 | { | |
680 | dm_io_put(kc->nr_pages); | |
681 | client_free_pages(kc); | |
682 | client_del(kc); | |
683 | kfree(kc); | |
684 | kcopyd_exit(); | |
685 | } | |
686 | ||
687 | EXPORT_SYMBOL(kcopyd_client_create); | |
688 | EXPORT_SYMBOL(kcopyd_client_destroy); | |
689 | EXPORT_SYMBOL(kcopyd_copy); |