]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright (C) 2003 Sistina Software | |
891ce207 | 3 | * Copyright (C) 2006 Red Hat GmbH |
1da177e4 LT |
4 | * |
5 | * This file is released under the GPL. | |
6 | */ | |
7 | ||
8 | #include "dm-io.h" | |
9 | ||
10 | #include <linux/bio.h> | |
11 | #include <linux/mempool.h> | |
12 | #include <linux/module.h> | |
13 | #include <linux/sched.h> | |
14 | #include <linux/slab.h> | |
15 | ||
16 | static struct bio_set *_bios; | |
17 | ||
891ce207 HM |
18 | struct dm_io_client { |
19 | mempool_t *pool; | |
20 | struct bio_set *bios; | |
21 | }; | |
22 | ||
1da177e4 LT |
23 | /* FIXME: can we shrink this ? */ |
24 | struct io { | |
25 | unsigned long error; | |
26 | atomic_t count; | |
27 | struct task_struct *sleeper; | |
891ce207 | 28 | struct dm_io_client *client; |
1da177e4 LT |
29 | io_notify_fn callback; |
30 | void *context; | |
31 | }; | |
32 | ||
33 | /* | |
34 | * io contexts are only dynamically allocated for asynchronous | |
35 | * io. Since async io is likely to be the majority of io we'll | |
891ce207 | 36 | * have the same number of io contexts as bios! (FIXME: must reduce this). |
1da177e4 LT |
37 | */ |
38 | static unsigned _num_ios; | |
39 | static mempool_t *_io_pool; | |
40 | ||
891ce207 HM |
41 | /* |
42 | * Temporary functions to allow old and new interfaces to co-exist. | |
43 | */ | |
44 | static struct bio_set *bios(struct dm_io_client *client) | |
45 | { | |
46 | return client ? client->bios : _bios; | |
47 | } | |
48 | ||
49 | static mempool_t *io_pool(struct dm_io_client *client) | |
50 | { | |
51 | return client ? client->pool : _io_pool; | |
52 | } | |
53 | ||
1da177e4 LT |
54 | static unsigned int pages_to_ios(unsigned int pages) |
55 | { | |
56 | return 4 * pages; /* too many ? */ | |
57 | } | |
58 | ||
59 | static int resize_pool(unsigned int new_ios) | |
60 | { | |
61 | int r = 0; | |
62 | ||
63 | if (_io_pool) { | |
64 | if (new_ios == 0) { | |
65 | /* free off the pool */ | |
66 | mempool_destroy(_io_pool); | |
67 | _io_pool = NULL; | |
68 | bioset_free(_bios); | |
69 | ||
70 | } else { | |
71 | /* resize the pool */ | |
72 | r = mempool_resize(_io_pool, new_ios, GFP_KERNEL); | |
73 | } | |
74 | ||
75 | } else { | |
76 | /* create new pool */ | |
0eaae62a MD |
77 | _io_pool = mempool_create_kmalloc_pool(new_ios, |
78 | sizeof(struct io)); | |
1da177e4 LT |
79 | if (!_io_pool) |
80 | return -ENOMEM; | |
81 | ||
5972511b | 82 | _bios = bioset_create(16, 16); |
1da177e4 LT |
83 | if (!_bios) { |
84 | mempool_destroy(_io_pool); | |
85 | _io_pool = NULL; | |
86 | return -ENOMEM; | |
87 | } | |
88 | } | |
89 | ||
90 | if (!r) | |
91 | _num_ios = new_ios; | |
92 | ||
93 | return r; | |
94 | } | |
95 | ||
96 | int dm_io_get(unsigned int num_pages) | |
97 | { | |
98 | return resize_pool(_num_ios + pages_to_ios(num_pages)); | |
99 | } | |
100 | ||
101 | void dm_io_put(unsigned int num_pages) | |
102 | { | |
103 | resize_pool(_num_ios - pages_to_ios(num_pages)); | |
104 | } | |
105 | ||
c8b03afe HM |
106 | /* |
107 | * Create a client with mempool and bioset. | |
108 | */ | |
109 | struct dm_io_client *dm_io_client_create(unsigned num_pages) | |
110 | { | |
111 | unsigned ios = pages_to_ios(num_pages); | |
112 | struct dm_io_client *client; | |
113 | ||
114 | client = kmalloc(sizeof(*client), GFP_KERNEL); | |
115 | if (!client) | |
116 | return ERR_PTR(-ENOMEM); | |
117 | ||
118 | client->pool = mempool_create_kmalloc_pool(ios, sizeof(struct io)); | |
119 | if (!client->pool) | |
120 | goto bad; | |
121 | ||
122 | client->bios = bioset_create(16, 16); | |
123 | if (!client->bios) | |
124 | goto bad; | |
125 | ||
126 | return client; | |
127 | ||
128 | bad: | |
129 | if (client->pool) | |
130 | mempool_destroy(client->pool); | |
131 | kfree(client); | |
132 | return ERR_PTR(-ENOMEM); | |
133 | } | |
134 | EXPORT_SYMBOL(dm_io_client_create); | |
135 | ||
136 | int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client) | |
137 | { | |
138 | return mempool_resize(client->pool, pages_to_ios(num_pages), | |
139 | GFP_KERNEL); | |
140 | } | |
141 | EXPORT_SYMBOL(dm_io_client_resize); | |
142 | ||
143 | void dm_io_client_destroy(struct dm_io_client *client) | |
144 | { | |
145 | mempool_destroy(client->pool); | |
146 | bioset_free(client->bios); | |
147 | kfree(client); | |
148 | } | |
149 | EXPORT_SYMBOL(dm_io_client_destroy); | |
150 | ||
1da177e4 LT |
151 | /*----------------------------------------------------------------- |
152 | * We need to keep track of which region a bio is doing io for. | |
153 | * In order to save a memory allocation we store this the last | |
154 | * bvec which we know is unused (blech). | |
155 | * XXX This is ugly and can OOPS with some configs... find another way. | |
156 | *---------------------------------------------------------------*/ | |
157 | static inline void bio_set_region(struct bio *bio, unsigned region) | |
158 | { | |
f00b16ad | 159 | bio->bi_io_vec[bio->bi_max_vecs].bv_len = region; |
1da177e4 LT |
160 | } |
161 | ||
162 | static inline unsigned bio_get_region(struct bio *bio) | |
163 | { | |
f00b16ad | 164 | return bio->bi_io_vec[bio->bi_max_vecs].bv_len; |
1da177e4 LT |
165 | } |
166 | ||
167 | /*----------------------------------------------------------------- | |
168 | * We need an io object to keep track of the number of bios that | |
169 | * have been dispatched for a particular io. | |
170 | *---------------------------------------------------------------*/ | |
171 | static void dec_count(struct io *io, unsigned int region, int error) | |
172 | { | |
173 | if (error) | |
174 | set_bit(region, &io->error); | |
175 | ||
176 | if (atomic_dec_and_test(&io->count)) { | |
177 | if (io->sleeper) | |
178 | wake_up_process(io->sleeper); | |
179 | ||
180 | else { | |
181 | int r = io->error; | |
182 | io_notify_fn fn = io->callback; | |
183 | void *context = io->context; | |
184 | ||
891ce207 | 185 | mempool_free(io, io_pool(io->client)); |
1da177e4 LT |
186 | fn(r, context); |
187 | } | |
188 | } | |
189 | } | |
190 | ||
191 | static int endio(struct bio *bio, unsigned int done, int error) | |
192 | { | |
c897feb3 HM |
193 | struct io *io; |
194 | unsigned region; | |
1da177e4 LT |
195 | |
196 | /* keep going until we've finished */ | |
197 | if (bio->bi_size) | |
198 | return 1; | |
199 | ||
200 | if (error && bio_data_dir(bio) == READ) | |
201 | zero_fill_bio(bio); | |
202 | ||
c897feb3 HM |
203 | /* |
204 | * The bio destructor in bio_put() may use the io object. | |
205 | */ | |
206 | io = bio->bi_private; | |
207 | region = bio_get_region(bio); | |
208 | ||
f00b16ad | 209 | bio->bi_max_vecs++; |
1da177e4 LT |
210 | bio_put(bio); |
211 | ||
c897feb3 HM |
212 | dec_count(io, region, error); |
213 | ||
1da177e4 LT |
214 | return 0; |
215 | } | |
216 | ||
217 | /*----------------------------------------------------------------- | |
218 | * These little objects provide an abstraction for getting a new | |
219 | * destination page for io. | |
220 | *---------------------------------------------------------------*/ | |
221 | struct dpages { | |
222 | void (*get_page)(struct dpages *dp, | |
223 | struct page **p, unsigned long *len, unsigned *offset); | |
224 | void (*next_page)(struct dpages *dp); | |
225 | ||
226 | unsigned context_u; | |
227 | void *context_ptr; | |
228 | }; | |
229 | ||
230 | /* | |
231 | * Functions for getting the pages from a list. | |
232 | */ | |
233 | static void list_get_page(struct dpages *dp, | |
234 | struct page **p, unsigned long *len, unsigned *offset) | |
235 | { | |
236 | unsigned o = dp->context_u; | |
237 | struct page_list *pl = (struct page_list *) dp->context_ptr; | |
238 | ||
239 | *p = pl->page; | |
240 | *len = PAGE_SIZE - o; | |
241 | *offset = o; | |
242 | } | |
243 | ||
244 | static void list_next_page(struct dpages *dp) | |
245 | { | |
246 | struct page_list *pl = (struct page_list *) dp->context_ptr; | |
247 | dp->context_ptr = pl->next; | |
248 | dp->context_u = 0; | |
249 | } | |
250 | ||
251 | static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset) | |
252 | { | |
253 | dp->get_page = list_get_page; | |
254 | dp->next_page = list_next_page; | |
255 | dp->context_u = offset; | |
256 | dp->context_ptr = pl; | |
257 | } | |
258 | ||
259 | /* | |
260 | * Functions for getting the pages from a bvec. | |
261 | */ | |
262 | static void bvec_get_page(struct dpages *dp, | |
263 | struct page **p, unsigned long *len, unsigned *offset) | |
264 | { | |
265 | struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; | |
266 | *p = bvec->bv_page; | |
267 | *len = bvec->bv_len; | |
268 | *offset = bvec->bv_offset; | |
269 | } | |
270 | ||
271 | static void bvec_next_page(struct dpages *dp) | |
272 | { | |
273 | struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; | |
274 | dp->context_ptr = bvec + 1; | |
275 | } | |
276 | ||
277 | static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec) | |
278 | { | |
279 | dp->get_page = bvec_get_page; | |
280 | dp->next_page = bvec_next_page; | |
281 | dp->context_ptr = bvec; | |
282 | } | |
283 | ||
c8b03afe HM |
284 | /* |
285 | * Functions for getting the pages from a VMA. | |
286 | */ | |
1da177e4 LT |
287 | static void vm_get_page(struct dpages *dp, |
288 | struct page **p, unsigned long *len, unsigned *offset) | |
289 | { | |
290 | *p = vmalloc_to_page(dp->context_ptr); | |
291 | *offset = dp->context_u; | |
292 | *len = PAGE_SIZE - dp->context_u; | |
293 | } | |
294 | ||
295 | static void vm_next_page(struct dpages *dp) | |
296 | { | |
297 | dp->context_ptr += PAGE_SIZE - dp->context_u; | |
298 | dp->context_u = 0; | |
299 | } | |
300 | ||
301 | static void vm_dp_init(struct dpages *dp, void *data) | |
302 | { | |
303 | dp->get_page = vm_get_page; | |
304 | dp->next_page = vm_next_page; | |
305 | dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1); | |
306 | dp->context_ptr = data; | |
307 | } | |
308 | ||
3676347a PO |
309 | static void dm_bio_destructor(struct bio *bio) |
310 | { | |
891ce207 HM |
311 | struct io *io = bio->bi_private; |
312 | ||
313 | bio_free(bio, bios(io->client)); | |
3676347a PO |
314 | } |
315 | ||
c8b03afe HM |
316 | /* |
317 | * Functions for getting the pages from kernel memory. | |
318 | */ | |
319 | static void km_get_page(struct dpages *dp, struct page **p, unsigned long *len, | |
320 | unsigned *offset) | |
321 | { | |
322 | *p = virt_to_page(dp->context_ptr); | |
323 | *offset = dp->context_u; | |
324 | *len = PAGE_SIZE - dp->context_u; | |
325 | } | |
326 | ||
327 | static void km_next_page(struct dpages *dp) | |
328 | { | |
329 | dp->context_ptr += PAGE_SIZE - dp->context_u; | |
330 | dp->context_u = 0; | |
331 | } | |
332 | ||
333 | static void km_dp_init(struct dpages *dp, void *data) | |
334 | { | |
335 | dp->get_page = km_get_page; | |
336 | dp->next_page = km_next_page; | |
337 | dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1); | |
338 | dp->context_ptr = data; | |
339 | } | |
340 | ||
1da177e4 LT |
341 | /*----------------------------------------------------------------- |
342 | * IO routines that accept a list of pages. | |
343 | *---------------------------------------------------------------*/ | |
344 | static void do_region(int rw, unsigned int region, struct io_region *where, | |
345 | struct dpages *dp, struct io *io) | |
346 | { | |
347 | struct bio *bio; | |
348 | struct page *page; | |
349 | unsigned long len; | |
350 | unsigned offset; | |
351 | unsigned num_bvecs; | |
352 | sector_t remaining = where->count; | |
353 | ||
354 | while (remaining) { | |
355 | /* | |
f00b16ad HM |
356 | * Allocate a suitably sized-bio: we add an extra |
357 | * bvec for bio_get/set_region() and decrement bi_max_vecs | |
358 | * to hide it from bio_add_page(). | |
1da177e4 | 359 | */ |
f00b16ad | 360 | num_bvecs = (remaining / (PAGE_SIZE >> SECTOR_SHIFT)) + 2; |
891ce207 | 361 | bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, bios(io->client)); |
1da177e4 LT |
362 | bio->bi_sector = where->sector + (where->count - remaining); |
363 | bio->bi_bdev = where->bdev; | |
364 | bio->bi_end_io = endio; | |
365 | bio->bi_private = io; | |
3676347a | 366 | bio->bi_destructor = dm_bio_destructor; |
f00b16ad | 367 | bio->bi_max_vecs--; |
1da177e4 LT |
368 | bio_set_region(bio, region); |
369 | ||
370 | /* | |
371 | * Try and add as many pages as possible. | |
372 | */ | |
373 | while (remaining) { | |
374 | dp->get_page(dp, &page, &len, &offset); | |
375 | len = min(len, to_bytes(remaining)); | |
376 | if (!bio_add_page(bio, page, len, offset)) | |
377 | break; | |
378 | ||
379 | offset = 0; | |
380 | remaining -= to_sector(len); | |
381 | dp->next_page(dp); | |
382 | } | |
383 | ||
384 | atomic_inc(&io->count); | |
385 | submit_bio(rw, bio); | |
386 | } | |
387 | } | |
388 | ||
389 | static void dispatch_io(int rw, unsigned int num_regions, | |
390 | struct io_region *where, struct dpages *dp, | |
391 | struct io *io, int sync) | |
392 | { | |
393 | int i; | |
394 | struct dpages old_pages = *dp; | |
395 | ||
396 | if (sync) | |
397 | rw |= (1 << BIO_RW_SYNC); | |
398 | ||
399 | /* | |
400 | * For multiple regions we need to be careful to rewind | |
401 | * the dp object for each call to do_region. | |
402 | */ | |
403 | for (i = 0; i < num_regions; i++) { | |
404 | *dp = old_pages; | |
405 | if (where[i].count) | |
406 | do_region(rw, i, where + i, dp, io); | |
407 | } | |
408 | ||
409 | /* | |
f00b16ad | 410 | * Drop the extra reference that we were holding to avoid |
1da177e4 LT |
411 | * the io being completed too early. |
412 | */ | |
413 | dec_count(io, 0, 0); | |
414 | } | |
415 | ||
891ce207 HM |
416 | static int sync_io(struct dm_io_client *client, unsigned int num_regions, |
417 | struct io_region *where, int rw, struct dpages *dp, | |
418 | unsigned long *error_bits) | |
1da177e4 LT |
419 | { |
420 | struct io io; | |
421 | ||
422 | if (num_regions > 1 && rw != WRITE) { | |
423 | WARN_ON(1); | |
424 | return -EIO; | |
425 | } | |
426 | ||
427 | io.error = 0; | |
428 | atomic_set(&io.count, 1); /* see dispatch_io() */ | |
429 | io.sleeper = current; | |
891ce207 | 430 | io.client = client; |
1da177e4 LT |
431 | |
432 | dispatch_io(rw, num_regions, where, dp, &io, 1); | |
433 | ||
434 | while (1) { | |
435 | set_current_state(TASK_UNINTERRUPTIBLE); | |
436 | ||
437 | if (!atomic_read(&io.count) || signal_pending(current)) | |
438 | break; | |
439 | ||
440 | io_schedule(); | |
441 | } | |
442 | set_current_state(TASK_RUNNING); | |
443 | ||
444 | if (atomic_read(&io.count)) | |
445 | return -EINTR; | |
446 | ||
891ce207 HM |
447 | if (error_bits) |
448 | *error_bits = io.error; | |
449 | ||
1da177e4 LT |
450 | return io.error ? -EIO : 0; |
451 | } | |
452 | ||
891ce207 HM |
453 | static int async_io(struct dm_io_client *client, unsigned int num_regions, |
454 | struct io_region *where, int rw, struct dpages *dp, | |
455 | io_notify_fn fn, void *context) | |
1da177e4 LT |
456 | { |
457 | struct io *io; | |
458 | ||
459 | if (num_regions > 1 && rw != WRITE) { | |
460 | WARN_ON(1); | |
461 | fn(1, context); | |
462 | return -EIO; | |
463 | } | |
464 | ||
891ce207 | 465 | io = mempool_alloc(io_pool(client), GFP_NOIO); |
1da177e4 LT |
466 | io->error = 0; |
467 | atomic_set(&io->count, 1); /* see dispatch_io() */ | |
468 | io->sleeper = NULL; | |
891ce207 | 469 | io->client = client; |
1da177e4 LT |
470 | io->callback = fn; |
471 | io->context = context; | |
472 | ||
473 | dispatch_io(rw, num_regions, where, dp, io, 0); | |
474 | return 0; | |
475 | } | |
476 | ||
477 | int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw, | |
478 | struct page_list *pl, unsigned int offset, | |
479 | unsigned long *error_bits) | |
480 | { | |
481 | struct dpages dp; | |
482 | list_dp_init(&dp, pl, offset); | |
891ce207 | 483 | return sync_io(NULL, num_regions, where, rw, &dp, error_bits); |
1da177e4 LT |
484 | } |
485 | ||
486 | int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where, int rw, | |
487 | struct bio_vec *bvec, unsigned long *error_bits) | |
488 | { | |
489 | struct dpages dp; | |
490 | bvec_dp_init(&dp, bvec); | |
891ce207 | 491 | return sync_io(NULL, num_regions, where, rw, &dp, error_bits); |
1da177e4 LT |
492 | } |
493 | ||
494 | int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw, | |
495 | void *data, unsigned long *error_bits) | |
496 | { | |
497 | struct dpages dp; | |
498 | vm_dp_init(&dp, data); | |
891ce207 | 499 | return sync_io(NULL, num_regions, where, rw, &dp, error_bits); |
1da177e4 LT |
500 | } |
501 | ||
502 | int dm_io_async(unsigned int num_regions, struct io_region *where, int rw, | |
503 | struct page_list *pl, unsigned int offset, | |
504 | io_notify_fn fn, void *context) | |
505 | { | |
506 | struct dpages dp; | |
507 | list_dp_init(&dp, pl, offset); | |
891ce207 | 508 | return async_io(NULL, num_regions, where, rw, &dp, fn, context); |
1da177e4 LT |
509 | } |
510 | ||
511 | int dm_io_async_bvec(unsigned int num_regions, struct io_region *where, int rw, | |
512 | struct bio_vec *bvec, io_notify_fn fn, void *context) | |
513 | { | |
514 | struct dpages dp; | |
515 | bvec_dp_init(&dp, bvec); | |
891ce207 | 516 | return async_io(NULL, num_regions, where, rw, &dp, fn, context); |
1da177e4 LT |
517 | } |
518 | ||
519 | int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw, | |
520 | void *data, io_notify_fn fn, void *context) | |
521 | { | |
522 | struct dpages dp; | |
523 | vm_dp_init(&dp, data); | |
891ce207 | 524 | return async_io(NULL, num_regions, where, rw, &dp, fn, context); |
1da177e4 LT |
525 | } |
526 | ||
c8b03afe HM |
527 | static int dp_init(struct dm_io_request *io_req, struct dpages *dp) |
528 | { | |
529 | /* Set up dpages based on memory type */ | |
530 | switch (io_req->mem.type) { | |
531 | case DM_IO_PAGE_LIST: | |
532 | list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); | |
533 | break; | |
534 | ||
535 | case DM_IO_BVEC: | |
536 | bvec_dp_init(dp, io_req->mem.ptr.bvec); | |
537 | break; | |
538 | ||
539 | case DM_IO_VMA: | |
540 | vm_dp_init(dp, io_req->mem.ptr.vma); | |
541 | break; | |
542 | ||
543 | case DM_IO_KMEM: | |
544 | km_dp_init(dp, io_req->mem.ptr.addr); | |
545 | break; | |
546 | ||
547 | default: | |
548 | return -EINVAL; | |
549 | } | |
550 | ||
551 | return 0; | |
552 | } | |
553 | ||
554 | /* | |
555 | * New collapsed (a)synchronous interface | |
556 | */ | |
557 | int dm_io(struct dm_io_request *io_req, unsigned num_regions, | |
558 | struct io_region *where, unsigned long *sync_error_bits) | |
559 | { | |
560 | int r; | |
561 | struct dpages dp; | |
562 | ||
563 | r = dp_init(io_req, &dp); | |
564 | if (r) | |
565 | return r; | |
566 | ||
567 | if (!io_req->notify.fn) | |
568 | return sync_io(io_req->client, num_regions, where, | |
569 | io_req->bi_rw, &dp, sync_error_bits); | |
570 | ||
571 | return async_io(io_req->client, num_regions, where, io_req->bi_rw, | |
572 | &dp, io_req->notify.fn, io_req->notify.context); | |
573 | } | |
574 | EXPORT_SYMBOL(dm_io); | |
575 | ||
1da177e4 LT |
576 | EXPORT_SYMBOL(dm_io_get); |
577 | EXPORT_SYMBOL(dm_io_put); | |
578 | EXPORT_SYMBOL(dm_io_sync); | |
579 | EXPORT_SYMBOL(dm_io_async); | |
580 | EXPORT_SYMBOL(dm_io_sync_bvec); | |
581 | EXPORT_SYMBOL(dm_io_async_bvec); | |
582 | EXPORT_SYMBOL(dm_io_sync_vm); | |
583 | EXPORT_SYMBOL(dm_io_async_vm); |