]> bbs.cooldavid.org Git - net-next-2.6.git/blob - block/blk-flush.c
block: drop barrier ordering by queue draining
[net-next-2.6.git] / block / blk-flush.c
1 /*
2  * Functions related to barrier IO handling
3  */
4 #include <linux/kernel.h>
5 #include <linux/module.h>
6 #include <linux/bio.h>
7 #include <linux/blkdev.h>
8 #include <linux/gfp.h>
9
10 #include "blk.h"
11
12 static struct request *queue_next_ordseq(struct request_queue *q);
13
14 /*
15  * Cache flushing for ordered writes handling
16  */
17 unsigned blk_ordered_cur_seq(struct request_queue *q)
18 {
19         if (!q->ordseq)
20                 return 0;
21         return 1 << ffz(q->ordseq);
22 }
23
24 static struct request *blk_ordered_complete_seq(struct request_queue *q,
25                                                 unsigned seq, int error)
26 {
27         struct request *next_rq = NULL;
28
29         if (error && !q->orderr)
30                 q->orderr = error;
31
32         BUG_ON(q->ordseq & seq);
33         q->ordseq |= seq;
34
35         if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) {
36                 /* not complete yet, queue the next ordered sequence */
37                 next_rq = queue_next_ordseq(q);
38         } else {
39                 /* complete this barrier request */
40                 __blk_end_request_all(q->orig_bar_rq, q->orderr);
41                 q->orig_bar_rq = NULL;
42                 q->ordseq = 0;
43
44                 /* dispatch the next barrier if there's one */
45                 if (!list_empty(&q->pending_barriers)) {
46                         next_rq = list_entry_rq(q->pending_barriers.next);
47                         list_move(&next_rq->queuelist, &q->queue_head);
48                 }
49         }
50         return next_rq;
51 }
52
53 static void pre_flush_end_io(struct request *rq, int error)
54 {
55         elv_completed_request(rq->q, rq);
56         blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
57 }
58
59 static void bar_end_io(struct request *rq, int error)
60 {
61         elv_completed_request(rq->q, rq);
62         blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
63 }
64
65 static void post_flush_end_io(struct request *rq, int error)
66 {
67         elv_completed_request(rq->q, rq);
68         blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
69 }
70
71 static void queue_flush(struct request_queue *q, struct request *rq,
72                         rq_end_io_fn *end_io)
73 {
74         blk_rq_init(q, rq);
75         rq->cmd_type = REQ_TYPE_FS;
76         rq->cmd_flags = REQ_FLUSH;
77         rq->rq_disk = q->orig_bar_rq->rq_disk;
78         rq->end_io = end_io;
79
80         elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
81 }
82
83 static struct request *queue_next_ordseq(struct request_queue *q)
84 {
85         struct request *rq = &q->bar_rq;
86
87         switch (blk_ordered_cur_seq(q)) {
88         case QUEUE_ORDSEQ_PREFLUSH:
89                 queue_flush(q, rq, pre_flush_end_io);
90                 break;
91
92         case QUEUE_ORDSEQ_BAR:
93                 /* initialize proxy request and queue it */
94                 blk_rq_init(q, rq);
95                 init_request_from_bio(rq, q->orig_bar_rq->bio);
96                 rq->cmd_flags &= ~REQ_HARDBARRIER;
97                 if (q->ordered & QUEUE_ORDERED_DO_FUA)
98                         rq->cmd_flags |= REQ_FUA;
99                 rq->end_io = bar_end_io;
100
101                 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
102                 break;
103
104         case QUEUE_ORDSEQ_POSTFLUSH:
105                 queue_flush(q, rq, post_flush_end_io);
106                 break;
107
108         default:
109                 BUG();
110         }
111         return rq;
112 }
113
114 struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
115 {
116         unsigned skip = 0;
117
118         if (!(rq->cmd_flags & REQ_HARDBARRIER))
119                 return rq;
120
121         if (q->ordseq) {
122                 /*
123                  * Barrier is already in progress and they can't be
124                  * processed in parallel.  Queue for later processing.
125                  */
126                 list_move_tail(&rq->queuelist, &q->pending_barriers);
127                 return NULL;
128         }
129
130         if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) {
131                 /*
132                  * Queue ordering not supported.  Terminate
133                  * with prejudice.
134                  */
135                 blk_dequeue_request(rq);
136                 __blk_end_request_all(rq, -EOPNOTSUPP);
137                 return NULL;
138         }
139
140         /*
141          * Start a new ordered sequence
142          */
143         q->orderr = 0;
144         q->ordered = q->next_ordered;
145         q->ordseq |= QUEUE_ORDSEQ_STARTED;
146
147         /*
148          * For an empty barrier, there's no actual BAR request, which
149          * in turn makes POSTFLUSH unnecessary.  Mask them off.
150          */
151         if (!blk_rq_sectors(rq))
152                 q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
153                                 QUEUE_ORDERED_DO_POSTFLUSH);
154
155         /* stash away the original request */
156         blk_dequeue_request(rq);
157         q->orig_bar_rq = rq;
158
159         if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH))
160                 skip |= QUEUE_ORDSEQ_PREFLUSH;
161
162         if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
163                 skip |= QUEUE_ORDSEQ_BAR;
164
165         if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
166                 skip |= QUEUE_ORDSEQ_POSTFLUSH;
167
168         /* complete skipped sequences and return the first sequence */
169         return blk_ordered_complete_seq(q, skip, 0);
170 }
171
172 static void bio_end_empty_barrier(struct bio *bio, int err)
173 {
174         if (err) {
175                 if (err == -EOPNOTSUPP)
176                         set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
177                 clear_bit(BIO_UPTODATE, &bio->bi_flags);
178         }
179         if (bio->bi_private)
180                 complete(bio->bi_private);
181         bio_put(bio);
182 }
183
184 /**
185  * blkdev_issue_flush - queue a flush
186  * @bdev:       blockdev to issue flush for
187  * @gfp_mask:   memory allocation flags (for bio_alloc)
188  * @error_sector:       error sector
189  * @flags:      BLKDEV_IFL_* flags to control behaviour
190  *
191  * Description:
192  *    Issue a flush for the block device in question. Caller can supply
193  *    room for storing the error offset in case of a flush error, if they
194  *    wish to. If WAIT flag is not passed then caller may check only what
195  *    request was pushed in some internal queue for later handling.
196  */
197 int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
198                 sector_t *error_sector, unsigned long flags)
199 {
200         DECLARE_COMPLETION_ONSTACK(wait);
201         struct request_queue *q;
202         struct bio *bio;
203         int ret = 0;
204
205         if (bdev->bd_disk == NULL)
206                 return -ENXIO;
207
208         q = bdev_get_queue(bdev);
209         if (!q)
210                 return -ENXIO;
211
212         /*
213          * some block devices may not have their queue correctly set up here
214          * (e.g. loop device without a backing file) and so issuing a flush
215          * here will panic. Ensure there is a request function before issuing
216          * the barrier.
217          */
218         if (!q->make_request_fn)
219                 return -ENXIO;
220
221         bio = bio_alloc(gfp_mask, 0);
222         bio->bi_end_io = bio_end_empty_barrier;
223         bio->bi_bdev = bdev;
224         if (test_bit(BLKDEV_WAIT, &flags))
225                 bio->bi_private = &wait;
226
227         bio_get(bio);
228         submit_bio(WRITE_BARRIER, bio);
229         if (test_bit(BLKDEV_WAIT, &flags)) {
230                 wait_for_completion(&wait);
231                 /*
232                  * The driver must store the error location in ->bi_sector, if
233                  * it supports it. For non-stacked drivers, this should be
234                  * copied from blk_rq_pos(rq).
235                  */
236                 if (error_sector)
237                         *error_sector = bio->bi_sector;
238         }
239
240         if (bio_flagged(bio, BIO_EOPNOTSUPP))
241                 ret = -EOPNOTSUPP;
242         else if (!bio_flagged(bio, BIO_UPTODATE))
243                 ret = -EIO;
244
245         bio_put(bio);
246         return ret;
247 }
248 EXPORT_SYMBOL(blkdev_issue_flush);