#include <linux/blkpg.h>
#include <linux/bio.h>
#include <linux/buffer_head.h>
+#include <linux/smp_lock.h>
#include <linux/mempool.h>
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/hdreg.h>
+#include <linux/delay.h>
#include <trace/events/block.h>
unsigned long flags;
struct request_queue *queue;
+ unsigned type;
+ /* Protect type against concurrent access. */
+ struct mutex type_lock;
+
struct gendisk *disk;
char name[16];
{
struct mapped_device *md;
+ lock_kernel();
spin_lock(&_minor_lock);
md = bdev->bd_disk->private_data;
out:
spin_unlock(&_minor_lock);
+ unlock_kernel();
return md ? 0 : -ENXIO;
}
static int dm_blk_close(struct gendisk *disk, fmode_t mode)
{
struct mapped_device *md = disk->private_data;
+
+ lock_kernel();
atomic_dec(&md->open_count);
dm_put(md);
+ unlock_kernel();
+
return 0;
}
*/
spin_lock_irqsave(&md->deferred_lock, flags);
if (__noflush_suspending(md)) {
- if (!bio_rw_flagged(io->bio, BIO_RW_BARRIER))
+ if (!(io->bio->bi_rw & REQ_HARDBARRIER))
bio_list_add_head(&md->deferred,
io->bio);
} else
io_error = io->error;
bio = io->bio;
- if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
+ if (bio->bi_rw & REQ_HARDBARRIER) {
/*
* There can be just one barrier request so we use
* a per-device variable for error reporting.
* Note that you can't touch the bio after end_io_acct
+ *
+ * We ignore -EOPNOTSUPP for empty flush reported by
+ * underlying devices. We assume that if the device
+ * doesn't support empty barriers, it doesn't need
+ * cache flushing commands.
*/
- if (!md->barrier_error && io_error != -EOPNOTSUPP)
+ if (!md->barrier_error &&
+ !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP))
md->barrier_error = io_error;
end_io_acct(io);
free_io(md, io);
{
int rw = rq_data_dir(clone);
int run_queue = 1;
- bool is_barrier = blk_barrier_rq(clone);
+ bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER;
struct dm_rq_target_io *tio = clone->end_io_data;
struct mapped_device *md = tio->md;
struct request *rq = tio->orig;
- if (blk_pc_request(rq) && !is_barrier) {
+ if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) {
rq->errors = clone->errors;
rq->resid_len = clone->resid_len;
struct request_queue *q = rq->q;
unsigned long flags;
- if (unlikely(blk_barrier_rq(clone))) {
+ if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
/*
* Barrier clones share an original request.
* Leave it to dm_end_request(), which handles this special
struct dm_rq_target_io *tio = clone->end_io_data;
struct request *rq = tio->orig;
- if (unlikely(blk_barrier_rq(clone))) {
+ if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
/*
* Barrier clones share an original request. So can't use
* softirq_done with the original.
struct dm_rq_target_io *tio = clone->end_io_data;
struct request *rq = tio->orig;
- if (unlikely(blk_barrier_rq(clone))) {
+ if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
/*
* Barrier clones share an original request.
* Leave it to dm_end_request(), which handles this special
clone->bi_sector = sector;
clone->bi_bdev = bio->bi_bdev;
- clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
+ clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER;
clone->bi_vcnt = 1;
clone->bi_size = to_bytes(len);
clone->bi_io_vec->bv_offset = offset;
clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
__bio_clone(clone, bio);
- clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
+ clone->bi_rw &= ~REQ_HARDBARRIER;
clone->bi_destructor = dm_bio_destructor;
clone->bi_sector = sector;
clone->bi_idx = idx;
ci.map = dm_get_live_table(md);
if (unlikely(!ci.map)) {
- if (!bio_rw_flagged(bio, BIO_RW_BARRIER))
+ if (!(bio->bi_rw & REQ_HARDBARRIER))
bio_io_error(bio);
else
if (!md->barrier_error)
* we have to queue this io for later.
*/
if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
- unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+ unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
up_read(&md->io_lock);
if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
return _dm_request(q, bio);
}
-/*
- * Mark this request as flush request, so that dm_request_fn() can
- * recognize.
- */
-static void dm_rq_prepare_flush(struct request_queue *q, struct request *rq)
-{
- rq->cmd_type = REQ_TYPE_LINUX_BLOCK;
- rq->cmd[0] = REQ_LB_OP_FLUSH;
-}
-
static bool dm_rq_is_flush_request(struct request *rq)
{
- if (rq->cmd_type == REQ_TYPE_LINUX_BLOCK &&
- rq->cmd[0] == REQ_LB_OP_FLUSH)
+ if (rq->cmd_flags & REQ_FLUSH)
return true;
else
return false;
if (r < 0)
goto bad_minor;
+ md->type = DM_TYPE_NONE;
init_rwsem(&md->io_lock);
mutex_init(&md->suspend_lock);
+ mutex_init(&md->type_lock);
spin_lock_init(&md->deferred_lock);
spin_lock_init(&md->barrier_error_lock);
rwlock_init(&md->map_lock);
blk_queue_softirq_done(md->queue, dm_softirq_done);
blk_queue_prep_rq(md->queue, dm_prep_fn);
blk_queue_lld_busy(md->queue, dm_lld_busy);
- blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH,
- dm_rq_prepare_flush);
+ blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH);
md->disk = alloc_disk(1);
if (!md->disk)
return 0;
}
+/*
+ * Functions to manage md->type.
+ * All are required to hold md->type_lock.
+ */
+void dm_lock_md_type(struct mapped_device *md)
+{
+ mutex_lock(&md->type_lock);
+}
+
+void dm_unlock_md_type(struct mapped_device *md)
+{
+ mutex_unlock(&md->type_lock);
+}
+
+void dm_set_md_type(struct mapped_device *md, unsigned type)
+{
+ md->type = type;
+}
+
+unsigned dm_get_md_type(struct mapped_device *md)
+{
+ return md->type;
+}
+
static struct mapped_device *dm_find_md(dev_t dev)
{
struct mapped_device *md;
md = idr_find(&_minor_idr, minor);
if (md && (md == MINOR_ALLOCED ||
(MINOR(disk_devt(dm_disk(md))) != minor) ||
+ dm_deleting_md(md) ||
test_bit(DMF_FREEING, &md->flags))) {
md = NULL;
goto out;
void dm_get(struct mapped_device *md)
{
atomic_inc(&md->holders);
+ BUG_ON(test_bit(DMF_FREEING, &md->flags));
}
const char *dm_device_name(struct mapped_device *md)
}
EXPORT_SYMBOL_GPL(dm_device_name);
-void dm_put(struct mapped_device *md)
+static void __dm_destroy(struct mapped_device *md, bool wait)
{
struct dm_table *map;
- BUG_ON(test_bit(DMF_FREEING, &md->flags));
+ might_sleep();
- if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
- map = dm_get_live_table(md);
- idr_replace(&_minor_idr, MINOR_ALLOCED,
- MINOR(disk_devt(dm_disk(md))));
- set_bit(DMF_FREEING, &md->flags);
- spin_unlock(&_minor_lock);
- if (!dm_suspended_md(md)) {
- dm_table_presuspend_targets(map);
- dm_table_postsuspend_targets(map);
- }
- dm_sysfs_exit(md);
- dm_table_put(map);
- dm_table_destroy(__unbind(md));
- free_dev(md);
+ spin_lock(&_minor_lock);
+ map = dm_get_live_table(md);
+ idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
+ set_bit(DMF_FREEING, &md->flags);
+ spin_unlock(&_minor_lock);
+
+ if (!dm_suspended_md(md)) {
+ dm_table_presuspend_targets(map);
+ dm_table_postsuspend_targets(map);
}
+
+ /*
+ * Rare, but there may be I/O requests still going to complete,
+ * for example. Wait for all references to disappear.
+ * No one should increment the reference count of the mapped_device,
+ * after the mapped_device state becomes DMF_FREEING.
+ */
+ if (wait)
+ while (atomic_read(&md->holders))
+ msleep(1);
+ else if (atomic_read(&md->holders))
+ DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
+ dm_device_name(md), atomic_read(&md->holders));
+
+ dm_sysfs_exit(md);
+ dm_table_put(map);
+ dm_table_destroy(__unbind(md));
+ free_dev(md);
+}
+
+void dm_destroy(struct mapped_device *md)
+{
+ __dm_destroy(md, true);
+}
+
+void dm_destroy_immediate(struct mapped_device *md)
+{
+ __dm_destroy(md, false);
+}
+
+void dm_put(struct mapped_device *md)
+{
+ atomic_dec(&md->holders);
}
EXPORT_SYMBOL_GPL(dm_put);
if (!bio_empty_barrier(bio)) {
__split_and_process_bio(md, bio);
- dm_flush(md);
+ /*
+ * If the request isn't supported, don't waste time with
+ * the second flush.
+ */
+ if (md->barrier_error != -EOPNOTSUPP)
+ dm_flush(md);
}
if (md->barrier_error != DM_ENDIO_REQUEUE)
if (dm_request_based(md))
generic_make_request(c);
else {
- if (bio_rw_flagged(c, BIO_RW_BARRIER))
+ if (c->bi_rw & REQ_HARDBARRIER)
process_barrier(md, c);
else
__split_and_process_bio(md, c);
goto out;
}
- /* cannot change the device type, once a table is bound */
- if (md->map &&
- (dm_table_get_type(md->map) != dm_table_get_type(table))) {
- DMWARN("can't change the device type after a table is bound");
- goto out;
- }
-
map = __bind(md, table, &limits);
out: