dm ioctl: make bio or request based device type immutable

[net-next-2.6.git] / drivers / md / dm.c
diff --git a/drivers/md/dm.c b/drivers/md/dm.c

index d21e1284604f6a2e391ce37c8a6c85a1b06c91ca..345e94c10c659f5bd8edbf92b5188f8121d1b4fd 100644 (file)
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -15,10 +15,12 @@
  #include <linux/blkpg.h>
  #include <linux/bio.h>
  #include <linux/buffer_head.h>
+#include <linux/smp_lock.h>
  #include <linux/mempool.h>
  #include <linux/slab.h>
  #include <linux/idr.h>
  #include <linux/hdreg.h>
+#include <linux/delay.h>
  
  #include <trace/events/block.h>
  
@@ -123,6 +125,10 @@ struct mapped_device {
         unsigned long flags;
  
         struct request_queue *queue;
+       unsigned type;
+       /* Protect type against concurrent access. */
+       struct mutex type_lock;
+
         struct gendisk *disk;
         char name[16];
  
@@ -338,6 +344,7 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
  {
         struct mapped_device *md;
  
+       lock_kernel();
         spin_lock(&_minor_lock);
  
         md = bdev->bd_disk->private_data;
@@ -355,6 +362,7 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
  
  out:
         spin_unlock(&_minor_lock);
+       unlock_kernel();
  
         return md ? 0 : -ENXIO;
  }
@@ -362,8 +370,12 @@ out:
  static int dm_blk_close(struct gendisk *disk, fmode_t mode)
  {
         struct mapped_device *md = disk->private_data;
+
+       lock_kernel();
         atomic_dec(&md->open_count);
         dm_put(md);
+       unlock_kernel();
+
         return 0;
  }
  
@@ -614,7 +626,7 @@ static void dec_pending(struct dm_io *io, int error)
                          */
                         spin_lock_irqsave(&md->deferred_lock, flags);
                         if (__noflush_suspending(md)) {
-                               if (!bio_rw_flagged(io->bio, BIO_RW_BARRIER))
+                               if (!(io->bio->bi_rw & REQ_HARDBARRIER))
                                         bio_list_add_head(&md->deferred,
                                                           io->bio);
                         } else
@@ -626,13 +638,19 @@ static void dec_pending(struct dm_io *io, int error)
                 io_error = io->error;
                 bio = io->bio;
  
-               if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
+               if (bio->bi_rw & REQ_HARDBARRIER) {
                         /*
                          * There can be just one barrier request so we use
                          * a per-device variable for error reporting.
                          * Note that you can't touch the bio after end_io_acct
+                        *
+                        * We ignore -EOPNOTSUPP for empty flush reported by
+                        * underlying devices. We assume that if the device
+                        * doesn't support empty barriers, it doesn't need
+                        * cache flushing commands.
                          */
-                       if (!md->barrier_error && io_error != -EOPNOTSUPP)
+                       if (!md->barrier_error &&
+                           !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP))
                                 md->barrier_error = io_error;
                         end_io_acct(io);
                         free_io(md, io);
@@ -792,12 +810,12 @@ static void dm_end_request(struct request *clone, int error)
  {
         int rw = rq_data_dir(clone);
         int run_queue = 1;
-       bool is_barrier = blk_barrier_rq(clone);
+       bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER;
         struct dm_rq_target_io *tio = clone->end_io_data;
         struct mapped_device *md = tio->md;
         struct request *rq = tio->orig;
  
-       if (blk_pc_request(rq) && !is_barrier) {
+       if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) {
                 rq->errors = clone->errors;
                 rq->resid_len = clone->resid_len;
  
@@ -844,7 +862,7 @@ void dm_requeue_unmapped_request(struct request *clone)
         struct request_queue *q = rq->q;
         unsigned long flags;
  
-       if (unlikely(blk_barrier_rq(clone))) {
+       if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
                 /*
                  * Barrier clones share an original request.
                  * Leave it to dm_end_request(), which handles this special
@@ -943,7 +961,7 @@ static void dm_complete_request(struct request *clone, int error)
         struct dm_rq_target_io *tio = clone->end_io_data;
         struct request *rq = tio->orig;
  
-       if (unlikely(blk_barrier_rq(clone))) {
+       if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
                 /*
                  * Barrier clones share an original request.  So can't use
                  * softirq_done with the original.
@@ -972,7 +990,7 @@ void dm_kill_unmapped_request(struct request *clone, int error)
         struct dm_rq_target_io *tio = clone->end_io_data;
         struct request *rq = tio->orig;
  
-       if (unlikely(blk_barrier_rq(clone))) {
+       if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
                 /*
                  * Barrier clones share an original request.
                  * Leave it to dm_end_request(), which handles this special
@@ -1106,7 +1124,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
  
         clone->bi_sector = sector;
         clone->bi_bdev = bio->bi_bdev;
-       clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
+       clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER;
         clone->bi_vcnt = 1;
         clone->bi_size = to_bytes(len);
         clone->bi_io_vec->bv_offset = offset;
@@ -1133,7 +1151,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
  
         clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
         __bio_clone(clone, bio);
-       clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
+       clone->bi_rw &= ~REQ_HARDBARRIER;
         clone->bi_destructor = dm_bio_destructor;
         clone->bi_sector = sector;
         clone->bi_idx = idx;
@@ -1301,7 +1319,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
  
         ci.map = dm_get_live_table(md);
         if (unlikely(!ci.map)) {
-               if (!bio_rw_flagged(bio, BIO_RW_BARRIER))
+               if (!(bio->bi_rw & REQ_HARDBARRIER))
                         bio_io_error(bio);
                 else
                         if (!md->barrier_error)
@@ -1414,7 +1432,7 @@ static int _dm_request(struct request_queue *q, struct bio *bio)
          * we have to queue this io for later.
          */
         if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
-           unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+           unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
                 up_read(&md->io_lock);
  
                 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
@@ -1455,20 +1473,9 @@ static int dm_request(struct request_queue *q, struct bio *bio)
         return _dm_request(q, bio);
  }
  
-/*
- * Mark this request as flush request, so that dm_request_fn() can
- * recognize.
- */
-static void dm_rq_prepare_flush(struct request_queue *q, struct request *rq)
-{
-       rq->cmd_type = REQ_TYPE_LINUX_BLOCK;
-       rq->cmd[0] = REQ_LB_OP_FLUSH;
-}
-
  static bool dm_rq_is_flush_request(struct request *rq)
  {
-       if (rq->cmd_type == REQ_TYPE_LINUX_BLOCK &&
-           rq->cmd[0] == REQ_LB_OP_FLUSH)
+       if (rq->cmd_flags & REQ_FLUSH)
                 return true;
         else
                 return false;
@@ -1874,8 +1881,10 @@ static struct mapped_device *alloc_dev(int minor)
         if (r < 0)
                 goto bad_minor;
  
+       md->type = DM_TYPE_NONE;
         init_rwsem(&md->io_lock);
         mutex_init(&md->suspend_lock);
+       mutex_init(&md->type_lock);
         spin_lock_init(&md->deferred_lock);
         spin_lock_init(&md->barrier_error_lock);
         rwlock_init(&md->map_lock);
@@ -1912,8 +1921,7 @@ static struct mapped_device *alloc_dev(int minor)
         blk_queue_softirq_done(md->queue, dm_softirq_done);
         blk_queue_prep_rq(md->queue, dm_prep_fn);
         blk_queue_lld_busy(md->queue, dm_lld_busy);
-       blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH,
-                         dm_rq_prepare_flush);
+       blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH);
  
         md->disk = alloc_disk(1);
         if (!md->disk)
@@ -2128,6 +2136,30 @@ int dm_create(int minor, struct mapped_device **result)
         return 0;
  }
  
+/*
+ * Functions to manage md->type.
+ * All are required to hold md->type_lock.
+ */
+void dm_lock_md_type(struct mapped_device *md)
+{
+       mutex_lock(&md->type_lock);
+}
+
+void dm_unlock_md_type(struct mapped_device *md)
+{
+       mutex_unlock(&md->type_lock);
+}
+
+void dm_set_md_type(struct mapped_device *md, unsigned type)
+{
+       md->type = type;
+}
+
+unsigned dm_get_md_type(struct mapped_device *md)
+{
+       return md->type;
+}
+
  static struct mapped_device *dm_find_md(dev_t dev)
  {
         struct mapped_device *md;
@@ -2141,6 +2173,7 @@ static struct mapped_device *dm_find_md(dev_t dev)
         md = idr_find(&_minor_idr, minor);
         if (md && (md == MINOR_ALLOCED ||
                    (MINOR(disk_devt(dm_disk(md))) != minor) ||
+                  dm_deleting_md(md) ||
                    test_bit(DMF_FREEING, &md->flags))) {
                 md = NULL;
                 goto out;
@@ -2175,6 +2208,7 @@ void dm_set_mdptr(struct mapped_device *md, void *ptr)
  void dm_get(struct mapped_device *md)
  {
         atomic_inc(&md->holders);
+       BUG_ON(test_bit(DMF_FREEING, &md->flags));
  }
  
  const char *dm_device_name(struct mapped_device *md)
@@ -2183,27 +2217,55 @@ const char *dm_device_name(struct mapped_device *md)
  }
  EXPORT_SYMBOL_GPL(dm_device_name);
  
-void dm_put(struct mapped_device *md)
+static void __dm_destroy(struct mapped_device *md, bool wait)
  {
         struct dm_table *map;
  
-       BUG_ON(test_bit(DMF_FREEING, &md->flags));
+       might_sleep();
  
-       if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
-               map = dm_get_live_table(md);
-               idr_replace(&_minor_idr, MINOR_ALLOCED,
-                           MINOR(disk_devt(dm_disk(md))));
-               set_bit(DMF_FREEING, &md->flags);
-               spin_unlock(&_minor_lock);
-               if (!dm_suspended_md(md)) {
-                       dm_table_presuspend_targets(map);
-                       dm_table_postsuspend_targets(map);
-               }
-               dm_sysfs_exit(md);
-               dm_table_put(map);
-               dm_table_destroy(__unbind(md));
-               free_dev(md);
+       spin_lock(&_minor_lock);
+       map = dm_get_live_table(md);
+       idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
+       set_bit(DMF_FREEING, &md->flags);
+       spin_unlock(&_minor_lock);
+
+       if (!dm_suspended_md(md)) {
+               dm_table_presuspend_targets(map);
+               dm_table_postsuspend_targets(map);
         }
+
+       /*
+        * Rare, but there may be I/O requests still going to complete,
+        * for example.  Wait for all references to disappear.
+        * No one should increment the reference count of the mapped_device,
+        * after the mapped_device state becomes DMF_FREEING.
+        */
+       if (wait)
+               while (atomic_read(&md->holders))
+                       msleep(1);
+       else if (atomic_read(&md->holders))
+               DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
+                      dm_device_name(md), atomic_read(&md->holders));
+
+       dm_sysfs_exit(md);
+       dm_table_put(map);
+       dm_table_destroy(__unbind(md));
+       free_dev(md);
+}
+
+void dm_destroy(struct mapped_device *md)
+{
+       __dm_destroy(md, true);
+}
+
+void dm_destroy_immediate(struct mapped_device *md)
+{
+       __dm_destroy(md, false);
+}
+
+void dm_put(struct mapped_device *md)
+{
+       atomic_dec(&md->holders);
  }
  EXPORT_SYMBOL_GPL(dm_put);
  
@@ -2258,7 +2320,12 @@ static void process_barrier(struct mapped_device *md, struct bio *bio)
  
         if (!bio_empty_barrier(bio)) {
                 __split_and_process_bio(md, bio);
-               dm_flush(md);
+               /*
+                * If the request isn't supported, don't waste time with
+                * the second flush.
+                */
+               if (md->barrier_error != -EOPNOTSUPP)
+                       dm_flush(md);
         }
  
         if (md->barrier_error != DM_ENDIO_REQUEUE)
@@ -2296,7 +2363,7 @@ static void dm_wq_work(struct work_struct *work)
                 if (dm_request_based(md))
                         generic_make_request(c);
                 else {
-                       if (bio_rw_flagged(c, BIO_RW_BARRIER))
+                       if (c->bi_rw & REQ_HARDBARRIER)
                                 process_barrier(md, c);
                         else
                                 __split_and_process_bio(md, c);
@@ -2403,13 +2470,6 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
                 goto out;
         }
  
-       /* cannot change the device type, once a table is bound */
-       if (md->map &&
-           (dm_table_get_type(md->map) != dm_table_get_type(table))) {
-               DMWARN("can't change the device type after a table is bound");
-               goto out;
-       }
-
         map = __bind(md, table, &limits);
  
  out: