]> bbs.cooldavid.org Git - net-next-2.6.git/blobdiff - drivers/md/md.c
Merge branches 'irq-core-for-linus' and 'core-locking-for-linus' of git://git.kernel...
[net-next-2.6.git] / drivers / md / md.c
index 225815197a3d69fba134433ab269d0c9a255681b..4e957f3140a81c64a75982d8764436962d929a15 100644 (file)
@@ -57,8 +57,6 @@
 #define DEBUG 0
 #define dprintk(x...) ((void)(DEBUG && printk(x)))
 
-static DEFINE_MUTEX(md_mutex);
-
 #ifndef MODULE
 static void autostart_arrays(int part);
 #endif
@@ -69,6 +67,8 @@ static DEFINE_SPINLOCK(pers_lock);
 static void md_print_devices(void);
 
 static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
+static struct workqueue_struct *md_wq;
+static struct workqueue_struct *md_misc_wq;
 
 #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
 
@@ -149,6 +149,72 @@ static const struct block_device_operations md_fops;
 
 static int start_readonly;
 
+/* bio_clone_mddev
+ * like bio_clone, but with a local bio set
+ */
+
+static void mddev_bio_destructor(struct bio *bio)
+{
+       mddev_t *mddev, **mddevp;
+
+       mddevp = (void*)bio;
+       mddev = mddevp[-1];
+
+       bio_free(bio, mddev->bio_set);
+}
+
+struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
+                           mddev_t *mddev)
+{
+       struct bio *b;
+       mddev_t **mddevp;
+
+       if (!mddev || !mddev->bio_set)
+               return bio_alloc(gfp_mask, nr_iovecs);
+
+       b = bio_alloc_bioset(gfp_mask, nr_iovecs,
+                            mddev->bio_set);
+       if (!b)
+               return NULL;
+       mddevp = (void*)b;
+       mddevp[-1] = mddev;
+       b->bi_destructor = mddev_bio_destructor;
+       return b;
+}
+EXPORT_SYMBOL_GPL(bio_alloc_mddev);
+
+struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
+                           mddev_t *mddev)
+{
+       struct bio *b;
+       mddev_t **mddevp;
+
+       if (!mddev || !mddev->bio_set)
+               return bio_clone(bio, gfp_mask);
+
+       b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs,
+                            mddev->bio_set);
+       if (!b)
+               return NULL;
+       mddevp = (void*)b;
+       mddevp[-1] = mddev;
+       b->bi_destructor = mddev_bio_destructor;
+       __bio_clone(b, bio);
+       if (bio_integrity(bio)) {
+               int ret;
+
+               ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set);
+
+               if (ret < 0) {
+                       bio_put(b);
+                       return NULL;
+               }
+       }
+
+       return b;
+}
+EXPORT_SYMBOL_GPL(bio_clone_mddev);
+
 /*
  * We have a system wide 'event count' that is incremented
  * on any 'interesting' event, and readers of /proc/mdstat
@@ -300,7 +366,7 @@ static void md_end_flush(struct bio *bio, int err)
 
        if (atomic_dec_and_test(&mddev->flush_pending)) {
                /* The pre-request flush has finished */
-               schedule_work(&mddev->flush_work);
+               queue_work(md_wq, &mddev->flush_work);
        }
        bio_put(bio);
 }
@@ -321,7 +387,7 @@ static void submit_flushes(mddev_t *mddev)
                        atomic_inc(&rdev->nr_pending);
                        atomic_inc(&rdev->nr_pending);
                        rcu_read_unlock();
-                       bi = bio_alloc(GFP_KERNEL, 0);
+                       bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev);
                        bi->bi_end_io = md_end_flush;
                        bi->bi_private = rdev;
                        bi->bi_bdev = rdev->bdev;
@@ -369,7 +435,7 @@ void md_flush_request(mddev_t *mddev, struct bio *bio)
        submit_flushes(mddev);
 
        if (atomic_dec_and_test(&mddev->flush_pending))
-               schedule_work(&mddev->flush_work);
+               queue_work(md_wq, &mddev->flush_work);
 }
 EXPORT_SYMBOL(md_flush_request);
 
@@ -428,6 +494,8 @@ static void mddev_delayed_delete(struct work_struct *ws);
 
 static void mddev_put(mddev_t *mddev)
 {
+       struct bio_set *bs = NULL;
+
        if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
                return;
        if (!mddev->raid_disks && list_empty(&mddev->disks) &&
@@ -435,19 +503,22 @@ static void mddev_put(mddev_t *mddev)
                /* Array is not configured at all, and not held active,
                 * so destroy it */
                list_del(&mddev->all_mddevs);
+               bs = mddev->bio_set;
+               mddev->bio_set = NULL;
                if (mddev->gendisk) {
-                       /* we did a probe so need to clean up.
-                        * Call schedule_work inside the spinlock
-                        * so that flush_scheduled_work() after
-                        * mddev_find will succeed in waiting for the
-                        * work to be done.
+                       /* We did a probe so need to clean up.  Call
+                        * queue_work inside the spinlock so that
+                        * flush_workqueue() after mddev_find will
+                        * succeed in waiting for the work to be done.
                         */
                        INIT_WORK(&mddev->del_work, mddev_delayed_delete);
-                       schedule_work(&mddev->del_work);
+                       queue_work(md_misc_wq, &mddev->del_work);
                } else
                        kfree(mddev);
        }
        spin_unlock(&all_mddevs_lock);
+       if (bs)
+               bioset_free(bs);
 }
 
 void mddev_init(mddev_t *mddev)
@@ -691,7 +762,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
         * if zero is reached.
         * If an error occurred, call md_error
         */
-       struct bio *bio = bio_alloc(GFP_NOIO, 1);
+       struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
 
        bio->bi_bdev = rdev->bdev;
        bio->bi_sector = sector;
@@ -722,16 +793,16 @@ static void bi_complete(struct bio *bio, int error)
        complete((struct completion*)bio->bi_private);
 }
 
-int sync_page_io(struct block_device *bdev, sector_t sector, int size,
-                  struct page *page, int rw)
+int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
+                struct page *page, int rw)
 {
-       struct bio *bio = bio_alloc(GFP_NOIO, 1);
+       struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
        struct completion event;
        int ret;
 
        rw |= REQ_SYNC | REQ_UNPLUG;
 
-       bio->bi_bdev = bdev;
+       bio->bi_bdev = rdev->bdev;
        bio->bi_sector = sector;
        bio_add_page(bio, page, size, 0);
        init_completion(&event);
@@ -757,7 +828,7 @@ static int read_disk_sb(mdk_rdev_t * rdev, int size)
                return 0;
 
 
-       if (!sync_page_io(rdev->bdev, rdev->sb_start, size, rdev->sb_page, READ))
+       if (!sync_page_io(rdev, rdev->sb_start, size, rdev->sb_page, READ))
                goto fail;
        rdev->sb_loaded = 1;
        return 0;
@@ -1850,7 +1921,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
        synchronize_rcu();
        INIT_WORK(&rdev->del_work, md_delayed_delete);
        kobject_get(&rdev->kobj);
-       schedule_work(&rdev->del_work);
+       queue_work(md_misc_wq, &rdev->del_work);
 }
 
 /*
@@ -2108,6 +2179,8 @@ repeat:
        if (!mddev->persistent) {
                clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
                clear_bit(MD_CHANGE_DEVS, &mddev->flags);
+               if (!mddev->external)
+                       clear_bit(MD_CHANGE_PENDING, &mddev->flags);
                wake_up(&mddev->sb_wait);
                return;
        }
@@ -4192,10 +4265,10 @@ static int md_alloc(dev_t dev, char *name)
        shift = partitioned ? MdpMinorShift : 0;
        unit = MINOR(mddev->unit) >> shift;
 
-       /* wait for any previous instance if this device
-        * to be completed removed (mddev_delayed_delete).
+       /* wait for any previous instance of this device to be
+        * completely removed (mddev_delayed_delete).
         */
-       flush_scheduled_work();
+       flush_workqueue(md_misc_wq);
 
        mutex_lock(&disks_mutex);
        error = -EEXIST;
@@ -4378,6 +4451,9 @@ int md_run(mddev_t *mddev)
                sysfs_notify_dirent_safe(rdev->sysfs_state);
        }
 
+       if (mddev->bio_set == NULL)
+               mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev));
+
        spin_lock(&pers_lock);
        pers = find_pers(mddev->level, mddev->clevel);
        if (!pers || !try_module_get(pers->owner)) {
@@ -5885,16 +5961,14 @@ static int md_open(struct block_device *bdev, fmode_t mode)
        mddev_t *mddev = mddev_find(bdev->bd_dev);
        int err;
 
-       mutex_lock(&md_mutex);
        if (mddev->gendisk != bdev->bd_disk) {
                /* we are racing with mddev_put which is discarding this
                 * bd_disk.
                 */
                mddev_put(mddev);
                /* Wait until bdev->bd_disk is definitely gone */
-               flush_scheduled_work();
+               flush_workqueue(md_misc_wq);
                /* Then retry the open from the top */
-               mutex_unlock(&md_mutex);
                return -ERESTARTSYS;
        }
        BUG_ON(mddev != bdev->bd_disk->private_data);
@@ -5908,7 +5982,6 @@ static int md_open(struct block_device *bdev, fmode_t mode)
 
        check_disk_size_change(mddev->gendisk, bdev);
  out:
-       mutex_unlock(&md_mutex);
        return err;
 }
 
@@ -5917,10 +5990,8 @@ static int md_release(struct gendisk *disk, fmode_t mode)
        mddev_t *mddev = disk->private_data;
 
        BUG_ON(!mddev);
-       mutex_lock(&md_mutex);
        atomic_dec(&mddev->openers);
        mddev_put(mddev);
-       mutex_unlock(&md_mutex);
 
        return 0;
 }
@@ -6052,7 +6123,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
        if (mddev->event_work.func)
-               schedule_work(&mddev->event_work);
+               queue_work(md_misc_wq, &mddev->event_work);
        md_new_event_inintr(mddev);
 }
 
@@ -7212,12 +7283,23 @@ static void md_geninit(void)
 
 static int __init md_init(void)
 {
-       if (register_blkdev(MD_MAJOR, "md"))
-               return -1;
-       if ((mdp_major=register_blkdev(0, "mdp"))<=0) {
-               unregister_blkdev(MD_MAJOR, "md");
-               return -1;
-       }
+       int ret = -ENOMEM;
+
+       md_wq = alloc_workqueue("md", WQ_RESCUER, 0);
+       if (!md_wq)
+               goto err_wq;
+
+       md_misc_wq = alloc_workqueue("md_misc", 0, 0);
+       if (!md_misc_wq)
+               goto err_misc_wq;
+
+       if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
+               goto err_md;
+
+       if ((ret = register_blkdev(0, "mdp")) < 0)
+               goto err_mdp;
+       mdp_major = ret;
+
        blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
                            md_probe, NULL, NULL);
        blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
@@ -7228,8 +7310,16 @@ static int __init md_init(void)
 
        md_geninit();
        return 0;
-}
 
+err_mdp:
+       unregister_blkdev(MD_MAJOR, "md");
+err_md:
+       destroy_workqueue(md_misc_wq);
+err_misc_wq:
+       destroy_workqueue(md_wq);
+err_wq:
+       return ret;
+}
 
 #ifndef MODULE
 
@@ -7316,6 +7406,8 @@ static __exit void md_exit(void)
                export_array(mddev);
                mddev->hold_active = 0;
        }
+       destroy_workqueue(md_misc_wq);
+       destroy_workqueue(md_wq);
 }
 
 subsys_initcall(md_init);