[net-next-2.6.git] / drivers / md / raid0.c

/*
   raid0.c : Multiple Devices driver for Linux
             Copyright (C) 1994-96 Marc ZYNGIER
	     <zyngier@ufr-info-p7.ibp.fr> or
	     <maz@gloups.fdn.fr>
             Copyright (C) 1999, 2000 Ingo Molnar, Red Hat


   RAID-0 management functions.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.
   
   You should have received a copy of the GNU General Public License
   (for example /usr/src/linux/COPYING); if not, write to the Free
   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
*/

#include <linux/blkdev.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include "md.h"
#include "raid0.h"
#include "raid5.h"

static void raid0_unplug(struct request_queue *q)
{
	mddev_t *mddev = q->queuedata;
	raid0_conf_t *conf = mddev->private;
	mdk_rdev_t **devlist = conf->devlist;
	int raid_disks = conf->strip_zone[0].nb_dev;
	int i;

	for (i=0; i < raid_disks; i++) {
		struct request_queue *r_queue = bdev_get_queue(devlist[i]->bdev);

		blk_unplug(r_queue);
	}
}

static int raid0_congested(void *data, int bits)
{
	mddev_t *mddev = data;
	raid0_conf_t *conf = mddev->private;
	mdk_rdev_t **devlist = conf->devlist;
	int raid_disks = conf->strip_zone[0].nb_dev;
	int i, ret = 0;

	if (mddev_congested(mddev, bits))
		return 1;

	for (i = 0; i < raid_disks && !ret ; i++) {
		struct request_queue *q = bdev_get_queue(devlist[i]->bdev);

		ret |= bdi_congested(&q->backing_dev_info, bits);
	}
	return ret;
}

/*
 * inform the user of the raid configuration
*/
static void dump_zones(mddev_t *mddev)
{
	int j, k, h;
	sector_t zone_size = 0;
	sector_t zone_start = 0;
	char b[BDEVNAME_SIZE];
	raid0_conf_t *conf = mddev->private;
	int raid_disks = conf->strip_zone[0].nb_dev;
	printk(KERN_INFO "******* %s configuration *********\n",
		mdname(mddev));
	h = 0;
	for (j = 0; j < conf->nr_strip_zones; j++) {
		printk(KERN_INFO "zone%d=[", j);
		for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
			printk(KERN_CONT "%s/",
			bdevname(conf->devlist[j*raid_disks
						+ k]->bdev, b));
		printk(KERN_CONT "]\n");

		zone_size  = conf->strip_zone[j].zone_end - zone_start;
		printk(KERN_INFO "        zone offset=%llukb "
				"device offset=%llukb size=%llukb\n",
			(unsigned long long)zone_start>>1,
			(unsigned long long)conf->strip_zone[j].dev_start>>1,
			(unsigned long long)zone_size>>1);
		zone_start = conf->strip_zone[j].zone_end;
	}
	printk(KERN_INFO "**********************************\n\n");
}

static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
{
	int i, c, err;
	sector_t curr_zone_end, sectors;
	mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev;
	struct strip_zone *zone;
	int cnt;
	char b[BDEVNAME_SIZE];
	raid0_conf_t *conf = kzalloc(sizeof(*conf), GFP_KERNEL);

	if (!conf)
		return -ENOMEM;
	list_for_each_entry(rdev1, &mddev->disks, same_set) {
		printk(KERN_INFO "md/raid0:%s: looking at %s\n",
		       mdname(mddev),
		       bdevname(rdev1->bdev, b));
		c = 0;

		/* round size to chunk_size */
		sectors = rdev1->sectors;
		sector_div(sectors, mddev->chunk_sectors);
		rdev1->sectors = sectors * mddev->chunk_sectors;

		list_for_each_entry(rdev2, &mddev->disks, same_set) {
			printk(KERN_INFO "md/raid0:%s:   comparing %s(%llu)",
			       mdname(mddev),
			       bdevname(rdev1->bdev,b),
			       (unsigned long long)rdev1->sectors);
			printk(KERN_CONT " with %s(%llu)\n",
			       bdevname(rdev2->bdev,b),
			       (unsigned long long)rdev2->sectors);
			if (rdev2 == rdev1) {
				printk(KERN_INFO "md/raid0:%s:   END\n",
				       mdname(mddev));
				break;
			}
			if (rdev2->sectors == rdev1->sectors) {
				/*
				 * Not unique, don't count it as a new
				 * group
				 */
				printk(KERN_INFO "md/raid0:%s:   EQUAL\n",
				       mdname(mddev));
				c = 1;
				break;
			}
			printk(KERN_INFO "md/raid0:%s:   NOT EQUAL\n",
			       mdname(mddev));
		}
		if (!c) {
			printk(KERN_INFO "md/raid0:%s:   ==> UNIQUE\n",
			       mdname(mddev));
			conf->nr_strip_zones++;
			printk(KERN_INFO "md/raid0:%s: %d zones\n",
			       mdname(mddev), conf->nr_strip_zones);
		}
	}
	printk(KERN_INFO "md/raid0:%s: FINAL %d zones\n",
	       mdname(mddev), conf->nr_strip_zones);
	err = -ENOMEM;
	conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
				conf->nr_strip_zones, GFP_KERNEL);
	if (!conf->strip_zone)
		goto abort;
	conf->devlist = kzalloc(sizeof(mdk_rdev_t*)*
				conf->nr_strip_zones*mddev->raid_disks,
				GFP_KERNEL);
	if (!conf->devlist)
		goto abort;

	/* The first zone must contain all devices, so here we check that
	 * there is a proper alignment of slots to devices and find them all
	 */
	zone = &conf->strip_zone[0];
	cnt = 0;
	smallest = NULL;
	dev = conf->devlist;
	err = -EINVAL;
	list_for_each_entry(rdev1, &mddev->disks, same_set) {
		int j = rdev1->raid_disk;

		if (mddev->level == 10) {
			/* taking over a raid10-n2 array */
			j /= 2;
			rdev1->new_raid_disk = j;
		}

		if (j < 0 || j >= mddev->raid_disks) {
			printk(KERN_ERR "md/raid0:%s: bad disk number %d - "
			       "aborting!\n", mdname(mddev), j);
			goto abort;
		}
		if (dev[j]) {
			printk(KERN_ERR "md/raid0:%s: multiple devices for %d - "
			       "aborting!\n", mdname(mddev), j);
			goto abort;
		}
		dev[j] = rdev1;

		disk_stack_limits(mddev->gendisk, rdev1->bdev,
				  rdev1->data_offset << 9);
		/* as we don't honour merge_bvec_fn, we must never risk
		 * violating it, so limit ->max_segments to 1, lying within
		 * a single page.
		 */

		if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) {
			blk_queue_max_segments(mddev->queue, 1);
			blk_queue_segment_boundary(mddev->queue,
						   PAGE_CACHE_SIZE - 1);
		}
		if (!smallest || (rdev1->sectors < smallest->sectors))
			smallest = rdev1;
		cnt++;
	}
	if (cnt != mddev->raid_disks) {
		printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - "
		       "aborting!\n", mdname(mddev), cnt, mddev->raid_disks);
		goto abort;
	}
	zone->nb_dev = cnt;
	zone->zone_end = smallest->sectors * cnt;

	curr_zone_end = zone->zone_end;

	/* now do the other zones */
	for (i = 1; i < conf->nr_strip_zones; i++)
	{
		int j;

		zone = conf->strip_zone + i;
		dev = conf->devlist + i * mddev->raid_disks;

		printk(KERN_INFO "md/raid0:%s: zone %d\n",
		       mdname(mddev), i);
		zone->dev_start = smallest->sectors;
		smallest = NULL;
		c = 0;

		for (j=0; j<cnt; j++) {
			rdev = conf->devlist[j];
			printk(KERN_INFO "md/raid0:%s: checking %s ...",
			       mdname(mddev),
			       bdevname(rdev->bdev, b));
			if (rdev->sectors <= zone->dev_start) {
				printk(KERN_CONT " nope.\n");
				continue;
			}
			printk(KERN_CONT " contained as device %d\n", c);
			dev[c] = rdev;
			c++;
			if (!smallest || rdev->sectors < smallest->sectors) {
				smallest = rdev;
				printk(KERN_INFO "md/raid0:%s:  (%llu) is smallest!.\n",
				       mdname(mddev),
				       (unsigned long long)rdev->sectors);
			}
		}

		zone->nb_dev = c;
		sectors = (smallest->sectors - zone->dev_start) * c;
		printk(KERN_INFO "md/raid0:%s: zone->nb_dev: %d, sectors: %llu\n",
		       mdname(mddev),
		       zone->nb_dev, (unsigned long long)sectors);

		curr_zone_end += sectors;
		zone->zone_end = curr_zone_end;

		printk(KERN_INFO "md/raid0:%s: current zone start: %llu\n",
		       mdname(mddev),
		       (unsigned long long)smallest->sectors);
	}
	mddev->queue->unplug_fn = raid0_unplug;
	mddev->queue->backing_dev_info.congested_fn = raid0_congested;
	mddev->queue->backing_dev_info.congested_data = mddev;

	/*
	 * now since we have the hard sector sizes, we can make sure
	 * chunk size is a multiple of that sector size
	 */
	if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) {
		printk(KERN_ERR "md/raid0:%s: chunk_size of %d not valid\n",
		       mdname(mddev),
		       mddev->chunk_sectors << 9);
		goto abort;
	}

	blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
	blk_queue_io_opt(mddev->queue,
			 (mddev->chunk_sectors << 9) * mddev->raid_disks);

	printk(KERN_INFO "md/raid0:%s: done.\n", mdname(mddev));
	*private_conf = conf;

	return 0;
abort:
	kfree(conf->strip_zone);
	kfree(conf->devlist);
	kfree(conf);
	*private_conf = NULL;
	return err;
}

/**
 *	raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
 *	@q: request queue
 *	@bvm: properties of new bio
 *	@biovec: the request that could be merged to it.
 *
 *	Return amount of bytes we can accept at this offset
 */
static int raid0_mergeable_bvec(struct request_queue *q,
				struct bvec_merge_data *bvm,
				struct bio_vec *biovec)
{
	mddev_t *mddev = q->queuedata;
	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
	int max;
	unsigned int chunk_sectors = mddev->chunk_sectors;
	unsigned int bio_sectors = bvm->bi_size >> 9;

	if (is_power_of_2(chunk_sectors))
		max =  (chunk_sectors - ((sector & (chunk_sectors-1))
						+ bio_sectors)) << 9;
	else
		max =  (chunk_sectors - (sector_div(sector, chunk_sectors)
						+ bio_sectors)) << 9;
	if (max < 0) max = 0; /* bio_add cannot handle a negative return */
	if (max <= biovec->bv_len && bio_sectors == 0)
		return biovec->bv_len;
	else 
		return max;
}

static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
	sector_t array_sectors = 0;
	mdk_rdev_t *rdev;

	WARN_ONCE(sectors || raid_disks,
		  "%s does not support generic reshape\n", __func__);

	list_for_each_entry(rdev, &mddev->disks, same_set)
		array_sectors += rdev->sectors;

	return array_sectors;
}

static int raid0_run(mddev_t *mddev)
{
	raid0_conf_t *conf;
	int ret;

	if (mddev->chunk_sectors == 0) {
		printk(KERN_ERR "md/raid0:%s: chunk size must be set.\n",
		       mdname(mddev));
		return -EINVAL;
	}
	if (md_check_no_bitmap(mddev))
		return -EINVAL;
	blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
	mddev->queue->queue_lock = &mddev->queue->__queue_lock;

	/* if private is not null, we are here after takeover */
	if (mddev->private == NULL) {
		ret = create_strip_zones(mddev, &conf);
		if (ret < 0)
			return ret;
		mddev->private = conf;
	}
	conf = mddev->private;

	/* calculate array device size */
	md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));

	printk(KERN_INFO "md/raid0:%s: md_size is %llu sectors.\n",
	       mdname(mddev),
	       (unsigned long long)mddev->array_sectors);
	/* calculate the max read-ahead size.
	 * For read-ahead of large files to be effective, we need to
	 * readahead at least twice a whole stripe. i.e. number of devices
	 * multiplied by chunk size times 2.
	 * If an individual device has an ra_pages greater than the
	 * chunk size, then we will not drive that device as hard as it
	 * wants.  We consider this a configuration error: a larger
	 * chunksize should be used in that case.
	 */
	{
		int stripe = mddev->raid_disks *
			(mddev->chunk_sectors << 9) / PAGE_SIZE;
		if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
			mddev->queue->backing_dev_info.ra_pages = 2* stripe;
	}

	blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
	dump_zones(mddev);
	md_integrity_register(mddev);
	return 0;
}

static int raid0_stop(mddev_t *mddev)
{
	raid0_conf_t *conf = mddev->private;

	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
	kfree(conf->strip_zone);
	kfree(conf->devlist);
	kfree(conf);
	mddev->private = NULL;
	return 0;
}

/* Find the zone which holds a particular offset
 * Update *sectorp to be an offset in that zone
 */
static struct strip_zone *find_zone(struct raid0_private_data *conf,
				    sector_t *sectorp)
{
	int i;
	struct strip_zone *z = conf->strip_zone;
	sector_t sector = *sectorp;

	for (i = 0; i < conf->nr_strip_zones; i++)
		if (sector < z[i].zone_end) {
			if (i)
				*sectorp = sector - z[i-1].zone_end;
			return z + i;
		}
	BUG();
}

/*
 * remaps the bio to the target device. we separate two flows.
 * power 2 flow and a general flow for the sake of perfromance
*/
static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone,
				sector_t sector, sector_t *sector_offset)
{
	unsigned int sect_in_chunk;
	sector_t chunk;
	raid0_conf_t *conf = mddev->private;
	int raid_disks = conf->strip_zone[0].nb_dev;
	unsigned int chunk_sects = mddev->chunk_sectors;

	if (is_power_of_2(chunk_sects)) {
		int chunksect_bits = ffz(~chunk_sects);
		/* find the sector offset inside the chunk */
		sect_in_chunk  = sector & (chunk_sects - 1);
		sector >>= chunksect_bits;
		/* chunk in zone */
		chunk = *sector_offset;
		/* quotient is the chunk in real device*/
		sector_div(chunk, zone->nb_dev << chunksect_bits);
	} else{
		sect_in_chunk = sector_div(sector, chunk_sects);
		chunk = *sector_offset;
		sector_div(chunk, chunk_sects * zone->nb_dev);
	}
	/*
	*  position the bio over the real device
	*  real sector = chunk in device + starting of zone
	*	+ the position in the chunk
	*/
	*sector_offset = (chunk * chunk_sects) + sect_in_chunk;
	return conf->devlist[(zone - conf->strip_zone)*raid_disks
			     + sector_div(sector, zone->nb_dev)];
}

/*
 * Is io distribute over 1 or more chunks ?
*/
static inline int is_io_in_chunk_boundary(mddev_t *mddev,
			unsigned int chunk_sects, struct bio *bio)
{
	if (likely(is_power_of_2(chunk_sects))) {
		return chunk_sects >= ((bio->bi_sector & (chunk_sects-1))
					+ (bio->bi_size >> 9));
	} else{
		sector_t sector = bio->bi_sector;
		return chunk_sects >= (sector_div(sector, chunk_sects)
						+ (bio->bi_size >> 9));
	}
}

static int raid0_make_request(mddev_t *mddev, struct bio *bio)
{
	unsigned int chunk_sects;
	sector_t sector_offset;
	struct strip_zone *zone;
	mdk_rdev_t *tmp_dev;

	if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
		md_barrier_request(mddev, bio);
		return 0;
	}

	chunk_sects = mddev->chunk_sectors;
	if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) {
		sector_t sector = bio->bi_sector;
		struct bio_pair *bp;
		/* Sanity check -- queue functions should prevent this happening */
		if (bio->bi_vcnt != 1 ||
		    bio->bi_idx != 0)
			goto bad_map;
		/* This is a one page bio that upper layers
		 * refuse to split for us, so we need to split it.
		 */
		if (likely(is_power_of_2(chunk_sects)))
			bp = bio_split(bio, chunk_sects - (sector &
							   (chunk_sects-1)));
		else
			bp = bio_split(bio, chunk_sects -
				       sector_div(sector, chunk_sects));
		if (raid0_make_request(mddev, &bp->bio1))
			generic_make_request(&bp->bio1);
		if (raid0_make_request(mddev, &bp->bio2))
			generic_make_request(&bp->bio2);

		bio_pair_release(bp);
		return 0;
	}

	sector_offset = bio->bi_sector;
	zone =  find_zone(mddev->private, &sector_offset);
	tmp_dev = map_sector(mddev, zone, bio->bi_sector,
			     &sector_offset);
	bio->bi_bdev = tmp_dev->bdev;
	bio->bi_sector = sector_offset + zone->dev_start +
		tmp_dev->data_offset;
	/*
	 * Let the main block layer submit the IO and resolve recursion:
	 */
	return 1;

bad_map:
	printk("md/raid0:%s: make_request bug: can't convert block across chunks"
	       " or bigger than %dk %llu %d\n",
	       mdname(mddev), chunk_sects / 2,
	       (unsigned long long)bio->bi_sector, bio->bi_size >> 10);

	bio_io_error(bio);
	return 0;
}

static void raid0_status(struct seq_file *seq, mddev_t *mddev)
{
#undef MD_DEBUG
#ifdef MD_DEBUG
	int j, k, h;
	char b[BDEVNAME_SIZE];
	raid0_conf_t *conf = mddev->private;
	int raid_disks = conf->strip_zone[0].nb_dev;

	sector_t zone_size;
	sector_t zone_start = 0;
	h = 0;

	for (j = 0; j < conf->nr_strip_zones; j++) {
		seq_printf(seq, "      z%d", j);
		seq_printf(seq, "=[");
		for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
			seq_printf(seq, "%s/", bdevname(
				conf->devlist[j*raid_disks + k]
						->bdev, b));

		zone_size  = conf->strip_zone[j].zone_end - zone_start;
		seq_printf(seq, "] ze=%lld ds=%lld s=%lld\n",
			(unsigned long long)zone_start>>1,
			(unsigned long long)conf->strip_zone[j].dev_start>>1,
			(unsigned long long)zone_size>>1);
		zone_start = conf->strip_zone[j].zone_end;
	}
#endif
	seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2);
	return;
}

static void *raid0_takeover_raid45(mddev_t *mddev)
{
	mdk_rdev_t *rdev;
	raid0_conf_t *priv_conf;

	if (mddev->degraded != 1) {
		printk(KERN_ERR "md/raid0:%s: raid5 must be degraded! Degraded disks: %d\n",
		       mdname(mddev),
		       mddev->degraded);
		return ERR_PTR(-EINVAL);
	}

	list_for_each_entry(rdev, &mddev->disks, same_set) {
		/* check slot number for a disk */
		if (rdev->raid_disk == mddev->raid_disks-1) {
			printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n",
			       mdname(mddev));
			return ERR_PTR(-EINVAL);
		}
	}

	/* Set new parameters */
	mddev->new_level = 0;
	mddev->new_layout = 0;
	mddev->new_chunk_sectors = mddev->chunk_sectors;
	mddev->raid_disks--;
	mddev->delta_disks = -1;
	/* make sure it will be not marked as dirty */
	mddev->recovery_cp = MaxSector;

	create_strip_zones(mddev, &priv_conf);
	return priv_conf;
}

static void *raid0_takeover_raid10(mddev_t *mddev)
{
	raid0_conf_t *priv_conf;

	/* Check layout:
	 *  - far_copies must be 1
	 *  - near_copies must be 2
	 *  - disks number must be even
	 *  - all mirrors must be already degraded
	 */
	if (mddev->layout != ((1 << 8) + 2)) {
		printk(KERN_ERR "md/raid0:%s:: Raid0 cannot takover layout: 0x%x\n",
		       mdname(mddev),
		       mddev->layout);
		return ERR_PTR(-EINVAL);
	}
	if (mddev->raid_disks & 1) {
		printk(KERN_ERR "md/raid0:%s: Raid0 cannot takover Raid10 with odd disk number.\n",
		       mdname(mddev));
		return ERR_PTR(-EINVAL);
	}
	if (mddev->degraded != (mddev->raid_disks>>1)) {
		printk(KERN_ERR "md/raid0:%s: All mirrors must be already degraded!\n",
		       mdname(mddev));
		return ERR_PTR(-EINVAL);
	}

	/* Set new parameters */
	mddev->new_level = 0;
	mddev->new_layout = 0;
	mddev->new_chunk_sectors = mddev->chunk_sectors;
	mddev->delta_disks = - mddev->raid_disks / 2;
	mddev->raid_disks += mddev->delta_disks;
	mddev->degraded = 0;
	/* make sure it will be not marked as dirty */
	mddev->recovery_cp = MaxSector;

	create_strip_zones(mddev, &priv_conf);
	return priv_conf;
}

static void *raid0_takeover(mddev_t *mddev)
{
	/* raid0 can take over:
	 *  raid4 - if all data disks are active.
	 *  raid5 - providing it is Raid4 layout and one disk is faulty
	 *  raid10 - assuming we have all necessary active disks
	 */
	if (mddev->level == 4)
		return raid0_takeover_raid45(mddev);

	if (mddev->level == 5) {
		if (mddev->layout == ALGORITHM_PARITY_N)
			return raid0_takeover_raid45(mddev);

		printk(KERN_ERR "md/raid0:%s: Raid can only takeover Raid5 with layout: %d\n",
		       mdname(mddev), ALGORITHM_PARITY_N);
	}

	if (mddev->level == 10)
		return raid0_takeover_raid10(mddev);

	return ERR_PTR(-EINVAL);
}

static void raid0_quiesce(mddev_t *mddev, int state)
{
}

static struct mdk_personality raid0_personality=
{
	.name		= "raid0",
	.level		= 0,
	.owner		= THIS_MODULE,
	.make_request	= raid0_make_request,
	.run		= raid0_run,
	.stop		= raid0_stop,
	.status		= raid0_status,
	.size		= raid0_size,
	.takeover	= raid0_takeover,
	.quiesce	= raid0_quiesce,
};

static int __init raid0_init (void)
{
	return register_md_personality (&raid0_personality);
}

static void raid0_exit (void)
{
	unregister_md_personality (&raid0_personality);
}

module_init(raid0_init);
module_exit(raid0_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("RAID0 (striping) personality for MD");
MODULE_ALIAS("md-personality-2"); /* RAID0 */
MODULE_ALIAS("md-raid0");
MODULE_ALIAS("md-level-0");
Commit	Line	Data
	1	/*
	2	raid0.c : Multiple Devices driver for Linux
	3	Copyright (C) 1994-96 Marc ZYNGIER
	4	<zyngier@ufr-info-p7.ibp.fr> or
	5	<maz@gloups.fdn.fr>
	6	Copyright (C) 1999, 2000 Ingo Molnar, Red Hat
	7
	8
	9	RAID-0 management functions.
	10
	11	This program is free software; you can redistribute it and/or modify
	12	it under the terms of the GNU General Public License as published by
	13	the Free Software Foundation; either version 2, or (at your option)
	14	any later version.
	15
	16	You should have received a copy of the GNU General Public License
	17	(for example /usr/src/linux/COPYING); if not, write to the Free
	18	Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	19	*/
	20
	21	#include <linux/blkdev.h>
	22	#include <linux/seq_file.h>
	23	#include <linux/slab.h>
	24	#include "md.h"
	25	#include "raid0.h"
	26	#include "raid5.h"
	27
	28	static void raid0_unplug(struct request_queue *q)
	29	{
	30	mddev_t *mddev = q->queuedata;
	31	raid0_conf_t *conf = mddev->private;
	32	mdk_rdev_t **devlist = conf->devlist;
	33	int raid_disks = conf->strip_zone[0].nb_dev;
	34	int i;
	35
	36	for (i=0; i < raid_disks; i++) {
	37	struct request_queue *r_queue = bdev_get_queue(devlist[i]->bdev);
	38
	39	blk_unplug(r_queue);
	40	}
	41	}
	42
	43	static int raid0_congested(void *data, int bits)
	44	{
	45	mddev_t *mddev = data;
	46	raid0_conf_t *conf = mddev->private;
	47	mdk_rdev_t **devlist = conf->devlist;
	48	int raid_disks = conf->strip_zone[0].nb_dev;
	49	int i, ret = 0;
	50
	51	if (mddev_congested(mddev, bits))
	52	return 1;
	53
	54	for (i = 0; i < raid_disks && !ret ; i++) {
	55	struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
	56
	57	ret \|= bdi_congested(&q->backing_dev_info, bits);
	58	}
	59	return ret;
	60	}
	61
	62	/*
	63	* inform the user of the raid configuration
	64	*/
	65	static void dump_zones(mddev_t *mddev)
	66	{
	67	int j, k, h;
	68	sector_t zone_size = 0;
	69	sector_t zone_start = 0;
	70	char b[BDEVNAME_SIZE];
	71	raid0_conf_t *conf = mddev->private;
	72	int raid_disks = conf->strip_zone[0].nb_dev;
	73	printk(KERN_INFO "***** %s configuration *******\n",
	74	mdname(mddev));
	75	h = 0;
	76	for (j = 0; j < conf->nr_strip_zones; j++) {
	77	printk(KERN_INFO "zone%d=[", j);
	78	for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
	79	printk(KERN_CONT "%s/",
	80	bdevname(conf->devlist[j*raid_disks
	81	+ k]->bdev, b));
	82	printk(KERN_CONT "]\n");
	83
	84	zone_size = conf->strip_zone[j].zone_end - zone_start;
	85	printk(KERN_INFO " zone offset=%llukb "
	86	"device offset=%llukb size=%llukb\n",
	87	(unsigned long long)zone_start>>1,
	88	(unsigned long long)conf->strip_zone[j].dev_start>>1,
	89	(unsigned long long)zone_size>>1);
	90	zone_start = conf->strip_zone[j].zone_end;
	91	}
	92	printk(KERN_INFO "**********************************\n\n");
	93	}
	94
	95	static int create_strip_zones(mddev_t mddev, raid0_conf_t *private_conf)
	96	{
	97	int i, c, err;
	98	sector_t curr_zone_end, sectors;
	99	mdk_rdev_t smallest, rdev1, rdev2, rdev, **dev;
	100	struct strip_zone *zone;
	101	int cnt;
	102	char b[BDEVNAME_SIZE];
	103	raid0_conf_t conf = kzalloc(sizeof(conf), GFP_KERNEL);
	104
	105	if (!conf)
	106	return -ENOMEM;
	107	list_for_each_entry(rdev1, &mddev->disks, same_set) {
	108	printk(KERN_INFO "md/raid0:%s: looking at %s\n",
	109	mdname(mddev),
	110	bdevname(rdev1->bdev, b));
	111	c = 0;
	112
	113	/* round size to chunk_size */
	114	sectors = rdev1->sectors;
	115	sector_div(sectors, mddev->chunk_sectors);
	116	rdev1->sectors = sectors * mddev->chunk_sectors;
	117
	118	list_for_each_entry(rdev2, &mddev->disks, same_set) {
	119	printk(KERN_INFO "md/raid0:%s: comparing %s(%llu)",
	120	mdname(mddev),
	121	bdevname(rdev1->bdev,b),
	122	(unsigned long long)rdev1->sectors);
	123	printk(KERN_CONT " with %s(%llu)\n",
	124	bdevname(rdev2->bdev,b),
	125	(unsigned long long)rdev2->sectors);
	126	if (rdev2 == rdev1) {
	127	printk(KERN_INFO "md/raid0:%s: END\n",
	128	mdname(mddev));
	129	break;
	130	}
	131	if (rdev2->sectors == rdev1->sectors) {
	132	/*
	133	* Not unique, don't count it as a new
	134	* group
	135	*/
	136	printk(KERN_INFO "md/raid0:%s: EQUAL\n",
	137	mdname(mddev));
	138	c = 1;
	139	break;
	140	}
	141	printk(KERN_INFO "md/raid0:%s: NOT EQUAL\n",
	142	mdname(mddev));
	143	}
	144	if (!c) {
	145	printk(KERN_INFO "md/raid0:%s: ==> UNIQUE\n",
	146	mdname(mddev));
	147	conf->nr_strip_zones++;
	148	printk(KERN_INFO "md/raid0:%s: %d zones\n",
	149	mdname(mddev), conf->nr_strip_zones);
	150	}
	151	}
	152	printk(KERN_INFO "md/raid0:%s: FINAL %d zones\n",
	153	mdname(mddev), conf->nr_strip_zones);
	154	err = -ENOMEM;
	155	conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
	156	conf->nr_strip_zones, GFP_KERNEL);
	157	if (!conf->strip_zone)
	158	goto abort;
	159	conf->devlist = kzalloc(sizeof(mdk_rdev_t)
	160	conf->nr_strip_zones*mddev->raid_disks,
	161	GFP_KERNEL);
	162	if (!conf->devlist)
	163	goto abort;
	164
	165	/* The first zone must contain all devices, so here we check that
	166	* there is a proper alignment of slots to devices and find them all
	167	*/
	168	zone = &conf->strip_zone[0];
	169	cnt = 0;
	170	smallest = NULL;
	171	dev = conf->devlist;
	172	err = -EINVAL;
	173	list_for_each_entry(rdev1, &mddev->disks, same_set) {
	174	int j = rdev1->raid_disk;
	175
	176	if (mddev->level == 10) {
	177	/* taking over a raid10-n2 array */
	178	j /= 2;
	179	rdev1->new_raid_disk = j;
	180	}
	181
	182	if (j < 0 \|\| j >= mddev->raid_disks) {
	183	printk(KERN_ERR "md/raid0:%s: bad disk number %d - "
	184	"aborting!\n", mdname(mddev), j);
	185	goto abort;
	186	}
	187	if (dev[j]) {
	188	printk(KERN_ERR "md/raid0:%s: multiple devices for %d - "
	189	"aborting!\n", mdname(mddev), j);
	190	goto abort;
	191	}
	192	dev[j] = rdev1;
	193
	194	disk_stack_limits(mddev->gendisk, rdev1->bdev,
	195	rdev1->data_offset << 9);
	196	/* as we don't honour merge_bvec_fn, we must never risk
	197	* violating it, so limit ->max_segments to 1, lying within
	198	* a single page.
	199	*/
	200
	201	if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) {
	202	blk_queue_max_segments(mddev->queue, 1);
	203	blk_queue_segment_boundary(mddev->queue,
	204	PAGE_CACHE_SIZE - 1);
	205	}
	206	if (!smallest \|\| (rdev1->sectors < smallest->sectors))
	207	smallest = rdev1;
	208	cnt++;
	209	}
	210	if (cnt != mddev->raid_disks) {
	211	printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - "
	212	"aborting!\n", mdname(mddev), cnt, mddev->raid_disks);
	213	goto abort;
	214	}
	215	zone->nb_dev = cnt;
	216	zone->zone_end = smallest->sectors * cnt;
	217
	218	curr_zone_end = zone->zone_end;
	219
	220	/* now do the other zones */
	221	for (i = 1; i < conf->nr_strip_zones; i++)
	222	{
	223	int j;
	224
	225	zone = conf->strip_zone + i;
	226	dev = conf->devlist + i * mddev->raid_disks;
	227
	228	printk(KERN_INFO "md/raid0:%s: zone %d\n",
	229	mdname(mddev), i);
	230	zone->dev_start = smallest->sectors;
	231	smallest = NULL;
	232	c = 0;
	233
	234	for (j=0; j<cnt; j++) {
	235	rdev = conf->devlist[j];
	236	printk(KERN_INFO "md/raid0:%s: checking %s ...",
	237	mdname(mddev),
	238	bdevname(rdev->bdev, b));
	239	if (rdev->sectors <= zone->dev_start) {
	240	printk(KERN_CONT " nope.\n");
	241	continue;
	242	}
	243	printk(KERN_CONT " contained as device %d\n", c);
	244	dev[c] = rdev;
	245	c++;
	246	if (!smallest \|\| rdev->sectors < smallest->sectors) {
	247	smallest = rdev;
	248	printk(KERN_INFO "md/raid0:%s: (%llu) is smallest!.\n",
	249	mdname(mddev),
	250	(unsigned long long)rdev->sectors);
	251	}
	252	}
	253
	254	zone->nb_dev = c;
	255	sectors = (smallest->sectors - zone->dev_start) * c;
	256	printk(KERN_INFO "md/raid0:%s: zone->nb_dev: %d, sectors: %llu\n",
	257	mdname(mddev),
	258	zone->nb_dev, (unsigned long long)sectors);
	259
	260	curr_zone_end += sectors;
	261	zone->zone_end = curr_zone_end;
	262
	263	printk(KERN_INFO "md/raid0:%s: current zone start: %llu\n",
	264	mdname(mddev),
	265	(unsigned long long)smallest->sectors);
	266	}
	267	mddev->queue->unplug_fn = raid0_unplug;
	268	mddev->queue->backing_dev_info.congested_fn = raid0_congested;
	269	mddev->queue->backing_dev_info.congested_data = mddev;
	270
	271	/*
	272	* now since we have the hard sector sizes, we can make sure
	273	* chunk size is a multiple of that sector size
	274	*/
	275	if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) {
	276	printk(KERN_ERR "md/raid0:%s: chunk_size of %d not valid\n",
	277	mdname(mddev),
	278	mddev->chunk_sectors << 9);
	279	goto abort;
	280	}
	281
	282	blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
	283	blk_queue_io_opt(mddev->queue,
	284	(mddev->chunk_sectors << 9) * mddev->raid_disks);
	285
	286	printk(KERN_INFO "md/raid0:%s: done.\n", mdname(mddev));
	287	*private_conf = conf;
	288
	289	return 0;
	290	abort:
	291	kfree(conf->strip_zone);
	292	kfree(conf->devlist);
	293	kfree(conf);
	294	*private_conf = NULL;
	295	return err;
	296	}
	297
	298	/**
	299	* raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
	300	* @q: request queue
	301	* @bvm: properties of new bio
	302	* @biovec: the request that could be merged to it.
	303	*
	304	* Return amount of bytes we can accept at this offset
	305	*/
	306	static int raid0_mergeable_bvec(struct request_queue *q,
	307	struct bvec_merge_data *bvm,
	308	struct bio_vec *biovec)
	309	{
	310	mddev_t *mddev = q->queuedata;
	311	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
	312	int max;
	313	unsigned int chunk_sectors = mddev->chunk_sectors;
	314	unsigned int bio_sectors = bvm->bi_size >> 9;
	315
	316	if (is_power_of_2(chunk_sectors))
	317	max = (chunk_sectors - ((sector & (chunk_sectors-1))
	318	+ bio_sectors)) << 9;
	319	else
	320	max = (chunk_sectors - (sector_div(sector, chunk_sectors)
	321	+ bio_sectors)) << 9;
	322	if (max < 0) max = 0; /* bio_add cannot handle a negative return */
	323	if (max <= biovec->bv_len && bio_sectors == 0)
	324	return biovec->bv_len;
	325	else
	326	return max;
	327	}
	328
	329	static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks)
	330	{
	331	sector_t array_sectors = 0;
	332	mdk_rdev_t *rdev;
	333
	334	WARN_ONCE(sectors \|\| raid_disks,
	335	"%s does not support generic reshape\n", __func__);
	336
	337	list_for_each_entry(rdev, &mddev->disks, same_set)
	338	array_sectors += rdev->sectors;
	339
	340	return array_sectors;
	341	}
	342
	343	static int raid0_run(mddev_t *mddev)
	344	{
	345	raid0_conf_t *conf;
	346	int ret;
	347
	348	if (mddev->chunk_sectors == 0) {
	349	printk(KERN_ERR "md/raid0:%s: chunk size must be set.\n",
	350	mdname(mddev));
	351	return -EINVAL;
	352	}
	353	if (md_check_no_bitmap(mddev))
	354	return -EINVAL;
	355	blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
	356	mddev->queue->queue_lock = &mddev->queue->__queue_lock;
	357
	358	/* if private is not null, we are here after takeover */
	359	if (mddev->private == NULL) {
	360	ret = create_strip_zones(mddev, &conf);
	361	if (ret < 0)
	362	return ret;
	363	mddev->private = conf;
	364	}
	365	conf = mddev->private;
	366
	367	/* calculate array device size */
	368	md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
	369
	370	printk(KERN_INFO "md/raid0:%s: md_size is %llu sectors.\n",
	371	mdname(mddev),
	372	(unsigned long long)mddev->array_sectors);
	373	/* calculate the max read-ahead size.
	374	* For read-ahead of large files to be effective, we need to
	375	* readahead at least twice a whole stripe. i.e. number of devices
	376	* multiplied by chunk size times 2.
	377	* If an individual device has an ra_pages greater than the
	378	* chunk size, then we will not drive that device as hard as it
	379	* wants. We consider this a configuration error: a larger
	380	* chunksize should be used in that case.
	381	*/
	382	{
	383	int stripe = mddev->raid_disks *
	384	(mddev->chunk_sectors << 9) / PAGE_SIZE;
	385	if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
	386	mddev->queue->backing_dev_info.ra_pages = 2* stripe;
	387	}
	388
	389	blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
	390	dump_zones(mddev);
	391	md_integrity_register(mddev);
	392	return 0;
	393	}
	394
	395	static int raid0_stop(mddev_t *mddev)
	396	{
	397	raid0_conf_t *conf = mddev->private;
	398
	399	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
	400	kfree(conf->strip_zone);
	401	kfree(conf->devlist);
	402	kfree(conf);
	403	mddev->private = NULL;
	404	return 0;
	405	}
	406
	407	/* Find the zone which holds a particular offset
	408	* Update *sectorp to be an offset in that zone
	409	*/
	410	static struct strip_zone find_zone(struct raid0_private_data conf,
	411	sector_t *sectorp)
	412	{
	413	int i;
	414	struct strip_zone *z = conf->strip_zone;
	415	sector_t sector = *sectorp;
	416
	417	for (i = 0; i < conf->nr_strip_zones; i++)
	418	if (sector < z[i].zone_end) {
	419	if (i)
	420	*sectorp = sector - z[i-1].zone_end;
	421	return z + i;
	422	}
	423	BUG();
	424	}
	425
	426	/*
	427	* remaps the bio to the target device. we separate two flows.
	428	* power 2 flow and a general flow for the sake of perfromance
	429	*/
	430	static mdk_rdev_t map_sector(mddev_t mddev, struct strip_zone *zone,
	431	sector_t sector, sector_t *sector_offset)
	432	{
	433	unsigned int sect_in_chunk;
	434	sector_t chunk;
	435	raid0_conf_t *conf = mddev->private;
	436	int raid_disks = conf->strip_zone[0].nb_dev;
	437	unsigned int chunk_sects = mddev->chunk_sectors;
	438
	439	if (is_power_of_2(chunk_sects)) {
	440	int chunksect_bits = ffz(~chunk_sects);
	441	/* find the sector offset inside the chunk */
	442	sect_in_chunk = sector & (chunk_sects - 1);
	443	sector >>= chunksect_bits;
	444	/* chunk in zone */
	445	chunk = *sector_offset;
	446	/* quotient is the chunk in real device*/
	447	sector_div(chunk, zone->nb_dev << chunksect_bits);
	448	} else{
	449	sect_in_chunk = sector_div(sector, chunk_sects);
	450	chunk = *sector_offset;
	451	sector_div(chunk, chunk_sects * zone->nb_dev);
	452	}
	453	/*
	454	* position the bio over the real device
	455	* real sector = chunk in device + starting of zone
	456	* + the position in the chunk
	457	*/
	458	sector_offset = (chunk chunk_sects) + sect_in_chunk;
	459	return conf->devlist[(zone - conf->strip_zone)*raid_disks
	460	+ sector_div(sector, zone->nb_dev)];
	461	}
	462
	463	/*
	464	* Is io distribute over 1 or more chunks ?
	465	*/
	466	static inline int is_io_in_chunk_boundary(mddev_t *mddev,
	467	unsigned int chunk_sects, struct bio *bio)
	468	{
	469	if (likely(is_power_of_2(chunk_sects))) {
	470	return chunk_sects >= ((bio->bi_sector & (chunk_sects-1))
	471	+ (bio->bi_size >> 9));
	472	} else{
	473	sector_t sector = bio->bi_sector;
	474	return chunk_sects >= (sector_div(sector, chunk_sects)
	475	+ (bio->bi_size >> 9));
	476	}
	477	}
	478
	479	static int raid0_make_request(mddev_t mddev, struct bio bio)
	480	{
	481	unsigned int chunk_sects;
	482	sector_t sector_offset;
	483	struct strip_zone *zone;
	484	mdk_rdev_t *tmp_dev;
	485
	486	if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
	487	md_barrier_request(mddev, bio);
	488	return 0;
	489	}
	490
	491	chunk_sects = mddev->chunk_sectors;
	492	if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) {
	493	sector_t sector = bio->bi_sector;
	494	struct bio_pair *bp;
	495	/* Sanity check -- queue functions should prevent this happening */
	496	if (bio->bi_vcnt != 1 \|\|
	497	bio->bi_idx != 0)
	498	goto bad_map;
	499	/* This is a one page bio that upper layers
	500	* refuse to split for us, so we need to split it.
	501	*/
	502	if (likely(is_power_of_2(chunk_sects)))
	503	bp = bio_split(bio, chunk_sects - (sector &
	504	(chunk_sects-1)));
	505	else
	506	bp = bio_split(bio, chunk_sects -
	507	sector_div(sector, chunk_sects));
	508	if (raid0_make_request(mddev, &bp->bio1))
	509	generic_make_request(&bp->bio1);
	510	if (raid0_make_request(mddev, &bp->bio2))
	511	generic_make_request(&bp->bio2);
	512
	513	bio_pair_release(bp);
	514	return 0;
	515	}
	516
	517	sector_offset = bio->bi_sector;
	518	zone = find_zone(mddev->private, &sector_offset);
	519	tmp_dev = map_sector(mddev, zone, bio->bi_sector,
	520	&sector_offset);
	521	bio->bi_bdev = tmp_dev->bdev;
	522	bio->bi_sector = sector_offset + zone->dev_start +
	523	tmp_dev->data_offset;
	524	/*
	525	* Let the main block layer submit the IO and resolve recursion:
	526	*/
	527	return 1;
	528
	529	bad_map:
	530	printk("md/raid0:%s: make_request bug: can't convert block across chunks"
	531	" or bigger than %dk %llu %d\n",
	532	mdname(mddev), chunk_sects / 2,
	533	(unsigned long long)bio->bi_sector, bio->bi_size >> 10);
	534
	535	bio_io_error(bio);
	536	return 0;
	537	}
	538
	539	static void raid0_status(struct seq_file seq, mddev_t mddev)
	540	{
	541	#undef MD_DEBUG
	542	#ifdef MD_DEBUG
	543	int j, k, h;
	544	char b[BDEVNAME_SIZE];
	545	raid0_conf_t *conf = mddev->private;
	546	int raid_disks = conf->strip_zone[0].nb_dev;
	547
	548	sector_t zone_size;
	549	sector_t zone_start = 0;
	550	h = 0;
	551
	552	for (j = 0; j < conf->nr_strip_zones; j++) {
	553	seq_printf(seq, " z%d", j);
	554	seq_printf(seq, "=[");
	555	for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
	556	seq_printf(seq, "%s/", bdevname(
	557	conf->devlist[j*raid_disks + k]
	558	->bdev, b));
	559
	560	zone_size = conf->strip_zone[j].zone_end - zone_start;
	561	seq_printf(seq, "] ze=%lld ds=%lld s=%lld\n",
	562	(unsigned long long)zone_start>>1,
	563	(unsigned long long)conf->strip_zone[j].dev_start>>1,
	564	(unsigned long long)zone_size>>1);
	565	zone_start = conf->strip_zone[j].zone_end;
	566	}
	567	#endif
	568	seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2);
	569	return;
	570	}
	571
	572	static void raid0_takeover_raid45(mddev_t mddev)
	573	{
	574	mdk_rdev_t *rdev;
	575	raid0_conf_t *priv_conf;
	576
	577	if (mddev->degraded != 1) {
	578	printk(KERN_ERR "md/raid0:%s: raid5 must be degraded! Degraded disks: %d\n",
	579	mdname(mddev),
	580	mddev->degraded);
	581	return ERR_PTR(-EINVAL);
	582	}
	583
	584	list_for_each_entry(rdev, &mddev->disks, same_set) {
	585	/* check slot number for a disk */
	586	if (rdev->raid_disk == mddev->raid_disks-1) {
	587	printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n",
	588	mdname(mddev));
	589	return ERR_PTR(-EINVAL);
	590	}
	591	}
	592
	593	/* Set new parameters */
	594	mddev->new_level = 0;
	595	mddev->new_layout = 0;
	596	mddev->new_chunk_sectors = mddev->chunk_sectors;
	597	mddev->raid_disks--;
	598	mddev->delta_disks = -1;
	599	/* make sure it will be not marked as dirty */
	600	mddev->recovery_cp = MaxSector;
	601
	602	create_strip_zones(mddev, &priv_conf);
	603	return priv_conf;
	604	}
	605
	606	static void raid0_takeover_raid10(mddev_t mddev)
	607	{
	608	raid0_conf_t *priv_conf;
	609
	610	/* Check layout:
	611	* - far_copies must be 1
	612	* - near_copies must be 2
	613	* - disks number must be even
	614	* - all mirrors must be already degraded
	615	*/
	616	if (mddev->layout != ((1 << 8) + 2)) {
	617	printk(KERN_ERR "md/raid0:%s:: Raid0 cannot takover layout: 0x%x\n",
	618	mdname(mddev),
	619	mddev->layout);
	620	return ERR_PTR(-EINVAL);
	621	}
	622	if (mddev->raid_disks & 1) {
	623	printk(KERN_ERR "md/raid0:%s: Raid0 cannot takover Raid10 with odd disk number.\n",
	624	mdname(mddev));
	625	return ERR_PTR(-EINVAL);
	626	}
	627	if (mddev->degraded != (mddev->raid_disks>>1)) {
	628	printk(KERN_ERR "md/raid0:%s: All mirrors must be already degraded!\n",
	629	mdname(mddev));
	630	return ERR_PTR(-EINVAL);
	631	}
	632
	633	/* Set new parameters */
	634	mddev->new_level = 0;
	635	mddev->new_layout = 0;
	636	mddev->new_chunk_sectors = mddev->chunk_sectors;
	637	mddev->delta_disks = - mddev->raid_disks / 2;
	638	mddev->raid_disks += mddev->delta_disks;
	639	mddev->degraded = 0;
	640	/* make sure it will be not marked as dirty */
	641	mddev->recovery_cp = MaxSector;
	642
	643	create_strip_zones(mddev, &priv_conf);
	644	return priv_conf;
	645	}
	646
	647	static void raid0_takeover(mddev_t mddev)
	648	{
	649	/* raid0 can take over:
	650	* raid4 - if all data disks are active.
	651	* raid5 - providing it is Raid4 layout and one disk is faulty
	652	* raid10 - assuming we have all necessary active disks
	653	*/
	654	if (mddev->level == 4)
	655	return raid0_takeover_raid45(mddev);
	656
	657	if (mddev->level == 5) {
	658	if (mddev->layout == ALGORITHM_PARITY_N)
	659	return raid0_takeover_raid45(mddev);
	660
	661	printk(KERN_ERR "md/raid0:%s: Raid can only takeover Raid5 with layout: %d\n",
	662	mdname(mddev), ALGORITHM_PARITY_N);
	663	}
	664
	665	if (mddev->level == 10)
	666	return raid0_takeover_raid10(mddev);
	667
	668	return ERR_PTR(-EINVAL);
	669	}
	670
	671	static void raid0_quiesce(mddev_t *mddev, int state)
	672	{
	673	}
	674
	675	static struct mdk_personality raid0_personality=
	676	{
	677	.name = "raid0",
	678	.level = 0,
	679	.owner = THIS_MODULE,
	680	.make_request = raid0_make_request,
	681	.run = raid0_run,
	682	.stop = raid0_stop,
	683	.status = raid0_status,
	684	.size = raid0_size,
	685	.takeover = raid0_takeover,
	686	.quiesce = raid0_quiesce,
	687	};
	688
	689	static int __init raid0_init (void)
	690	{
	691	return register_md_personality (&raid0_personality);
	692	}
	693
	694	static void raid0_exit (void)
	695	{
	696	unregister_md_personality (&raid0_personality);
	697	}
	698
	699	module_init(raid0_init);
	700	module_exit(raid0_exit);
	701	MODULE_LICENSE("GPL");
	702	MODULE_DESCRIPTION("RAID0 (striping) personality for MD");
	703	MODULE_ALIAS("md-personality-2"); /* RAID0 */
	704	MODULE_ALIAS("md-raid0");
	705	MODULE_ALIAS("md-level-0");