]> bbs.cooldavid.org Git - net-next-2.6.git/blame - fs/btrfs/volumes.c
Btrfs: Use current_fsuid/gid
[net-next-2.6.git] / fs / btrfs / volumes.c
CommitLineData
0b86a832
CM
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/sched.h>
19#include <linux/bio.h>
8a4b83cc 20#include <linux/buffer_head.h>
f2d8d74d 21#include <linux/blkdev.h>
788f20eb 22#include <linux/random.h>
593060d7 23#include <asm/div64.h>
0b86a832
CM
24#include "ctree.h"
25#include "extent_map.h"
26#include "disk-io.h"
27#include "transaction.h"
28#include "print-tree.h"
29#include "volumes.h"
8b712842 30#include "async-thread.h"
0b86a832 31
593060d7
CM
32struct map_lookup {
33 u64 type;
34 int io_align;
35 int io_width;
36 int stripe_len;
37 int sector_size;
38 int num_stripes;
321aecc6 39 int sub_stripes;
cea9e445 40 struct btrfs_bio_stripe stripes[];
593060d7
CM
41};
42
2b82032c
YZ
43static int init_first_rw_device(struct btrfs_trans_handle *trans,
44 struct btrfs_root *root,
45 struct btrfs_device *device);
46static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
47
48
593060d7 49#define map_lookup_size(n) (sizeof(struct map_lookup) + \
cea9e445 50 (sizeof(struct btrfs_bio_stripe) * (n)))
593060d7 51
8a4b83cc
CM
52static DEFINE_MUTEX(uuid_mutex);
53static LIST_HEAD(fs_uuids);
54
a061fc8d
CM
55void btrfs_lock_volumes(void)
56{
57 mutex_lock(&uuid_mutex);
58}
59
60void btrfs_unlock_volumes(void)
61{
62 mutex_unlock(&uuid_mutex);
63}
64
7d9eb12c
CM
65static void lock_chunks(struct btrfs_root *root)
66{
7d9eb12c
CM
67 mutex_lock(&root->fs_info->chunk_mutex);
68}
69
70static void unlock_chunks(struct btrfs_root *root)
71{
7d9eb12c
CM
72 mutex_unlock(&root->fs_info->chunk_mutex);
73}
74
8a4b83cc
CM
75int btrfs_cleanup_fs_uuids(void)
76{
77 struct btrfs_fs_devices *fs_devices;
8a4b83cc
CM
78 struct btrfs_device *dev;
79
2b82032c
YZ
80 while (!list_empty(&fs_uuids)) {
81 fs_devices = list_entry(fs_uuids.next,
82 struct btrfs_fs_devices, list);
83 list_del(&fs_devices->list);
8a4b83cc 84 while(!list_empty(&fs_devices->devices)) {
2b82032c
YZ
85 dev = list_entry(fs_devices->devices.next,
86 struct btrfs_device, dev_list);
8a4b83cc 87 if (dev->bdev) {
15916de8 88 close_bdev_exclusive(dev->bdev, dev->mode);
a0af469b 89 fs_devices->open_devices--;
8a4b83cc 90 }
2b82032c
YZ
91 fs_devices->num_devices--;
92 if (dev->writeable)
93 fs_devices->rw_devices--;
8a4b83cc 94 list_del(&dev->dev_list);
2b82032c 95 list_del(&dev->dev_alloc_list);
dfe25020 96 kfree(dev->name);
8a4b83cc
CM
97 kfree(dev);
98 }
2b82032c
YZ
99 WARN_ON(fs_devices->num_devices);
100 WARN_ON(fs_devices->open_devices);
101 WARN_ON(fs_devices->rw_devices);
102 kfree(fs_devices);
8a4b83cc
CM
103 }
104 return 0;
105}
106
a1b32a59
CM
107static noinline struct btrfs_device *__find_device(struct list_head *head,
108 u64 devid, u8 *uuid)
8a4b83cc
CM
109{
110 struct btrfs_device *dev;
111 struct list_head *cur;
112
113 list_for_each(cur, head) {
114 dev = list_entry(cur, struct btrfs_device, dev_list);
a443755f 115 if (dev->devid == devid &&
8f18cf13 116 (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
8a4b83cc 117 return dev;
a443755f 118 }
8a4b83cc
CM
119 }
120 return NULL;
121}
122
a1b32a59 123static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
8a4b83cc
CM
124{
125 struct list_head *cur;
126 struct btrfs_fs_devices *fs_devices;
127
128 list_for_each(cur, &fs_uuids) {
129 fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
130 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
131 return fs_devices;
132 }
133 return NULL;
134}
135
8b712842
CM
136/*
137 * we try to collect pending bios for a device so we don't get a large
138 * number of procs sending bios down to the same device. This greatly
139 * improves the schedulers ability to collect and merge the bios.
140 *
141 * But, it also turns into a long list of bios to process and that is sure
142 * to eventually make the worker thread block. The solution here is to
143 * make some progress and then put this work struct back at the end of
144 * the list if the block device is congested. This way, multiple devices
145 * can make progress from a single worker thread.
146 */
a1b32a59 147static int noinline run_scheduled_bios(struct btrfs_device *device)
8b712842
CM
148{
149 struct bio *pending;
150 struct backing_dev_info *bdi;
b64a2851 151 struct btrfs_fs_info *fs_info;
8b712842
CM
152 struct bio *tail;
153 struct bio *cur;
154 int again = 0;
155 unsigned long num_run = 0;
b64a2851 156 unsigned long limit;
8b712842
CM
157
158 bdi = device->bdev->bd_inode->i_mapping->backing_dev_info;
b64a2851
CM
159 fs_info = device->dev_root->fs_info;
160 limit = btrfs_async_submit_limit(fs_info);
161 limit = limit * 2 / 3;
162
8b712842
CM
163loop:
164 spin_lock(&device->io_lock);
165
166 /* take all the bios off the list at once and process them
167 * later on (without the lock held). But, remember the
168 * tail and other pointers so the bios can be properly reinserted
169 * into the list if we hit congestion
170 */
171 pending = device->pending_bios;
172 tail = device->pending_bio_tail;
173 WARN_ON(pending && !tail);
174 device->pending_bios = NULL;
175 device->pending_bio_tail = NULL;
176
177 /*
178 * if pending was null this time around, no bios need processing
179 * at all and we can stop. Otherwise it'll loop back up again
180 * and do an additional check so no bios are missed.
181 *
182 * device->running_pending is used to synchronize with the
183 * schedule_bio code.
184 */
185 if (pending) {
186 again = 1;
187 device->running_pending = 1;
188 } else {
189 again = 0;
190 device->running_pending = 0;
191 }
192 spin_unlock(&device->io_lock);
193
194 while(pending) {
195 cur = pending;
196 pending = pending->bi_next;
197 cur->bi_next = NULL;
b64a2851
CM
198 atomic_dec(&fs_info->nr_async_bios);
199
200 if (atomic_read(&fs_info->nr_async_bios) < limit &&
201 waitqueue_active(&fs_info->async_submit_wait))
202 wake_up(&fs_info->async_submit_wait);
492bb6de
CM
203
204 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
205 bio_get(cur);
8b712842 206 submit_bio(cur->bi_rw, cur);
492bb6de 207 bio_put(cur);
8b712842
CM
208 num_run++;
209
210 /*
211 * we made progress, there is more work to do and the bdi
212 * is now congested. Back off and let other work structs
213 * run instead
214 */
5f2cc086
CM
215 if (pending && bdi_write_congested(bdi) &&
216 fs_info->fs_devices->open_devices > 1) {
8b712842
CM
217 struct bio *old_head;
218
219 spin_lock(&device->io_lock);
492bb6de 220
8b712842
CM
221 old_head = device->pending_bios;
222 device->pending_bios = pending;
223 if (device->pending_bio_tail)
224 tail->bi_next = old_head;
225 else
226 device->pending_bio_tail = tail;
227
228 spin_unlock(&device->io_lock);
229 btrfs_requeue_work(&device->work);
230 goto done;
231 }
232 }
233 if (again)
234 goto loop;
235done:
236 return 0;
237}
238
239void pending_bios_fn(struct btrfs_work *work)
240{
241 struct btrfs_device *device;
242
243 device = container_of(work, struct btrfs_device, work);
244 run_scheduled_bios(device);
245}
246
a1b32a59 247static noinline int device_list_add(const char *path,
8a4b83cc
CM
248 struct btrfs_super_block *disk_super,
249 u64 devid, struct btrfs_fs_devices **fs_devices_ret)
250{
251 struct btrfs_device *device;
252 struct btrfs_fs_devices *fs_devices;
253 u64 found_transid = btrfs_super_generation(disk_super);
254
255 fs_devices = find_fsid(disk_super->fsid);
256 if (!fs_devices) {
515dc322 257 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
8a4b83cc
CM
258 if (!fs_devices)
259 return -ENOMEM;
260 INIT_LIST_HEAD(&fs_devices->devices);
b3075717 261 INIT_LIST_HEAD(&fs_devices->alloc_list);
8a4b83cc
CM
262 list_add(&fs_devices->list, &fs_uuids);
263 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
264 fs_devices->latest_devid = devid;
265 fs_devices->latest_trans = found_transid;
8a4b83cc
CM
266 device = NULL;
267 } else {
a443755f
CM
268 device = __find_device(&fs_devices->devices, devid,
269 disk_super->dev_item.uuid);
8a4b83cc
CM
270 }
271 if (!device) {
2b82032c
YZ
272 if (fs_devices->opened)
273 return -EBUSY;
274
8a4b83cc
CM
275 device = kzalloc(sizeof(*device), GFP_NOFS);
276 if (!device) {
277 /* we can safely leave the fs_devices entry around */
278 return -ENOMEM;
279 }
280 device->devid = devid;
8b712842 281 device->work.func = pending_bios_fn;
a443755f
CM
282 memcpy(device->uuid, disk_super->dev_item.uuid,
283 BTRFS_UUID_SIZE);
f2984462 284 device->barriers = 1;
b248a415 285 spin_lock_init(&device->io_lock);
8a4b83cc
CM
286 device->name = kstrdup(path, GFP_NOFS);
287 if (!device->name) {
288 kfree(device);
289 return -ENOMEM;
290 }
2b82032c 291 INIT_LIST_HEAD(&device->dev_alloc_list);
8a4b83cc 292 list_add(&device->dev_list, &fs_devices->devices);
2b82032c 293 device->fs_devices = fs_devices;
8a4b83cc
CM
294 fs_devices->num_devices++;
295 }
296
297 if (found_transid > fs_devices->latest_trans) {
298 fs_devices->latest_devid = devid;
299 fs_devices->latest_trans = found_transid;
300 }
8a4b83cc
CM
301 *fs_devices_ret = fs_devices;
302 return 0;
303}
304
dfe25020
CM
305int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
306{
2b82032c 307 struct list_head *tmp;
dfe25020
CM
308 struct list_head *cur;
309 struct btrfs_device *device;
2b82032c 310 int seed_devices = 0;
dfe25020
CM
311
312 mutex_lock(&uuid_mutex);
313again:
2b82032c 314 list_for_each_safe(cur, tmp, &fs_devices->devices) {
dfe25020 315 device = list_entry(cur, struct btrfs_device, dev_list);
2b82032c
YZ
316 if (device->in_fs_metadata)
317 continue;
318
319 if (device->bdev) {
15916de8 320 close_bdev_exclusive(device->bdev, device->mode);
2b82032c
YZ
321 device->bdev = NULL;
322 fs_devices->open_devices--;
323 }
324 if (device->writeable) {
325 list_del_init(&device->dev_alloc_list);
326 device->writeable = 0;
327 fs_devices->rw_devices--;
328 }
329 if (!seed_devices) {
330 list_del_init(&device->dev_list);
dfe25020
CM
331 fs_devices->num_devices--;
332 kfree(device->name);
333 kfree(device);
dfe25020
CM
334 }
335 }
2b82032c
YZ
336
337 if (fs_devices->seed) {
338 fs_devices = fs_devices->seed;
339 seed_devices = 1;
340 goto again;
341 }
342
dfe25020
CM
343 mutex_unlock(&uuid_mutex);
344 return 0;
345}
a0af469b 346
2b82032c 347static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
8a4b83cc 348{
2b82032c 349 struct btrfs_fs_devices *seed_devices;
8a4b83cc
CM
350 struct list_head *cur;
351 struct btrfs_device *device;
2b82032c
YZ
352again:
353 if (--fs_devices->opened > 0)
354 return 0;
8a4b83cc 355
2b82032c 356 list_for_each(cur, &fs_devices->devices) {
8a4b83cc
CM
357 device = list_entry(cur, struct btrfs_device, dev_list);
358 if (device->bdev) {
15916de8 359 close_bdev_exclusive(device->bdev, device->mode);
a0af469b 360 fs_devices->open_devices--;
8a4b83cc 361 }
2b82032c
YZ
362 if (device->writeable) {
363 list_del_init(&device->dev_alloc_list);
364 fs_devices->rw_devices--;
365 }
366
8a4b83cc 367 device->bdev = NULL;
2b82032c 368 device->writeable = 0;
dfe25020 369 device->in_fs_metadata = 0;
8a4b83cc 370 }
2b82032c
YZ
371 fs_devices->opened = 0;
372 fs_devices->seeding = 0;
373 fs_devices->sprouted = 0;
374
375 seed_devices = fs_devices->seed;
376 fs_devices->seed = NULL;
377 if (seed_devices) {
378 fs_devices = seed_devices;
379 goto again;
380 }
8a4b83cc
CM
381 return 0;
382}
383
2b82032c
YZ
384int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
385{
386 int ret;
387
388 mutex_lock(&uuid_mutex);
389 ret = __btrfs_close_devices(fs_devices);
390 mutex_unlock(&uuid_mutex);
391 return ret;
392}
393
15916de8
CM
394int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
395 int flags, void *holder)
8a4b83cc
CM
396{
397 struct block_device *bdev;
398 struct list_head *head = &fs_devices->devices;
399 struct list_head *cur;
400 struct btrfs_device *device;
a0af469b
CM
401 struct block_device *latest_bdev = NULL;
402 struct buffer_head *bh;
403 struct btrfs_super_block *disk_super;
404 u64 latest_devid = 0;
405 u64 latest_transid = 0;
a0af469b 406 u64 devid;
2b82032c 407 int seeding = 1;
a0af469b 408 int ret = 0;
8a4b83cc 409
8a4b83cc
CM
410 list_for_each(cur, head) {
411 device = list_entry(cur, struct btrfs_device, dev_list);
c1c4d91c
CM
412 if (device->bdev)
413 continue;
dfe25020
CM
414 if (!device->name)
415 continue;
416
15916de8 417 bdev = open_bdev_exclusive(device->name, flags, holder);
8a4b83cc
CM
418 if (IS_ERR(bdev)) {
419 printk("open %s failed\n", device->name);
a0af469b 420 goto error;
8a4b83cc 421 }
a061fc8d 422 set_blocksize(bdev, 4096);
a0af469b
CM
423
424 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
425 if (!bh)
426 goto error_close;
427
428 disk_super = (struct btrfs_super_block *)bh->b_data;
429 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
430 sizeof(disk_super->magic)))
431 goto error_brelse;
432
433 devid = le64_to_cpu(disk_super->dev_item.devid);
434 if (devid != device->devid)
435 goto error_brelse;
436
2b82032c
YZ
437 if (memcmp(device->uuid, disk_super->dev_item.uuid,
438 BTRFS_UUID_SIZE))
439 goto error_brelse;
440
441 device->generation = btrfs_super_generation(disk_super);
442 if (!latest_transid || device->generation > latest_transid) {
a0af469b 443 latest_devid = devid;
2b82032c 444 latest_transid = device->generation;
a0af469b
CM
445 latest_bdev = bdev;
446 }
447
2b82032c
YZ
448 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
449 device->writeable = 0;
450 } else {
451 device->writeable = !bdev_read_only(bdev);
452 seeding = 0;
453 }
454
8a4b83cc 455 device->bdev = bdev;
dfe25020 456 device->in_fs_metadata = 0;
15916de8
CM
457 device->mode = flags;
458
a0af469b 459 fs_devices->open_devices++;
2b82032c
YZ
460 if (device->writeable) {
461 fs_devices->rw_devices++;
462 list_add(&device->dev_alloc_list,
463 &fs_devices->alloc_list);
464 }
a0af469b 465 continue;
a061fc8d 466
a0af469b
CM
467error_brelse:
468 brelse(bh);
469error_close:
15916de8 470 close_bdev_exclusive(bdev, MS_RDONLY);
a0af469b
CM
471error:
472 continue;
8a4b83cc 473 }
a0af469b
CM
474 if (fs_devices->open_devices == 0) {
475 ret = -EIO;
476 goto out;
477 }
2b82032c
YZ
478 fs_devices->seeding = seeding;
479 fs_devices->opened = 1;
a0af469b
CM
480 fs_devices->latest_bdev = latest_bdev;
481 fs_devices->latest_devid = latest_devid;
482 fs_devices->latest_trans = latest_transid;
2b82032c 483 fs_devices->total_rw_bytes = 0;
a0af469b 484out:
2b82032c
YZ
485 return ret;
486}
487
488int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
489 int flags, void *holder)
490{
491 int ret;
492
493 mutex_lock(&uuid_mutex);
494 if (fs_devices->opened) {
495 if (fs_devices->sprouted) {
496 ret = -EBUSY;
497 } else {
498 fs_devices->opened++;
499 ret = 0;
500 }
501 } else {
15916de8 502 ret = __btrfs_open_devices(fs_devices, flags, holder);
2b82032c 503 }
8a4b83cc 504 mutex_unlock(&uuid_mutex);
8a4b83cc
CM
505 return ret;
506}
507
508int btrfs_scan_one_device(const char *path, int flags, void *holder,
509 struct btrfs_fs_devices **fs_devices_ret)
510{
511 struct btrfs_super_block *disk_super;
512 struct block_device *bdev;
513 struct buffer_head *bh;
514 int ret;
515 u64 devid;
f2984462 516 u64 transid;
8a4b83cc
CM
517
518 mutex_lock(&uuid_mutex);
519
15916de8 520 bdev = open_bdev_exclusive(path, flags, holder);
8a4b83cc
CM
521
522 if (IS_ERR(bdev)) {
8a4b83cc
CM
523 ret = PTR_ERR(bdev);
524 goto error;
525 }
526
527 ret = set_blocksize(bdev, 4096);
528 if (ret)
529 goto error_close;
530 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
531 if (!bh) {
532 ret = -EIO;
533 goto error_close;
534 }
535 disk_super = (struct btrfs_super_block *)bh->b_data;
536 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
537 sizeof(disk_super->magic))) {
e58ca020 538 ret = -EINVAL;
8a4b83cc
CM
539 goto error_brelse;
540 }
541 devid = le64_to_cpu(disk_super->dev_item.devid);
f2984462 542 transid = btrfs_super_generation(disk_super);
7ae9c09d
CM
543 if (disk_super->label[0])
544 printk("device label %s ", disk_super->label);
545 else {
546 /* FIXME, make a readl uuid parser */
547 printk("device fsid %llx-%llx ",
548 *(unsigned long long *)disk_super->fsid,
549 *(unsigned long long *)(disk_super->fsid + 8));
550 }
551 printk("devid %Lu transid %Lu %s\n", devid, transid, path);
8a4b83cc
CM
552 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
553
554error_brelse:
555 brelse(bh);
556error_close:
15916de8 557 close_bdev_exclusive(bdev, flags);
8a4b83cc
CM
558error:
559 mutex_unlock(&uuid_mutex);
560 return ret;
561}
0b86a832
CM
562
563/*
564 * this uses a pretty simple search, the expectation is that it is
565 * called very infrequently and that a given device has a small number
566 * of extents
567 */
a1b32a59
CM
568static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
569 struct btrfs_device *device,
a1b32a59 570 u64 num_bytes, u64 *start)
0b86a832
CM
571{
572 struct btrfs_key key;
573 struct btrfs_root *root = device->dev_root;
574 struct btrfs_dev_extent *dev_extent = NULL;
2b82032c 575 struct btrfs_path *path;
0b86a832
CM
576 u64 hole_size = 0;
577 u64 last_byte = 0;
578 u64 search_start = 0;
579 u64 search_end = device->total_bytes;
580 int ret;
581 int slot = 0;
582 int start_found;
583 struct extent_buffer *l;
584
2b82032c
YZ
585 path = btrfs_alloc_path();
586 if (!path)
587 return -ENOMEM;
0b86a832 588 path->reada = 2;
2b82032c 589 start_found = 0;
0b86a832
CM
590
591 /* FIXME use last free of some kind */
592
8a4b83cc
CM
593 /* we don't want to overwrite the superblock on the drive,
594 * so we make sure to start at an offset of at least 1MB
595 */
596 search_start = max((u64)1024 * 1024, search_start);
8f18cf13
CM
597
598 if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
599 search_start = max(root->fs_info->alloc_start, search_start);
600
0b86a832
CM
601 key.objectid = device->devid;
602 key.offset = search_start;
603 key.type = BTRFS_DEV_EXTENT_KEY;
604 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
605 if (ret < 0)
606 goto error;
607 ret = btrfs_previous_item(root, path, 0, key.type);
608 if (ret < 0)
609 goto error;
610 l = path->nodes[0];
611 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
612 while (1) {
613 l = path->nodes[0];
614 slot = path->slots[0];
615 if (slot >= btrfs_header_nritems(l)) {
616 ret = btrfs_next_leaf(root, path);
617 if (ret == 0)
618 continue;
619 if (ret < 0)
620 goto error;
621no_more_items:
622 if (!start_found) {
623 if (search_start >= search_end) {
624 ret = -ENOSPC;
625 goto error;
626 }
627 *start = search_start;
628 start_found = 1;
629 goto check_pending;
630 }
631 *start = last_byte > search_start ?
632 last_byte : search_start;
633 if (search_end <= *start) {
634 ret = -ENOSPC;
635 goto error;
636 }
637 goto check_pending;
638 }
639 btrfs_item_key_to_cpu(l, &key, slot);
640
641 if (key.objectid < device->devid)
642 goto next;
643
644 if (key.objectid > device->devid)
645 goto no_more_items;
646
647 if (key.offset >= search_start && key.offset > last_byte &&
648 start_found) {
649 if (last_byte < search_start)
650 last_byte = search_start;
651 hole_size = key.offset - last_byte;
652 if (key.offset > last_byte &&
653 hole_size >= num_bytes) {
654 *start = last_byte;
655 goto check_pending;
656 }
657 }
658 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
659 goto next;
660 }
661
662 start_found = 1;
663 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
664 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
665next:
666 path->slots[0]++;
667 cond_resched();
668 }
669check_pending:
670 /* we have to make sure we didn't find an extent that has already
671 * been allocated by the map tree or the original allocation
672 */
0b86a832
CM
673 BUG_ON(*start < search_start);
674
6324fbf3 675 if (*start + num_bytes > search_end) {
0b86a832
CM
676 ret = -ENOSPC;
677 goto error;
678 }
679 /* check for pending inserts here */
2b82032c 680 ret = 0;
0b86a832
CM
681
682error:
2b82032c 683 btrfs_free_path(path);
0b86a832
CM
684 return ret;
685}
686
8f18cf13
CM
687int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
688 struct btrfs_device *device,
689 u64 start)
690{
691 int ret;
692 struct btrfs_path *path;
693 struct btrfs_root *root = device->dev_root;
694 struct btrfs_key key;
a061fc8d
CM
695 struct btrfs_key found_key;
696 struct extent_buffer *leaf = NULL;
697 struct btrfs_dev_extent *extent = NULL;
8f18cf13
CM
698
699 path = btrfs_alloc_path();
700 if (!path)
701 return -ENOMEM;
702
703 key.objectid = device->devid;
704 key.offset = start;
705 key.type = BTRFS_DEV_EXTENT_KEY;
706
707 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
a061fc8d
CM
708 if (ret > 0) {
709 ret = btrfs_previous_item(root, path, key.objectid,
710 BTRFS_DEV_EXTENT_KEY);
711 BUG_ON(ret);
712 leaf = path->nodes[0];
713 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
714 extent = btrfs_item_ptr(leaf, path->slots[0],
715 struct btrfs_dev_extent);
716 BUG_ON(found_key.offset > start || found_key.offset +
717 btrfs_dev_extent_length(leaf, extent) < start);
718 ret = 0;
719 } else if (ret == 0) {
720 leaf = path->nodes[0];
721 extent = btrfs_item_ptr(leaf, path->slots[0],
722 struct btrfs_dev_extent);
723 }
8f18cf13
CM
724 BUG_ON(ret);
725
dfe25020
CM
726 if (device->bytes_used > 0)
727 device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
8f18cf13
CM
728 ret = btrfs_del_item(trans, root, path);
729 BUG_ON(ret);
730
731 btrfs_free_path(path);
732 return ret;
733}
734
2b82032c 735int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
0b86a832 736 struct btrfs_device *device,
e17cade2 737 u64 chunk_tree, u64 chunk_objectid,
2b82032c 738 u64 chunk_offset, u64 start, u64 num_bytes)
0b86a832
CM
739{
740 int ret;
741 struct btrfs_path *path;
742 struct btrfs_root *root = device->dev_root;
743 struct btrfs_dev_extent *extent;
744 struct extent_buffer *leaf;
745 struct btrfs_key key;
746
dfe25020 747 WARN_ON(!device->in_fs_metadata);
0b86a832
CM
748 path = btrfs_alloc_path();
749 if (!path)
750 return -ENOMEM;
751
0b86a832 752 key.objectid = device->devid;
2b82032c 753 key.offset = start;
0b86a832
CM
754 key.type = BTRFS_DEV_EXTENT_KEY;
755 ret = btrfs_insert_empty_item(trans, root, path, &key,
756 sizeof(*extent));
757 BUG_ON(ret);
758
759 leaf = path->nodes[0];
760 extent = btrfs_item_ptr(leaf, path->slots[0],
761 struct btrfs_dev_extent);
e17cade2
CM
762 btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
763 btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
764 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
765
766 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
767 (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
768 BTRFS_UUID_SIZE);
769
0b86a832
CM
770 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
771 btrfs_mark_buffer_dirty(leaf);
0b86a832
CM
772 btrfs_free_path(path);
773 return ret;
774}
775
a1b32a59
CM
776static noinline int find_next_chunk(struct btrfs_root *root,
777 u64 objectid, u64 *offset)
0b86a832
CM
778{
779 struct btrfs_path *path;
780 int ret;
781 struct btrfs_key key;
e17cade2 782 struct btrfs_chunk *chunk;
0b86a832
CM
783 struct btrfs_key found_key;
784
785 path = btrfs_alloc_path();
786 BUG_ON(!path);
787
e17cade2 788 key.objectid = objectid;
0b86a832
CM
789 key.offset = (u64)-1;
790 key.type = BTRFS_CHUNK_ITEM_KEY;
791
792 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
793 if (ret < 0)
794 goto error;
795
796 BUG_ON(ret == 0);
797
798 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
799 if (ret) {
e17cade2 800 *offset = 0;
0b86a832
CM
801 } else {
802 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
803 path->slots[0]);
e17cade2
CM
804 if (found_key.objectid != objectid)
805 *offset = 0;
806 else {
807 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
808 struct btrfs_chunk);
809 *offset = found_key.offset +
810 btrfs_chunk_length(path->nodes[0], chunk);
811 }
0b86a832
CM
812 }
813 ret = 0;
814error:
815 btrfs_free_path(path);
816 return ret;
817}
818
2b82032c 819static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
0b86a832
CM
820{
821 int ret;
822 struct btrfs_key key;
823 struct btrfs_key found_key;
2b82032c
YZ
824 struct btrfs_path *path;
825
826 root = root->fs_info->chunk_root;
827
828 path = btrfs_alloc_path();
829 if (!path)
830 return -ENOMEM;
0b86a832
CM
831
832 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
833 key.type = BTRFS_DEV_ITEM_KEY;
834 key.offset = (u64)-1;
835
836 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
837 if (ret < 0)
838 goto error;
839
840 BUG_ON(ret == 0);
841
842 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
843 BTRFS_DEV_ITEM_KEY);
844 if (ret) {
845 *objectid = 1;
846 } else {
847 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
848 path->slots[0]);
849 *objectid = found_key.offset + 1;
850 }
851 ret = 0;
852error:
2b82032c 853 btrfs_free_path(path);
0b86a832
CM
854 return ret;
855}
856
857/*
858 * the device information is stored in the chunk root
859 * the btrfs_device struct should be fully filled in
860 */
861int btrfs_add_device(struct btrfs_trans_handle *trans,
862 struct btrfs_root *root,
863 struct btrfs_device *device)
864{
865 int ret;
866 struct btrfs_path *path;
867 struct btrfs_dev_item *dev_item;
868 struct extent_buffer *leaf;
869 struct btrfs_key key;
870 unsigned long ptr;
0b86a832
CM
871
872 root = root->fs_info->chunk_root;
873
874 path = btrfs_alloc_path();
875 if (!path)
876 return -ENOMEM;
877
0b86a832
CM
878 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
879 key.type = BTRFS_DEV_ITEM_KEY;
2b82032c 880 key.offset = device->devid;
0b86a832
CM
881
882 ret = btrfs_insert_empty_item(trans, root, path, &key,
0d81ba5d 883 sizeof(*dev_item));
0b86a832
CM
884 if (ret)
885 goto out;
886
887 leaf = path->nodes[0];
888 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
889
890 btrfs_set_device_id(leaf, dev_item, device->devid);
2b82032c 891 btrfs_set_device_generation(leaf, dev_item, 0);
0b86a832
CM
892 btrfs_set_device_type(leaf, dev_item, device->type);
893 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
894 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
895 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
0b86a832
CM
896 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
897 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
e17cade2
CM
898 btrfs_set_device_group(leaf, dev_item, 0);
899 btrfs_set_device_seek_speed(leaf, dev_item, 0);
900 btrfs_set_device_bandwidth(leaf, dev_item, 0);
0b86a832 901
0b86a832 902 ptr = (unsigned long)btrfs_device_uuid(dev_item);
e17cade2 903 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
2b82032c
YZ
904 ptr = (unsigned long)btrfs_device_fsid(dev_item);
905 write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
0b86a832 906 btrfs_mark_buffer_dirty(leaf);
0b86a832 907
2b82032c 908 ret = 0;
0b86a832
CM
909out:
910 btrfs_free_path(path);
911 return ret;
912}
8f18cf13 913
a061fc8d
CM
914static int btrfs_rm_dev_item(struct btrfs_root *root,
915 struct btrfs_device *device)
916{
917 int ret;
918 struct btrfs_path *path;
a061fc8d 919 struct btrfs_key key;
a061fc8d
CM
920 struct btrfs_trans_handle *trans;
921
922 root = root->fs_info->chunk_root;
923
924 path = btrfs_alloc_path();
925 if (!path)
926 return -ENOMEM;
927
928 trans = btrfs_start_transaction(root, 1);
929 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
930 key.type = BTRFS_DEV_ITEM_KEY;
931 key.offset = device->devid;
7d9eb12c 932 lock_chunks(root);
a061fc8d
CM
933
934 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
935 if (ret < 0)
936 goto out;
937
938 if (ret > 0) {
939 ret = -ENOENT;
940 goto out;
941 }
942
943 ret = btrfs_del_item(trans, root, path);
944 if (ret)
945 goto out;
a061fc8d
CM
946out:
947 btrfs_free_path(path);
7d9eb12c 948 unlock_chunks(root);
a061fc8d
CM
949 btrfs_commit_transaction(trans, root);
950 return ret;
951}
952
953int btrfs_rm_device(struct btrfs_root *root, char *device_path)
954{
955 struct btrfs_device *device;
2b82032c 956 struct btrfs_device *next_device;
a061fc8d 957 struct block_device *bdev;
dfe25020 958 struct buffer_head *bh = NULL;
a061fc8d
CM
959 struct btrfs_super_block *disk_super;
960 u64 all_avail;
961 u64 devid;
2b82032c
YZ
962 u64 num_devices;
963 u8 *dev_uuid;
a061fc8d
CM
964 int ret = 0;
965
a061fc8d 966 mutex_lock(&uuid_mutex);
7d9eb12c 967 mutex_lock(&root->fs_info->volume_mutex);
a061fc8d
CM
968
969 all_avail = root->fs_info->avail_data_alloc_bits |
970 root->fs_info->avail_system_alloc_bits |
971 root->fs_info->avail_metadata_alloc_bits;
972
973 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
2b82032c 974 root->fs_info->fs_devices->rw_devices <= 4) {
a061fc8d
CM
975 printk("btrfs: unable to go below four devices on raid10\n");
976 ret = -EINVAL;
977 goto out;
978 }
979
980 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
2b82032c 981 root->fs_info->fs_devices->rw_devices <= 2) {
a061fc8d
CM
982 printk("btrfs: unable to go below two devices on raid1\n");
983 ret = -EINVAL;
984 goto out;
985 }
986
dfe25020
CM
987 if (strcmp(device_path, "missing") == 0) {
988 struct list_head *cur;
989 struct list_head *devices;
990 struct btrfs_device *tmp;
a061fc8d 991
dfe25020
CM
992 device = NULL;
993 devices = &root->fs_info->fs_devices->devices;
994 list_for_each(cur, devices) {
995 tmp = list_entry(cur, struct btrfs_device, dev_list);
996 if (tmp->in_fs_metadata && !tmp->bdev) {
997 device = tmp;
998 break;
999 }
1000 }
1001 bdev = NULL;
1002 bh = NULL;
1003 disk_super = NULL;
1004 if (!device) {
1005 printk("btrfs: no missing devices found to remove\n");
1006 goto out;
1007 }
dfe25020 1008 } else {
15916de8 1009 bdev = open_bdev_exclusive(device_path, MS_RDONLY,
dfe25020
CM
1010 root->fs_info->bdev_holder);
1011 if (IS_ERR(bdev)) {
1012 ret = PTR_ERR(bdev);
1013 goto out;
1014 }
a061fc8d 1015
2b82032c 1016 set_blocksize(bdev, 4096);
dfe25020
CM
1017 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
1018 if (!bh) {
1019 ret = -EIO;
1020 goto error_close;
1021 }
1022 disk_super = (struct btrfs_super_block *)bh->b_data;
1023 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
2b82032c 1024 sizeof(disk_super->magic))) {
dfe25020
CM
1025 ret = -ENOENT;
1026 goto error_brelse;
1027 }
1028 devid = le64_to_cpu(disk_super->dev_item.devid);
2b82032c
YZ
1029 dev_uuid = disk_super->dev_item.uuid;
1030 device = btrfs_find_device(root, devid, dev_uuid,
1031 disk_super->fsid);
dfe25020
CM
1032 if (!device) {
1033 ret = -ENOENT;
1034 goto error_brelse;
1035 }
2b82032c 1036 }
dfe25020 1037
2b82032c
YZ
1038 if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
1039 printk("btrfs: unable to remove the only writeable device\n");
1040 ret = -EINVAL;
1041 goto error_brelse;
1042 }
1043
1044 if (device->writeable) {
1045 list_del_init(&device->dev_alloc_list);
1046 root->fs_info->fs_devices->rw_devices--;
dfe25020 1047 }
a061fc8d
CM
1048
1049 ret = btrfs_shrink_device(device, 0);
1050 if (ret)
1051 goto error_brelse;
1052
a061fc8d
CM
1053 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1054 if (ret)
1055 goto error_brelse;
1056
2b82032c
YZ
1057 device->in_fs_metadata = 0;
1058 if (device->fs_devices == root->fs_info->fs_devices) {
1059 list_del_init(&device->dev_list);
1060 root->fs_info->fs_devices->num_devices--;
1061 if (device->bdev)
1062 device->fs_devices->open_devices--;
1063 }
1064
1065 next_device = list_entry(root->fs_info->fs_devices->devices.next,
1066 struct btrfs_device, dev_list);
1067 if (device->bdev == root->fs_info->sb->s_bdev)
1068 root->fs_info->sb->s_bdev = next_device->bdev;
1069 if (device->bdev == root->fs_info->fs_devices->latest_bdev)
1070 root->fs_info->fs_devices->latest_bdev = next_device->bdev;
1071
1072 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
1073 btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices);
1074
1075 if (device->fs_devices != root->fs_info->fs_devices) {
1076 BUG_ON(device->writeable);
1077 brelse(bh);
1078 if (bdev)
15916de8 1079 close_bdev_exclusive(bdev, MS_RDONLY);
2b82032c
YZ
1080
1081 if (device->bdev) {
15916de8 1082 close_bdev_exclusive(device->bdev, device->mode);
2b82032c
YZ
1083 device->bdev = NULL;
1084 device->fs_devices->open_devices--;
1085 }
1086 if (device->fs_devices->open_devices == 0) {
1087 struct btrfs_fs_devices *fs_devices;
1088 fs_devices = root->fs_info->fs_devices;
1089 while (fs_devices) {
1090 if (fs_devices->seed == device->fs_devices)
1091 break;
1092 fs_devices = fs_devices->seed;
1093 }
1094 fs_devices->seed = device->fs_devices->seed;
1095 device->fs_devices->seed = NULL;
1096 __btrfs_close_devices(device->fs_devices);
1097 }
1098 ret = 0;
1099 goto out;
1100 }
1101
1102 /*
1103 * at this point, the device is zero sized. We want to
1104 * remove it from the devices list and zero out the old super
1105 */
1106 if (device->writeable) {
dfe25020
CM
1107 /* make sure this device isn't detected as part of
1108 * the FS anymore
1109 */
1110 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
1111 set_buffer_dirty(bh);
1112 sync_dirty_buffer(bh);
dfe25020 1113 }
2b82032c 1114 brelse(bh);
a061fc8d 1115
dfe25020
CM
1116 if (device->bdev) {
1117 /* one close for the device struct or super_block */
15916de8 1118 close_bdev_exclusive(device->bdev, device->mode);
dfe25020
CM
1119 }
1120 if (bdev) {
1121 /* one close for us */
15916de8 1122 close_bdev_exclusive(bdev, MS_RDONLY);
dfe25020 1123 }
a061fc8d
CM
1124 kfree(device->name);
1125 kfree(device);
1126 ret = 0;
1127 goto out;
1128
1129error_brelse:
1130 brelse(bh);
1131error_close:
dfe25020 1132 if (bdev)
15916de8 1133 close_bdev_exclusive(bdev, MS_RDONLY);
a061fc8d 1134out:
7d9eb12c 1135 mutex_unlock(&root->fs_info->volume_mutex);
a061fc8d 1136 mutex_unlock(&uuid_mutex);
a061fc8d
CM
1137 return ret;
1138}
1139
2b82032c
YZ
1140/*
1141 * does all the dirty work required for changing file system's UUID.
1142 */
1143static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
1144 struct btrfs_root *root)
1145{
1146 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
1147 struct btrfs_fs_devices *old_devices;
1148 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
1149 struct btrfs_device *device;
1150 u64 super_flags;
1151
1152 BUG_ON(!mutex_is_locked(&uuid_mutex));
1153 if (!fs_devices->seeding || fs_devices->opened != 1)
1154 return -EINVAL;
1155
1156 old_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
1157 if (!old_devices)
1158 return -ENOMEM;
1159
1160 memcpy(old_devices, fs_devices, sizeof(*old_devices));
1161 old_devices->opened = 1;
1162 old_devices->sprouted = 1;
1163 INIT_LIST_HEAD(&old_devices->devices);
1164 INIT_LIST_HEAD(&old_devices->alloc_list);
1165 list_splice_init(&fs_devices->devices, &old_devices->devices);
1166 list_splice_init(&fs_devices->alloc_list, &old_devices->alloc_list);
1167 list_for_each_entry(device, &old_devices->devices, dev_list) {
1168 device->fs_devices = old_devices;
1169 }
1170 list_add(&old_devices->list, &fs_uuids);
1171
1172 fs_devices->seeding = 0;
1173 fs_devices->num_devices = 0;
1174 fs_devices->open_devices = 0;
1175 fs_devices->seed = old_devices;
1176
1177 generate_random_uuid(fs_devices->fsid);
1178 memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1179 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1180 super_flags = btrfs_super_flags(disk_super) &
1181 ~BTRFS_SUPER_FLAG_SEEDING;
1182 btrfs_set_super_flags(disk_super, super_flags);
1183
1184 return 0;
1185}
1186
1187/*
1188 * strore the expected generation for seed devices in device items.
1189 */
1190static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
1191 struct btrfs_root *root)
1192{
1193 struct btrfs_path *path;
1194 struct extent_buffer *leaf;
1195 struct btrfs_dev_item *dev_item;
1196 struct btrfs_device *device;
1197 struct btrfs_key key;
1198 u8 fs_uuid[BTRFS_UUID_SIZE];
1199 u8 dev_uuid[BTRFS_UUID_SIZE];
1200 u64 devid;
1201 int ret;
1202
1203 path = btrfs_alloc_path();
1204 if (!path)
1205 return -ENOMEM;
1206
1207 root = root->fs_info->chunk_root;
1208 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1209 key.offset = 0;
1210 key.type = BTRFS_DEV_ITEM_KEY;
1211
1212 while (1) {
1213 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1214 if (ret < 0)
1215 goto error;
1216
1217 leaf = path->nodes[0];
1218next_slot:
1219 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1220 ret = btrfs_next_leaf(root, path);
1221 if (ret > 0)
1222 break;
1223 if (ret < 0)
1224 goto error;
1225 leaf = path->nodes[0];
1226 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1227 btrfs_release_path(root, path);
1228 continue;
1229 }
1230
1231 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1232 if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
1233 key.type != BTRFS_DEV_ITEM_KEY)
1234 break;
1235
1236 dev_item = btrfs_item_ptr(leaf, path->slots[0],
1237 struct btrfs_dev_item);
1238 devid = btrfs_device_id(leaf, dev_item);
1239 read_extent_buffer(leaf, dev_uuid,
1240 (unsigned long)btrfs_device_uuid(dev_item),
1241 BTRFS_UUID_SIZE);
1242 read_extent_buffer(leaf, fs_uuid,
1243 (unsigned long)btrfs_device_fsid(dev_item),
1244 BTRFS_UUID_SIZE);
1245 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
1246 BUG_ON(!device);
1247
1248 if (device->fs_devices->seeding) {
1249 btrfs_set_device_generation(leaf, dev_item,
1250 device->generation);
1251 btrfs_mark_buffer_dirty(leaf);
1252 }
1253
1254 path->slots[0]++;
1255 goto next_slot;
1256 }
1257 ret = 0;
1258error:
1259 btrfs_free_path(path);
1260 return ret;
1261}
1262
788f20eb
CM
1263int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1264{
1265 struct btrfs_trans_handle *trans;
1266 struct btrfs_device *device;
1267 struct block_device *bdev;
1268 struct list_head *cur;
1269 struct list_head *devices;
2b82032c 1270 struct super_block *sb = root->fs_info->sb;
788f20eb 1271 u64 total_bytes;
2b82032c 1272 int seeding_dev = 0;
788f20eb
CM
1273 int ret = 0;
1274
2b82032c
YZ
1275 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
1276 return -EINVAL;
788f20eb 1277
15916de8 1278 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
788f20eb
CM
1279 if (!bdev) {
1280 return -EIO;
1281 }
a2135011 1282
2b82032c
YZ
1283 if (root->fs_info->fs_devices->seeding) {
1284 seeding_dev = 1;
1285 down_write(&sb->s_umount);
1286 mutex_lock(&uuid_mutex);
1287 }
1288
8c8bee1d 1289 filemap_write_and_wait(bdev->bd_inode->i_mapping);
7d9eb12c 1290 mutex_lock(&root->fs_info->volume_mutex);
a2135011 1291
788f20eb
CM
1292 devices = &root->fs_info->fs_devices->devices;
1293 list_for_each(cur, devices) {
1294 device = list_entry(cur, struct btrfs_device, dev_list);
1295 if (device->bdev == bdev) {
1296 ret = -EEXIST;
2b82032c 1297 goto error;
788f20eb
CM
1298 }
1299 }
1300
1301 device = kzalloc(sizeof(*device), GFP_NOFS);
1302 if (!device) {
1303 /* we can safely leave the fs_devices entry around */
1304 ret = -ENOMEM;
2b82032c 1305 goto error;
788f20eb
CM
1306 }
1307
788f20eb
CM
1308 device->name = kstrdup(device_path, GFP_NOFS);
1309 if (!device->name) {
1310 kfree(device);
2b82032c
YZ
1311 ret = -ENOMEM;
1312 goto error;
788f20eb 1313 }
2b82032c
YZ
1314
1315 ret = find_next_devid(root, &device->devid);
1316 if (ret) {
1317 kfree(device);
1318 goto error;
1319 }
1320
1321 trans = btrfs_start_transaction(root, 1);
1322 lock_chunks(root);
1323
1324 device->barriers = 1;
1325 device->writeable = 1;
1326 device->work.func = pending_bios_fn;
1327 generate_random_uuid(device->uuid);
1328 spin_lock_init(&device->io_lock);
1329 device->generation = trans->transid;
788f20eb
CM
1330 device->io_width = root->sectorsize;
1331 device->io_align = root->sectorsize;
1332 device->sector_size = root->sectorsize;
1333 device->total_bytes = i_size_read(bdev->bd_inode);
1334 device->dev_root = root->fs_info->dev_root;
1335 device->bdev = bdev;
dfe25020 1336 device->in_fs_metadata = 1;
15916de8 1337 device->mode = 0;
2b82032c 1338 set_blocksize(device->bdev, 4096);
788f20eb 1339
2b82032c
YZ
1340 if (seeding_dev) {
1341 sb->s_flags &= ~MS_RDONLY;
1342 ret = btrfs_prepare_sprout(trans, root);
1343 BUG_ON(ret);
1344 }
788f20eb 1345
2b82032c
YZ
1346 device->fs_devices = root->fs_info->fs_devices;
1347 list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
1348 list_add(&device->dev_alloc_list,
1349 &root->fs_info->fs_devices->alloc_list);
1350 root->fs_info->fs_devices->num_devices++;
1351 root->fs_info->fs_devices->open_devices++;
1352 root->fs_info->fs_devices->rw_devices++;
1353 root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
325cd4ba 1354
788f20eb
CM
1355 total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
1356 btrfs_set_super_total_bytes(&root->fs_info->super_copy,
1357 total_bytes + device->total_bytes);
1358
1359 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
1360 btrfs_set_super_num_devices(&root->fs_info->super_copy,
1361 total_bytes + 1);
1362
2b82032c
YZ
1363 if (seeding_dev) {
1364 ret = init_first_rw_device(trans, root, device);
1365 BUG_ON(ret);
1366 ret = btrfs_finish_sprout(trans, root);
1367 BUG_ON(ret);
1368 } else {
1369 ret = btrfs_add_device(trans, root, device);
1370 }
1371
7d9eb12c 1372 unlock_chunks(root);
2b82032c 1373 btrfs_commit_transaction(trans, root);
a2135011 1374
2b82032c
YZ
1375 if (seeding_dev) {
1376 mutex_unlock(&uuid_mutex);
1377 up_write(&sb->s_umount);
788f20eb 1378
2b82032c
YZ
1379 ret = btrfs_relocate_sys_chunks(root);
1380 BUG_ON(ret);
1381 }
1382out:
1383 mutex_unlock(&root->fs_info->volume_mutex);
1384 return ret;
1385error:
15916de8 1386 close_bdev_exclusive(bdev, 0);
2b82032c
YZ
1387 if (seeding_dev) {
1388 mutex_unlock(&uuid_mutex);
1389 up_write(&sb->s_umount);
1390 }
788f20eb
CM
1391 goto out;
1392}
1393
a1b32a59
CM
1394int noinline btrfs_update_device(struct btrfs_trans_handle *trans,
1395 struct btrfs_device *device)
0b86a832
CM
1396{
1397 int ret;
1398 struct btrfs_path *path;
1399 struct btrfs_root *root;
1400 struct btrfs_dev_item *dev_item;
1401 struct extent_buffer *leaf;
1402 struct btrfs_key key;
1403
1404 root = device->dev_root->fs_info->chunk_root;
1405
1406 path = btrfs_alloc_path();
1407 if (!path)
1408 return -ENOMEM;
1409
1410 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1411 key.type = BTRFS_DEV_ITEM_KEY;
1412 key.offset = device->devid;
1413
1414 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1415 if (ret < 0)
1416 goto out;
1417
1418 if (ret > 0) {
1419 ret = -ENOENT;
1420 goto out;
1421 }
1422
1423 leaf = path->nodes[0];
1424 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1425
1426 btrfs_set_device_id(leaf, dev_item, device->devid);
1427 btrfs_set_device_type(leaf, dev_item, device->type);
1428 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1429 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1430 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
0b86a832
CM
1431 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
1432 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1433 btrfs_mark_buffer_dirty(leaf);
1434
1435out:
1436 btrfs_free_path(path);
1437 return ret;
1438}
1439
7d9eb12c 1440static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
8f18cf13
CM
1441 struct btrfs_device *device, u64 new_size)
1442{
1443 struct btrfs_super_block *super_copy =
1444 &device->dev_root->fs_info->super_copy;
1445 u64 old_total = btrfs_super_total_bytes(super_copy);
1446 u64 diff = new_size - device->total_bytes;
1447
2b82032c
YZ
1448 if (!device->writeable)
1449 return -EACCES;
1450 if (new_size <= device->total_bytes)
1451 return -EINVAL;
1452
8f18cf13 1453 btrfs_set_super_total_bytes(super_copy, old_total + diff);
2b82032c
YZ
1454 device->fs_devices->total_rw_bytes += diff;
1455
1456 device->total_bytes = new_size;
8f18cf13
CM
1457 return btrfs_update_device(trans, device);
1458}
1459
7d9eb12c
CM
1460int btrfs_grow_device(struct btrfs_trans_handle *trans,
1461 struct btrfs_device *device, u64 new_size)
1462{
1463 int ret;
1464 lock_chunks(device->dev_root);
1465 ret = __btrfs_grow_device(trans, device, new_size);
1466 unlock_chunks(device->dev_root);
1467 return ret;
1468}
1469
8f18cf13
CM
1470static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
1471 struct btrfs_root *root,
1472 u64 chunk_tree, u64 chunk_objectid,
1473 u64 chunk_offset)
1474{
1475 int ret;
1476 struct btrfs_path *path;
1477 struct btrfs_key key;
1478
1479 root = root->fs_info->chunk_root;
1480 path = btrfs_alloc_path();
1481 if (!path)
1482 return -ENOMEM;
1483
1484 key.objectid = chunk_objectid;
1485 key.offset = chunk_offset;
1486 key.type = BTRFS_CHUNK_ITEM_KEY;
1487
1488 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1489 BUG_ON(ret);
1490
1491 ret = btrfs_del_item(trans, root, path);
1492 BUG_ON(ret);
1493
1494 btrfs_free_path(path);
1495 return 0;
1496}
1497
1498int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
1499 chunk_offset)
1500{
1501 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1502 struct btrfs_disk_key *disk_key;
1503 struct btrfs_chunk *chunk;
1504 u8 *ptr;
1505 int ret = 0;
1506 u32 num_stripes;
1507 u32 array_size;
1508 u32 len = 0;
1509 u32 cur;
1510 struct btrfs_key key;
1511
1512 array_size = btrfs_super_sys_array_size(super_copy);
1513
1514 ptr = super_copy->sys_chunk_array;
1515 cur = 0;
1516
1517 while (cur < array_size) {
1518 disk_key = (struct btrfs_disk_key *)ptr;
1519 btrfs_disk_key_to_cpu(&key, disk_key);
1520
1521 len = sizeof(*disk_key);
1522
1523 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1524 chunk = (struct btrfs_chunk *)(ptr + len);
1525 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1526 len += btrfs_chunk_item_size(num_stripes);
1527 } else {
1528 ret = -EIO;
1529 break;
1530 }
1531 if (key.objectid == chunk_objectid &&
1532 key.offset == chunk_offset) {
1533 memmove(ptr, ptr + len, array_size - (cur + len));
1534 array_size -= len;
1535 btrfs_set_super_sys_array_size(super_copy, array_size);
1536 } else {
1537 ptr += len;
1538 cur += len;
1539 }
1540 }
1541 return ret;
1542}
1543
8f18cf13
CM
1544int btrfs_relocate_chunk(struct btrfs_root *root,
1545 u64 chunk_tree, u64 chunk_objectid,
1546 u64 chunk_offset)
1547{
1548 struct extent_map_tree *em_tree;
1549 struct btrfs_root *extent_root;
1550 struct btrfs_trans_handle *trans;
1551 struct extent_map *em;
1552 struct map_lookup *map;
1553 int ret;
1554 int i;
1555
323da79c
CM
1556 printk("btrfs relocating chunk %llu\n",
1557 (unsigned long long)chunk_offset);
8f18cf13
CM
1558 root = root->fs_info->chunk_root;
1559 extent_root = root->fs_info->extent_root;
1560 em_tree = &root->fs_info->mapping_tree.map_tree;
1561
1562 /* step one, relocate all the extents inside this chunk */
1a40e23b 1563 ret = btrfs_relocate_block_group(extent_root, chunk_offset);
8f18cf13
CM
1564 BUG_ON(ret);
1565
1566 trans = btrfs_start_transaction(root, 1);
1567 BUG_ON(!trans);
1568
7d9eb12c
CM
1569 lock_chunks(root);
1570
8f18cf13
CM
1571 /*
1572 * step two, delete the device extents and the
1573 * chunk tree entries
1574 */
1575 spin_lock(&em_tree->lock);
1576 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1577 spin_unlock(&em_tree->lock);
1578
a061fc8d
CM
1579 BUG_ON(em->start > chunk_offset ||
1580 em->start + em->len < chunk_offset);
8f18cf13
CM
1581 map = (struct map_lookup *)em->bdev;
1582
1583 for (i = 0; i < map->num_stripes; i++) {
1584 ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
1585 map->stripes[i].physical);
1586 BUG_ON(ret);
a061fc8d 1587
dfe25020
CM
1588 if (map->stripes[i].dev) {
1589 ret = btrfs_update_device(trans, map->stripes[i].dev);
1590 BUG_ON(ret);
1591 }
8f18cf13
CM
1592 }
1593 ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
1594 chunk_offset);
1595
1596 BUG_ON(ret);
1597
1598 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1599 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
1600 BUG_ON(ret);
8f18cf13
CM
1601 }
1602
2b82032c
YZ
1603 ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
1604 BUG_ON(ret);
1605
1606 spin_lock(&em_tree->lock);
1607 remove_extent_mapping(em_tree, em);
1608 spin_unlock(&em_tree->lock);
1609
1610 kfree(map);
1611 em->bdev = NULL;
1612
1613 /* once for the tree */
1614 free_extent_map(em);
1615 /* once for us */
1616 free_extent_map(em);
1617
1618 unlock_chunks(root);
1619 btrfs_end_transaction(trans, root);
1620 return 0;
1621}
1622
1623static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
1624{
1625 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
1626 struct btrfs_path *path;
1627 struct extent_buffer *leaf;
1628 struct btrfs_chunk *chunk;
1629 struct btrfs_key key;
1630 struct btrfs_key found_key;
1631 u64 chunk_tree = chunk_root->root_key.objectid;
1632 u64 chunk_type;
1633 int ret;
1634
1635 path = btrfs_alloc_path();
1636 if (!path)
1637 return -ENOMEM;
1638
1639 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1640 key.offset = (u64)-1;
1641 key.type = BTRFS_CHUNK_ITEM_KEY;
1642
1643 while (1) {
1644 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
1645 if (ret < 0)
1646 goto error;
1647 BUG_ON(ret == 0);
1648
1649 ret = btrfs_previous_item(chunk_root, path, key.objectid,
1650 key.type);
1651 if (ret < 0)
1652 goto error;
1653 if (ret > 0)
1654 break;
1a40e23b 1655
2b82032c
YZ
1656 leaf = path->nodes[0];
1657 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1a40e23b 1658
2b82032c
YZ
1659 chunk = btrfs_item_ptr(leaf, path->slots[0],
1660 struct btrfs_chunk);
1661 chunk_type = btrfs_chunk_type(leaf, chunk);
1662 btrfs_release_path(chunk_root, path);
8f18cf13 1663
2b82032c
YZ
1664 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
1665 ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
1666 found_key.objectid,
1667 found_key.offset);
1668 BUG_ON(ret);
1669 }
8f18cf13 1670
2b82032c
YZ
1671 if (found_key.offset == 0)
1672 break;
1673 key.offset = found_key.offset - 1;
1674 }
1675 ret = 0;
1676error:
1677 btrfs_free_path(path);
1678 return ret;
8f18cf13
CM
1679}
1680
ec44a35c
CM
1681static u64 div_factor(u64 num, int factor)
1682{
1683 if (factor == 10)
1684 return num;
1685 num *= factor;
1686 do_div(num, 10);
1687 return num;
1688}
1689
ec44a35c
CM
1690int btrfs_balance(struct btrfs_root *dev_root)
1691{
1692 int ret;
1693 struct list_head *cur;
1694 struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
1695 struct btrfs_device *device;
1696 u64 old_size;
1697 u64 size_to_free;
1698 struct btrfs_path *path;
1699 struct btrfs_key key;
1700 struct btrfs_chunk *chunk;
1701 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
1702 struct btrfs_trans_handle *trans;
1703 struct btrfs_key found_key;
1704
2b82032c
YZ
1705 if (dev_root->fs_info->sb->s_flags & MS_RDONLY)
1706 return -EROFS;
ec44a35c 1707
7d9eb12c 1708 mutex_lock(&dev_root->fs_info->volume_mutex);
ec44a35c
CM
1709 dev_root = dev_root->fs_info->dev_root;
1710
ec44a35c
CM
1711 /* step one make some room on all the devices */
1712 list_for_each(cur, devices) {
1713 device = list_entry(cur, struct btrfs_device, dev_list);
1714 old_size = device->total_bytes;
1715 size_to_free = div_factor(old_size, 1);
1716 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
2b82032c
YZ
1717 if (!device->writeable ||
1718 device->total_bytes - device->bytes_used > size_to_free)
ec44a35c
CM
1719 continue;
1720
1721 ret = btrfs_shrink_device(device, old_size - size_to_free);
1722 BUG_ON(ret);
1723
1724 trans = btrfs_start_transaction(dev_root, 1);
1725 BUG_ON(!trans);
1726
1727 ret = btrfs_grow_device(trans, device, old_size);
1728 BUG_ON(ret);
1729
1730 btrfs_end_transaction(trans, dev_root);
1731 }
1732
1733 /* step two, relocate all the chunks */
1734 path = btrfs_alloc_path();
1735 BUG_ON(!path);
1736
1737 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1738 key.offset = (u64)-1;
1739 key.type = BTRFS_CHUNK_ITEM_KEY;
1740
1741 while(1) {
1742 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
1743 if (ret < 0)
1744 goto error;
1745
1746 /*
1747 * this shouldn't happen, it means the last relocate
1748 * failed
1749 */
1750 if (ret == 0)
1751 break;
1752
1753 ret = btrfs_previous_item(chunk_root, path, 0,
1754 BTRFS_CHUNK_ITEM_KEY);
7d9eb12c 1755 if (ret)
ec44a35c 1756 break;
7d9eb12c 1757
ec44a35c
CM
1758 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1759 path->slots[0]);
1760 if (found_key.objectid != key.objectid)
1761 break;
7d9eb12c 1762
ec44a35c
CM
1763 chunk = btrfs_item_ptr(path->nodes[0],
1764 path->slots[0],
1765 struct btrfs_chunk);
1766 key.offset = found_key.offset;
1767 /* chunk zero is special */
1768 if (key.offset == 0)
1769 break;
1770
7d9eb12c 1771 btrfs_release_path(chunk_root, path);
ec44a35c
CM
1772 ret = btrfs_relocate_chunk(chunk_root,
1773 chunk_root->root_key.objectid,
1774 found_key.objectid,
1775 found_key.offset);
1776 BUG_ON(ret);
ec44a35c
CM
1777 }
1778 ret = 0;
1779error:
1780 btrfs_free_path(path);
7d9eb12c 1781 mutex_unlock(&dev_root->fs_info->volume_mutex);
ec44a35c
CM
1782 return ret;
1783}
1784
8f18cf13
CM
1785/*
1786 * shrinking a device means finding all of the device extents past
1787 * the new size, and then following the back refs to the chunks.
1788 * The chunk relocation code actually frees the device extent
1789 */
1790int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1791{
1792 struct btrfs_trans_handle *trans;
1793 struct btrfs_root *root = device->dev_root;
1794 struct btrfs_dev_extent *dev_extent = NULL;
1795 struct btrfs_path *path;
1796 u64 length;
1797 u64 chunk_tree;
1798 u64 chunk_objectid;
1799 u64 chunk_offset;
1800 int ret;
1801 int slot;
1802 struct extent_buffer *l;
1803 struct btrfs_key key;
1804 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1805 u64 old_total = btrfs_super_total_bytes(super_copy);
1806 u64 diff = device->total_bytes - new_size;
1807
2b82032c
YZ
1808 if (new_size >= device->total_bytes)
1809 return -EINVAL;
8f18cf13
CM
1810
1811 path = btrfs_alloc_path();
1812 if (!path)
1813 return -ENOMEM;
1814
1815 trans = btrfs_start_transaction(root, 1);
1816 if (!trans) {
1817 ret = -ENOMEM;
1818 goto done;
1819 }
1820
1821 path->reada = 2;
1822
7d9eb12c
CM
1823 lock_chunks(root);
1824
8f18cf13 1825 device->total_bytes = new_size;
2b82032c
YZ
1826 if (device->writeable)
1827 device->fs_devices->total_rw_bytes -= diff;
8f18cf13
CM
1828 ret = btrfs_update_device(trans, device);
1829 if (ret) {
7d9eb12c 1830 unlock_chunks(root);
8f18cf13
CM
1831 btrfs_end_transaction(trans, root);
1832 goto done;
1833 }
1834 WARN_ON(diff > old_total);
1835 btrfs_set_super_total_bytes(super_copy, old_total - diff);
7d9eb12c 1836 unlock_chunks(root);
8f18cf13
CM
1837 btrfs_end_transaction(trans, root);
1838
1839 key.objectid = device->devid;
1840 key.offset = (u64)-1;
1841 key.type = BTRFS_DEV_EXTENT_KEY;
1842
1843 while (1) {
1844 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1845 if (ret < 0)
1846 goto done;
1847
1848 ret = btrfs_previous_item(root, path, 0, key.type);
1849 if (ret < 0)
1850 goto done;
1851 if (ret) {
1852 ret = 0;
1853 goto done;
1854 }
1855
1856 l = path->nodes[0];
1857 slot = path->slots[0];
1858 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1859
1860 if (key.objectid != device->devid)
1861 goto done;
1862
1863 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1864 length = btrfs_dev_extent_length(l, dev_extent);
1865
1866 if (key.offset + length <= new_size)
1867 goto done;
1868
1869 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1870 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1871 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1872 btrfs_release_path(root, path);
1873
1874 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
1875 chunk_offset);
1876 if (ret)
1877 goto done;
1878 }
1879
1880done:
1881 btrfs_free_path(path);
1882 return ret;
1883}
1884
0b86a832
CM
1885int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
1886 struct btrfs_root *root,
1887 struct btrfs_key *key,
1888 struct btrfs_chunk *chunk, int item_size)
1889{
1890 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1891 struct btrfs_disk_key disk_key;
1892 u32 array_size;
1893 u8 *ptr;
1894
1895 array_size = btrfs_super_sys_array_size(super_copy);
1896 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
1897 return -EFBIG;
1898
1899 ptr = super_copy->sys_chunk_array + array_size;
1900 btrfs_cpu_key_to_disk(&disk_key, key);
1901 memcpy(ptr, &disk_key, sizeof(disk_key));
1902 ptr += sizeof(disk_key);
1903 memcpy(ptr, chunk, item_size);
1904 item_size += sizeof(disk_key);
1905 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
1906 return 0;
1907}
1908
a1b32a59
CM
1909static u64 noinline chunk_bytes_by_type(u64 type, u64 calc_size,
1910 int num_stripes, int sub_stripes)
9b3f68b9
CM
1911{
1912 if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
1913 return calc_size;
1914 else if (type & BTRFS_BLOCK_GROUP_RAID10)
1915 return calc_size * (num_stripes / sub_stripes);
1916 else
1917 return calc_size * num_stripes;
1918}
1919
2b82032c
YZ
1920static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
1921 struct btrfs_root *extent_root,
1922 struct map_lookup **map_ret,
1923 u64 *num_bytes, u64 *stripe_size,
1924 u64 start, u64 type)
0b86a832 1925{
593060d7 1926 struct btrfs_fs_info *info = extent_root->fs_info;
0b86a832 1927 struct btrfs_device *device = NULL;
2b82032c 1928 struct btrfs_fs_devices *fs_devices = info->fs_devices;
6324fbf3 1929 struct list_head *cur;
2b82032c 1930 struct map_lookup *map = NULL;
0b86a832 1931 struct extent_map_tree *em_tree;
0b86a832 1932 struct extent_map *em;
2b82032c 1933 struct list_head private_devs;
a40a90a0 1934 int min_stripe_size = 1 * 1024 * 1024;
0b86a832 1935 u64 calc_size = 1024 * 1024 * 1024;
9b3f68b9
CM
1936 u64 max_chunk_size = calc_size;
1937 u64 min_free;
6324fbf3
CM
1938 u64 avail;
1939 u64 max_avail = 0;
2b82032c 1940 u64 dev_offset;
6324fbf3 1941 int num_stripes = 1;
a40a90a0 1942 int min_stripes = 1;
321aecc6 1943 int sub_stripes = 0;
6324fbf3 1944 int looped = 0;
0b86a832 1945 int ret;
6324fbf3 1946 int index;
593060d7 1947 int stripe_len = 64 * 1024;
0b86a832 1948
ec44a35c
CM
1949 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
1950 (type & BTRFS_BLOCK_GROUP_DUP)) {
1951 WARN_ON(1);
1952 type &= ~BTRFS_BLOCK_GROUP_DUP;
1953 }
2b82032c 1954 if (list_empty(&fs_devices->alloc_list))
6324fbf3 1955 return -ENOSPC;
593060d7 1956
a40a90a0 1957 if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
2b82032c 1958 num_stripes = fs_devices->rw_devices;
a40a90a0
CM
1959 min_stripes = 2;
1960 }
1961 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
611f0e00 1962 num_stripes = 2;
a40a90a0
CM
1963 min_stripes = 2;
1964 }
8790d502 1965 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
2b82032c 1966 num_stripes = min_t(u64, 2, fs_devices->rw_devices);
9b3f68b9
CM
1967 if (num_stripes < 2)
1968 return -ENOSPC;
a40a90a0 1969 min_stripes = 2;
8790d502 1970 }
321aecc6 1971 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2b82032c 1972 num_stripes = fs_devices->rw_devices;
321aecc6
CM
1973 if (num_stripes < 4)
1974 return -ENOSPC;
1975 num_stripes &= ~(u32)1;
1976 sub_stripes = 2;
a40a90a0 1977 min_stripes = 4;
321aecc6 1978 }
9b3f68b9
CM
1979
1980 if (type & BTRFS_BLOCK_GROUP_DATA) {
1981 max_chunk_size = 10 * calc_size;
a40a90a0 1982 min_stripe_size = 64 * 1024 * 1024;
9b3f68b9
CM
1983 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
1984 max_chunk_size = 4 * calc_size;
a40a90a0
CM
1985 min_stripe_size = 32 * 1024 * 1024;
1986 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
1987 calc_size = 8 * 1024 * 1024;
1988 max_chunk_size = calc_size * 2;
1989 min_stripe_size = 1 * 1024 * 1024;
9b3f68b9
CM
1990 }
1991
2b82032c
YZ
1992 /* we don't want a chunk larger than 10% of writeable space */
1993 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
1994 max_chunk_size);
9b3f68b9 1995
a40a90a0 1996again:
2b82032c
YZ
1997 if (!map || map->num_stripes != num_stripes) {
1998 kfree(map);
1999 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
2000 if (!map)
2001 return -ENOMEM;
2002 map->num_stripes = num_stripes;
2003 }
2004
9b3f68b9
CM
2005 if (calc_size * num_stripes > max_chunk_size) {
2006 calc_size = max_chunk_size;
2007 do_div(calc_size, num_stripes);
2008 do_div(calc_size, stripe_len);
2009 calc_size *= stripe_len;
2010 }
2011 /* we don't want tiny stripes */
a40a90a0 2012 calc_size = max_t(u64, min_stripe_size, calc_size);
9b3f68b9 2013
9b3f68b9
CM
2014 do_div(calc_size, stripe_len);
2015 calc_size *= stripe_len;
2016
2b82032c 2017 cur = fs_devices->alloc_list.next;
6324fbf3 2018 index = 0;
611f0e00
CM
2019
2020 if (type & BTRFS_BLOCK_GROUP_DUP)
2021 min_free = calc_size * 2;
9b3f68b9
CM
2022 else
2023 min_free = calc_size;
611f0e00 2024
0f9dd46c
JB
2025 /*
2026 * we add 1MB because we never use the first 1MB of the device, unless
2027 * we've looped, then we are likely allocating the maximum amount of
2028 * space left already
2029 */
2030 if (!looped)
2031 min_free += 1024 * 1024;
ad5bd91e 2032
2b82032c 2033 INIT_LIST_HEAD(&private_devs);
6324fbf3 2034 while(index < num_stripes) {
b3075717 2035 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
2b82032c 2036 BUG_ON(!device->writeable);
dfe25020
CM
2037 if (device->total_bytes > device->bytes_used)
2038 avail = device->total_bytes - device->bytes_used;
2039 else
2040 avail = 0;
6324fbf3 2041 cur = cur->next;
8f18cf13 2042
dfe25020 2043 if (device->in_fs_metadata && avail >= min_free) {
2b82032c
YZ
2044 ret = find_free_dev_extent(trans, device,
2045 min_free, &dev_offset);
8f18cf13
CM
2046 if (ret == 0) {
2047 list_move_tail(&device->dev_alloc_list,
2048 &private_devs);
2b82032c
YZ
2049 map->stripes[index].dev = device;
2050 map->stripes[index].physical = dev_offset;
611f0e00 2051 index++;
2b82032c
YZ
2052 if (type & BTRFS_BLOCK_GROUP_DUP) {
2053 map->stripes[index].dev = device;
2054 map->stripes[index].physical =
2055 dev_offset + calc_size;
8f18cf13 2056 index++;
2b82032c 2057 }
8f18cf13 2058 }
dfe25020 2059 } else if (device->in_fs_metadata && avail > max_avail)
a40a90a0 2060 max_avail = avail;
2b82032c 2061 if (cur == &fs_devices->alloc_list)
6324fbf3
CM
2062 break;
2063 }
2b82032c 2064 list_splice(&private_devs, &fs_devices->alloc_list);
6324fbf3 2065 if (index < num_stripes) {
a40a90a0
CM
2066 if (index >= min_stripes) {
2067 num_stripes = index;
2068 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2069 num_stripes /= sub_stripes;
2070 num_stripes *= sub_stripes;
2071 }
2072 looped = 1;
2073 goto again;
2074 }
6324fbf3
CM
2075 if (!looped && max_avail > 0) {
2076 looped = 1;
2077 calc_size = max_avail;
2078 goto again;
2079 }
2b82032c 2080 kfree(map);
6324fbf3
CM
2081 return -ENOSPC;
2082 }
2b82032c
YZ
2083 map->sector_size = extent_root->sectorsize;
2084 map->stripe_len = stripe_len;
2085 map->io_align = stripe_len;
2086 map->io_width = stripe_len;
2087 map->type = type;
2088 map->num_stripes = num_stripes;
2089 map->sub_stripes = sub_stripes;
0b86a832 2090
2b82032c
YZ
2091 *map_ret = map;
2092 *stripe_size = calc_size;
2093 *num_bytes = chunk_bytes_by_type(type, calc_size,
2094 num_stripes, sub_stripes);
0b86a832 2095
2b82032c
YZ
2096 em = alloc_extent_map(GFP_NOFS);
2097 if (!em) {
2098 kfree(map);
593060d7
CM
2099 return -ENOMEM;
2100 }
2b82032c
YZ
2101 em->bdev = (struct block_device *)map;
2102 em->start = start;
2103 em->len = *num_bytes;
2104 em->block_start = 0;
2105 em->block_len = em->len;
593060d7 2106
2b82032c
YZ
2107 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
2108 spin_lock(&em_tree->lock);
2109 ret = add_extent_mapping(em_tree, em);
2110 spin_unlock(&em_tree->lock);
2111 BUG_ON(ret);
2112 free_extent_map(em);
0b86a832 2113
2b82032c
YZ
2114 ret = btrfs_make_block_group(trans, extent_root, 0, type,
2115 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2116 start, *num_bytes);
2117 BUG_ON(ret);
611f0e00 2118
2b82032c
YZ
2119 index = 0;
2120 while (index < map->num_stripes) {
2121 device = map->stripes[index].dev;
2122 dev_offset = map->stripes[index].physical;
0b86a832
CM
2123
2124 ret = btrfs_alloc_dev_extent(trans, device,
2b82032c
YZ
2125 info->chunk_root->root_key.objectid,
2126 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2127 start, dev_offset, calc_size);
0b86a832 2128 BUG_ON(ret);
2b82032c
YZ
2129 index++;
2130 }
2131
2132 return 0;
2133}
2134
2135static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
2136 struct btrfs_root *extent_root,
2137 struct map_lookup *map, u64 chunk_offset,
2138 u64 chunk_size, u64 stripe_size)
2139{
2140 u64 dev_offset;
2141 struct btrfs_key key;
2142 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
2143 struct btrfs_device *device;
2144 struct btrfs_chunk *chunk;
2145 struct btrfs_stripe *stripe;
2146 size_t item_size = btrfs_chunk_item_size(map->num_stripes);
2147 int index = 0;
2148 int ret;
2149
2150 chunk = kzalloc(item_size, GFP_NOFS);
2151 if (!chunk)
2152 return -ENOMEM;
2153
2154 index = 0;
2155 while (index < map->num_stripes) {
2156 device = map->stripes[index].dev;
2157 device->bytes_used += stripe_size;
0b86a832
CM
2158 ret = btrfs_update_device(trans, device);
2159 BUG_ON(ret);
2b82032c
YZ
2160 index++;
2161 }
2162
2163 index = 0;
2164 stripe = &chunk->stripe;
2165 while (index < map->num_stripes) {
2166 device = map->stripes[index].dev;
2167 dev_offset = map->stripes[index].physical;
0b86a832 2168
e17cade2
CM
2169 btrfs_set_stack_stripe_devid(stripe, device->devid);
2170 btrfs_set_stack_stripe_offset(stripe, dev_offset);
2171 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
2b82032c 2172 stripe++;
0b86a832
CM
2173 index++;
2174 }
2175
2b82032c 2176 btrfs_set_stack_chunk_length(chunk, chunk_size);
0b86a832 2177 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
2b82032c
YZ
2178 btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
2179 btrfs_set_stack_chunk_type(chunk, map->type);
2180 btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
2181 btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
2182 btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
0b86a832 2183 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
2b82032c 2184 btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
0b86a832 2185
2b82032c
YZ
2186 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2187 key.type = BTRFS_CHUNK_ITEM_KEY;
2188 key.offset = chunk_offset;
0b86a832 2189
2b82032c
YZ
2190 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
2191 BUG_ON(ret);
0b86a832 2192
2b82032c
YZ
2193 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
2194 ret = btrfs_add_system_chunk(trans, chunk_root, &key, chunk,
2195 item_size);
8f18cf13
CM
2196 BUG_ON(ret);
2197 }
0b86a832 2198 kfree(chunk);
2b82032c
YZ
2199 return 0;
2200}
0b86a832 2201
2b82032c
YZ
2202/*
2203 * Chunk allocation falls into two parts. The first part does works
2204 * that make the new allocated chunk useable, but not do any operation
2205 * that modifies the chunk tree. The second part does the works that
2206 * require modifying the chunk tree. This division is important for the
2207 * bootstrap process of adding storage to a seed btrfs.
2208 */
2209int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2210 struct btrfs_root *extent_root, u64 type)
2211{
2212 u64 chunk_offset;
2213 u64 chunk_size;
2214 u64 stripe_size;
2215 struct map_lookup *map;
2216 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
2217 int ret;
2218
2219 ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2220 &chunk_offset);
2221 if (ret)
2222 return ret;
2223
2224 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
2225 &stripe_size, chunk_offset, type);
2226 if (ret)
2227 return ret;
2228
2229 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
2230 chunk_size, stripe_size);
2231 BUG_ON(ret);
2232 return 0;
2233}
2234
2235static int noinline init_first_rw_device(struct btrfs_trans_handle *trans,
2236 struct btrfs_root *root,
2237 struct btrfs_device *device)
2238{
2239 u64 chunk_offset;
2240 u64 sys_chunk_offset;
2241 u64 chunk_size;
2242 u64 sys_chunk_size;
2243 u64 stripe_size;
2244 u64 sys_stripe_size;
2245 u64 alloc_profile;
2246 struct map_lookup *map;
2247 struct map_lookup *sys_map;
2248 struct btrfs_fs_info *fs_info = root->fs_info;
2249 struct btrfs_root *extent_root = fs_info->extent_root;
2250 int ret;
2251
2252 ret = find_next_chunk(fs_info->chunk_root,
2253 BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
2254 BUG_ON(ret);
2255
2256 alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
2257 (fs_info->metadata_alloc_profile &
2258 fs_info->avail_metadata_alloc_bits);
2259 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
2260
2261 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
2262 &stripe_size, chunk_offset, alloc_profile);
2263 BUG_ON(ret);
2264
2265 sys_chunk_offset = chunk_offset + chunk_size;
2266
2267 alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM |
2268 (fs_info->system_alloc_profile &
2269 fs_info->avail_system_alloc_bits);
2270 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
2271
2272 ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
2273 &sys_chunk_size, &sys_stripe_size,
2274 sys_chunk_offset, alloc_profile);
2275 BUG_ON(ret);
2276
2277 ret = btrfs_add_device(trans, fs_info->chunk_root, device);
2278 BUG_ON(ret);
2279
2280 /*
2281 * Modifying chunk tree needs allocating new blocks from both
2282 * system block group and metadata block group. So we only can
2283 * do operations require modifying the chunk tree after both
2284 * block groups were created.
2285 */
2286 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
2287 chunk_size, stripe_size);
2288 BUG_ON(ret);
2289
2290 ret = __finish_chunk_alloc(trans, extent_root, sys_map,
2291 sys_chunk_offset, sys_chunk_size,
2292 sys_stripe_size);
b248a415 2293 BUG_ON(ret);
2b82032c
YZ
2294 return 0;
2295}
2296
2297int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2298{
2299 struct extent_map *em;
2300 struct map_lookup *map;
2301 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
2302 int readonly = 0;
2303 int i;
2304
2305 spin_lock(&map_tree->map_tree.lock);
2306 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
2307 spin_unlock(&map_tree->map_tree.lock);
2308 if (!em)
2309 return 1;
2310
2311 map = (struct map_lookup *)em->bdev;
2312 for (i = 0; i < map->num_stripes; i++) {
2313 if (!map->stripes[i].dev->writeable) {
2314 readonly = 1;
2315 break;
2316 }
2317 }
0b86a832 2318 free_extent_map(em);
2b82032c 2319 return readonly;
0b86a832
CM
2320}
2321
2322void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
2323{
2324 extent_map_tree_init(&tree->map_tree, GFP_NOFS);
2325}
2326
2327void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
2328{
2329 struct extent_map *em;
2330
2331 while(1) {
2332 spin_lock(&tree->map_tree.lock);
2333 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
2334 if (em)
2335 remove_extent_mapping(&tree->map_tree, em);
2336 spin_unlock(&tree->map_tree.lock);
2337 if (!em)
2338 break;
2339 kfree(em->bdev);
2340 /* once for us */
2341 free_extent_map(em);
2342 /* once for the tree */
2343 free_extent_map(em);
2344 }
2345}
2346
f188591e
CM
2347int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
2348{
2349 struct extent_map *em;
2350 struct map_lookup *map;
2351 struct extent_map_tree *em_tree = &map_tree->map_tree;
2352 int ret;
2353
2354 spin_lock(&em_tree->lock);
2355 em = lookup_extent_mapping(em_tree, logical, len);
b248a415 2356 spin_unlock(&em_tree->lock);
f188591e
CM
2357 BUG_ON(!em);
2358
2359 BUG_ON(em->start > logical || em->start + em->len < logical);
2360 map = (struct map_lookup *)em->bdev;
2361 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
2362 ret = map->num_stripes;
321aecc6
CM
2363 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
2364 ret = map->sub_stripes;
f188591e
CM
2365 else
2366 ret = 1;
2367 free_extent_map(em);
f188591e
CM
2368 return ret;
2369}
2370
dfe25020
CM
2371static int find_live_mirror(struct map_lookup *map, int first, int num,
2372 int optimal)
2373{
2374 int i;
2375 if (map->stripes[optimal].dev->bdev)
2376 return optimal;
2377 for (i = first; i < first + num; i++) {
2378 if (map->stripes[i].dev->bdev)
2379 return i;
2380 }
2381 /* we couldn't find one that doesn't fail. Just return something
2382 * and the io error handling code will clean up eventually
2383 */
2384 return optimal;
2385}
2386
f2d8d74d
CM
2387static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2388 u64 logical, u64 *length,
2389 struct btrfs_multi_bio **multi_ret,
2390 int mirror_num, struct page *unplug_page)
0b86a832
CM
2391{
2392 struct extent_map *em;
2393 struct map_lookup *map;
2394 struct extent_map_tree *em_tree = &map_tree->map_tree;
2395 u64 offset;
593060d7
CM
2396 u64 stripe_offset;
2397 u64 stripe_nr;
cea9e445 2398 int stripes_allocated = 8;
321aecc6 2399 int stripes_required = 1;
593060d7 2400 int stripe_index;
cea9e445 2401 int i;
f2d8d74d 2402 int num_stripes;
a236aed1 2403 int max_errors = 0;
cea9e445 2404 struct btrfs_multi_bio *multi = NULL;
0b86a832 2405
cea9e445
CM
2406 if (multi_ret && !(rw & (1 << BIO_RW))) {
2407 stripes_allocated = 1;
2408 }
2409again:
2410 if (multi_ret) {
2411 multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
2412 GFP_NOFS);
2413 if (!multi)
2414 return -ENOMEM;
a236aed1
CM
2415
2416 atomic_set(&multi->error, 0);
cea9e445 2417 }
0b86a832
CM
2418
2419 spin_lock(&em_tree->lock);
2420 em = lookup_extent_mapping(em_tree, logical, *length);
b248a415 2421 spin_unlock(&em_tree->lock);
f2d8d74d
CM
2422
2423 if (!em && unplug_page)
2424 return 0;
2425
3b951516 2426 if (!em) {
a061fc8d 2427 printk("unable to find logical %Lu len %Lu\n", logical, *length);
f2d8d74d 2428 BUG();
3b951516 2429 }
0b86a832
CM
2430
2431 BUG_ON(em->start > logical || em->start + em->len < logical);
2432 map = (struct map_lookup *)em->bdev;
2433 offset = logical - em->start;
593060d7 2434
f188591e
CM
2435 if (mirror_num > map->num_stripes)
2436 mirror_num = 0;
2437
cea9e445 2438 /* if our multi bio struct is too small, back off and try again */
321aecc6
CM
2439 if (rw & (1 << BIO_RW)) {
2440 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
2441 BTRFS_BLOCK_GROUP_DUP)) {
2442 stripes_required = map->num_stripes;
a236aed1 2443 max_errors = 1;
321aecc6
CM
2444 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
2445 stripes_required = map->sub_stripes;
a236aed1 2446 max_errors = 1;
321aecc6
CM
2447 }
2448 }
2449 if (multi_ret && rw == WRITE &&
2450 stripes_allocated < stripes_required) {
cea9e445 2451 stripes_allocated = map->num_stripes;
cea9e445
CM
2452 free_extent_map(em);
2453 kfree(multi);
2454 goto again;
2455 }
593060d7
CM
2456 stripe_nr = offset;
2457 /*
2458 * stripe_nr counts the total number of stripes we have to stride
2459 * to get to this block
2460 */
2461 do_div(stripe_nr, map->stripe_len);
2462
2463 stripe_offset = stripe_nr * map->stripe_len;
2464 BUG_ON(offset < stripe_offset);
2465
2466 /* stripe_offset is the offset of this block in its stripe*/
2467 stripe_offset = offset - stripe_offset;
2468
cea9e445 2469 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
321aecc6 2470 BTRFS_BLOCK_GROUP_RAID10 |
cea9e445
CM
2471 BTRFS_BLOCK_GROUP_DUP)) {
2472 /* we limit the length of each bio to what fits in a stripe */
2473 *length = min_t(u64, em->len - offset,
2474 map->stripe_len - stripe_offset);
2475 } else {
2476 *length = em->len - offset;
2477 }
f2d8d74d
CM
2478
2479 if (!multi_ret && !unplug_page)
cea9e445
CM
2480 goto out;
2481
f2d8d74d 2482 num_stripes = 1;
cea9e445 2483 stripe_index = 0;
8790d502 2484 if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
f2d8d74d
CM
2485 if (unplug_page || (rw & (1 << BIO_RW)))
2486 num_stripes = map->num_stripes;
2fff734f 2487 else if (mirror_num)
f188591e 2488 stripe_index = mirror_num - 1;
dfe25020
CM
2489 else {
2490 stripe_index = find_live_mirror(map, 0,
2491 map->num_stripes,
2492 current->pid % map->num_stripes);
2493 }
2fff734f 2494
611f0e00 2495 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
cea9e445 2496 if (rw & (1 << BIO_RW))
f2d8d74d 2497 num_stripes = map->num_stripes;
f188591e
CM
2498 else if (mirror_num)
2499 stripe_index = mirror_num - 1;
2fff734f 2500
321aecc6
CM
2501 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
2502 int factor = map->num_stripes / map->sub_stripes;
321aecc6
CM
2503
2504 stripe_index = do_div(stripe_nr, factor);
2505 stripe_index *= map->sub_stripes;
2506
f2d8d74d
CM
2507 if (unplug_page || (rw & (1 << BIO_RW)))
2508 num_stripes = map->sub_stripes;
321aecc6
CM
2509 else if (mirror_num)
2510 stripe_index += mirror_num - 1;
dfe25020
CM
2511 else {
2512 stripe_index = find_live_mirror(map, stripe_index,
2513 map->sub_stripes, stripe_index +
2514 current->pid % map->sub_stripes);
2515 }
8790d502
CM
2516 } else {
2517 /*
2518 * after this do_div call, stripe_nr is the number of stripes
2519 * on this device we have to walk to find the data, and
2520 * stripe_index is the number of our device in the stripe array
2521 */
2522 stripe_index = do_div(stripe_nr, map->num_stripes);
2523 }
593060d7 2524 BUG_ON(stripe_index >= map->num_stripes);
cea9e445 2525
f2d8d74d
CM
2526 for (i = 0; i < num_stripes; i++) {
2527 if (unplug_page) {
2528 struct btrfs_device *device;
2529 struct backing_dev_info *bdi;
2530
2531 device = map->stripes[stripe_index].dev;
dfe25020
CM
2532 if (device->bdev) {
2533 bdi = blk_get_backing_dev_info(device->bdev);
2534 if (bdi->unplug_io_fn) {
2535 bdi->unplug_io_fn(bdi, unplug_page);
2536 }
f2d8d74d
CM
2537 }
2538 } else {
2539 multi->stripes[i].physical =
2540 map->stripes[stripe_index].physical +
2541 stripe_offset + stripe_nr * map->stripe_len;
2542 multi->stripes[i].dev = map->stripes[stripe_index].dev;
2543 }
cea9e445 2544 stripe_index++;
593060d7 2545 }
f2d8d74d
CM
2546 if (multi_ret) {
2547 *multi_ret = multi;
2548 multi->num_stripes = num_stripes;
a236aed1 2549 multi->max_errors = max_errors;
f2d8d74d 2550 }
cea9e445 2551out:
0b86a832 2552 free_extent_map(em);
0b86a832
CM
2553 return 0;
2554}
2555
f2d8d74d
CM
2556int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2557 u64 logical, u64 *length,
2558 struct btrfs_multi_bio **multi_ret, int mirror_num)
2559{
2560 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
2561 mirror_num, NULL);
2562}
2563
2564int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
2565 u64 logical, struct page *page)
2566{
2567 u64 length = PAGE_CACHE_SIZE;
2568 return __btrfs_map_block(map_tree, READ, logical, &length,
2569 NULL, 0, page);
2570}
2571
2572
8790d502 2573static void end_bio_multi_stripe(struct bio *bio, int err)
8790d502 2574{
cea9e445 2575 struct btrfs_multi_bio *multi = bio->bi_private;
7d2b4daa 2576 int is_orig_bio = 0;
8790d502 2577
8790d502 2578 if (err)
a236aed1 2579 atomic_inc(&multi->error);
8790d502 2580
7d2b4daa
CM
2581 if (bio == multi->orig_bio)
2582 is_orig_bio = 1;
2583
cea9e445 2584 if (atomic_dec_and_test(&multi->stripes_pending)) {
7d2b4daa
CM
2585 if (!is_orig_bio) {
2586 bio_put(bio);
2587 bio = multi->orig_bio;
2588 }
8790d502
CM
2589 bio->bi_private = multi->private;
2590 bio->bi_end_io = multi->end_io;
a236aed1
CM
2591 /* only send an error to the higher layers if it is
2592 * beyond the tolerance of the multi-bio
2593 */
1259ab75 2594 if (atomic_read(&multi->error) > multi->max_errors) {
a236aed1 2595 err = -EIO;
1259ab75
CM
2596 } else if (err) {
2597 /*
2598 * this bio is actually up to date, we didn't
2599 * go over the max number of errors
2600 */
2601 set_bit(BIO_UPTODATE, &bio->bi_flags);
a236aed1 2602 err = 0;
1259ab75 2603 }
8790d502
CM
2604 kfree(multi);
2605
2606 bio_endio(bio, err);
7d2b4daa 2607 } else if (!is_orig_bio) {
8790d502
CM
2608 bio_put(bio);
2609 }
8790d502
CM
2610}
2611
8b712842
CM
2612struct async_sched {
2613 struct bio *bio;
2614 int rw;
2615 struct btrfs_fs_info *info;
2616 struct btrfs_work work;
2617};
2618
2619/*
2620 * see run_scheduled_bios for a description of why bios are collected for
2621 * async submit.
2622 *
2623 * This will add one bio to the pending list for a device and make sure
2624 * the work struct is scheduled.
2625 */
a1b32a59
CM
2626static int noinline schedule_bio(struct btrfs_root *root,
2627 struct btrfs_device *device,
2628 int rw, struct bio *bio)
8b712842
CM
2629{
2630 int should_queue = 1;
2631
2632 /* don't bother with additional async steps for reads, right now */
2633 if (!(rw & (1 << BIO_RW))) {
492bb6de 2634 bio_get(bio);
8b712842 2635 submit_bio(rw, bio);
492bb6de 2636 bio_put(bio);
8b712842
CM
2637 return 0;
2638 }
2639
2640 /*
0986fe9e 2641 * nr_async_bios allows us to reliably return congestion to the
8b712842
CM
2642 * higher layers. Otherwise, the async bio makes it appear we have
2643 * made progress against dirty pages when we've really just put it
2644 * on a queue for later
2645 */
0986fe9e 2646 atomic_inc(&root->fs_info->nr_async_bios);
492bb6de 2647 WARN_ON(bio->bi_next);
8b712842
CM
2648 bio->bi_next = NULL;
2649 bio->bi_rw |= rw;
2650
2651 spin_lock(&device->io_lock);
2652
2653 if (device->pending_bio_tail)
2654 device->pending_bio_tail->bi_next = bio;
2655
2656 device->pending_bio_tail = bio;
2657 if (!device->pending_bios)
2658 device->pending_bios = bio;
2659 if (device->running_pending)
2660 should_queue = 0;
2661
2662 spin_unlock(&device->io_lock);
2663
2664 if (should_queue)
1cc127b5
CM
2665 btrfs_queue_worker(&root->fs_info->submit_workers,
2666 &device->work);
8b712842
CM
2667 return 0;
2668}
2669
f188591e 2670int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
8b712842 2671 int mirror_num, int async_submit)
0b86a832
CM
2672{
2673 struct btrfs_mapping_tree *map_tree;
2674 struct btrfs_device *dev;
8790d502 2675 struct bio *first_bio = bio;
a62b9401 2676 u64 logical = (u64)bio->bi_sector << 9;
0b86a832
CM
2677 u64 length = 0;
2678 u64 map_length;
cea9e445 2679 struct btrfs_multi_bio *multi = NULL;
0b86a832 2680 int ret;
8790d502
CM
2681 int dev_nr = 0;
2682 int total_devs = 1;
0b86a832 2683
f2d8d74d 2684 length = bio->bi_size;
0b86a832
CM
2685 map_tree = &root->fs_info->mapping_tree;
2686 map_length = length;
cea9e445 2687
f188591e
CM
2688 ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi,
2689 mirror_num);
cea9e445
CM
2690 BUG_ON(ret);
2691
2692 total_devs = multi->num_stripes;
2693 if (map_length < length) {
2694 printk("mapping failed logical %Lu bio len %Lu "
2695 "len %Lu\n", logical, length, map_length);
2696 BUG();
2697 }
2698 multi->end_io = first_bio->bi_end_io;
2699 multi->private = first_bio->bi_private;
7d2b4daa 2700 multi->orig_bio = first_bio;
cea9e445
CM
2701 atomic_set(&multi->stripes_pending, multi->num_stripes);
2702
8790d502 2703 while(dev_nr < total_devs) {
8790d502 2704 if (total_devs > 1) {
8790d502
CM
2705 if (dev_nr < total_devs - 1) {
2706 bio = bio_clone(first_bio, GFP_NOFS);
2707 BUG_ON(!bio);
2708 } else {
2709 bio = first_bio;
2710 }
2711 bio->bi_private = multi;
2712 bio->bi_end_io = end_bio_multi_stripe;
2713 }
cea9e445
CM
2714 bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
2715 dev = multi->stripes[dev_nr].dev;
2b82032c 2716 BUG_ON(rw == WRITE && !dev->writeable);
dfe25020
CM
2717 if (dev && dev->bdev) {
2718 bio->bi_bdev = dev->bdev;
8b712842
CM
2719 if (async_submit)
2720 schedule_bio(root, dev, rw, bio);
2721 else
2722 submit_bio(rw, bio);
dfe25020
CM
2723 } else {
2724 bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
2725 bio->bi_sector = logical >> 9;
dfe25020 2726 bio_endio(bio, -EIO);
dfe25020 2727 }
8790d502
CM
2728 dev_nr++;
2729 }
cea9e445
CM
2730 if (total_devs == 1)
2731 kfree(multi);
0b86a832
CM
2732 return 0;
2733}
2734
a443755f 2735struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
2b82032c 2736 u8 *uuid, u8 *fsid)
0b86a832 2737{
2b82032c
YZ
2738 struct btrfs_device *device;
2739 struct btrfs_fs_devices *cur_devices;
2740
2741 cur_devices = root->fs_info->fs_devices;
2742 while (cur_devices) {
2743 if (!fsid ||
2744 !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
2745 device = __find_device(&cur_devices->devices,
2746 devid, uuid);
2747 if (device)
2748 return device;
2749 }
2750 cur_devices = cur_devices->seed;
2751 }
2752 return NULL;
0b86a832
CM
2753}
2754
dfe25020
CM
2755static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
2756 u64 devid, u8 *dev_uuid)
2757{
2758 struct btrfs_device *device;
2759 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2760
2761 device = kzalloc(sizeof(*device), GFP_NOFS);
7cbd8a83 2762 if (!device)
2763 return NULL;
dfe25020
CM
2764 list_add(&device->dev_list,
2765 &fs_devices->devices);
dfe25020
CM
2766 device->barriers = 1;
2767 device->dev_root = root->fs_info->dev_root;
2768 device->devid = devid;
8b712842 2769 device->work.func = pending_bios_fn;
dfe25020
CM
2770 fs_devices->num_devices++;
2771 spin_lock_init(&device->io_lock);
2772 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
2773 return device;
2774}
2775
0b86a832
CM
2776static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
2777 struct extent_buffer *leaf,
2778 struct btrfs_chunk *chunk)
2779{
2780 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
2781 struct map_lookup *map;
2782 struct extent_map *em;
2783 u64 logical;
2784 u64 length;
2785 u64 devid;
a443755f 2786 u8 uuid[BTRFS_UUID_SIZE];
593060d7 2787 int num_stripes;
0b86a832 2788 int ret;
593060d7 2789 int i;
0b86a832 2790
e17cade2
CM
2791 logical = key->offset;
2792 length = btrfs_chunk_length(leaf, chunk);
a061fc8d 2793
0b86a832
CM
2794 spin_lock(&map_tree->map_tree.lock);
2795 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
b248a415 2796 spin_unlock(&map_tree->map_tree.lock);
0b86a832
CM
2797
2798 /* already mapped? */
2799 if (em && em->start <= logical && em->start + em->len > logical) {
2800 free_extent_map(em);
0b86a832
CM
2801 return 0;
2802 } else if (em) {
2803 free_extent_map(em);
2804 }
0b86a832
CM
2805
2806 map = kzalloc(sizeof(*map), GFP_NOFS);
2807 if (!map)
2808 return -ENOMEM;
2809
2810 em = alloc_extent_map(GFP_NOFS);
2811 if (!em)
2812 return -ENOMEM;
593060d7
CM
2813 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
2814 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
0b86a832
CM
2815 if (!map) {
2816 free_extent_map(em);
2817 return -ENOMEM;
2818 }
2819
2820 em->bdev = (struct block_device *)map;
2821 em->start = logical;
2822 em->len = length;
2823 em->block_start = 0;
c8b97818 2824 em->block_len = em->len;
0b86a832 2825
593060d7
CM
2826 map->num_stripes = num_stripes;
2827 map->io_width = btrfs_chunk_io_width(leaf, chunk);
2828 map->io_align = btrfs_chunk_io_align(leaf, chunk);
2829 map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
2830 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
2831 map->type = btrfs_chunk_type(leaf, chunk);
321aecc6 2832 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
593060d7
CM
2833 for (i = 0; i < num_stripes; i++) {
2834 map->stripes[i].physical =
2835 btrfs_stripe_offset_nr(leaf, chunk, i);
2836 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
a443755f
CM
2837 read_extent_buffer(leaf, uuid, (unsigned long)
2838 btrfs_stripe_dev_uuid_nr(chunk, i),
2839 BTRFS_UUID_SIZE);
2b82032c
YZ
2840 map->stripes[i].dev = btrfs_find_device(root, devid, uuid,
2841 NULL);
dfe25020 2842 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
593060d7
CM
2843 kfree(map);
2844 free_extent_map(em);
2845 return -EIO;
2846 }
dfe25020
CM
2847 if (!map->stripes[i].dev) {
2848 map->stripes[i].dev =
2849 add_missing_dev(root, devid, uuid);
2850 if (!map->stripes[i].dev) {
2851 kfree(map);
2852 free_extent_map(em);
2853 return -EIO;
2854 }
2855 }
2856 map->stripes[i].dev->in_fs_metadata = 1;
0b86a832
CM
2857 }
2858
2859 spin_lock(&map_tree->map_tree.lock);
2860 ret = add_extent_mapping(&map_tree->map_tree, em);
0b86a832 2861 spin_unlock(&map_tree->map_tree.lock);
b248a415 2862 BUG_ON(ret);
0b86a832
CM
2863 free_extent_map(em);
2864
2865 return 0;
2866}
2867
2868static int fill_device_from_item(struct extent_buffer *leaf,
2869 struct btrfs_dev_item *dev_item,
2870 struct btrfs_device *device)
2871{
2872 unsigned long ptr;
0b86a832
CM
2873
2874 device->devid = btrfs_device_id(leaf, dev_item);
2875 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2876 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2877 device->type = btrfs_device_type(leaf, dev_item);
2878 device->io_align = btrfs_device_io_align(leaf, dev_item);
2879 device->io_width = btrfs_device_io_width(leaf, dev_item);
2880 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
0b86a832
CM
2881
2882 ptr = (unsigned long)btrfs_device_uuid(dev_item);
e17cade2 2883 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
0b86a832 2884
0b86a832
CM
2885 return 0;
2886}
2887
2b82032c
YZ
2888static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
2889{
2890 struct btrfs_fs_devices *fs_devices;
2891 int ret;
2892
2893 mutex_lock(&uuid_mutex);
2894
2895 fs_devices = root->fs_info->fs_devices->seed;
2896 while (fs_devices) {
2897 if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
2898 ret = 0;
2899 goto out;
2900 }
2901 fs_devices = fs_devices->seed;
2902 }
2903
2904 fs_devices = find_fsid(fsid);
2905 if (!fs_devices) {
2906 ret = -ENOENT;
2907 goto out;
2908 }
2909 if (fs_devices->opened) {
2910 ret = -EBUSY;
2911 goto out;
2912 }
2913
15916de8
CM
2914 ret = __btrfs_open_devices(fs_devices, MS_RDONLY,
2915 root->fs_info->bdev_holder);
2b82032c
YZ
2916 if (ret)
2917 goto out;
2918
2919 if (!fs_devices->seeding) {
2920 __btrfs_close_devices(fs_devices);
2921 ret = -EINVAL;
2922 goto out;
2923 }
2924
2925 fs_devices->seed = root->fs_info->fs_devices->seed;
2926 root->fs_info->fs_devices->seed = fs_devices;
2927 fs_devices->sprouted = 1;
2928out:
2929 mutex_unlock(&uuid_mutex);
2930 return ret;
2931}
2932
0d81ba5d 2933static int read_one_dev(struct btrfs_root *root,
0b86a832
CM
2934 struct extent_buffer *leaf,
2935 struct btrfs_dev_item *dev_item)
2936{
2937 struct btrfs_device *device;
2938 u64 devid;
2939 int ret;
2b82032c
YZ
2940 int seed_devices = 0;
2941 u8 fs_uuid[BTRFS_UUID_SIZE];
a443755f
CM
2942 u8 dev_uuid[BTRFS_UUID_SIZE];
2943
0b86a832 2944 devid = btrfs_device_id(leaf, dev_item);
a443755f
CM
2945 read_extent_buffer(leaf, dev_uuid,
2946 (unsigned long)btrfs_device_uuid(dev_item),
2947 BTRFS_UUID_SIZE);
2b82032c
YZ
2948 read_extent_buffer(leaf, fs_uuid,
2949 (unsigned long)btrfs_device_fsid(dev_item),
2950 BTRFS_UUID_SIZE);
2951
2952 if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
2953 ret = open_seed_devices(root, fs_uuid);
2954 if (ret)
2955 return ret;
2956 seed_devices = 1;
2957 }
2958
2959 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
2960 if (!device || !device->bdev) {
2961 if (!btrfs_test_opt(root, DEGRADED) || seed_devices)
2962 return -EIO;
2963
2964 if (!device) {
2965 printk("warning devid %Lu missing\n", devid);
2966 device = add_missing_dev(root, devid, dev_uuid);
2967 if (!device)
2968 return -ENOMEM;
2969 }
2970 }
2971
2972 if (device->fs_devices != root->fs_info->fs_devices) {
2973 BUG_ON(device->writeable);
2974 if (device->generation !=
2975 btrfs_device_generation(leaf, dev_item))
2976 return -EINVAL;
6324fbf3 2977 }
0b86a832
CM
2978
2979 fill_device_from_item(leaf, dev_item, device);
2980 device->dev_root = root->fs_info->dev_root;
dfe25020 2981 device->in_fs_metadata = 1;
2b82032c
YZ
2982 if (device->writeable)
2983 device->fs_devices->total_rw_bytes += device->total_bytes;
0b86a832
CM
2984 ret = 0;
2985#if 0
2986 ret = btrfs_open_device(device);
2987 if (ret) {
2988 kfree(device);
2989 }
2990#endif
2991 return ret;
2992}
2993
0d81ba5d
CM
2994int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
2995{
2996 struct btrfs_dev_item *dev_item;
2997
2998 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
2999 dev_item);
3000 return read_one_dev(root, buf, dev_item);
3001}
3002
0b86a832
CM
3003int btrfs_read_sys_array(struct btrfs_root *root)
3004{
3005 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
a061fc8d 3006 struct extent_buffer *sb;
0b86a832 3007 struct btrfs_disk_key *disk_key;
0b86a832 3008 struct btrfs_chunk *chunk;
84eed90f
CM
3009 u8 *ptr;
3010 unsigned long sb_ptr;
3011 int ret = 0;
0b86a832
CM
3012 u32 num_stripes;
3013 u32 array_size;
3014 u32 len = 0;
0b86a832 3015 u32 cur;
84eed90f 3016 struct btrfs_key key;
0b86a832 3017
a061fc8d
CM
3018 sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
3019 BTRFS_SUPER_INFO_SIZE);
3020 if (!sb)
3021 return -ENOMEM;
3022 btrfs_set_buffer_uptodate(sb);
3023 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
0b86a832
CM
3024 array_size = btrfs_super_sys_array_size(super_copy);
3025
0b86a832
CM
3026 ptr = super_copy->sys_chunk_array;
3027 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
3028 cur = 0;
3029
3030 while (cur < array_size) {
3031 disk_key = (struct btrfs_disk_key *)ptr;
3032 btrfs_disk_key_to_cpu(&key, disk_key);
3033
a061fc8d 3034 len = sizeof(*disk_key); ptr += len;
0b86a832
CM
3035 sb_ptr += len;
3036 cur += len;
3037
0d81ba5d 3038 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
0b86a832 3039 chunk = (struct btrfs_chunk *)sb_ptr;
0d81ba5d 3040 ret = read_one_chunk(root, &key, sb, chunk);
84eed90f
CM
3041 if (ret)
3042 break;
0b86a832
CM
3043 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
3044 len = btrfs_chunk_item_size(num_stripes);
3045 } else {
84eed90f
CM
3046 ret = -EIO;
3047 break;
0b86a832
CM
3048 }
3049 ptr += len;
3050 sb_ptr += len;
3051 cur += len;
3052 }
a061fc8d 3053 free_extent_buffer(sb);
84eed90f 3054 return ret;
0b86a832
CM
3055}
3056
3057int btrfs_read_chunk_tree(struct btrfs_root *root)
3058{
3059 struct btrfs_path *path;
3060 struct extent_buffer *leaf;
3061 struct btrfs_key key;
3062 struct btrfs_key found_key;
3063 int ret;
3064 int slot;
3065
3066 root = root->fs_info->chunk_root;
3067
3068 path = btrfs_alloc_path();
3069 if (!path)
3070 return -ENOMEM;
3071
3072 /* first we search for all of the device items, and then we
3073 * read in all of the chunk items. This way we can create chunk
3074 * mappings that reference all of the devices that are afound
3075 */
3076 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
3077 key.offset = 0;
3078 key.type = 0;
3079again:
3080 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3081 while(1) {
3082 leaf = path->nodes[0];
3083 slot = path->slots[0];
3084 if (slot >= btrfs_header_nritems(leaf)) {
3085 ret = btrfs_next_leaf(root, path);
3086 if (ret == 0)
3087 continue;
3088 if (ret < 0)
3089 goto error;
3090 break;
3091 }
3092 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3093 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
3094 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
3095 break;
3096 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
3097 struct btrfs_dev_item *dev_item;
3098 dev_item = btrfs_item_ptr(leaf, slot,
3099 struct btrfs_dev_item);
0d81ba5d 3100 ret = read_one_dev(root, leaf, dev_item);
2b82032c
YZ
3101 if (ret)
3102 goto error;
0b86a832
CM
3103 }
3104 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
3105 struct btrfs_chunk *chunk;
3106 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3107 ret = read_one_chunk(root, &found_key, leaf, chunk);
2b82032c
YZ
3108 if (ret)
3109 goto error;
0b86a832
CM
3110 }
3111 path->slots[0]++;
3112 }
3113 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
3114 key.objectid = 0;
3115 btrfs_release_path(root, path);
3116 goto again;
3117 }
0b86a832
CM
3118 ret = 0;
3119error:
2b82032c 3120 btrfs_free_path(path);
0b86a832
CM
3121 return ret;
3122}