]> bbs.cooldavid.org Git - net-next-2.6.git/blob - fs/btrfs/extent-tree.c
Btrfs: Leaf reference cache update
[net-next-2.6.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include "hash.h"
22 #include "crc32c.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "volumes.h"
28 #include "locking.h"
29 #include "ref-cache.h"
30
31 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
32 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
33 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
34
35 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
36
37 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
38                                  btrfs_root *extent_root);
39 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
40                                btrfs_root *extent_root);
41 static struct btrfs_block_group_cache *
42 __btrfs_find_block_group(struct btrfs_root *root,
43                          struct btrfs_block_group_cache *hint,
44                          u64 search_start, int data, int owner);
45
46 void maybe_lock_mutex(struct btrfs_root *root)
47 {
48         if (root != root->fs_info->extent_root &&
49             root != root->fs_info->chunk_root &&
50             root != root->fs_info->dev_root) {
51                 mutex_lock(&root->fs_info->alloc_mutex);
52         }
53 }
54
55 void maybe_unlock_mutex(struct btrfs_root *root)
56 {
57         if (root != root->fs_info->extent_root &&
58             root != root->fs_info->chunk_root &&
59             root != root->fs_info->dev_root) {
60                 mutex_unlock(&root->fs_info->alloc_mutex);
61         }
62 }
63
64 static int cache_block_group(struct btrfs_root *root,
65                              struct btrfs_block_group_cache *block_group)
66 {
67         struct btrfs_path *path;
68         int ret;
69         struct btrfs_key key;
70         struct extent_buffer *leaf;
71         struct extent_io_tree *free_space_cache;
72         int slot;
73         u64 last = 0;
74         u64 hole_size;
75         u64 first_free;
76         int found = 0;
77
78         if (!block_group)
79                 return 0;
80
81         root = root->fs_info->extent_root;
82         free_space_cache = &root->fs_info->free_space_cache;
83
84         if (block_group->cached)
85                 return 0;
86
87         path = btrfs_alloc_path();
88         if (!path)
89                 return -ENOMEM;
90
91         path->reada = 2;
92         /*
93          * we get into deadlocks with paths held by callers of this function.
94          * since the alloc_mutex is protecting things right now, just
95          * skip the locking here
96          */
97         path->skip_locking = 1;
98         first_free = block_group->key.objectid;
99         key.objectid = block_group->key.objectid;
100         key.offset = 0;
101         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
102         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
103         if (ret < 0)
104                 return ret;
105         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
106         if (ret < 0)
107                 return ret;
108         if (ret == 0) {
109                 leaf = path->nodes[0];
110                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
111                 if (key.objectid + key.offset > first_free)
112                         first_free = key.objectid + key.offset;
113         }
114         while(1) {
115                 leaf = path->nodes[0];
116                 slot = path->slots[0];
117                 if (slot >= btrfs_header_nritems(leaf)) {
118                         ret = btrfs_next_leaf(root, path);
119                         if (ret < 0)
120                                 goto err;
121                         if (ret == 0) {
122                                 continue;
123                         } else {
124                                 break;
125                         }
126                 }
127                 btrfs_item_key_to_cpu(leaf, &key, slot);
128                 if (key.objectid < block_group->key.objectid) {
129                         goto next;
130                 }
131                 if (key.objectid >= block_group->key.objectid +
132                     block_group->key.offset) {
133                         break;
134                 }
135
136                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
137                         if (!found) {
138                                 last = first_free;
139                                 found = 1;
140                         }
141                         if (key.objectid > last) {
142                                 hole_size = key.objectid - last;
143                                 set_extent_dirty(free_space_cache, last,
144                                                  last + hole_size - 1,
145                                                  GFP_NOFS);
146                         }
147                         last = key.objectid + key.offset;
148                 }
149 next:
150                 path->slots[0]++;
151         }
152
153         if (!found)
154                 last = first_free;
155         if (block_group->key.objectid +
156             block_group->key.offset > last) {
157                 hole_size = block_group->key.objectid +
158                         block_group->key.offset - last;
159                 set_extent_dirty(free_space_cache, last,
160                                  last + hole_size - 1, GFP_NOFS);
161         }
162         block_group->cached = 1;
163 err:
164         btrfs_free_path(path);
165         return 0;
166 }
167
168 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
169                                                        btrfs_fs_info *info,
170                                                          u64 bytenr)
171 {
172         struct extent_io_tree *block_group_cache;
173         struct btrfs_block_group_cache *block_group = NULL;
174         u64 ptr;
175         u64 start;
176         u64 end;
177         int ret;
178
179         bytenr = max_t(u64, bytenr,
180                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
181         block_group_cache = &info->block_group_cache;
182         ret = find_first_extent_bit(block_group_cache,
183                                     bytenr, &start, &end,
184                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
185                                     BLOCK_GROUP_SYSTEM);
186         if (ret) {
187                 return NULL;
188         }
189         ret = get_state_private(block_group_cache, start, &ptr);
190         if (ret)
191                 return NULL;
192
193         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
194         return block_group;
195 }
196
197 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
198                                                          btrfs_fs_info *info,
199                                                          u64 bytenr)
200 {
201         struct extent_io_tree *block_group_cache;
202         struct btrfs_block_group_cache *block_group = NULL;
203         u64 ptr;
204         u64 start;
205         u64 end;
206         int ret;
207
208         bytenr = max_t(u64, bytenr,
209                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
210         block_group_cache = &info->block_group_cache;
211         ret = find_first_extent_bit(block_group_cache,
212                                     bytenr, &start, &end,
213                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
214                                     BLOCK_GROUP_SYSTEM);
215         if (ret) {
216                 return NULL;
217         }
218         ret = get_state_private(block_group_cache, start, &ptr);
219         if (ret)
220                 return NULL;
221
222         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
223         if (block_group->key.objectid <= bytenr && bytenr <
224             block_group->key.objectid + block_group->key.offset)
225                 return block_group;
226         return NULL;
227 }
228
229 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
230 {
231         return (cache->flags & bits) == bits;
232 }
233
234 static int noinline find_search_start(struct btrfs_root *root,
235                               struct btrfs_block_group_cache **cache_ret,
236                               u64 *start_ret, u64 num, int data)
237 {
238         int ret;
239         struct btrfs_block_group_cache *cache = *cache_ret;
240         struct extent_io_tree *free_space_cache;
241         struct extent_state *state;
242         u64 last;
243         u64 start = 0;
244         u64 cache_miss = 0;
245         u64 total_fs_bytes;
246         u64 search_start = *start_ret;
247         int wrapped = 0;
248
249         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
250         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
251         free_space_cache = &root->fs_info->free_space_cache;
252
253         if (!cache)
254                 goto out;
255
256 again:
257         ret = cache_block_group(root, cache);
258         if (ret) {
259                 goto out;
260         }
261
262         last = max(search_start, cache->key.objectid);
263         if (!block_group_bits(cache, data) || cache->ro)
264                 goto new_group;
265
266         spin_lock_irq(&free_space_cache->lock);
267         state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY);
268         while(1) {
269                 if (!state) {
270                         if (!cache_miss)
271                                 cache_miss = last;
272                         spin_unlock_irq(&free_space_cache->lock);
273                         goto new_group;
274                 }
275
276                 start = max(last, state->start);
277                 last = state->end + 1;
278                 if (last - start < num) {
279                         do {
280                                 state = extent_state_next(state);
281                         } while(state && !(state->state & EXTENT_DIRTY));
282                         continue;
283                 }
284                 spin_unlock_irq(&free_space_cache->lock);
285                 if (cache->ro) {
286                         goto new_group;
287                 }
288                 if (start + num > cache->key.objectid + cache->key.offset)
289                         goto new_group;
290                 if (!block_group_bits(cache, data)) {
291                         printk("block group bits don't match %Lu %d\n", cache->flags, data);
292                 }
293                 *start_ret = start;
294                 return 0;
295         }
296 out:
297         cache = btrfs_lookup_block_group(root->fs_info, search_start);
298         if (!cache) {
299                 printk("Unable to find block group for %Lu\n", search_start);
300                 WARN_ON(1);
301         }
302         return -ENOSPC;
303
304 new_group:
305         last = cache->key.objectid + cache->key.offset;
306 wrapped:
307         cache = btrfs_lookup_first_block_group(root->fs_info, last);
308         if (!cache || cache->key.objectid >= total_fs_bytes) {
309 no_cache:
310                 if (!wrapped) {
311                         wrapped = 1;
312                         last = search_start;
313                         goto wrapped;
314                 }
315                 goto out;
316         }
317         if (cache_miss && !cache->cached) {
318                 cache_block_group(root, cache);
319                 last = cache_miss;
320                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
321         }
322         cache_miss = 0;
323         cache = btrfs_find_block_group(root, cache, last, data, 0);
324         if (!cache)
325                 goto no_cache;
326         *cache_ret = cache;
327         goto again;
328 }
329
330 static u64 div_factor(u64 num, int factor)
331 {
332         if (factor == 10)
333                 return num;
334         num *= factor;
335         do_div(num, 10);
336         return num;
337 }
338
339 static int block_group_state_bits(u64 flags)
340 {
341         int bits = 0;
342         if (flags & BTRFS_BLOCK_GROUP_DATA)
343                 bits |= BLOCK_GROUP_DATA;
344         if (flags & BTRFS_BLOCK_GROUP_METADATA)
345                 bits |= BLOCK_GROUP_METADATA;
346         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
347                 bits |= BLOCK_GROUP_SYSTEM;
348         return bits;
349 }
350
351 static struct btrfs_block_group_cache *
352 __btrfs_find_block_group(struct btrfs_root *root,
353                          struct btrfs_block_group_cache *hint,
354                          u64 search_start, int data, int owner)
355 {
356         struct btrfs_block_group_cache *cache;
357         struct extent_io_tree *block_group_cache;
358         struct btrfs_block_group_cache *found_group = NULL;
359         struct btrfs_fs_info *info = root->fs_info;
360         u64 used;
361         u64 last = 0;
362         u64 start;
363         u64 end;
364         u64 free_check;
365         u64 ptr;
366         int bit;
367         int ret;
368         int full_search = 0;
369         int factor = 10;
370         int wrapped = 0;
371
372         block_group_cache = &info->block_group_cache;
373
374         if (data & BTRFS_BLOCK_GROUP_METADATA)
375                 factor = 9;
376
377         bit = block_group_state_bits(data);
378
379         if (search_start) {
380                 struct btrfs_block_group_cache *shint;
381                 shint = btrfs_lookup_first_block_group(info, search_start);
382                 if (shint && block_group_bits(shint, data) && !shint->ro) {
383                         spin_lock(&shint->lock);
384                         used = btrfs_block_group_used(&shint->item);
385                         if (used + shint->pinned <
386                             div_factor(shint->key.offset, factor)) {
387                                 spin_unlock(&shint->lock);
388                                 return shint;
389                         }
390                         spin_unlock(&shint->lock);
391                 }
392         }
393         if (hint && !hint->ro && block_group_bits(hint, data)) {
394                 spin_lock(&hint->lock);
395                 used = btrfs_block_group_used(&hint->item);
396                 if (used + hint->pinned <
397                     div_factor(hint->key.offset, factor)) {
398                         spin_unlock(&hint->lock);
399                         return hint;
400                 }
401                 spin_unlock(&hint->lock);
402                 last = hint->key.objectid + hint->key.offset;
403         } else {
404                 if (hint)
405                         last = max(hint->key.objectid, search_start);
406                 else
407                         last = search_start;
408         }
409 again:
410         while(1) {
411                 ret = find_first_extent_bit(block_group_cache, last,
412                                             &start, &end, bit);
413                 if (ret)
414                         break;
415
416                 ret = get_state_private(block_group_cache, start, &ptr);
417                 if (ret) {
418                         last = end + 1;
419                         continue;
420                 }
421
422                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
423                 spin_lock(&cache->lock);
424                 last = cache->key.objectid + cache->key.offset;
425                 used = btrfs_block_group_used(&cache->item);
426
427                 if (!cache->ro && block_group_bits(cache, data)) {
428                         free_check = div_factor(cache->key.offset, factor);
429                         if (used + cache->pinned < free_check) {
430                                 found_group = cache;
431                                 spin_unlock(&cache->lock);
432                                 goto found;
433                         }
434                 }
435                 spin_unlock(&cache->lock);
436                 cond_resched();
437         }
438         if (!wrapped) {
439                 last = search_start;
440                 wrapped = 1;
441                 goto again;
442         }
443         if (!full_search && factor < 10) {
444                 last = search_start;
445                 full_search = 1;
446                 factor = 10;
447                 goto again;
448         }
449 found:
450         return found_group;
451 }
452
453 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
454                                                  struct btrfs_block_group_cache
455                                                  *hint, u64 search_start,
456                                                  int data, int owner)
457 {
458
459         struct btrfs_block_group_cache *ret;
460         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
461         return ret;
462 }
463 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
464                            u64 owner, u64 owner_offset)
465 {
466         u32 high_crc = ~(u32)0;
467         u32 low_crc = ~(u32)0;
468         __le64 lenum;
469         lenum = cpu_to_le64(root_objectid);
470         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
471         lenum = cpu_to_le64(ref_generation);
472         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
473         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
474                 lenum = cpu_to_le64(owner);
475                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
476                 lenum = cpu_to_le64(owner_offset);
477                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
478         }
479         return ((u64)high_crc << 32) | (u64)low_crc;
480 }
481
482 static int match_extent_ref(struct extent_buffer *leaf,
483                             struct btrfs_extent_ref *disk_ref,
484                             struct btrfs_extent_ref *cpu_ref)
485 {
486         int ret;
487         int len;
488
489         if (cpu_ref->objectid)
490                 len = sizeof(*cpu_ref);
491         else
492                 len = 2 * sizeof(u64);
493         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
494                                    len);
495         return ret == 0;
496 }
497
498 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
499                                           struct btrfs_root *root,
500                                           struct btrfs_path *path, u64 bytenr,
501                                           u64 root_objectid,
502                                           u64 ref_generation, u64 owner,
503                                           u64 owner_offset, int del)
504 {
505         u64 hash;
506         struct btrfs_key key;
507         struct btrfs_key found_key;
508         struct btrfs_extent_ref ref;
509         struct extent_buffer *leaf;
510         struct btrfs_extent_ref *disk_ref;
511         int ret;
512         int ret2;
513
514         btrfs_set_stack_ref_root(&ref, root_objectid);
515         btrfs_set_stack_ref_generation(&ref, ref_generation);
516         btrfs_set_stack_ref_objectid(&ref, owner);
517         btrfs_set_stack_ref_offset(&ref, owner_offset);
518
519         hash = hash_extent_ref(root_objectid, ref_generation, owner,
520                                owner_offset);
521         key.offset = hash;
522         key.objectid = bytenr;
523         key.type = BTRFS_EXTENT_REF_KEY;
524
525         while (1) {
526                 ret = btrfs_search_slot(trans, root, &key, path,
527                                         del ? -1 : 0, del);
528                 if (ret < 0)
529                         goto out;
530                 leaf = path->nodes[0];
531                 if (ret != 0) {
532                         u32 nritems = btrfs_header_nritems(leaf);
533                         if (path->slots[0] >= nritems) {
534                                 ret2 = btrfs_next_leaf(root, path);
535                                 if (ret2)
536                                         goto out;
537                                 leaf = path->nodes[0];
538                         }
539                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
540                         if (found_key.objectid != bytenr ||
541                             found_key.type != BTRFS_EXTENT_REF_KEY)
542                                 goto out;
543                         key.offset = found_key.offset;
544                         if (del) {
545                                 btrfs_release_path(root, path);
546                                 continue;
547                         }
548                 }
549                 disk_ref = btrfs_item_ptr(path->nodes[0],
550                                           path->slots[0],
551                                           struct btrfs_extent_ref);
552                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
553                         ret = 0;
554                         goto out;
555                 }
556                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
557                 key.offset = found_key.offset + 1;
558                 btrfs_release_path(root, path);
559         }
560 out:
561         return ret;
562 }
563
564 /*
565  * Back reference rules.  Back refs have three main goals:
566  *
567  * 1) differentiate between all holders of references to an extent so that
568  *    when a reference is dropped we can make sure it was a valid reference
569  *    before freeing the extent.
570  *
571  * 2) Provide enough information to quickly find the holders of an extent
572  *    if we notice a given block is corrupted or bad.
573  *
574  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
575  *    maintenance.  This is actually the same as #2, but with a slightly
576  *    different use case.
577  *
578  * File extents can be referenced by:
579  *
580  * - multiple snapshots, subvolumes, or different generations in one subvol
581  * - different files inside a single subvolume (in theory, not implemented yet)
582  * - different offsets inside a file (bookend extents in file.c)
583  *
584  * The extent ref structure has fields for:
585  *
586  * - Objectid of the subvolume root
587  * - Generation number of the tree holding the reference
588  * - objectid of the file holding the reference
589  * - offset in the file corresponding to the key holding the reference
590  *
591  * When a file extent is allocated the fields are filled in:
592  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
593  *
594  * When a leaf is cow'd new references are added for every file extent found
595  * in the leaf.  It looks the same as the create case, but trans->transid
596  * will be different when the block is cow'd.
597  *
598  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
599  *
600  * When a file extent is removed either during snapshot deletion or file
601  * truncation, the corresponding back reference is found
602  * by searching for:
603  *
604  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
605  *      inode objectid, offset in file)
606  *
607  * Btree extents can be referenced by:
608  *
609  * - Different subvolumes
610  * - Different generations of the same subvolume
611  *
612  * Storing sufficient information for a full reverse mapping of a btree
613  * block would require storing the lowest key of the block in the backref,
614  * and it would require updating that lowest key either before write out or
615  * every time it changed.  Instead, the objectid of the lowest key is stored
616  * along with the level of the tree block.  This provides a hint
617  * about where in the btree the block can be found.  Searches through the
618  * btree only need to look for a pointer to that block, so they stop one
619  * level higher than the level recorded in the backref.
620  *
621  * Some btrees do not do reference counting on their extents.  These
622  * include the extent tree and the tree of tree roots.  Backrefs for these
623  * trees always have a generation of zero.
624  *
625  * When a tree block is created, back references are inserted:
626  *
627  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
628  *
629  * When a tree block is cow'd in a reference counted root,
630  * new back references are added for all the blocks it points to.
631  * These are of the form (trans->transid will have increased since creation):
632  *
633  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
634  *
635  * Because the lowest_key_objectid and the level are just hints
636  * they are not used when backrefs are deleted.  When a backref is deleted:
637  *
638  * if backref was for a tree root:
639  *     root_objectid = root->root_key.objectid
640  * else
641  *     root_objectid = btrfs_header_owner(parent)
642  *
643  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
644  *
645  * Back Reference Key hashing:
646  *
647  * Back references have four fields, each 64 bits long.  Unfortunately,
648  * This is hashed into a single 64 bit number and placed into the key offset.
649  * The key objectid corresponds to the first byte in the extent, and the
650  * key type is set to BTRFS_EXTENT_REF_KEY
651  */
652 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
653                                  struct btrfs_root *root,
654                                  struct btrfs_path *path, u64 bytenr,
655                                  u64 root_objectid, u64 ref_generation,
656                                  u64 owner, u64 owner_offset)
657 {
658         u64 hash;
659         struct btrfs_key key;
660         struct btrfs_extent_ref ref;
661         struct btrfs_extent_ref *disk_ref;
662         int ret;
663
664         btrfs_set_stack_ref_root(&ref, root_objectid);
665         btrfs_set_stack_ref_generation(&ref, ref_generation);
666         btrfs_set_stack_ref_objectid(&ref, owner);
667         btrfs_set_stack_ref_offset(&ref, owner_offset);
668
669         hash = hash_extent_ref(root_objectid, ref_generation, owner,
670                                owner_offset);
671         key.offset = hash;
672         key.objectid = bytenr;
673         key.type = BTRFS_EXTENT_REF_KEY;
674
675         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
676         while (ret == -EEXIST) {
677                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
678                                           struct btrfs_extent_ref);
679                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
680                         goto out;
681                 key.offset++;
682                 btrfs_release_path(root, path);
683                 ret = btrfs_insert_empty_item(trans, root, path, &key,
684                                               sizeof(ref));
685         }
686         if (ret)
687                 goto out;
688         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
689                                   struct btrfs_extent_ref);
690         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
691                             sizeof(ref));
692         btrfs_mark_buffer_dirty(path->nodes[0]);
693 out:
694         btrfs_release_path(root, path);
695         return ret;
696 }
697
698 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
699                                 struct btrfs_root *root,
700                                 u64 bytenr, u64 num_bytes,
701                                 u64 root_objectid, u64 ref_generation,
702                                 u64 owner, u64 owner_offset)
703 {
704         struct btrfs_path *path;
705         int ret;
706         struct btrfs_key key;
707         struct extent_buffer *l;
708         struct btrfs_extent_item *item;
709         u32 refs;
710
711         WARN_ON(num_bytes < root->sectorsize);
712         path = btrfs_alloc_path();
713         if (!path)
714                 return -ENOMEM;
715
716         path->reada = 1;
717         key.objectid = bytenr;
718         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
719         key.offset = num_bytes;
720         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
721                                 0, 1);
722         if (ret < 0)
723                 return ret;
724         if (ret != 0) {
725                 BUG();
726         }
727         BUG_ON(ret != 0);
728         l = path->nodes[0];
729         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
730         refs = btrfs_extent_refs(l, item);
731         btrfs_set_extent_refs(l, item, refs + 1);
732         btrfs_mark_buffer_dirty(path->nodes[0]);
733
734         btrfs_release_path(root->fs_info->extent_root, path);
735
736         path->reada = 1;
737         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
738                                           path, bytenr, root_objectid,
739                                           ref_generation, owner, owner_offset);
740         BUG_ON(ret);
741         finish_current_insert(trans, root->fs_info->extent_root);
742         del_pending_extents(trans, root->fs_info->extent_root);
743
744         btrfs_free_path(path);
745         return 0;
746 }
747
748 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
749                                 struct btrfs_root *root,
750                                 u64 bytenr, u64 num_bytes,
751                                 u64 root_objectid, u64 ref_generation,
752                                 u64 owner, u64 owner_offset)
753 {
754         int ret;
755
756         mutex_lock(&root->fs_info->alloc_mutex);
757         ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
758                                      root_objectid, ref_generation,
759                                      owner, owner_offset);
760         mutex_unlock(&root->fs_info->alloc_mutex);
761         return ret;
762 }
763
764 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
765                          struct btrfs_root *root)
766 {
767         finish_current_insert(trans, root->fs_info->extent_root);
768         del_pending_extents(trans, root->fs_info->extent_root);
769         return 0;
770 }
771
772 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
773                              struct btrfs_root *root, u64 bytenr,
774                              u64 num_bytes, u32 *refs)
775 {
776         struct btrfs_path *path;
777         int ret;
778         struct btrfs_key key;
779         struct extent_buffer *l;
780         struct btrfs_extent_item *item;
781
782         WARN_ON(num_bytes < root->sectorsize);
783         path = btrfs_alloc_path();
784         path->reada = 1;
785         key.objectid = bytenr;
786         key.offset = num_bytes;
787         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
788         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
789                                 0, 0);
790         if (ret < 0)
791                 goto out;
792         if (ret != 0) {
793                 btrfs_print_leaf(root, path->nodes[0]);
794                 printk("failed to find block number %Lu\n", bytenr);
795                 BUG();
796         }
797         l = path->nodes[0];
798         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
799         *refs = btrfs_extent_refs(l, item);
800 out:
801         btrfs_free_path(path);
802         return 0;
803 }
804
805 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
806                                   struct btrfs_path *count_path,
807                                   u64 expected_owner,
808                                   u64 first_extent)
809 {
810         struct btrfs_root *extent_root = root->fs_info->extent_root;
811         struct btrfs_path *path;
812         u64 bytenr;
813         u64 found_objectid;
814         u64 found_owner;
815         u64 root_objectid = root->root_key.objectid;
816         u32 total_count = 0;
817         u32 extent_refs;
818         u32 cur_count;
819         u32 nritems;
820         int ret;
821         struct btrfs_key key;
822         struct btrfs_key found_key;
823         struct extent_buffer *l;
824         struct btrfs_extent_item *item;
825         struct btrfs_extent_ref *ref_item;
826         int level = -1;
827
828         /* FIXME, needs locking */
829         BUG();
830
831         mutex_lock(&root->fs_info->alloc_mutex);
832         path = btrfs_alloc_path();
833 again:
834         if (level == -1)
835                 bytenr = first_extent;
836         else
837                 bytenr = count_path->nodes[level]->start;
838
839         cur_count = 0;
840         key.objectid = bytenr;
841         key.offset = 0;
842
843         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
844         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
845         if (ret < 0)
846                 goto out;
847         BUG_ON(ret == 0);
848
849         l = path->nodes[0];
850         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
851
852         if (found_key.objectid != bytenr ||
853             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
854                 goto out;
855         }
856
857         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
858         extent_refs = btrfs_extent_refs(l, item);
859         while (1) {
860                 l = path->nodes[0];
861                 nritems = btrfs_header_nritems(l);
862                 if (path->slots[0] >= nritems) {
863                         ret = btrfs_next_leaf(extent_root, path);
864                         if (ret == 0)
865                                 continue;
866                         break;
867                 }
868                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
869                 if (found_key.objectid != bytenr)
870                         break;
871
872                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
873                         path->slots[0]++;
874                         continue;
875                 }
876
877                 cur_count++;
878                 ref_item = btrfs_item_ptr(l, path->slots[0],
879                                           struct btrfs_extent_ref);
880                 found_objectid = btrfs_ref_root(l, ref_item);
881
882                 if (found_objectid != root_objectid) {
883                         total_count = 2;
884                         goto out;
885                 }
886                 if (level == -1) {
887                         found_owner = btrfs_ref_objectid(l, ref_item);
888                         if (found_owner != expected_owner) {
889                                 total_count = 2;
890                                 goto out;
891                         }
892                         /*
893                          * nasty.  we don't count a reference held by
894                          * the running transaction.  This allows nodatacow
895                          * to avoid cow most of the time
896                          */
897                         if (found_owner >= BTRFS_FIRST_FREE_OBJECTID &&
898                             btrfs_ref_generation(l, ref_item) ==
899                             root->fs_info->generation) {
900                                 extent_refs--;
901                         }
902                 }
903                 total_count = 1;
904                 path->slots[0]++;
905         }
906         /*
907          * if there is more than one reference against a data extent,
908          * we have to assume the other ref is another snapshot
909          */
910         if (level == -1 && extent_refs > 1) {
911                 total_count = 2;
912                 goto out;
913         }
914         if (cur_count == 0) {
915                 total_count = 0;
916                 goto out;
917         }
918         if (level >= 0 && root->node == count_path->nodes[level])
919                 goto out;
920         level++;
921         btrfs_release_path(root, path);
922         goto again;
923
924 out:
925         btrfs_free_path(path);
926         mutex_unlock(&root->fs_info->alloc_mutex);
927         return total_count;
928 }
929
930 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
931                   struct extent_buffer *buf, int cache_ref)
932 {
933         u64 bytenr;
934         u32 nritems;
935         struct btrfs_key key;
936         struct btrfs_file_extent_item *fi;
937         int i;
938         int level;
939         int ret;
940         int faili;
941         int nr_file_extents = 0;
942
943         if (!root->ref_cows)
944                 return 0;
945
946         level = btrfs_header_level(buf);
947         nritems = btrfs_header_nritems(buf);
948         for (i = 0; i < nritems; i++) {
949                 cond_resched();
950                 if (level == 0) {
951                         u64 disk_bytenr;
952                         btrfs_item_key_to_cpu(buf, &key, i);
953                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
954                                 continue;
955                         fi = btrfs_item_ptr(buf, i,
956                                             struct btrfs_file_extent_item);
957                         if (btrfs_file_extent_type(buf, fi) ==
958                             BTRFS_FILE_EXTENT_INLINE)
959                                 continue;
960                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
961                         if (disk_bytenr == 0)
962                                 continue;
963
964                         if (buf != root->commit_root)
965                                 nr_file_extents++;
966
967                         mutex_lock(&root->fs_info->alloc_mutex);
968                         ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
969                                     btrfs_file_extent_disk_num_bytes(buf, fi),
970                                     root->root_key.objectid, trans->transid,
971                                     key.objectid, key.offset);
972                         mutex_unlock(&root->fs_info->alloc_mutex);
973                         if (ret) {
974                                 faili = i;
975                                 WARN_ON(1);
976                                 goto fail;
977                         }
978                 } else {
979                         bytenr = btrfs_node_blockptr(buf, i);
980                         btrfs_node_key_to_cpu(buf, &key, i);
981
982                         mutex_lock(&root->fs_info->alloc_mutex);
983                         ret = __btrfs_inc_extent_ref(trans, root, bytenr,
984                                            btrfs_level_size(root, level - 1),
985                                            root->root_key.objectid,
986                                            trans->transid,
987                                            level - 1, key.objectid);
988                         mutex_unlock(&root->fs_info->alloc_mutex);
989                         if (ret) {
990                                 faili = i;
991                                 WARN_ON(1);
992                                 goto fail;
993                         }
994                 }
995         }
996         /* cache orignal leaf block's references */
997         if (level == 0 && cache_ref && buf != root->commit_root) {
998                 struct btrfs_leaf_ref *ref;
999                 struct btrfs_extent_info *info;
1000
1001                 ref = btrfs_alloc_leaf_ref(nr_file_extents);
1002                 if (!ref) {
1003                         WARN_ON(1);
1004                         goto out;
1005                 }
1006
1007                 ref->bytenr = buf->start;
1008                 ref->owner = btrfs_header_owner(buf);
1009                 ref->generation = btrfs_header_generation(buf);
1010                 ref->nritems = nr_file_extents;
1011                 info = ref->extents;
1012                 
1013                 for (i = 0; nr_file_extents > 0 && i < nritems; i++) {
1014                         u64 disk_bytenr;
1015                         btrfs_item_key_to_cpu(buf, &key, i);
1016                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1017                                 continue;
1018                         fi = btrfs_item_ptr(buf, i,
1019                                             struct btrfs_file_extent_item);
1020                         if (btrfs_file_extent_type(buf, fi) ==
1021                             BTRFS_FILE_EXTENT_INLINE)
1022                                 continue;
1023                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1024                         if (disk_bytenr == 0)
1025                                 continue;
1026
1027                         info->bytenr = disk_bytenr;
1028                         info->num_bytes =
1029                                 btrfs_file_extent_disk_num_bytes(buf, fi);
1030                         info->objectid = key.objectid;
1031                         info->offset = key.offset;
1032                         info++;
1033                 }
1034
1035                 BUG_ON(!root->ref_tree);
1036                 ret = btrfs_add_leaf_ref(root, ref);
1037                 WARN_ON(ret);
1038                 btrfs_free_leaf_ref(ref);
1039         }
1040 out:
1041         return 0;
1042 fail:
1043         WARN_ON(1);
1044 #if 0
1045         for (i =0; i < faili; i++) {
1046                 if (level == 0) {
1047                         u64 disk_bytenr;
1048                         btrfs_item_key_to_cpu(buf, &key, i);
1049                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1050                                 continue;
1051                         fi = btrfs_item_ptr(buf, i,
1052                                             struct btrfs_file_extent_item);
1053                         if (btrfs_file_extent_type(buf, fi) ==
1054                             BTRFS_FILE_EXTENT_INLINE)
1055                                 continue;
1056                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1057                         if (disk_bytenr == 0)
1058                                 continue;
1059                         err = btrfs_free_extent(trans, root, disk_bytenr,
1060                                     btrfs_file_extent_disk_num_bytes(buf,
1061                                                                       fi), 0);
1062                         BUG_ON(err);
1063                 } else {
1064                         bytenr = btrfs_node_blockptr(buf, i);
1065                         err = btrfs_free_extent(trans, root, bytenr,
1066                                         btrfs_level_size(root, level - 1), 0);
1067                         BUG_ON(err);
1068                 }
1069         }
1070 #endif
1071         return ret;
1072 }
1073
1074 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1075                                  struct btrfs_root *root,
1076                                  struct btrfs_path *path,
1077                                  struct btrfs_block_group_cache *cache)
1078 {
1079         int ret;
1080         int pending_ret;
1081         struct btrfs_root *extent_root = root->fs_info->extent_root;
1082         unsigned long bi;
1083         struct extent_buffer *leaf;
1084
1085         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1086         if (ret < 0)
1087                 goto fail;
1088         BUG_ON(ret);
1089
1090         leaf = path->nodes[0];
1091         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1092         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1093         btrfs_mark_buffer_dirty(leaf);
1094         btrfs_release_path(extent_root, path);
1095 fail:
1096         finish_current_insert(trans, extent_root);
1097         pending_ret = del_pending_extents(trans, extent_root);
1098         if (ret)
1099                 return ret;
1100         if (pending_ret)
1101                 return pending_ret;
1102         return 0;
1103
1104 }
1105
1106 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1107                                    struct btrfs_root *root)
1108 {
1109         struct extent_io_tree *block_group_cache;
1110         struct btrfs_block_group_cache *cache;
1111         int ret;
1112         int err = 0;
1113         int werr = 0;
1114         struct btrfs_path *path;
1115         u64 last = 0;
1116         u64 start;
1117         u64 end;
1118         u64 ptr;
1119
1120         block_group_cache = &root->fs_info->block_group_cache;
1121         path = btrfs_alloc_path();
1122         if (!path)
1123                 return -ENOMEM;
1124
1125         mutex_lock(&root->fs_info->alloc_mutex);
1126         while(1) {
1127                 ret = find_first_extent_bit(block_group_cache, last,
1128                                             &start, &end, BLOCK_GROUP_DIRTY);
1129                 if (ret)
1130                         break;
1131
1132                 last = end + 1;
1133                 ret = get_state_private(block_group_cache, start, &ptr);
1134                 if (ret)
1135                         break;
1136                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
1137                 err = write_one_cache_group(trans, root,
1138                                             path, cache);
1139                 /*
1140                  * if we fail to write the cache group, we want
1141                  * to keep it marked dirty in hopes that a later
1142                  * write will work
1143                  */
1144                 if (err) {
1145                         werr = err;
1146                         continue;
1147                 }
1148                 clear_extent_bits(block_group_cache, start, end,
1149                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
1150         }
1151         btrfs_free_path(path);
1152         mutex_unlock(&root->fs_info->alloc_mutex);
1153         return werr;
1154 }
1155
1156 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1157                                                   u64 flags)
1158 {
1159         struct list_head *head = &info->space_info;
1160         struct list_head *cur;
1161         struct btrfs_space_info *found;
1162         list_for_each(cur, head) {
1163                 found = list_entry(cur, struct btrfs_space_info, list);
1164                 if (found->flags == flags)
1165                         return found;
1166         }
1167         return NULL;
1168
1169 }
1170
1171 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1172                              u64 total_bytes, u64 bytes_used,
1173                              struct btrfs_space_info **space_info)
1174 {
1175         struct btrfs_space_info *found;
1176
1177         found = __find_space_info(info, flags);
1178         if (found) {
1179                 found->total_bytes += total_bytes;
1180                 found->bytes_used += bytes_used;
1181                 found->full = 0;
1182                 WARN_ON(found->total_bytes < found->bytes_used);
1183                 *space_info = found;
1184                 return 0;
1185         }
1186         found = kmalloc(sizeof(*found), GFP_NOFS);
1187         if (!found)
1188                 return -ENOMEM;
1189
1190         list_add(&found->list, &info->space_info);
1191         found->flags = flags;
1192         found->total_bytes = total_bytes;
1193         found->bytes_used = bytes_used;
1194         found->bytes_pinned = 0;
1195         found->full = 0;
1196         found->force_alloc = 0;
1197         *space_info = found;
1198         return 0;
1199 }
1200
1201 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1202 {
1203         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1204                                    BTRFS_BLOCK_GROUP_RAID1 |
1205                                    BTRFS_BLOCK_GROUP_RAID10 |
1206                                    BTRFS_BLOCK_GROUP_DUP);
1207         if (extra_flags) {
1208                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1209                         fs_info->avail_data_alloc_bits |= extra_flags;
1210                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1211                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1212                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1213                         fs_info->avail_system_alloc_bits |= extra_flags;
1214         }
1215 }
1216
1217 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1218 {
1219         u64 num_devices = root->fs_info->fs_devices->num_devices;
1220
1221         if (num_devices == 1)
1222                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1223         if (num_devices < 4)
1224                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1225
1226         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1227             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1228                       BTRFS_BLOCK_GROUP_RAID10))) {
1229                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1230         }
1231
1232         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1233             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1234                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1235         }
1236
1237         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1238             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1239              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1240              (flags & BTRFS_BLOCK_GROUP_DUP)))
1241                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1242         return flags;
1243 }
1244
1245 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1246                           struct btrfs_root *extent_root, u64 alloc_bytes,
1247                           u64 flags, int force)
1248 {
1249         struct btrfs_space_info *space_info;
1250         u64 thresh;
1251         u64 start;
1252         u64 num_bytes;
1253         int ret;
1254
1255         flags = reduce_alloc_profile(extent_root, flags);
1256
1257         space_info = __find_space_info(extent_root->fs_info, flags);
1258         if (!space_info) {
1259                 ret = update_space_info(extent_root->fs_info, flags,
1260                                         0, 0, &space_info);
1261                 BUG_ON(ret);
1262         }
1263         BUG_ON(!space_info);
1264
1265         if (space_info->force_alloc) {
1266                 force = 1;
1267                 space_info->force_alloc = 0;
1268         }
1269         if (space_info->full)
1270                 goto out;
1271
1272         thresh = div_factor(space_info->total_bytes, 6);
1273         if (!force &&
1274            (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1275             thresh)
1276                 goto out;
1277
1278         mutex_lock(&extent_root->fs_info->chunk_mutex);
1279         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1280         if (ret == -ENOSPC) {
1281 printk("space info full %Lu\n", flags);
1282                 space_info->full = 1;
1283                 goto out_unlock;
1284         }
1285         BUG_ON(ret);
1286
1287         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1288                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1289         BUG_ON(ret);
1290 out_unlock:
1291         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1292 out:
1293         return 0;
1294 }
1295
1296 static int update_block_group(struct btrfs_trans_handle *trans,
1297                               struct btrfs_root *root,
1298                               u64 bytenr, u64 num_bytes, int alloc,
1299                               int mark_free)
1300 {
1301         struct btrfs_block_group_cache *cache;
1302         struct btrfs_fs_info *info = root->fs_info;
1303         u64 total = num_bytes;
1304         u64 old_val;
1305         u64 byte_in_group;
1306         u64 start;
1307         u64 end;
1308
1309         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1310         while(total) {
1311                 cache = btrfs_lookup_block_group(info, bytenr);
1312                 if (!cache) {
1313                         return -1;
1314                 }
1315                 byte_in_group = bytenr - cache->key.objectid;
1316                 WARN_ON(byte_in_group > cache->key.offset);
1317                 start = cache->key.objectid;
1318                 end = start + cache->key.offset - 1;
1319                 set_extent_bits(&info->block_group_cache, start, end,
1320                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1321
1322                 spin_lock(&cache->lock);
1323                 old_val = btrfs_block_group_used(&cache->item);
1324                 num_bytes = min(total, cache->key.offset - byte_in_group);
1325                 if (alloc) {
1326                         old_val += num_bytes;
1327                         cache->space_info->bytes_used += num_bytes;
1328                         btrfs_set_block_group_used(&cache->item, old_val);
1329                         spin_unlock(&cache->lock);
1330                 } else {
1331                         old_val -= num_bytes;
1332                         cache->space_info->bytes_used -= num_bytes;
1333                         btrfs_set_block_group_used(&cache->item, old_val);
1334                         spin_unlock(&cache->lock);
1335                         if (mark_free) {
1336                                 set_extent_dirty(&info->free_space_cache,
1337                                                  bytenr, bytenr + num_bytes - 1,
1338                                                  GFP_NOFS);
1339                         }
1340                 }
1341                 total -= num_bytes;
1342                 bytenr += num_bytes;
1343         }
1344         return 0;
1345 }
1346
1347 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1348 {
1349         u64 start;
1350         u64 end;
1351         int ret;
1352         ret = find_first_extent_bit(&root->fs_info->block_group_cache,
1353                                     search_start, &start, &end,
1354                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
1355                                     BLOCK_GROUP_SYSTEM);
1356         if (ret)
1357                 return 0;
1358         return start;
1359 }
1360
1361
1362 static int update_pinned_extents(struct btrfs_root *root,
1363                                 u64 bytenr, u64 num, int pin)
1364 {
1365         u64 len;
1366         struct btrfs_block_group_cache *cache;
1367         struct btrfs_fs_info *fs_info = root->fs_info;
1368
1369         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1370         if (pin) {
1371                 set_extent_dirty(&fs_info->pinned_extents,
1372                                 bytenr, bytenr + num - 1, GFP_NOFS);
1373         } else {
1374                 clear_extent_dirty(&fs_info->pinned_extents,
1375                                 bytenr, bytenr + num - 1, GFP_NOFS);
1376         }
1377         while (num > 0) {
1378                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1379                 if (!cache) {
1380                         u64 first = first_logical_byte(root, bytenr);
1381                         WARN_ON(first < bytenr);
1382                         len = min(first - bytenr, num);
1383                 } else {
1384                         len = min(num, cache->key.offset -
1385                                   (bytenr - cache->key.objectid));
1386                 }
1387                 if (pin) {
1388                         if (cache) {
1389                                 spin_lock(&cache->lock);
1390                                 cache->pinned += len;
1391                                 cache->space_info->bytes_pinned += len;
1392                                 spin_unlock(&cache->lock);
1393                         }
1394                         fs_info->total_pinned += len;
1395                 } else {
1396                         if (cache) {
1397                                 spin_lock(&cache->lock);
1398                                 cache->pinned -= len;
1399                                 cache->space_info->bytes_pinned -= len;
1400                                 spin_unlock(&cache->lock);
1401                         }
1402                         fs_info->total_pinned -= len;
1403                 }
1404                 bytenr += len;
1405                 num -= len;
1406         }
1407         return 0;
1408 }
1409
1410 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1411 {
1412         u64 last = 0;
1413         u64 start;
1414         u64 end;
1415         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1416         int ret;
1417
1418         while(1) {
1419                 ret = find_first_extent_bit(pinned_extents, last,
1420                                             &start, &end, EXTENT_DIRTY);
1421                 if (ret)
1422                         break;
1423                 set_extent_dirty(copy, start, end, GFP_NOFS);
1424                 last = end + 1;
1425         }
1426         return 0;
1427 }
1428
1429 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1430                                struct btrfs_root *root,
1431                                struct extent_io_tree *unpin)
1432 {
1433         u64 start;
1434         u64 end;
1435         int ret;
1436         struct extent_io_tree *free_space_cache;
1437         free_space_cache = &root->fs_info->free_space_cache;
1438
1439         mutex_lock(&root->fs_info->alloc_mutex);
1440         while(1) {
1441                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1442                                             EXTENT_DIRTY);
1443                 if (ret)
1444                         break;
1445                 update_pinned_extents(root, start, end + 1 - start, 0);
1446                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1447                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1448                 if (need_resched()) {
1449                         mutex_unlock(&root->fs_info->alloc_mutex);
1450                         cond_resched();
1451                         mutex_lock(&root->fs_info->alloc_mutex);
1452                 }
1453         }
1454         mutex_unlock(&root->fs_info->alloc_mutex);
1455         return 0;
1456 }
1457
1458 static int finish_current_insert(struct btrfs_trans_handle *trans,
1459                                  struct btrfs_root *extent_root)
1460 {
1461         u64 start;
1462         u64 end;
1463         struct btrfs_fs_info *info = extent_root->fs_info;
1464         struct extent_buffer *eb;
1465         struct btrfs_path *path;
1466         struct btrfs_key ins;
1467         struct btrfs_disk_key first;
1468         struct btrfs_extent_item extent_item;
1469         int ret;
1470         int level;
1471         int err = 0;
1472
1473         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1474         btrfs_set_stack_extent_refs(&extent_item, 1);
1475         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1476         path = btrfs_alloc_path();
1477
1478         while(1) {
1479                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1480                                             &end, EXTENT_LOCKED);
1481                 if (ret)
1482                         break;
1483
1484                 ins.objectid = start;
1485                 ins.offset = end + 1 - start;
1486                 err = btrfs_insert_item(trans, extent_root, &ins,
1487                                         &extent_item, sizeof(extent_item));
1488                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1489                                   GFP_NOFS);
1490
1491                 eb = btrfs_find_tree_block(extent_root, ins.objectid,
1492                                            ins.offset);
1493
1494                 if (!btrfs_buffer_uptodate(eb, trans->transid)) {
1495                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
1496                         btrfs_read_buffer(eb, trans->transid);
1497                         mutex_lock(&extent_root->fs_info->alloc_mutex);
1498                 }
1499
1500                 btrfs_tree_lock(eb);
1501                 level = btrfs_header_level(eb);
1502                 if (level == 0) {
1503                         btrfs_item_key(eb, &first, 0);
1504                 } else {
1505                         btrfs_node_key(eb, &first, 0);
1506                 }
1507                 btrfs_tree_unlock(eb);
1508                 free_extent_buffer(eb);
1509                 /*
1510                  * the first key is just a hint, so the race we've created
1511                  * against reading it is fine
1512                  */
1513                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1514                                           start, extent_root->root_key.objectid,
1515                                           0, level,
1516                                           btrfs_disk_key_objectid(&first));
1517                 BUG_ON(err);
1518                 if (need_resched()) {
1519                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
1520                         cond_resched();
1521                         mutex_lock(&extent_root->fs_info->alloc_mutex);
1522                 }
1523         }
1524         btrfs_free_path(path);
1525         return 0;
1526 }
1527
1528 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1529                           int pending)
1530 {
1531         int err = 0;
1532
1533         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1534         if (!pending) {
1535                 struct extent_buffer *buf;
1536                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1537                 if (buf) {
1538                         if (btrfs_buffer_uptodate(buf, 0) &&
1539                             btrfs_try_tree_lock(buf)) {
1540                                 u64 transid =
1541                                     root->fs_info->running_transaction->transid;
1542                                 u64 header_transid =
1543                                         btrfs_header_generation(buf);
1544                                 if (header_transid == transid &&
1545                                     !btrfs_header_flag(buf,
1546                                                BTRFS_HEADER_FLAG_WRITTEN)) {
1547                                         clean_tree_block(NULL, root, buf);
1548                                         btrfs_tree_unlock(buf);
1549                                         free_extent_buffer(buf);
1550                                         return 1;
1551                                 }
1552                                 btrfs_tree_unlock(buf);
1553                         }
1554                         free_extent_buffer(buf);
1555                 }
1556                 update_pinned_extents(root, bytenr, num_bytes, 1);
1557         } else {
1558                 set_extent_bits(&root->fs_info->pending_del,
1559                                 bytenr, bytenr + num_bytes - 1,
1560                                 EXTENT_LOCKED, GFP_NOFS);
1561         }
1562         BUG_ON(err < 0);
1563         return 0;
1564 }
1565
1566 /*
1567  * remove an extent from the root, returns 0 on success
1568  */
1569 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1570                          *root, u64 bytenr, u64 num_bytes,
1571                          u64 root_objectid, u64 ref_generation,
1572                          u64 owner_objectid, u64 owner_offset, int pin,
1573                          int mark_free)
1574 {
1575         struct btrfs_path *path;
1576         struct btrfs_key key;
1577         struct btrfs_fs_info *info = root->fs_info;
1578         struct btrfs_root *extent_root = info->extent_root;
1579         struct extent_buffer *leaf;
1580         int ret;
1581         int extent_slot = 0;
1582         int found_extent = 0;
1583         int num_to_del = 1;
1584         struct btrfs_extent_item *ei;
1585         u32 refs;
1586
1587         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1588         key.objectid = bytenr;
1589         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1590         key.offset = num_bytes;
1591         path = btrfs_alloc_path();
1592         if (!path)
1593                 return -ENOMEM;
1594
1595         path->reada = 1;
1596         ret = lookup_extent_backref(trans, extent_root, path,
1597                                     bytenr, root_objectid,
1598                                     ref_generation,
1599                                     owner_objectid, owner_offset, 1);
1600         if (ret == 0) {
1601                 struct btrfs_key found_key;
1602                 extent_slot = path->slots[0];
1603                 while(extent_slot > 0) {
1604                         extent_slot--;
1605                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1606                                               extent_slot);
1607                         if (found_key.objectid != bytenr)
1608                                 break;
1609                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1610                             found_key.offset == num_bytes) {
1611                                 found_extent = 1;
1612                                 break;
1613                         }
1614                         if (path->slots[0] - extent_slot > 5)
1615                                 break;
1616                 }
1617                 if (!found_extent)
1618                         ret = btrfs_del_item(trans, extent_root, path);
1619         } else {
1620                 btrfs_print_leaf(extent_root, path->nodes[0]);
1621                 WARN_ON(1);
1622                 printk("Unable to find ref byte nr %Lu root %Lu "
1623                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1624                        root_objectid, ref_generation, owner_objectid,
1625                        owner_offset);
1626         }
1627         if (!found_extent) {
1628                 btrfs_release_path(extent_root, path);
1629                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1630                 if (ret < 0)
1631                         return ret;
1632                 BUG_ON(ret);
1633                 extent_slot = path->slots[0];
1634         }
1635
1636         leaf = path->nodes[0];
1637         ei = btrfs_item_ptr(leaf, extent_slot,
1638                             struct btrfs_extent_item);
1639         refs = btrfs_extent_refs(leaf, ei);
1640         BUG_ON(refs == 0);
1641         refs -= 1;
1642         btrfs_set_extent_refs(leaf, ei, refs);
1643
1644         btrfs_mark_buffer_dirty(leaf);
1645
1646         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1647                 /* if the back ref and the extent are next to each other
1648                  * they get deleted below in one shot
1649                  */
1650                 path->slots[0] = extent_slot;
1651                 num_to_del = 2;
1652         } else if (found_extent) {
1653                 /* otherwise delete the extent back ref */
1654                 ret = btrfs_del_item(trans, extent_root, path);
1655                 BUG_ON(ret);
1656                 /* if refs are 0, we need to setup the path for deletion */
1657                 if (refs == 0) {
1658                         btrfs_release_path(extent_root, path);
1659                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1660                                                 -1, 1);
1661                         if (ret < 0)
1662                                 return ret;
1663                         BUG_ON(ret);
1664                 }
1665         }
1666
1667         if (refs == 0) {
1668                 u64 super_used;
1669                 u64 root_used;
1670
1671                 if (pin) {
1672                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1673                         if (ret > 0)
1674                                 mark_free = 1;
1675                         BUG_ON(ret < 0);
1676                 }
1677
1678                 /* block accounting for super block */
1679                 spin_lock_irq(&info->delalloc_lock);
1680                 super_used = btrfs_super_bytes_used(&info->super_copy);
1681                 btrfs_set_super_bytes_used(&info->super_copy,
1682                                            super_used - num_bytes);
1683                 spin_unlock_irq(&info->delalloc_lock);
1684
1685                 /* block accounting for root item */
1686                 root_used = btrfs_root_used(&root->root_item);
1687                 btrfs_set_root_used(&root->root_item,
1688                                            root_used - num_bytes);
1689                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1690                                       num_to_del);
1691                 if (ret) {
1692                         return ret;
1693                 }
1694                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1695                                          mark_free);
1696                 BUG_ON(ret);
1697         }
1698         btrfs_free_path(path);
1699         finish_current_insert(trans, extent_root);
1700         return ret;
1701 }
1702
1703 /*
1704  * find all the blocks marked as pending in the radix tree and remove
1705  * them from the extent map
1706  */
1707 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1708                                btrfs_root *extent_root)
1709 {
1710         int ret;
1711         int err = 0;
1712         u64 start;
1713         u64 end;
1714         struct extent_io_tree *pending_del;
1715         struct extent_io_tree *pinned_extents;
1716
1717         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1718         pending_del = &extent_root->fs_info->pending_del;
1719         pinned_extents = &extent_root->fs_info->pinned_extents;
1720
1721         while(1) {
1722                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1723                                             EXTENT_LOCKED);
1724                 if (ret)
1725                         break;
1726                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1727                                   GFP_NOFS);
1728                 if (!test_range_bit(&extent_root->fs_info->extent_ins,
1729                                     start, end, EXTENT_LOCKED, 0)) {
1730                         update_pinned_extents(extent_root, start,
1731                                               end + 1 - start, 1);
1732                         ret = __free_extent(trans, extent_root,
1733                                              start, end + 1 - start,
1734                                              extent_root->root_key.objectid,
1735                                              0, 0, 0, 0, 0);
1736                 } else {
1737                         clear_extent_bits(&extent_root->fs_info->extent_ins,
1738                                           start, end, EXTENT_LOCKED, GFP_NOFS);
1739                 }
1740                 if (ret)
1741                         err = ret;
1742
1743                 if (need_resched()) {
1744                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
1745                         cond_resched();
1746                         mutex_lock(&extent_root->fs_info->alloc_mutex);
1747                 }
1748         }
1749         return err;
1750 }
1751
1752 /*
1753  * remove an extent from the root, returns 0 on success
1754  */
1755 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
1756                                struct btrfs_root *root, u64 bytenr,
1757                                u64 num_bytes, u64 root_objectid,
1758                                u64 ref_generation, u64 owner_objectid,
1759                                u64 owner_offset, int pin)
1760 {
1761         struct btrfs_root *extent_root = root->fs_info->extent_root;
1762         int pending_ret;
1763         int ret;
1764
1765         WARN_ON(num_bytes < root->sectorsize);
1766         if (!root->ref_cows)
1767                 ref_generation = 0;
1768
1769         if (root == extent_root) {
1770                 pin_down_bytes(root, bytenr, num_bytes, 1);
1771                 return 0;
1772         }
1773         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1774                             ref_generation, owner_objectid, owner_offset,
1775                             pin, pin == 0);
1776
1777         finish_current_insert(trans, root->fs_info->extent_root);
1778         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1779         return ret ? ret : pending_ret;
1780 }
1781
1782 int btrfs_free_extent(struct btrfs_trans_handle *trans,
1783                       struct btrfs_root *root, u64 bytenr,
1784                       u64 num_bytes, u64 root_objectid,
1785                       u64 ref_generation, u64 owner_objectid,
1786                       u64 owner_offset, int pin)
1787 {
1788         int ret;
1789
1790         maybe_lock_mutex(root);
1791         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes,
1792                                   root_objectid, ref_generation,
1793                                   owner_objectid, owner_offset, pin);
1794         maybe_unlock_mutex(root);
1795         return ret;
1796 }
1797
1798 static u64 stripe_align(struct btrfs_root *root, u64 val)
1799 {
1800         u64 mask = ((u64)root->stripesize - 1);
1801         u64 ret = (val + mask) & ~mask;
1802         return ret;
1803 }
1804
1805 /*
1806  * walks the btree of allocated extents and find a hole of a given size.
1807  * The key ins is changed to record the hole:
1808  * ins->objectid == block start
1809  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1810  * ins->offset == number of blocks
1811  * Any available blocks before search_start are skipped.
1812  */
1813 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1814                                      struct btrfs_root *orig_root,
1815                                      u64 num_bytes, u64 empty_size,
1816                                      u64 search_start, u64 search_end,
1817                                      u64 hint_byte, struct btrfs_key *ins,
1818                                      u64 exclude_start, u64 exclude_nr,
1819                                      int data)
1820 {
1821         int ret;
1822         u64 orig_search_start;
1823         struct btrfs_root * root = orig_root->fs_info->extent_root;
1824         struct btrfs_fs_info *info = root->fs_info;
1825         u64 total_needed = num_bytes;
1826         u64 *last_ptr = NULL;
1827         struct btrfs_block_group_cache *block_group;
1828         int full_scan = 0;
1829         int wrapped = 0;
1830         int chunk_alloc_done = 0;
1831         int empty_cluster = 2 * 1024 * 1024;
1832         int allowed_chunk_alloc = 0;
1833
1834         WARN_ON(num_bytes < root->sectorsize);
1835         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1836
1837         if (orig_root->ref_cows || empty_size)
1838                 allowed_chunk_alloc = 1;
1839
1840         if (data & BTRFS_BLOCK_GROUP_METADATA) {
1841                 last_ptr = &root->fs_info->last_alloc;
1842                 empty_cluster = 256 * 1024;
1843         }
1844
1845         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
1846                 last_ptr = &root->fs_info->last_data_alloc;
1847         }
1848
1849         if (last_ptr) {
1850                 if (*last_ptr)
1851                         hint_byte = *last_ptr;
1852                 else {
1853                         empty_size += empty_cluster;
1854                 }
1855         }
1856
1857         search_start = max(search_start, first_logical_byte(root, 0));
1858         orig_search_start = search_start;
1859
1860         if (search_end == (u64)-1)
1861                 search_end = btrfs_super_total_bytes(&info->super_copy);
1862
1863         if (hint_byte) {
1864                 block_group = btrfs_lookup_first_block_group(info, hint_byte);
1865                 if (!block_group)
1866                         hint_byte = search_start;
1867                 block_group = btrfs_find_block_group(root, block_group,
1868                                                      hint_byte, data, 1);
1869                 if (last_ptr && *last_ptr == 0 && block_group)
1870                         hint_byte = block_group->key.objectid;
1871         } else {
1872                 block_group = btrfs_find_block_group(root,
1873                                                      trans->block_group,
1874                                                      search_start, data, 1);
1875         }
1876         search_start = max(search_start, hint_byte);
1877
1878         total_needed += empty_size;
1879
1880 check_failed:
1881         if (!block_group) {
1882                 block_group = btrfs_lookup_first_block_group(info,
1883                                                              search_start);
1884                 if (!block_group)
1885                         block_group = btrfs_lookup_first_block_group(info,
1886                                                        orig_search_start);
1887         }
1888         if (full_scan && !chunk_alloc_done) {
1889                 if (allowed_chunk_alloc) {
1890                         do_chunk_alloc(trans, root,
1891                                      num_bytes + 2 * 1024 * 1024, data, 1);
1892                         allowed_chunk_alloc = 0;
1893                 } else if (block_group && block_group_bits(block_group, data)) {
1894                         block_group->space_info->force_alloc = 1;
1895                 }
1896                 chunk_alloc_done = 1;
1897         }
1898         ret = find_search_start(root, &block_group, &search_start,
1899                                 total_needed, data);
1900         if (ret == -ENOSPC && last_ptr && *last_ptr) {
1901                 *last_ptr = 0;
1902                 block_group = btrfs_lookup_first_block_group(info,
1903                                                              orig_search_start);
1904                 search_start = orig_search_start;
1905                 ret = find_search_start(root, &block_group, &search_start,
1906                                         total_needed, data);
1907         }
1908         if (ret == -ENOSPC)
1909                 goto enospc;
1910         if (ret)
1911                 goto error;
1912
1913         if (last_ptr && *last_ptr && search_start != *last_ptr) {
1914                 *last_ptr = 0;
1915                 if (!empty_size) {
1916                         empty_size += empty_cluster;
1917                         total_needed += empty_size;
1918                 }
1919                 block_group = btrfs_lookup_first_block_group(info,
1920                                                        orig_search_start);
1921                 search_start = orig_search_start;
1922                 ret = find_search_start(root, &block_group,
1923                                         &search_start, total_needed, data);
1924                 if (ret == -ENOSPC)
1925                         goto enospc;
1926                 if (ret)
1927                         goto error;
1928         }
1929
1930         search_start = stripe_align(root, search_start);
1931         ins->objectid = search_start;
1932         ins->offset = num_bytes;
1933
1934         if (ins->objectid + num_bytes >= search_end)
1935                 goto enospc;
1936
1937         if (ins->objectid + num_bytes >
1938             block_group->key.objectid + block_group->key.offset) {
1939                 search_start = block_group->key.objectid +
1940                         block_group->key.offset;
1941                 goto new_group;
1942         }
1943
1944         if (test_range_bit(&info->extent_ins, ins->objectid,
1945                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1946                 search_start = ins->objectid + num_bytes;
1947                 goto new_group;
1948         }
1949
1950         if (test_range_bit(&info->pinned_extents, ins->objectid,
1951                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1952                 search_start = ins->objectid + num_bytes;
1953                 goto new_group;
1954         }
1955
1956         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1957             ins->objectid < exclude_start + exclude_nr)) {
1958                 search_start = exclude_start + exclude_nr;
1959                 goto new_group;
1960         }
1961
1962         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1963                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1964                 if (block_group)
1965                         trans->block_group = block_group;
1966         }
1967         ins->offset = num_bytes;
1968         if (last_ptr) {
1969                 *last_ptr = ins->objectid + ins->offset;
1970                 if (*last_ptr ==
1971                     btrfs_super_total_bytes(&root->fs_info->super_copy)) {
1972                         *last_ptr = 0;
1973                 }
1974         }
1975         return 0;
1976
1977 new_group:
1978         if (search_start + num_bytes >= search_end) {
1979 enospc:
1980                 search_start = orig_search_start;
1981                 if (full_scan) {
1982                         ret = -ENOSPC;
1983                         goto error;
1984                 }
1985                 if (wrapped) {
1986                         if (!full_scan)
1987                                 total_needed -= empty_size;
1988                         full_scan = 1;
1989                 } else
1990                         wrapped = 1;
1991         }
1992         block_group = btrfs_lookup_first_block_group(info, search_start);
1993         cond_resched();
1994         block_group = btrfs_find_block_group(root, block_group,
1995                                              search_start, data, 0);
1996         goto check_failed;
1997
1998 error:
1999         return ret;
2000 }
2001
2002 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2003                                   struct btrfs_root *root,
2004                                   u64 num_bytes, u64 min_alloc_size,
2005                                   u64 empty_size, u64 hint_byte,
2006                                   u64 search_end, struct btrfs_key *ins,
2007                                   u64 data)
2008 {
2009         int ret;
2010         u64 search_start = 0;
2011         u64 alloc_profile;
2012         struct btrfs_fs_info *info = root->fs_info;
2013
2014         if (data) {
2015                 alloc_profile = info->avail_data_alloc_bits &
2016                                 info->data_alloc_profile;
2017                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2018         } else if (root == root->fs_info->chunk_root) {
2019                 alloc_profile = info->avail_system_alloc_bits &
2020                                 info->system_alloc_profile;
2021                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2022         } else {
2023                 alloc_profile = info->avail_metadata_alloc_bits &
2024                                 info->metadata_alloc_profile;
2025                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2026         }
2027 again:
2028         data = reduce_alloc_profile(root, data);
2029         /*
2030          * the only place that sets empty_size is btrfs_realloc_node, which
2031          * is not called recursively on allocations
2032          */
2033         if (empty_size || root->ref_cows) {
2034                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
2035                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2036                                      2 * 1024 * 1024,
2037                                      BTRFS_BLOCK_GROUP_METADATA |
2038                                      (info->metadata_alloc_profile &
2039                                       info->avail_metadata_alloc_bits), 0);
2040                         BUG_ON(ret);
2041                 }
2042                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2043                                      num_bytes + 2 * 1024 * 1024, data, 0);
2044                 BUG_ON(ret);
2045         }
2046
2047         WARN_ON(num_bytes < root->sectorsize);
2048         ret = find_free_extent(trans, root, num_bytes, empty_size,
2049                                search_start, search_end, hint_byte, ins,
2050                                trans->alloc_exclude_start,
2051                                trans->alloc_exclude_nr, data);
2052
2053         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
2054                 num_bytes = num_bytes >> 1;
2055                 num_bytes = max(num_bytes, min_alloc_size);
2056                 do_chunk_alloc(trans, root->fs_info->extent_root,
2057                                num_bytes, data, 1);
2058                 goto again;
2059         }
2060         if (ret) {
2061                 printk("allocation failed flags %Lu\n", data);
2062                 BUG();
2063         }
2064         clear_extent_dirty(&root->fs_info->free_space_cache,
2065                            ins->objectid, ins->objectid + ins->offset - 1,
2066                            GFP_NOFS);
2067         return 0;
2068 }
2069
2070 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2071                                   struct btrfs_root *root,
2072                                   u64 num_bytes, u64 min_alloc_size,
2073                                   u64 empty_size, u64 hint_byte,
2074                                   u64 search_end, struct btrfs_key *ins,
2075                                   u64 data)
2076 {
2077         int ret;
2078         maybe_lock_mutex(root);
2079         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
2080                                      empty_size, hint_byte, search_end, ins,
2081                                      data);
2082         maybe_unlock_mutex(root);
2083         return ret;
2084 }
2085
2086 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2087                                          struct btrfs_root *root,
2088                                          u64 root_objectid, u64 ref_generation,
2089                                          u64 owner, u64 owner_offset,
2090                                          struct btrfs_key *ins)
2091 {
2092         int ret;
2093         int pending_ret;
2094         u64 super_used;
2095         u64 root_used;
2096         u64 num_bytes = ins->offset;
2097         u32 sizes[2];
2098         struct btrfs_fs_info *info = root->fs_info;
2099         struct btrfs_root *extent_root = info->extent_root;
2100         struct btrfs_extent_item *extent_item;
2101         struct btrfs_extent_ref *ref;
2102         struct btrfs_path *path;
2103         struct btrfs_key keys[2];
2104
2105         /* block accounting for super block */
2106         spin_lock_irq(&info->delalloc_lock);
2107         super_used = btrfs_super_bytes_used(&info->super_copy);
2108         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2109         spin_unlock_irq(&info->delalloc_lock);
2110
2111         /* block accounting for root item */
2112         root_used = btrfs_root_used(&root->root_item);
2113         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
2114
2115         if (root == extent_root) {
2116                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2117                                 ins->objectid + ins->offset - 1,
2118                                 EXTENT_LOCKED, GFP_NOFS);
2119                 goto update_block;
2120         }
2121
2122         memcpy(&keys[0], ins, sizeof(*ins));
2123         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
2124                                          owner, owner_offset);
2125         keys[1].objectid = ins->objectid;
2126         keys[1].type = BTRFS_EXTENT_REF_KEY;
2127         sizes[0] = sizeof(*extent_item);
2128         sizes[1] = sizeof(*ref);
2129
2130         path = btrfs_alloc_path();
2131         BUG_ON(!path);
2132
2133         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2134                                        sizes, 2);
2135
2136         BUG_ON(ret);
2137         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2138                                      struct btrfs_extent_item);
2139         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2140         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2141                              struct btrfs_extent_ref);
2142
2143         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2144         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2145         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2146         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
2147
2148         btrfs_mark_buffer_dirty(path->nodes[0]);
2149
2150         trans->alloc_exclude_start = 0;
2151         trans->alloc_exclude_nr = 0;
2152         btrfs_free_path(path);
2153         finish_current_insert(trans, extent_root);
2154         pending_ret = del_pending_extents(trans, extent_root);
2155
2156         if (ret)
2157                 goto out;
2158         if (pending_ret) {
2159                 ret = pending_ret;
2160                 goto out;
2161         }
2162
2163 update_block:
2164         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
2165         if (ret) {
2166                 printk("update block group failed for %Lu %Lu\n",
2167                        ins->objectid, ins->offset);
2168                 BUG();
2169         }
2170 out:
2171         return ret;
2172 }
2173
2174 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2175                                 struct btrfs_root *root,
2176                                 u64 root_objectid, u64 ref_generation,
2177                                 u64 owner, u64 owner_offset,
2178                                 struct btrfs_key *ins)
2179 {
2180         int ret;
2181         maybe_lock_mutex(root);
2182         ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2183                                             ref_generation, owner,
2184                                             owner_offset, ins);
2185         maybe_unlock_mutex(root);
2186         return ret;
2187 }
2188 /*
2189  * finds a free extent and does all the dirty work required for allocation
2190  * returns the key for the extent through ins, and a tree buffer for
2191  * the first block of the extent through buf.
2192  *
2193  * returns 0 if everything worked, non-zero otherwise.
2194  */
2195 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2196                        struct btrfs_root *root,
2197                        u64 num_bytes, u64 min_alloc_size,
2198                        u64 root_objectid, u64 ref_generation,
2199                        u64 owner, u64 owner_offset,
2200                        u64 empty_size, u64 hint_byte,
2201                        u64 search_end, struct btrfs_key *ins, u64 data)
2202 {
2203         int ret;
2204
2205         maybe_lock_mutex(root);
2206
2207         ret = __btrfs_reserve_extent(trans, root, num_bytes,
2208                                      min_alloc_size, empty_size, hint_byte,
2209                                      search_end, ins, data);
2210         BUG_ON(ret);
2211         ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2212                                             ref_generation, owner,
2213                                             owner_offset, ins);
2214         BUG_ON(ret);
2215
2216         maybe_unlock_mutex(root);
2217         return ret;
2218 }
2219 /*
2220  * helper function to allocate a block for a given tree
2221  * returns the tree buffer or NULL.
2222  */
2223 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2224                                              struct btrfs_root *root,
2225                                              u32 blocksize,
2226                                              u64 root_objectid,
2227                                              u64 ref_generation,
2228                                              u64 first_objectid,
2229                                              int level,
2230                                              u64 hint,
2231                                              u64 empty_size)
2232 {
2233         struct btrfs_key ins;
2234         int ret;
2235         struct extent_buffer *buf;
2236
2237         ret = btrfs_alloc_extent(trans, root, blocksize, blocksize,
2238                                  root_objectid, ref_generation,
2239                                  level, first_objectid, empty_size, hint,
2240                                  (u64)-1, &ins, 0);
2241         if (ret) {
2242                 BUG_ON(ret > 0);
2243                 return ERR_PTR(ret);
2244         }
2245         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
2246         if (!buf) {
2247                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
2248                                   root->root_key.objectid, ref_generation,
2249                                   0, 0, 0);
2250                 return ERR_PTR(-ENOMEM);
2251         }
2252         btrfs_set_header_generation(buf, trans->transid);
2253         btrfs_tree_lock(buf);
2254         clean_tree_block(trans, root, buf);
2255         btrfs_set_buffer_uptodate(buf);
2256
2257         if (PageDirty(buf->first_page)) {
2258                 printk("page %lu dirty\n", buf->first_page->index);
2259                 WARN_ON(1);
2260         }
2261
2262         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2263                          buf->start + buf->len - 1, GFP_NOFS);
2264         trans->blocks_used++;
2265         return buf;
2266 }
2267
2268 static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
2269                                            struct btrfs_root *root,
2270                                            struct extent_buffer *leaf)
2271 {
2272         u64 leaf_owner;
2273         u64 leaf_generation;
2274         struct btrfs_key key;
2275         struct btrfs_file_extent_item *fi;
2276         int i;
2277         int nritems;
2278         int ret;
2279
2280         BUG_ON(!btrfs_is_leaf(leaf));
2281         nritems = btrfs_header_nritems(leaf);
2282         leaf_owner = btrfs_header_owner(leaf);
2283         leaf_generation = btrfs_header_generation(leaf);
2284
2285         mutex_unlock(&root->fs_info->alloc_mutex);
2286
2287         for (i = 0; i < nritems; i++) {
2288                 u64 disk_bytenr;
2289                 cond_resched();
2290
2291                 btrfs_item_key_to_cpu(leaf, &key, i);
2292                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2293                         continue;
2294                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2295                 if (btrfs_file_extent_type(leaf, fi) ==
2296                     BTRFS_FILE_EXTENT_INLINE)
2297                         continue;
2298                 /*
2299                  * FIXME make sure to insert a trans record that
2300                  * repeats the snapshot del on crash
2301                  */
2302                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2303                 if (disk_bytenr == 0)
2304                         continue;
2305
2306                 mutex_lock(&root->fs_info->alloc_mutex);
2307                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2308                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2309                                 leaf_owner, leaf_generation,
2310                                 key.objectid, key.offset, 0);
2311                 mutex_unlock(&root->fs_info->alloc_mutex);
2312                 BUG_ON(ret);
2313         }
2314
2315         mutex_lock(&root->fs_info->alloc_mutex);
2316         return 0;
2317 }
2318
2319 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2320                                          struct btrfs_root *root,
2321                                          struct btrfs_leaf_ref *ref)
2322 {
2323         int i;
2324         int ret;
2325         struct btrfs_extent_info *info = ref->extents;
2326
2327         mutex_unlock(&root->fs_info->alloc_mutex);
2328         for (i = 0; i < ref->nritems; i++) {
2329                 mutex_lock(&root->fs_info->alloc_mutex);
2330                 ret = __btrfs_free_extent(trans, root,
2331                                         info->bytenr, info->num_bytes,
2332                                         ref->owner, ref->generation,
2333                                         info->objectid, info->offset, 0);
2334                 mutex_unlock(&root->fs_info->alloc_mutex);
2335                 BUG_ON(ret);
2336                 info++;
2337         }
2338         mutex_lock(&root->fs_info->alloc_mutex);
2339
2340         return 0;
2341 }
2342
2343 static void noinline reada_walk_down(struct btrfs_root *root,
2344                                      struct extent_buffer *node,
2345                                      int slot)
2346 {
2347         u64 bytenr;
2348         u64 last = 0;
2349         u32 nritems;
2350         u32 refs;
2351         u32 blocksize;
2352         int ret;
2353         int i;
2354         int level;
2355         int skipped = 0;
2356
2357         nritems = btrfs_header_nritems(node);
2358         level = btrfs_header_level(node);
2359         if (level)
2360                 return;
2361
2362         for (i = slot; i < nritems && skipped < 32; i++) {
2363                 bytenr = btrfs_node_blockptr(node, i);
2364                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
2365                              (last > bytenr && last - bytenr > 32 * 1024))) {
2366                         skipped++;
2367                         continue;
2368                 }
2369                 blocksize = btrfs_level_size(root, level - 1);
2370                 if (i != slot) {
2371                         ret = lookup_extent_ref(NULL, root, bytenr,
2372                                                 blocksize, &refs);
2373                         BUG_ON(ret);
2374                         if (refs != 1) {
2375                                 skipped++;
2376                                 continue;
2377                         }
2378                 }
2379                 ret = readahead_tree_block(root, bytenr, blocksize,
2380                                            btrfs_node_ptr_generation(node, i));
2381                 last = bytenr + blocksize;
2382                 cond_resched();
2383                 if (ret)
2384                         break;
2385         }
2386 }
2387
2388 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2389                               u32 *refs)
2390 {
2391         int ret;
2392         mutex_unlock(&root->fs_info->alloc_mutex);
2393         ret = lookup_extent_ref(NULL, root, start, len, refs);
2394         cond_resched();
2395         mutex_lock(&root->fs_info->alloc_mutex);
2396         return ret;
2397 }
2398
2399 /*
2400  * helper function for drop_snapshot, this walks down the tree dropping ref
2401  * counts as it goes.
2402  */
2403 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2404                                    struct btrfs_root *root,
2405                                    struct btrfs_path *path, int *level)
2406 {
2407         u64 root_owner;
2408         u64 root_gen;
2409         u64 bytenr;
2410         u64 ptr_gen;
2411         struct extent_buffer *next;
2412         struct extent_buffer *cur;
2413         struct extent_buffer *parent;
2414         struct btrfs_leaf_ref *ref;
2415         u32 blocksize;
2416         int ret;
2417         u32 refs;
2418
2419         mutex_lock(&root->fs_info->alloc_mutex);
2420
2421         WARN_ON(*level < 0);
2422         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2423         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
2424                                 path->nodes[*level]->len, &refs);
2425         BUG_ON(ret);
2426         if (refs > 1)
2427                 goto out;
2428
2429         /*
2430          * walk down to the last node level and free all the leaves
2431          */
2432         while(*level >= 0) {
2433                 WARN_ON(*level < 0);
2434                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2435                 cur = path->nodes[*level];
2436
2437                 if (btrfs_header_level(cur) != *level)
2438                         WARN_ON(1);
2439
2440                 if (path->slots[*level] >=
2441                     btrfs_header_nritems(cur))
2442                         break;
2443                 if (*level == 0) {
2444                         ret = drop_leaf_ref_no_cache(trans, root, cur);
2445                         BUG_ON(ret);
2446                         break;
2447                 }
2448                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2449                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2450                 blocksize = btrfs_level_size(root, *level - 1);
2451
2452                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
2453                 BUG_ON(ret);
2454                 if (refs != 1) {
2455                         parent = path->nodes[*level];
2456                         root_owner = btrfs_header_owner(parent);
2457                         root_gen = btrfs_header_generation(parent);
2458                         path->slots[*level]++;
2459                         ret = __btrfs_free_extent(trans, root, bytenr,
2460                                                 blocksize, root_owner,
2461                                                 root_gen, 0, 0, 1);
2462                         BUG_ON(ret);
2463                         continue;
2464                 }
2465
2466                 if (*level == 1) {
2467                         struct btrfs_key key;
2468                         btrfs_node_key_to_cpu(cur, &key, path->slots[*level]);
2469                         ref = btrfs_lookup_leaf_ref(root, bytenr);
2470                         if (ref) {
2471                                 ret = drop_leaf_ref(trans, root, ref);
2472                                 BUG_ON(ret);
2473                                 btrfs_remove_leaf_ref(root, ref);
2474                                 btrfs_free_leaf_ref(ref);
2475                                 *level = 0;
2476                                 break;
2477                         }
2478                 }
2479                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2480                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2481                         free_extent_buffer(next);
2482                         mutex_unlock(&root->fs_info->alloc_mutex);
2483
2484                         if (path->slots[*level] == 0)
2485                                 reada_walk_down(root, cur, path->slots[*level]);
2486                         next = read_tree_block(root, bytenr, blocksize,
2487                                                ptr_gen);
2488                         cond_resched();
2489                         mutex_lock(&root->fs_info->alloc_mutex);
2490
2491                         /* we've dropped the lock, double check */
2492                         ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2493                                                 &refs);
2494                         BUG_ON(ret);
2495                         if (refs != 1) {
2496                                 parent = path->nodes[*level];
2497                                 root_owner = btrfs_header_owner(parent);
2498                                 root_gen = btrfs_header_generation(parent);
2499
2500                                 path->slots[*level]++;
2501                                 free_extent_buffer(next);
2502                                 ret = __btrfs_free_extent(trans, root, bytenr,
2503                                                         blocksize,
2504                                                         root_owner,
2505                                                         root_gen, 0, 0, 1);
2506                                 BUG_ON(ret);
2507                                 continue;
2508                         }
2509                 }
2510                 WARN_ON(*level <= 0);
2511                 if (path->nodes[*level-1])
2512                         free_extent_buffer(path->nodes[*level-1]);
2513                 path->nodes[*level-1] = next;
2514                 *level = btrfs_header_level(next);
2515                 path->slots[*level] = 0;
2516         }
2517 out:
2518         WARN_ON(*level < 0);
2519         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2520
2521         if (path->nodes[*level] == root->node) {
2522                 parent = path->nodes[*level];
2523                 bytenr = path->nodes[*level]->start;
2524         } else {
2525                 parent = path->nodes[*level + 1];
2526                 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
2527         }
2528
2529         blocksize = btrfs_level_size(root, *level);
2530         root_owner = btrfs_header_owner(parent);
2531         root_gen = btrfs_header_generation(parent);
2532
2533         ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
2534                                   root_owner, root_gen, 0, 0, 1);
2535         free_extent_buffer(path->nodes[*level]);
2536         path->nodes[*level] = NULL;
2537         *level += 1;
2538         BUG_ON(ret);
2539         mutex_unlock(&root->fs_info->alloc_mutex);
2540         cond_resched();
2541         return 0;
2542 }
2543
2544 /*
2545  * helper for dropping snapshots.  This walks back up the tree in the path
2546  * to find the first node higher up where we haven't yet gone through
2547  * all the slots
2548  */
2549 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2550                                  struct btrfs_root *root,
2551                                  struct btrfs_path *path, int *level)
2552 {
2553         u64 root_owner;
2554         u64 root_gen;
2555         struct btrfs_root_item *root_item = &root->root_item;
2556         int i;
2557         int slot;
2558         int ret;
2559
2560         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2561                 slot = path->slots[i];
2562                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2563                         struct extent_buffer *node;
2564                         struct btrfs_disk_key disk_key;
2565                         node = path->nodes[i];
2566                         path->slots[i]++;
2567                         *level = i;
2568                         WARN_ON(*level == 0);
2569                         btrfs_node_key(node, &disk_key, path->slots[i]);
2570                         memcpy(&root_item->drop_progress,
2571                                &disk_key, sizeof(disk_key));
2572                         root_item->drop_level = i;
2573                         return 0;
2574                 } else {
2575                         if (path->nodes[*level] == root->node) {
2576                                 root_owner = root->root_key.objectid;
2577                                 root_gen =
2578                                    btrfs_header_generation(path->nodes[*level]);
2579                         } else {
2580                                 struct extent_buffer *node;
2581                                 node = path->nodes[*level + 1];
2582                                 root_owner = btrfs_header_owner(node);
2583                                 root_gen = btrfs_header_generation(node);
2584                         }
2585                         ret = btrfs_free_extent(trans, root,
2586                                                 path->nodes[*level]->start,
2587                                                 path->nodes[*level]->len,
2588                                                 root_owner, root_gen, 0, 0, 1);
2589                         BUG_ON(ret);
2590                         free_extent_buffer(path->nodes[*level]);
2591                         path->nodes[*level] = NULL;
2592                         *level = i + 1;
2593                 }
2594         }
2595         return 1;
2596 }
2597
2598 /*
2599  * drop the reference count on the tree rooted at 'snap'.  This traverses
2600  * the tree freeing any blocks that have a ref count of zero after being
2601  * decremented.
2602  */
2603 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2604                         *root)
2605 {
2606         int ret = 0;
2607         int wret;
2608         int level;
2609         struct btrfs_path *path;
2610         int i;
2611         int orig_level;
2612         struct btrfs_root_item *root_item = &root->root_item;
2613
2614         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
2615         path = btrfs_alloc_path();
2616         BUG_ON(!path);
2617
2618         level = btrfs_header_level(root->node);
2619         orig_level = level;
2620         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2621                 path->nodes[level] = root->node;
2622                 extent_buffer_get(root->node);
2623                 path->slots[level] = 0;
2624         } else {
2625                 struct btrfs_key key;
2626                 struct btrfs_disk_key found_key;
2627                 struct extent_buffer *node;
2628
2629                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2630                 level = root_item->drop_level;
2631                 path->lowest_level = level;
2632                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2633                 if (wret < 0) {
2634                         ret = wret;
2635                         goto out;
2636                 }
2637                 node = path->nodes[level];
2638                 btrfs_node_key(node, &found_key, path->slots[level]);
2639                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2640                                sizeof(found_key)));
2641                 /*
2642                  * unlock our path, this is safe because only this
2643                  * function is allowed to delete this snapshot
2644                  */
2645                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
2646                         if (path->nodes[i] && path->locks[i]) {
2647                                 path->locks[i] = 0;
2648                                 btrfs_tree_unlock(path->nodes[i]);
2649                         }
2650                 }
2651         }
2652         while(1) {
2653                 wret = walk_down_tree(trans, root, path, &level);
2654                 if (wret > 0)
2655                         break;
2656                 if (wret < 0)
2657                         ret = wret;
2658
2659                 wret = walk_up_tree(trans, root, path, &level);
2660                 if (wret > 0)
2661                         break;
2662                 if (wret < 0)
2663                         ret = wret;
2664                 if (trans->transaction->in_commit) {
2665                         ret = -EAGAIN;
2666                         break;
2667                 }
2668                 wake_up(&root->fs_info->transaction_throttle);
2669         }
2670         for (i = 0; i <= orig_level; i++) {
2671                 if (path->nodes[i]) {
2672                         free_extent_buffer(path->nodes[i]);
2673                         path->nodes[i] = NULL;
2674                 }
2675         }
2676 out:
2677         btrfs_free_path(path);
2678         return ret;
2679 }
2680
2681 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2682 {
2683         u64 start;
2684         u64 end;
2685         u64 ptr;
2686         int ret;
2687
2688         mutex_lock(&info->alloc_mutex);
2689         while(1) {
2690                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2691                                             &start, &end, (unsigned int)-1);
2692                 if (ret)
2693                         break;
2694                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2695                 if (!ret)
2696                         kfree((void *)(unsigned long)ptr);
2697                 clear_extent_bits(&info->block_group_cache, start,
2698                                   end, (unsigned int)-1, GFP_NOFS);
2699         }
2700         while(1) {
2701                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2702                                             &start, &end, EXTENT_DIRTY);
2703                 if (ret)
2704                         break;
2705                 clear_extent_dirty(&info->free_space_cache, start,
2706                                    end, GFP_NOFS);
2707         }
2708         mutex_unlock(&info->alloc_mutex);
2709         return 0;
2710 }
2711
2712 static unsigned long calc_ra(unsigned long start, unsigned long last,
2713                              unsigned long nr)
2714 {
2715         return min(last, start + nr - 1);
2716 }
2717
2718 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2719                                          u64 len)
2720 {
2721         u64 page_start;
2722         u64 page_end;
2723         unsigned long last_index;
2724         unsigned long i;
2725         struct page *page;
2726         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2727         struct file_ra_state *ra;
2728         unsigned long total_read = 0;
2729         unsigned long ra_pages;
2730         struct btrfs_ordered_extent *ordered;
2731         struct btrfs_trans_handle *trans;
2732
2733         ra = kzalloc(sizeof(*ra), GFP_NOFS);
2734
2735         mutex_lock(&inode->i_mutex);
2736         i = start >> PAGE_CACHE_SHIFT;
2737         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2738
2739         ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
2740
2741         file_ra_state_init(ra, inode->i_mapping);
2742
2743         for (; i <= last_index; i++) {
2744                 if (total_read % ra_pages == 0) {
2745                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
2746                                        calc_ra(i, last_index, ra_pages));
2747                 }
2748                 total_read++;
2749 again:
2750                 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
2751                         goto truncate_racing;
2752                 page = grab_cache_page(inode->i_mapping, i);
2753                 if (!page) {
2754                         goto out_unlock;
2755                 }
2756                 if (!PageUptodate(page)) {
2757                         btrfs_readpage(NULL, page);
2758                         lock_page(page);
2759                         if (!PageUptodate(page)) {
2760                                 unlock_page(page);
2761                                 page_cache_release(page);
2762                                 goto out_unlock;
2763                         }
2764                 }
2765                 wait_on_page_writeback(page);
2766
2767                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2768                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2769                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2770
2771                 ordered = btrfs_lookup_ordered_extent(inode, page_start);
2772                 if (ordered) {
2773                         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2774                         unlock_page(page);
2775                         page_cache_release(page);
2776                         btrfs_start_ordered_extent(inode, ordered, 1);
2777                         btrfs_put_ordered_extent(ordered);
2778                         goto again;
2779                 }
2780                 set_page_extent_mapped(page);
2781
2782
2783                 set_extent_delalloc(io_tree, page_start,
2784                                     page_end, GFP_NOFS);
2785                 set_page_dirty(page);
2786
2787                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2788                 unlock_page(page);
2789                 page_cache_release(page);
2790         }
2791
2792 out_unlock:
2793         /* we have to start the IO in order to get the ordered extents
2794          * instantiated.  This allows the relocation to code to wait
2795          * for all the ordered extents to hit the disk.
2796          *
2797          * Otherwise, it would constantly loop over the same extents
2798          * because the old ones don't get deleted  until the IO is
2799          * started
2800          */
2801         btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1,
2802                                WB_SYNC_NONE);
2803         kfree(ra);
2804         trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
2805         if (trans) {
2806                 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
2807                 mark_inode_dirty(inode);
2808         }
2809         mutex_unlock(&inode->i_mutex);
2810         return 0;
2811
2812 truncate_racing:
2813         vmtruncate(inode, inode->i_size);
2814         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2815                                            total_read);
2816         goto out_unlock;
2817 }
2818
2819 /*
2820  * The back references tell us which tree holds a ref on a block,
2821  * but it is possible for the tree root field in the reference to
2822  * reflect the original root before a snapshot was made.  In this
2823  * case we should search through all the children of a given root
2824  * to find potential holders of references on a block.
2825  *
2826  * Instead, we do something a little less fancy and just search
2827  * all the roots for a given key/block combination.
2828  */
2829 static int find_root_for_ref(struct btrfs_root *root,
2830                              struct btrfs_path *path,
2831                              struct btrfs_key *key0,
2832                              int level,
2833                              int file_key,
2834                              struct btrfs_root **found_root,
2835                              u64 bytenr)
2836 {
2837         struct btrfs_key root_location;
2838         struct btrfs_root *cur_root = *found_root;
2839         struct btrfs_file_extent_item *file_extent;
2840         u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
2841         u64 found_bytenr;
2842         int ret;
2843
2844         root_location.offset = (u64)-1;
2845         root_location.type = BTRFS_ROOT_ITEM_KEY;
2846         path->lowest_level = level;
2847         path->reada = 0;
2848         while(1) {
2849                 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0);
2850                 found_bytenr = 0;
2851                 if (ret == 0 && file_key) {
2852                         struct extent_buffer *leaf = path->nodes[0];
2853                         file_extent = btrfs_item_ptr(leaf, path->slots[0],
2854                                              struct btrfs_file_extent_item);
2855                         if (btrfs_file_extent_type(leaf, file_extent) ==
2856                             BTRFS_FILE_EXTENT_REG) {
2857                                 found_bytenr =
2858                                         btrfs_file_extent_disk_bytenr(leaf,
2859                                                                file_extent);
2860                        }
2861                 } else if (!file_key) {
2862                         if (path->nodes[level])
2863                                 found_bytenr = path->nodes[level]->start;
2864                 }
2865
2866                 btrfs_release_path(cur_root, path);
2867
2868                 if (found_bytenr == bytenr) {
2869                         *found_root = cur_root;
2870                         ret = 0;
2871                         goto out;
2872                 }
2873                 ret = btrfs_search_root(root->fs_info->tree_root,
2874                                         root_search_start, &root_search_start);
2875                 if (ret)
2876                         break;
2877
2878                 root_location.objectid = root_search_start;
2879                 cur_root = btrfs_read_fs_root_no_name(root->fs_info,
2880                                                       &root_location);
2881                 if (!cur_root) {
2882                         ret = 1;
2883                         break;
2884                 }
2885         }
2886 out:
2887         path->lowest_level = 0;
2888         return ret;
2889 }
2890
2891 /*
2892  * note, this releases the path
2893  */
2894 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2895                                   struct btrfs_path *path,
2896                                   struct btrfs_key *extent_key,
2897                                   u64 *last_file_objectid,
2898                                   u64 *last_file_offset,
2899                                   u64 *last_file_root,
2900                                   u64 last_extent)
2901 {
2902         struct inode *inode;
2903         struct btrfs_root *found_root;
2904         struct btrfs_key root_location;
2905         struct btrfs_key found_key;
2906         struct btrfs_extent_ref *ref;
2907         u64 ref_root;
2908         u64 ref_gen;
2909         u64 ref_objectid;
2910         u64 ref_offset;
2911         int ret;
2912         int level;
2913
2914         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
2915
2916         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2917                              struct btrfs_extent_ref);
2918         ref_root = btrfs_ref_root(path->nodes[0], ref);
2919         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2920         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2921         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2922         btrfs_release_path(extent_root, path);
2923
2924         root_location.objectid = ref_root;
2925         if (ref_gen == 0)
2926                 root_location.offset = 0;
2927         else
2928                 root_location.offset = (u64)-1;
2929         root_location.type = BTRFS_ROOT_ITEM_KEY;
2930
2931         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2932                                                 &root_location);
2933         BUG_ON(!found_root);
2934         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2935
2936         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2937                 found_key.objectid = ref_objectid;
2938                 found_key.type = BTRFS_EXTENT_DATA_KEY;
2939                 found_key.offset = ref_offset;
2940                 level = 0;
2941
2942                 if (last_extent == extent_key->objectid &&
2943                     *last_file_objectid == ref_objectid &&
2944                     *last_file_offset == ref_offset &&
2945                     *last_file_root == ref_root)
2946                         goto out;
2947
2948                 ret = find_root_for_ref(extent_root, path, &found_key,
2949                                         level, 1, &found_root,
2950                                         extent_key->objectid);
2951
2952                 if (ret)
2953                         goto out;
2954
2955                 if (last_extent == extent_key->objectid &&
2956                     *last_file_objectid == ref_objectid &&
2957                     *last_file_offset == ref_offset &&
2958                     *last_file_root == ref_root)
2959                         goto out;
2960
2961                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2962                                           ref_objectid, found_root);
2963                 if (inode->i_state & I_NEW) {
2964                         /* the inode and parent dir are two different roots */
2965                         BTRFS_I(inode)->root = found_root;
2966                         BTRFS_I(inode)->location.objectid = ref_objectid;
2967                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2968                         BTRFS_I(inode)->location.offset = 0;
2969                         btrfs_read_locked_inode(inode);
2970                         unlock_new_inode(inode);
2971
2972                 }
2973                 /* this can happen if the reference is not against
2974                  * the latest version of the tree root
2975                  */
2976                 if (is_bad_inode(inode))
2977                         goto out;
2978
2979                 *last_file_objectid = inode->i_ino;
2980                 *last_file_root = found_root->root_key.objectid;
2981                 *last_file_offset = ref_offset;
2982
2983                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2984                 iput(inode);
2985         } else {
2986                 struct btrfs_trans_handle *trans;
2987                 struct extent_buffer *eb;
2988                 int needs_lock = 0;
2989
2990                 eb = read_tree_block(found_root, extent_key->objectid,
2991                                      extent_key->offset, 0);
2992                 btrfs_tree_lock(eb);
2993                 level = btrfs_header_level(eb);
2994
2995                 if (level == 0)
2996                         btrfs_item_key_to_cpu(eb, &found_key, 0);
2997                 else
2998                         btrfs_node_key_to_cpu(eb, &found_key, 0);
2999
3000                 btrfs_tree_unlock(eb);
3001                 free_extent_buffer(eb);
3002
3003                 ret = find_root_for_ref(extent_root, path, &found_key,
3004                                         level, 0, &found_root,
3005                                         extent_key->objectid);
3006
3007                 if (ret)
3008                         goto out;
3009
3010                 /*
3011                  * right here almost anything could happen to our key,
3012                  * but that's ok.  The cow below will either relocate it
3013                  * or someone else will have relocated it.  Either way,
3014                  * it is in a different spot than it was before and
3015                  * we're happy.
3016                  */
3017
3018                 trans = btrfs_start_transaction(found_root, 1);
3019
3020                 if (found_root == extent_root->fs_info->extent_root ||
3021                     found_root == extent_root->fs_info->chunk_root ||
3022                     found_root == extent_root->fs_info->dev_root) {
3023                         needs_lock = 1;
3024                         mutex_lock(&extent_root->fs_info->alloc_mutex);
3025                 }
3026
3027                 path->lowest_level = level;
3028                 path->reada = 2;
3029                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
3030                                         0, 1);
3031                 path->lowest_level = 0;
3032                 btrfs_release_path(found_root, path);
3033
3034                 if (found_root == found_root->fs_info->extent_root)
3035                         btrfs_extent_post_op(trans, found_root);
3036                 if (needs_lock)
3037                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
3038
3039                 btrfs_end_transaction(trans, found_root);
3040
3041         }
3042 out:
3043         mutex_lock(&extent_root->fs_info->alloc_mutex);
3044         return 0;
3045 }
3046
3047 static int noinline del_extent_zero(struct btrfs_root *extent_root,
3048                                     struct btrfs_path *path,
3049                                     struct btrfs_key *extent_key)
3050 {
3051         int ret;
3052         struct btrfs_trans_handle *trans;
3053
3054         trans = btrfs_start_transaction(extent_root, 1);
3055         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
3056         if (ret > 0) {
3057                 ret = -EIO;
3058                 goto out;
3059         }
3060         if (ret < 0)
3061                 goto out;
3062         ret = btrfs_del_item(trans, extent_root, path);
3063 out:
3064         btrfs_end_transaction(trans, extent_root);
3065         return ret;
3066 }
3067
3068 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
3069                                         struct btrfs_path *path,
3070                                         struct btrfs_key *extent_key)
3071 {
3072         struct btrfs_key key;
3073         struct btrfs_key found_key;
3074         struct extent_buffer *leaf;
3075         u64 last_file_objectid = 0;
3076         u64 last_file_root = 0;
3077         u64 last_file_offset = (u64)-1;
3078         u64 last_extent = 0;
3079         u32 nritems;
3080         u32 item_size;
3081         int ret = 0;
3082
3083         if (extent_key->objectid == 0) {
3084                 ret = del_extent_zero(extent_root, path, extent_key);
3085                 goto out;
3086         }
3087         key.objectid = extent_key->objectid;
3088         key.type = BTRFS_EXTENT_REF_KEY;
3089         key.offset = 0;
3090
3091         while(1) {
3092                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3093
3094                 if (ret < 0)
3095                         goto out;
3096
3097                 ret = 0;
3098                 leaf = path->nodes[0];
3099                 nritems = btrfs_header_nritems(leaf);
3100                 if (path->slots[0] == nritems) {
3101                         ret = btrfs_next_leaf(extent_root, path);
3102                         if (ret > 0) {
3103                                 ret = 0;
3104                                 goto out;
3105                         }
3106                         if (ret < 0)
3107                                 goto out;
3108                         leaf = path->nodes[0];
3109                 }
3110
3111                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3112                 if (found_key.objectid != extent_key->objectid) {
3113                         break;
3114                 }
3115
3116                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
3117                         break;
3118                 }
3119
3120                 key.offset = found_key.offset + 1;
3121                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3122
3123                 ret = relocate_one_reference(extent_root, path, extent_key,
3124                                              &last_file_objectid,
3125                                              &last_file_offset,
3126                                              &last_file_root, last_extent);
3127                 if (ret)
3128                         goto out;
3129                 last_extent = extent_key->objectid;
3130         }
3131         ret = 0;
3132 out:
3133         btrfs_release_path(extent_root, path);
3134         return ret;
3135 }
3136
3137 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
3138 {
3139         u64 num_devices;
3140         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
3141                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
3142
3143         num_devices = root->fs_info->fs_devices->num_devices;
3144         if (num_devices == 1) {
3145                 stripped |= BTRFS_BLOCK_GROUP_DUP;
3146                 stripped = flags & ~stripped;
3147
3148                 /* turn raid0 into single device chunks */
3149                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
3150                         return stripped;
3151
3152                 /* turn mirroring into duplication */
3153                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
3154                              BTRFS_BLOCK_GROUP_RAID10))
3155                         return stripped | BTRFS_BLOCK_GROUP_DUP;
3156                 return flags;
3157         } else {
3158                 /* they already had raid on here, just return */
3159                 if (flags & stripped)
3160                         return flags;
3161
3162                 stripped |= BTRFS_BLOCK_GROUP_DUP;
3163                 stripped = flags & ~stripped;
3164
3165                 /* switch duplicated blocks with raid1 */
3166                 if (flags & BTRFS_BLOCK_GROUP_DUP)
3167                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
3168
3169                 /* turn single device chunks into raid0 */
3170                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
3171         }
3172         return flags;
3173 }
3174
3175 int __alloc_chunk_for_shrink(struct btrfs_root *root,
3176                      struct btrfs_block_group_cache *shrink_block_group,
3177                      int force)
3178 {
3179         struct btrfs_trans_handle *trans;
3180         u64 new_alloc_flags;
3181         u64 calc;
3182
3183         spin_lock(&shrink_block_group->lock);
3184         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
3185                 spin_unlock(&shrink_block_group->lock);
3186                 mutex_unlock(&root->fs_info->alloc_mutex);
3187
3188                 trans = btrfs_start_transaction(root, 1);
3189                 mutex_lock(&root->fs_info->alloc_mutex);
3190                 spin_lock(&shrink_block_group->lock);
3191
3192                 new_alloc_flags = update_block_group_flags(root,
3193                                                    shrink_block_group->flags);
3194                 if (new_alloc_flags != shrink_block_group->flags) {
3195                         calc =
3196                              btrfs_block_group_used(&shrink_block_group->item);
3197                 } else {
3198                         calc = shrink_block_group->key.offset;
3199                 }
3200                 spin_unlock(&shrink_block_group->lock);
3201
3202                 do_chunk_alloc(trans, root->fs_info->extent_root,
3203                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
3204
3205                 mutex_unlock(&root->fs_info->alloc_mutex);
3206                 btrfs_end_transaction(trans, root);
3207                 mutex_lock(&root->fs_info->alloc_mutex);
3208         } else
3209                 spin_unlock(&shrink_block_group->lock);
3210         return 0;
3211 }
3212
3213 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
3214 {
3215         struct btrfs_trans_handle *trans;
3216         struct btrfs_root *tree_root = root->fs_info->tree_root;
3217         struct btrfs_path *path;
3218         u64 cur_byte;
3219         u64 total_found;
3220         u64 shrink_last_byte;
3221         struct btrfs_block_group_cache *shrink_block_group;
3222         struct btrfs_fs_info *info = root->fs_info;
3223         struct btrfs_key key;
3224         struct btrfs_key found_key;
3225         struct extent_buffer *leaf;
3226         u32 nritems;
3227         int ret;
3228         int progress;
3229
3230         mutex_lock(&root->fs_info->alloc_mutex);
3231         shrink_block_group = btrfs_lookup_block_group(root->fs_info,
3232                                                       shrink_start);
3233         BUG_ON(!shrink_block_group);
3234
3235         shrink_last_byte = shrink_block_group->key.objectid +
3236                 shrink_block_group->key.offset;
3237
3238         shrink_block_group->space_info->total_bytes -=
3239                 shrink_block_group->key.offset;
3240         path = btrfs_alloc_path();
3241         root = root->fs_info->extent_root;
3242         path->reada = 2;
3243
3244         printk("btrfs relocating block group %llu flags %llu\n",
3245                (unsigned long long)shrink_start,
3246                (unsigned long long)shrink_block_group->flags);
3247
3248         __alloc_chunk_for_shrink(root, shrink_block_group, 1);
3249
3250 again:
3251
3252         shrink_block_group->ro = 1;
3253
3254         total_found = 0;
3255         progress = 0;
3256         key.objectid = shrink_start;
3257         key.offset = 0;
3258         key.type = 0;
3259         cur_byte = key.objectid;
3260
3261         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3262         if (ret < 0)
3263                 goto out;
3264
3265         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
3266         if (ret < 0)
3267                 goto out;
3268
3269         if (ret == 0) {
3270                 leaf = path->nodes[0];
3271                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3272                 if (found_key.objectid + found_key.offset > shrink_start &&
3273                     found_key.objectid < shrink_last_byte) {
3274                         cur_byte = found_key.objectid;
3275                         key.objectid = cur_byte;
3276                 }
3277         }
3278         btrfs_release_path(root, path);
3279
3280         while(1) {
3281                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3282                 if (ret < 0)
3283                         goto out;
3284
3285 next:
3286                 leaf = path->nodes[0];
3287                 nritems = btrfs_header_nritems(leaf);
3288                 if (path->slots[0] >= nritems) {
3289                         ret = btrfs_next_leaf(root, path);
3290                         if (ret < 0)
3291                                 goto out;
3292                         if (ret == 1) {
3293                                 ret = 0;
3294                                 break;
3295                         }
3296                         leaf = path->nodes[0];
3297                         nritems = btrfs_header_nritems(leaf);
3298                 }
3299
3300                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3301
3302                 if (found_key.objectid >= shrink_last_byte)
3303                         break;
3304
3305                 if (progress && need_resched()) {
3306                         memcpy(&key, &found_key, sizeof(key));
3307                         cond_resched();
3308                         btrfs_release_path(root, path);
3309                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
3310                         progress = 0;
3311                         goto next;
3312                 }
3313                 progress = 1;
3314
3315                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
3316                     found_key.objectid + found_key.offset <= cur_byte) {
3317                         memcpy(&key, &found_key, sizeof(key));
3318                         key.offset++;
3319                         path->slots[0]++;
3320                         goto next;
3321                 }
3322
3323                 total_found++;
3324                 cur_byte = found_key.objectid + found_key.offset;
3325                 key.objectid = cur_byte;
3326                 btrfs_release_path(root, path);
3327                 ret = relocate_one_extent(root, path, &found_key);
3328                 __alloc_chunk_for_shrink(root, shrink_block_group, 0);
3329         }
3330
3331         btrfs_release_path(root, path);
3332
3333         if (total_found > 0) {
3334                 printk("btrfs relocate found %llu last extent was %llu\n",
3335                        (unsigned long long)total_found,
3336                        (unsigned long long)found_key.objectid);
3337                 mutex_unlock(&root->fs_info->alloc_mutex);
3338                 trans = btrfs_start_transaction(tree_root, 1);
3339                 btrfs_commit_transaction(trans, tree_root);
3340
3341                 btrfs_clean_old_snapshots(tree_root);
3342
3343                 btrfs_wait_ordered_extents(tree_root);
3344
3345                 trans = btrfs_start_transaction(tree_root, 1);
3346                 btrfs_commit_transaction(trans, tree_root);
3347                 mutex_lock(&root->fs_info->alloc_mutex);
3348                 goto again;
3349         }
3350
3351         /*
3352          * we've freed all the extents, now remove the block
3353          * group item from the tree
3354          */
3355         mutex_unlock(&root->fs_info->alloc_mutex);
3356
3357         trans = btrfs_start_transaction(root, 1);
3358
3359         mutex_lock(&root->fs_info->alloc_mutex);
3360         memcpy(&key, &shrink_block_group->key, sizeof(key));
3361
3362         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3363         if (ret > 0)
3364                 ret = -EIO;
3365         if (ret < 0) {
3366                 btrfs_end_transaction(trans, root);
3367                 goto out;
3368         }
3369
3370         clear_extent_bits(&info->block_group_cache, key.objectid,
3371                           key.objectid + key.offset - 1,
3372                           (unsigned int)-1, GFP_NOFS);
3373
3374
3375         clear_extent_bits(&info->free_space_cache,
3376                            key.objectid, key.objectid + key.offset - 1,
3377                            (unsigned int)-1, GFP_NOFS);
3378
3379         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
3380         kfree(shrink_block_group);
3381
3382         btrfs_del_item(trans, root, path);
3383         btrfs_release_path(root, path);
3384         mutex_unlock(&root->fs_info->alloc_mutex);
3385         btrfs_commit_transaction(trans, root);
3386
3387         mutex_lock(&root->fs_info->alloc_mutex);
3388
3389         /* the code to unpin extents might set a few bits in the free
3390          * space cache for this range again
3391          */
3392         clear_extent_bits(&info->free_space_cache,
3393                            key.objectid, key.objectid + key.offset - 1,
3394                            (unsigned int)-1, GFP_NOFS);
3395 out:
3396         btrfs_free_path(path);
3397         mutex_unlock(&root->fs_info->alloc_mutex);
3398         return ret;
3399 }
3400
3401 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3402                            struct btrfs_key *key)
3403 {
3404         int ret = 0;
3405         struct btrfs_key found_key;
3406         struct extent_buffer *leaf;
3407         int slot;
3408
3409         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3410         if (ret < 0)
3411                 goto out;
3412
3413         while(1) {
3414                 slot = path->slots[0];
3415                 leaf = path->nodes[0];
3416                 if (slot >= btrfs_header_nritems(leaf)) {
3417                         ret = btrfs_next_leaf(root, path);
3418                         if (ret == 0)
3419                                 continue;
3420                         if (ret < 0)
3421                                 goto out;
3422                         break;
3423                 }
3424                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3425
3426                 if (found_key.objectid >= key->objectid &&
3427                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3428                         ret = 0;
3429                         goto out;
3430                 }
3431                 path->slots[0]++;
3432         }
3433         ret = -ENOENT;
3434 out:
3435         return ret;
3436 }
3437
3438 int btrfs_read_block_groups(struct btrfs_root *root)
3439 {
3440         struct btrfs_path *path;
3441         int ret;
3442         int bit;
3443         struct btrfs_block_group_cache *cache;
3444         struct btrfs_fs_info *info = root->fs_info;
3445         struct btrfs_space_info *space_info;
3446         struct extent_io_tree *block_group_cache;
3447         struct btrfs_key key;
3448         struct btrfs_key found_key;
3449         struct extent_buffer *leaf;
3450
3451         block_group_cache = &info->block_group_cache;
3452         root = info->extent_root;
3453         key.objectid = 0;
3454         key.offset = 0;
3455         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3456         path = btrfs_alloc_path();
3457         if (!path)
3458                 return -ENOMEM;
3459
3460         mutex_lock(&root->fs_info->alloc_mutex);
3461         while(1) {
3462                 ret = find_first_block_group(root, path, &key);
3463                 if (ret > 0) {
3464                         ret = 0;
3465                         goto error;
3466                 }
3467                 if (ret != 0)
3468                         goto error;
3469
3470                 leaf = path->nodes[0];
3471                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3472                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3473                 if (!cache) {
3474                         ret = -ENOMEM;
3475                         break;
3476                 }
3477
3478                 spin_lock_init(&cache->lock);
3479                 read_extent_buffer(leaf, &cache->item,
3480                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
3481                                    sizeof(cache->item));
3482                 memcpy(&cache->key, &found_key, sizeof(found_key));
3483
3484                 key.objectid = found_key.objectid + found_key.offset;
3485                 btrfs_release_path(root, path);
3486                 cache->flags = btrfs_block_group_flags(&cache->item);
3487                 bit = 0;
3488                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3489                         bit = BLOCK_GROUP_DATA;
3490                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3491                         bit = BLOCK_GROUP_SYSTEM;
3492                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3493                         bit = BLOCK_GROUP_METADATA;
3494                 }
3495                 set_avail_alloc_bits(info, cache->flags);
3496
3497                 ret = update_space_info(info, cache->flags, found_key.offset,
3498                                         btrfs_block_group_used(&cache->item),
3499                                         &space_info);
3500                 BUG_ON(ret);
3501                 cache->space_info = space_info;
3502
3503                 /* use EXTENT_LOCKED to prevent merging */
3504                 set_extent_bits(block_group_cache, found_key.objectid,
3505                                 found_key.objectid + found_key.offset - 1,
3506                                 EXTENT_LOCKED, GFP_NOFS);
3507                 set_state_private(block_group_cache, found_key.objectid,
3508                                   (unsigned long)cache);
3509                 set_extent_bits(block_group_cache, found_key.objectid,
3510                                 found_key.objectid + found_key.offset - 1,
3511                                 bit | EXTENT_LOCKED, GFP_NOFS);
3512                 if (key.objectid >=
3513                     btrfs_super_total_bytes(&info->super_copy))
3514                         break;
3515         }
3516         ret = 0;
3517 error:
3518         btrfs_free_path(path);
3519         mutex_unlock(&root->fs_info->alloc_mutex);
3520         return ret;
3521 }
3522
3523 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3524                            struct btrfs_root *root, u64 bytes_used,
3525                            u64 type, u64 chunk_objectid, u64 chunk_offset,
3526                            u64 size)
3527 {
3528         int ret;
3529         int bit = 0;
3530         struct btrfs_root *extent_root;
3531         struct btrfs_block_group_cache *cache;
3532         struct extent_io_tree *block_group_cache;
3533
3534         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
3535         extent_root = root->fs_info->extent_root;
3536         block_group_cache = &root->fs_info->block_group_cache;
3537
3538         cache = kzalloc(sizeof(*cache), GFP_NOFS);
3539         BUG_ON(!cache);
3540         cache->key.objectid = chunk_offset;
3541         cache->key.offset = size;
3542         spin_lock_init(&cache->lock);
3543         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3544
3545         btrfs_set_block_group_used(&cache->item, bytes_used);
3546         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
3547         cache->flags = type;
3548         btrfs_set_block_group_flags(&cache->item, type);
3549
3550         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
3551                                 &cache->space_info);
3552         BUG_ON(ret);
3553
3554         bit = block_group_state_bits(type);
3555         set_extent_bits(block_group_cache, chunk_offset,
3556                         chunk_offset + size - 1,
3557                         EXTENT_LOCKED, GFP_NOFS);
3558         set_state_private(block_group_cache, chunk_offset,
3559                           (unsigned long)cache);
3560         set_extent_bits(block_group_cache, chunk_offset,
3561                         chunk_offset + size - 1,
3562                         bit | EXTENT_LOCKED, GFP_NOFS);
3563
3564         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3565                                 sizeof(cache->item));
3566         BUG_ON(ret);
3567
3568         finish_current_insert(trans, extent_root);
3569         ret = del_pending_extents(trans, extent_root);
3570         BUG_ON(ret);
3571         set_avail_alloc_bits(extent_root->fs_info, type);
3572
3573         return 0;
3574 }