]>
Commit | Line | Data |
---|---|---|
0f9dd46c JB |
1 | /* |
2 | * Copyright (C) 2008 Red Hat. All rights reserved. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public | |
6 | * License v2 as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, | |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * You should have received a copy of the GNU General Public | |
14 | * License along with this program; if not, write to the | |
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
16 | * Boston, MA 021110-1307, USA. | |
17 | */ | |
18 | ||
96303081 | 19 | #include <linux/pagemap.h> |
0f9dd46c | 20 | #include <linux/sched.h> |
5a0e3ad6 | 21 | #include <linux/slab.h> |
96303081 | 22 | #include <linux/math64.h> |
0f9dd46c | 23 | #include "ctree.h" |
fa9c0d79 CM |
24 | #include "free-space-cache.h" |
25 | #include "transaction.h" | |
0af3d00b | 26 | #include "disk-io.h" |
fa9c0d79 | 27 | |
96303081 JB |
28 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) |
29 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) | |
0f9dd46c | 30 | |
0af3d00b JB |
31 | struct inode *lookup_free_space_inode(struct btrfs_root *root, |
32 | struct btrfs_block_group_cache | |
33 | *block_group, struct btrfs_path *path) | |
34 | { | |
35 | struct btrfs_key key; | |
36 | struct btrfs_key location; | |
37 | struct btrfs_disk_key disk_key; | |
38 | struct btrfs_free_space_header *header; | |
39 | struct extent_buffer *leaf; | |
40 | struct inode *inode = NULL; | |
41 | int ret; | |
42 | ||
43 | spin_lock(&block_group->lock); | |
44 | if (block_group->inode) | |
45 | inode = igrab(block_group->inode); | |
46 | spin_unlock(&block_group->lock); | |
47 | if (inode) | |
48 | return inode; | |
49 | ||
50 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | |
51 | key.offset = block_group->key.objectid; | |
52 | key.type = 0; | |
53 | ||
54 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | |
55 | if (ret < 0) | |
56 | return ERR_PTR(ret); | |
57 | if (ret > 0) { | |
58 | btrfs_release_path(root, path); | |
59 | return ERR_PTR(-ENOENT); | |
60 | } | |
61 | ||
62 | leaf = path->nodes[0]; | |
63 | header = btrfs_item_ptr(leaf, path->slots[0], | |
64 | struct btrfs_free_space_header); | |
65 | btrfs_free_space_key(leaf, header, &disk_key); | |
66 | btrfs_disk_key_to_cpu(&location, &disk_key); | |
67 | btrfs_release_path(root, path); | |
68 | ||
69 | inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); | |
70 | if (!inode) | |
71 | return ERR_PTR(-ENOENT); | |
72 | if (IS_ERR(inode)) | |
73 | return inode; | |
74 | if (is_bad_inode(inode)) { | |
75 | iput(inode); | |
76 | return ERR_PTR(-ENOENT); | |
77 | } | |
78 | ||
79 | spin_lock(&block_group->lock); | |
80 | if (!root->fs_info->closing) { | |
81 | block_group->inode = igrab(inode); | |
82 | block_group->iref = 1; | |
83 | } | |
84 | spin_unlock(&block_group->lock); | |
85 | ||
86 | return inode; | |
87 | } | |
88 | ||
89 | int create_free_space_inode(struct btrfs_root *root, | |
90 | struct btrfs_trans_handle *trans, | |
91 | struct btrfs_block_group_cache *block_group, | |
92 | struct btrfs_path *path) | |
93 | { | |
94 | struct btrfs_key key; | |
95 | struct btrfs_disk_key disk_key; | |
96 | struct btrfs_free_space_header *header; | |
97 | struct btrfs_inode_item *inode_item; | |
98 | struct extent_buffer *leaf; | |
99 | u64 objectid; | |
100 | int ret; | |
101 | ||
102 | ret = btrfs_find_free_objectid(trans, root, 0, &objectid); | |
103 | if (ret < 0) | |
104 | return ret; | |
105 | ||
106 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); | |
107 | if (ret) | |
108 | return ret; | |
109 | ||
110 | leaf = path->nodes[0]; | |
111 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | |
112 | struct btrfs_inode_item); | |
113 | btrfs_item_key(leaf, &disk_key, path->slots[0]); | |
114 | memset_extent_buffer(leaf, 0, (unsigned long)inode_item, | |
115 | sizeof(*inode_item)); | |
116 | btrfs_set_inode_generation(leaf, inode_item, trans->transid); | |
117 | btrfs_set_inode_size(leaf, inode_item, 0); | |
118 | btrfs_set_inode_nbytes(leaf, inode_item, 0); | |
119 | btrfs_set_inode_uid(leaf, inode_item, 0); | |
120 | btrfs_set_inode_gid(leaf, inode_item, 0); | |
121 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); | |
122 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | | |
123 | BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); | |
124 | btrfs_set_inode_nlink(leaf, inode_item, 1); | |
125 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); | |
126 | btrfs_set_inode_block_group(leaf, inode_item, | |
127 | block_group->key.objectid); | |
128 | btrfs_mark_buffer_dirty(leaf); | |
129 | btrfs_release_path(root, path); | |
130 | ||
131 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | |
132 | key.offset = block_group->key.objectid; | |
133 | key.type = 0; | |
134 | ||
135 | ret = btrfs_insert_empty_item(trans, root, path, &key, | |
136 | sizeof(struct btrfs_free_space_header)); | |
137 | if (ret < 0) { | |
138 | btrfs_release_path(root, path); | |
139 | return ret; | |
140 | } | |
141 | leaf = path->nodes[0]; | |
142 | header = btrfs_item_ptr(leaf, path->slots[0], | |
143 | struct btrfs_free_space_header); | |
144 | memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); | |
145 | btrfs_set_free_space_key(leaf, header, &disk_key); | |
146 | btrfs_mark_buffer_dirty(leaf); | |
147 | btrfs_release_path(root, path); | |
148 | ||
149 | return 0; | |
150 | } | |
151 | ||
152 | int btrfs_truncate_free_space_cache(struct btrfs_root *root, | |
153 | struct btrfs_trans_handle *trans, | |
154 | struct btrfs_path *path, | |
155 | struct inode *inode) | |
156 | { | |
157 | loff_t oldsize; | |
158 | int ret = 0; | |
159 | ||
160 | trans->block_rsv = root->orphan_block_rsv; | |
161 | ret = btrfs_block_rsv_check(trans, root, | |
162 | root->orphan_block_rsv, | |
163 | 0, 5); | |
164 | if (ret) | |
165 | return ret; | |
166 | ||
167 | oldsize = i_size_read(inode); | |
168 | btrfs_i_size_write(inode, 0); | |
169 | truncate_pagecache(inode, oldsize, 0); | |
170 | ||
171 | /* | |
172 | * We don't need an orphan item because truncating the free space cache | |
173 | * will never be split across transactions. | |
174 | */ | |
175 | ret = btrfs_truncate_inode_items(trans, root, inode, | |
176 | 0, BTRFS_EXTENT_DATA_KEY); | |
177 | if (ret) { | |
178 | WARN_ON(1); | |
179 | return ret; | |
180 | } | |
181 | ||
182 | return btrfs_update_inode(trans, root, inode); | |
183 | } | |
184 | ||
96303081 JB |
185 | static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, |
186 | u64 offset) | |
0f9dd46c | 187 | { |
96303081 JB |
188 | BUG_ON(offset < bitmap_start); |
189 | offset -= bitmap_start; | |
190 | return (unsigned long)(div64_u64(offset, sectorsize)); | |
191 | } | |
0f9dd46c | 192 | |
96303081 JB |
193 | static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize) |
194 | { | |
195 | return (unsigned long)(div64_u64(bytes, sectorsize)); | |
196 | } | |
0f9dd46c | 197 | |
96303081 JB |
198 | static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group, |
199 | u64 offset) | |
200 | { | |
201 | u64 bitmap_start; | |
202 | u64 bytes_per_bitmap; | |
0f9dd46c | 203 | |
96303081 JB |
204 | bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize; |
205 | bitmap_start = offset - block_group->key.objectid; | |
206 | bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); | |
207 | bitmap_start *= bytes_per_bitmap; | |
208 | bitmap_start += block_group->key.objectid; | |
0f9dd46c | 209 | |
96303081 | 210 | return bitmap_start; |
0f9dd46c JB |
211 | } |
212 | ||
96303081 JB |
213 | static int tree_insert_offset(struct rb_root *root, u64 offset, |
214 | struct rb_node *node, int bitmap) | |
0f9dd46c JB |
215 | { |
216 | struct rb_node **p = &root->rb_node; | |
217 | struct rb_node *parent = NULL; | |
218 | struct btrfs_free_space *info; | |
219 | ||
220 | while (*p) { | |
221 | parent = *p; | |
96303081 | 222 | info = rb_entry(parent, struct btrfs_free_space, offset_index); |
0f9dd46c | 223 | |
96303081 | 224 | if (offset < info->offset) { |
0f9dd46c | 225 | p = &(*p)->rb_left; |
96303081 | 226 | } else if (offset > info->offset) { |
0f9dd46c | 227 | p = &(*p)->rb_right; |
96303081 JB |
228 | } else { |
229 | /* | |
230 | * we could have a bitmap entry and an extent entry | |
231 | * share the same offset. If this is the case, we want | |
232 | * the extent entry to always be found first if we do a | |
233 | * linear search through the tree, since we want to have | |
234 | * the quickest allocation time, and allocating from an | |
235 | * extent is faster than allocating from a bitmap. So | |
236 | * if we're inserting a bitmap and we find an entry at | |
237 | * this offset, we want to go right, or after this entry | |
238 | * logically. If we are inserting an extent and we've | |
239 | * found a bitmap, we want to go left, or before | |
240 | * logically. | |
241 | */ | |
242 | if (bitmap) { | |
243 | WARN_ON(info->bitmap); | |
244 | p = &(*p)->rb_right; | |
245 | } else { | |
246 | WARN_ON(!info->bitmap); | |
247 | p = &(*p)->rb_left; | |
248 | } | |
249 | } | |
0f9dd46c JB |
250 | } |
251 | ||
252 | rb_link_node(node, parent, p); | |
253 | rb_insert_color(node, root); | |
254 | ||
255 | return 0; | |
256 | } | |
257 | ||
258 | /* | |
70cb0743 JB |
259 | * searches the tree for the given offset. |
260 | * | |
96303081 JB |
261 | * fuzzy - If this is set, then we are trying to make an allocation, and we just |
262 | * want a section that has at least bytes size and comes at or after the given | |
263 | * offset. | |
0f9dd46c | 264 | */ |
96303081 JB |
265 | static struct btrfs_free_space * |
266 | tree_search_offset(struct btrfs_block_group_cache *block_group, | |
267 | u64 offset, int bitmap_only, int fuzzy) | |
0f9dd46c | 268 | { |
96303081 JB |
269 | struct rb_node *n = block_group->free_space_offset.rb_node; |
270 | struct btrfs_free_space *entry, *prev = NULL; | |
271 | ||
272 | /* find entry that is closest to the 'offset' */ | |
273 | while (1) { | |
274 | if (!n) { | |
275 | entry = NULL; | |
276 | break; | |
277 | } | |
0f9dd46c | 278 | |
0f9dd46c | 279 | entry = rb_entry(n, struct btrfs_free_space, offset_index); |
96303081 | 280 | prev = entry; |
0f9dd46c | 281 | |
96303081 | 282 | if (offset < entry->offset) |
0f9dd46c | 283 | n = n->rb_left; |
96303081 | 284 | else if (offset > entry->offset) |
0f9dd46c | 285 | n = n->rb_right; |
96303081 | 286 | else |
0f9dd46c | 287 | break; |
0f9dd46c JB |
288 | } |
289 | ||
96303081 JB |
290 | if (bitmap_only) { |
291 | if (!entry) | |
292 | return NULL; | |
293 | if (entry->bitmap) | |
294 | return entry; | |
0f9dd46c | 295 | |
96303081 JB |
296 | /* |
297 | * bitmap entry and extent entry may share same offset, | |
298 | * in that case, bitmap entry comes after extent entry. | |
299 | */ | |
300 | n = rb_next(n); | |
301 | if (!n) | |
302 | return NULL; | |
303 | entry = rb_entry(n, struct btrfs_free_space, offset_index); | |
304 | if (entry->offset != offset) | |
305 | return NULL; | |
0f9dd46c | 306 | |
96303081 JB |
307 | WARN_ON(!entry->bitmap); |
308 | return entry; | |
309 | } else if (entry) { | |
310 | if (entry->bitmap) { | |
0f9dd46c | 311 | /* |
96303081 JB |
312 | * if previous extent entry covers the offset, |
313 | * we should return it instead of the bitmap entry | |
0f9dd46c | 314 | */ |
96303081 JB |
315 | n = &entry->offset_index; |
316 | while (1) { | |
317 | n = rb_prev(n); | |
318 | if (!n) | |
319 | break; | |
320 | prev = rb_entry(n, struct btrfs_free_space, | |
321 | offset_index); | |
322 | if (!prev->bitmap) { | |
323 | if (prev->offset + prev->bytes > offset) | |
324 | entry = prev; | |
325 | break; | |
326 | } | |
0f9dd46c | 327 | } |
96303081 JB |
328 | } |
329 | return entry; | |
330 | } | |
331 | ||
332 | if (!prev) | |
333 | return NULL; | |
334 | ||
335 | /* find last entry before the 'offset' */ | |
336 | entry = prev; | |
337 | if (entry->offset > offset) { | |
338 | n = rb_prev(&entry->offset_index); | |
339 | if (n) { | |
340 | entry = rb_entry(n, struct btrfs_free_space, | |
341 | offset_index); | |
342 | BUG_ON(entry->offset > offset); | |
0f9dd46c | 343 | } else { |
96303081 JB |
344 | if (fuzzy) |
345 | return entry; | |
346 | else | |
347 | return NULL; | |
0f9dd46c JB |
348 | } |
349 | } | |
350 | ||
96303081 JB |
351 | if (entry->bitmap) { |
352 | n = &entry->offset_index; | |
353 | while (1) { | |
354 | n = rb_prev(n); | |
355 | if (!n) | |
356 | break; | |
357 | prev = rb_entry(n, struct btrfs_free_space, | |
358 | offset_index); | |
359 | if (!prev->bitmap) { | |
360 | if (prev->offset + prev->bytes > offset) | |
361 | return prev; | |
362 | break; | |
363 | } | |
364 | } | |
365 | if (entry->offset + BITS_PER_BITMAP * | |
366 | block_group->sectorsize > offset) | |
367 | return entry; | |
368 | } else if (entry->offset + entry->bytes > offset) | |
369 | return entry; | |
370 | ||
371 | if (!fuzzy) | |
372 | return NULL; | |
373 | ||
374 | while (1) { | |
375 | if (entry->bitmap) { | |
376 | if (entry->offset + BITS_PER_BITMAP * | |
377 | block_group->sectorsize > offset) | |
378 | break; | |
379 | } else { | |
380 | if (entry->offset + entry->bytes > offset) | |
381 | break; | |
382 | } | |
383 | ||
384 | n = rb_next(&entry->offset_index); | |
385 | if (!n) | |
386 | return NULL; | |
387 | entry = rb_entry(n, struct btrfs_free_space, offset_index); | |
388 | } | |
389 | return entry; | |
0f9dd46c JB |
390 | } |
391 | ||
392 | static void unlink_free_space(struct btrfs_block_group_cache *block_group, | |
393 | struct btrfs_free_space *info) | |
394 | { | |
395 | rb_erase(&info->offset_index, &block_group->free_space_offset); | |
96303081 | 396 | block_group->free_extents--; |
817d52f8 | 397 | block_group->free_space -= info->bytes; |
0f9dd46c JB |
398 | } |
399 | ||
400 | static int link_free_space(struct btrfs_block_group_cache *block_group, | |
401 | struct btrfs_free_space *info) | |
402 | { | |
403 | int ret = 0; | |
404 | ||
96303081 | 405 | BUG_ON(!info->bitmap && !info->bytes); |
0f9dd46c | 406 | ret = tree_insert_offset(&block_group->free_space_offset, info->offset, |
96303081 | 407 | &info->offset_index, (info->bitmap != NULL)); |
0f9dd46c JB |
408 | if (ret) |
409 | return ret; | |
410 | ||
817d52f8 | 411 | block_group->free_space += info->bytes; |
96303081 JB |
412 | block_group->free_extents++; |
413 | return ret; | |
414 | } | |
415 | ||
416 | static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | |
417 | { | |
25891f79 JB |
418 | u64 max_bytes; |
419 | u64 bitmap_bytes; | |
420 | u64 extent_bytes; | |
96303081 JB |
421 | |
422 | /* | |
423 | * The goal is to keep the total amount of memory used per 1gb of space | |
424 | * at or below 32k, so we need to adjust how much memory we allow to be | |
425 | * used by extent based free space tracking | |
426 | */ | |
427 | max_bytes = MAX_CACHE_BYTES_PER_GIG * | |
428 | (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); | |
429 | ||
25891f79 JB |
430 | /* |
431 | * we want to account for 1 more bitmap than what we have so we can make | |
432 | * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as | |
433 | * we add more bitmaps. | |
434 | */ | |
435 | bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; | |
96303081 | 436 | |
25891f79 JB |
437 | if (bitmap_bytes >= max_bytes) { |
438 | block_group->extents_thresh = 0; | |
439 | return; | |
440 | } | |
96303081 | 441 | |
25891f79 JB |
442 | /* |
443 | * we want the extent entry threshold to always be at most 1/2 the maxw | |
444 | * bytes we can have, or whatever is less than that. | |
445 | */ | |
446 | extent_bytes = max_bytes - bitmap_bytes; | |
447 | extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); | |
96303081 | 448 | |
25891f79 JB |
449 | block_group->extents_thresh = |
450 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); | |
96303081 JB |
451 | } |
452 | ||
817d52f8 JB |
453 | static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, |
454 | struct btrfs_free_space *info, u64 offset, | |
455 | u64 bytes) | |
96303081 JB |
456 | { |
457 | unsigned long start, end; | |
458 | unsigned long i; | |
459 | ||
817d52f8 JB |
460 | start = offset_to_bit(info->offset, block_group->sectorsize, offset); |
461 | end = start + bytes_to_bits(bytes, block_group->sectorsize); | |
96303081 JB |
462 | BUG_ON(end > BITS_PER_BITMAP); |
463 | ||
464 | for (i = start; i < end; i++) | |
465 | clear_bit(i, info->bitmap); | |
466 | ||
467 | info->bytes -= bytes; | |
817d52f8 | 468 | block_group->free_space -= bytes; |
96303081 JB |
469 | } |
470 | ||
817d52f8 JB |
471 | static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, |
472 | struct btrfs_free_space *info, u64 offset, | |
473 | u64 bytes) | |
96303081 JB |
474 | { |
475 | unsigned long start, end; | |
476 | unsigned long i; | |
477 | ||
817d52f8 JB |
478 | start = offset_to_bit(info->offset, block_group->sectorsize, offset); |
479 | end = start + bytes_to_bits(bytes, block_group->sectorsize); | |
96303081 JB |
480 | BUG_ON(end > BITS_PER_BITMAP); |
481 | ||
482 | for (i = start; i < end; i++) | |
483 | set_bit(i, info->bitmap); | |
484 | ||
485 | info->bytes += bytes; | |
817d52f8 | 486 | block_group->free_space += bytes; |
96303081 JB |
487 | } |
488 | ||
489 | static int search_bitmap(struct btrfs_block_group_cache *block_group, | |
490 | struct btrfs_free_space *bitmap_info, u64 *offset, | |
491 | u64 *bytes) | |
492 | { | |
493 | unsigned long found_bits = 0; | |
494 | unsigned long bits, i; | |
495 | unsigned long next_zero; | |
496 | ||
497 | i = offset_to_bit(bitmap_info->offset, block_group->sectorsize, | |
498 | max_t(u64, *offset, bitmap_info->offset)); | |
499 | bits = bytes_to_bits(*bytes, block_group->sectorsize); | |
500 | ||
501 | for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); | |
502 | i < BITS_PER_BITMAP; | |
503 | i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { | |
504 | next_zero = find_next_zero_bit(bitmap_info->bitmap, | |
505 | BITS_PER_BITMAP, i); | |
506 | if ((next_zero - i) >= bits) { | |
507 | found_bits = next_zero - i; | |
508 | break; | |
509 | } | |
510 | i = next_zero; | |
511 | } | |
512 | ||
513 | if (found_bits) { | |
514 | *offset = (u64)(i * block_group->sectorsize) + | |
515 | bitmap_info->offset; | |
516 | *bytes = (u64)(found_bits) * block_group->sectorsize; | |
517 | return 0; | |
518 | } | |
519 | ||
520 | return -1; | |
521 | } | |
522 | ||
523 | static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache | |
524 | *block_group, u64 *offset, | |
525 | u64 *bytes, int debug) | |
526 | { | |
527 | struct btrfs_free_space *entry; | |
528 | struct rb_node *node; | |
529 | int ret; | |
530 | ||
531 | if (!block_group->free_space_offset.rb_node) | |
532 | return NULL; | |
533 | ||
534 | entry = tree_search_offset(block_group, | |
535 | offset_to_bitmap(block_group, *offset), | |
536 | 0, 1); | |
537 | if (!entry) | |
538 | return NULL; | |
539 | ||
540 | for (node = &entry->offset_index; node; node = rb_next(node)) { | |
541 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | |
542 | if (entry->bytes < *bytes) | |
543 | continue; | |
544 | ||
545 | if (entry->bitmap) { | |
546 | ret = search_bitmap(block_group, entry, offset, bytes); | |
547 | if (!ret) | |
548 | return entry; | |
549 | continue; | |
550 | } | |
551 | ||
552 | *offset = entry->offset; | |
553 | *bytes = entry->bytes; | |
554 | return entry; | |
555 | } | |
556 | ||
557 | return NULL; | |
558 | } | |
559 | ||
560 | static void add_new_bitmap(struct btrfs_block_group_cache *block_group, | |
561 | struct btrfs_free_space *info, u64 offset) | |
562 | { | |
563 | u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; | |
564 | int max_bitmaps = (int)div64_u64(block_group->key.offset + | |
565 | bytes_per_bg - 1, bytes_per_bg); | |
566 | BUG_ON(block_group->total_bitmaps >= max_bitmaps); | |
567 | ||
568 | info->offset = offset_to_bitmap(block_group, offset); | |
f019f426 | 569 | info->bytes = 0; |
96303081 JB |
570 | link_free_space(block_group, info); |
571 | block_group->total_bitmaps++; | |
572 | ||
573 | recalculate_thresholds(block_group); | |
574 | } | |
575 | ||
576 | static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, | |
577 | struct btrfs_free_space *bitmap_info, | |
578 | u64 *offset, u64 *bytes) | |
579 | { | |
580 | u64 end; | |
6606bb97 JB |
581 | u64 search_start, search_bytes; |
582 | int ret; | |
96303081 JB |
583 | |
584 | again: | |
585 | end = bitmap_info->offset + | |
586 | (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; | |
587 | ||
6606bb97 JB |
588 | /* |
589 | * XXX - this can go away after a few releases. | |
590 | * | |
591 | * since the only user of btrfs_remove_free_space is the tree logging | |
592 | * stuff, and the only way to test that is under crash conditions, we | |
593 | * want to have this debug stuff here just in case somethings not | |
594 | * working. Search the bitmap for the space we are trying to use to | |
595 | * make sure its actually there. If its not there then we need to stop | |
596 | * because something has gone wrong. | |
597 | */ | |
598 | search_start = *offset; | |
599 | search_bytes = *bytes; | |
600 | ret = search_bitmap(block_group, bitmap_info, &search_start, | |
601 | &search_bytes); | |
602 | BUG_ON(ret < 0 || search_start != *offset); | |
603 | ||
96303081 | 604 | if (*offset > bitmap_info->offset && *offset + *bytes > end) { |
817d52f8 JB |
605 | bitmap_clear_bits(block_group, bitmap_info, *offset, |
606 | end - *offset + 1); | |
96303081 JB |
607 | *bytes -= end - *offset + 1; |
608 | *offset = end + 1; | |
609 | } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { | |
817d52f8 | 610 | bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); |
96303081 JB |
611 | *bytes = 0; |
612 | } | |
613 | ||
614 | if (*bytes) { | |
6606bb97 | 615 | struct rb_node *next = rb_next(&bitmap_info->offset_index); |
96303081 JB |
616 | if (!bitmap_info->bytes) { |
617 | unlink_free_space(block_group, bitmap_info); | |
618 | kfree(bitmap_info->bitmap); | |
619 | kfree(bitmap_info); | |
620 | block_group->total_bitmaps--; | |
621 | recalculate_thresholds(block_group); | |
622 | } | |
623 | ||
6606bb97 JB |
624 | /* |
625 | * no entry after this bitmap, but we still have bytes to | |
626 | * remove, so something has gone wrong. | |
627 | */ | |
628 | if (!next) | |
96303081 JB |
629 | return -EINVAL; |
630 | ||
6606bb97 JB |
631 | bitmap_info = rb_entry(next, struct btrfs_free_space, |
632 | offset_index); | |
633 | ||
634 | /* | |
635 | * if the next entry isn't a bitmap we need to return to let the | |
636 | * extent stuff do its work. | |
637 | */ | |
96303081 JB |
638 | if (!bitmap_info->bitmap) |
639 | return -EAGAIN; | |
640 | ||
6606bb97 JB |
641 | /* |
642 | * Ok the next item is a bitmap, but it may not actually hold | |
643 | * the information for the rest of this free space stuff, so | |
644 | * look for it, and if we don't find it return so we can try | |
645 | * everything over again. | |
646 | */ | |
647 | search_start = *offset; | |
648 | search_bytes = *bytes; | |
649 | ret = search_bitmap(block_group, bitmap_info, &search_start, | |
650 | &search_bytes); | |
651 | if (ret < 0 || search_start != *offset) | |
652 | return -EAGAIN; | |
653 | ||
96303081 JB |
654 | goto again; |
655 | } else if (!bitmap_info->bytes) { | |
656 | unlink_free_space(block_group, bitmap_info); | |
657 | kfree(bitmap_info->bitmap); | |
658 | kfree(bitmap_info); | |
659 | block_group->total_bitmaps--; | |
660 | recalculate_thresholds(block_group); | |
661 | } | |
662 | ||
663 | return 0; | |
664 | } | |
665 | ||
666 | static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, | |
667 | struct btrfs_free_space *info) | |
668 | { | |
669 | struct btrfs_free_space *bitmap_info; | |
670 | int added = 0; | |
671 | u64 bytes, offset, end; | |
672 | int ret; | |
673 | ||
674 | /* | |
675 | * If we are below the extents threshold then we can add this as an | |
676 | * extent, and don't have to deal with the bitmap | |
677 | */ | |
678 | if (block_group->free_extents < block_group->extents_thresh && | |
679 | info->bytes > block_group->sectorsize * 4) | |
680 | return 0; | |
681 | ||
682 | /* | |
683 | * some block groups are so tiny they can't be enveloped by a bitmap, so | |
684 | * don't even bother to create a bitmap for this | |
685 | */ | |
686 | if (BITS_PER_BITMAP * block_group->sectorsize > | |
687 | block_group->key.offset) | |
688 | return 0; | |
689 | ||
690 | bytes = info->bytes; | |
691 | offset = info->offset; | |
692 | ||
693 | again: | |
694 | bitmap_info = tree_search_offset(block_group, | |
695 | offset_to_bitmap(block_group, offset), | |
696 | 1, 0); | |
697 | if (!bitmap_info) { | |
698 | BUG_ON(added); | |
699 | goto new_bitmap; | |
700 | } | |
701 | ||
702 | end = bitmap_info->offset + | |
703 | (u64)(BITS_PER_BITMAP * block_group->sectorsize); | |
704 | ||
705 | if (offset >= bitmap_info->offset && offset + bytes > end) { | |
817d52f8 JB |
706 | bitmap_set_bits(block_group, bitmap_info, offset, |
707 | end - offset); | |
96303081 JB |
708 | bytes -= end - offset; |
709 | offset = end; | |
710 | added = 0; | |
711 | } else if (offset >= bitmap_info->offset && offset + bytes <= end) { | |
817d52f8 | 712 | bitmap_set_bits(block_group, bitmap_info, offset, bytes); |
96303081 JB |
713 | bytes = 0; |
714 | } else { | |
715 | BUG(); | |
716 | } | |
717 | ||
718 | if (!bytes) { | |
719 | ret = 1; | |
720 | goto out; | |
721 | } else | |
722 | goto again; | |
723 | ||
724 | new_bitmap: | |
725 | if (info && info->bitmap) { | |
726 | add_new_bitmap(block_group, info, offset); | |
727 | added = 1; | |
728 | info = NULL; | |
729 | goto again; | |
730 | } else { | |
731 | spin_unlock(&block_group->tree_lock); | |
732 | ||
733 | /* no pre-allocated info, allocate a new one */ | |
734 | if (!info) { | |
735 | info = kzalloc(sizeof(struct btrfs_free_space), | |
736 | GFP_NOFS); | |
737 | if (!info) { | |
738 | spin_lock(&block_group->tree_lock); | |
739 | ret = -ENOMEM; | |
740 | goto out; | |
741 | } | |
742 | } | |
743 | ||
744 | /* allocate the bitmap */ | |
745 | info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); | |
746 | spin_lock(&block_group->tree_lock); | |
747 | if (!info->bitmap) { | |
748 | ret = -ENOMEM; | |
749 | goto out; | |
750 | } | |
751 | goto again; | |
752 | } | |
753 | ||
754 | out: | |
755 | if (info) { | |
756 | if (info->bitmap) | |
757 | kfree(info->bitmap); | |
758 | kfree(info); | |
759 | } | |
0f9dd46c JB |
760 | |
761 | return ret; | |
762 | } | |
763 | ||
6226cb0a JB |
764 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, |
765 | u64 offset, u64 bytes) | |
0f9dd46c | 766 | { |
96303081 JB |
767 | struct btrfs_free_space *right_info = NULL; |
768 | struct btrfs_free_space *left_info = NULL; | |
0f9dd46c | 769 | struct btrfs_free_space *info = NULL; |
0f9dd46c JB |
770 | int ret = 0; |
771 | ||
6226cb0a JB |
772 | info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); |
773 | if (!info) | |
774 | return -ENOMEM; | |
775 | ||
776 | info->offset = offset; | |
777 | info->bytes = bytes; | |
778 | ||
779 | spin_lock(&block_group->tree_lock); | |
780 | ||
0f9dd46c JB |
781 | /* |
782 | * first we want to see if there is free space adjacent to the range we | |
783 | * are adding, if there is remove that struct and add a new one to | |
784 | * cover the entire range | |
785 | */ | |
96303081 JB |
786 | right_info = tree_search_offset(block_group, offset + bytes, 0, 0); |
787 | if (right_info && rb_prev(&right_info->offset_index)) | |
788 | left_info = rb_entry(rb_prev(&right_info->offset_index), | |
789 | struct btrfs_free_space, offset_index); | |
790 | else | |
791 | left_info = tree_search_offset(block_group, offset - 1, 0, 0); | |
0f9dd46c | 792 | |
96303081 JB |
793 | /* |
794 | * If there was no extent directly to the left or right of this new | |
795 | * extent then we know we're going to have to allocate a new extent, so | |
796 | * before we do that see if we need to drop this into a bitmap | |
797 | */ | |
798 | if ((!left_info || left_info->bitmap) && | |
799 | (!right_info || right_info->bitmap)) { | |
800 | ret = insert_into_bitmap(block_group, info); | |
801 | ||
802 | if (ret < 0) { | |
803 | goto out; | |
804 | } else if (ret) { | |
805 | ret = 0; | |
806 | goto out; | |
807 | } | |
808 | } | |
809 | ||
810 | if (right_info && !right_info->bitmap) { | |
0f9dd46c | 811 | unlink_free_space(block_group, right_info); |
6226cb0a JB |
812 | info->bytes += right_info->bytes; |
813 | kfree(right_info); | |
0f9dd46c JB |
814 | } |
815 | ||
96303081 JB |
816 | if (left_info && !left_info->bitmap && |
817 | left_info->offset + left_info->bytes == offset) { | |
0f9dd46c | 818 | unlink_free_space(block_group, left_info); |
6226cb0a JB |
819 | info->offset = left_info->offset; |
820 | info->bytes += left_info->bytes; | |
821 | kfree(left_info); | |
0f9dd46c JB |
822 | } |
823 | ||
0f9dd46c JB |
824 | ret = link_free_space(block_group, info); |
825 | if (ret) | |
826 | kfree(info); | |
96303081 | 827 | out: |
6226cb0a JB |
828 | spin_unlock(&block_group->tree_lock); |
829 | ||
0f9dd46c | 830 | if (ret) { |
96303081 | 831 | printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); |
c293498b | 832 | BUG_ON(ret == -EEXIST); |
0f9dd46c JB |
833 | } |
834 | ||
0f9dd46c JB |
835 | return ret; |
836 | } | |
837 | ||
6226cb0a JB |
838 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, |
839 | u64 offset, u64 bytes) | |
0f9dd46c JB |
840 | { |
841 | struct btrfs_free_space *info; | |
96303081 | 842 | struct btrfs_free_space *next_info = NULL; |
0f9dd46c JB |
843 | int ret = 0; |
844 | ||
6226cb0a JB |
845 | spin_lock(&block_group->tree_lock); |
846 | ||
96303081 JB |
847 | again: |
848 | info = tree_search_offset(block_group, offset, 0, 0); | |
849 | if (!info) { | |
6606bb97 JB |
850 | /* |
851 | * oops didn't find an extent that matched the space we wanted | |
852 | * to remove, look for a bitmap instead | |
853 | */ | |
854 | info = tree_search_offset(block_group, | |
855 | offset_to_bitmap(block_group, offset), | |
856 | 1, 0); | |
857 | if (!info) { | |
858 | WARN_ON(1); | |
859 | goto out_lock; | |
860 | } | |
96303081 JB |
861 | } |
862 | ||
863 | if (info->bytes < bytes && rb_next(&info->offset_index)) { | |
864 | u64 end; | |
865 | next_info = rb_entry(rb_next(&info->offset_index), | |
866 | struct btrfs_free_space, | |
867 | offset_index); | |
868 | ||
869 | if (next_info->bitmap) | |
870 | end = next_info->offset + BITS_PER_BITMAP * | |
871 | block_group->sectorsize - 1; | |
872 | else | |
873 | end = next_info->offset + next_info->bytes; | |
874 | ||
875 | if (next_info->bytes < bytes || | |
876 | next_info->offset > offset || offset > end) { | |
877 | printk(KERN_CRIT "Found free space at %llu, size %llu," | |
878 | " trying to use %llu\n", | |
879 | (unsigned long long)info->offset, | |
880 | (unsigned long long)info->bytes, | |
881 | (unsigned long long)bytes); | |
0f9dd46c JB |
882 | WARN_ON(1); |
883 | ret = -EINVAL; | |
96303081 | 884 | goto out_lock; |
0f9dd46c | 885 | } |
0f9dd46c | 886 | |
96303081 JB |
887 | info = next_info; |
888 | } | |
889 | ||
890 | if (info->bytes == bytes) { | |
891 | unlink_free_space(block_group, info); | |
892 | if (info->bitmap) { | |
893 | kfree(info->bitmap); | |
894 | block_group->total_bitmaps--; | |
0f9dd46c | 895 | } |
96303081 JB |
896 | kfree(info); |
897 | goto out_lock; | |
898 | } | |
0f9dd46c | 899 | |
96303081 JB |
900 | if (!info->bitmap && info->offset == offset) { |
901 | unlink_free_space(block_group, info); | |
0f9dd46c JB |
902 | info->offset += bytes; |
903 | info->bytes -= bytes; | |
96303081 JB |
904 | link_free_space(block_group, info); |
905 | goto out_lock; | |
906 | } | |
0f9dd46c | 907 | |
96303081 JB |
908 | if (!info->bitmap && info->offset <= offset && |
909 | info->offset + info->bytes >= offset + bytes) { | |
9b49c9b9 CM |
910 | u64 old_start = info->offset; |
911 | /* | |
912 | * we're freeing space in the middle of the info, | |
913 | * this can happen during tree log replay | |
914 | * | |
915 | * first unlink the old info and then | |
916 | * insert it again after the hole we're creating | |
917 | */ | |
918 | unlink_free_space(block_group, info); | |
919 | if (offset + bytes < info->offset + info->bytes) { | |
920 | u64 old_end = info->offset + info->bytes; | |
921 | ||
922 | info->offset = offset + bytes; | |
923 | info->bytes = old_end - info->offset; | |
924 | ret = link_free_space(block_group, info); | |
96303081 JB |
925 | WARN_ON(ret); |
926 | if (ret) | |
927 | goto out_lock; | |
9b49c9b9 CM |
928 | } else { |
929 | /* the hole we're creating ends at the end | |
930 | * of the info struct, just free the info | |
931 | */ | |
932 | kfree(info); | |
933 | } | |
6226cb0a | 934 | spin_unlock(&block_group->tree_lock); |
96303081 JB |
935 | |
936 | /* step two, insert a new info struct to cover | |
937 | * anything before the hole | |
9b49c9b9 | 938 | */ |
6226cb0a JB |
939 | ret = btrfs_add_free_space(block_group, old_start, |
940 | offset - old_start); | |
96303081 JB |
941 | WARN_ON(ret); |
942 | goto out; | |
0f9dd46c | 943 | } |
96303081 JB |
944 | |
945 | ret = remove_from_bitmap(block_group, info, &offset, &bytes); | |
946 | if (ret == -EAGAIN) | |
947 | goto again; | |
948 | BUG_ON(ret); | |
949 | out_lock: | |
950 | spin_unlock(&block_group->tree_lock); | |
0f9dd46c | 951 | out: |
25179201 JB |
952 | return ret; |
953 | } | |
954 | ||
0f9dd46c JB |
955 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, |
956 | u64 bytes) | |
957 | { | |
958 | struct btrfs_free_space *info; | |
959 | struct rb_node *n; | |
960 | int count = 0; | |
961 | ||
962 | for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) { | |
963 | info = rb_entry(n, struct btrfs_free_space, offset_index); | |
964 | if (info->bytes >= bytes) | |
965 | count++; | |
96303081 | 966 | printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", |
21380931 | 967 | (unsigned long long)info->offset, |
96303081 JB |
968 | (unsigned long long)info->bytes, |
969 | (info->bitmap) ? "yes" : "no"); | |
0f9dd46c | 970 | } |
96303081 JB |
971 | printk(KERN_INFO "block group has cluster?: %s\n", |
972 | list_empty(&block_group->cluster_list) ? "no" : "yes"); | |
0f9dd46c JB |
973 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" |
974 | "\n", count); | |
975 | } | |
976 | ||
977 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) | |
978 | { | |
979 | struct btrfs_free_space *info; | |
980 | struct rb_node *n; | |
981 | u64 ret = 0; | |
982 | ||
983 | for (n = rb_first(&block_group->free_space_offset); n; | |
984 | n = rb_next(n)) { | |
985 | info = rb_entry(n, struct btrfs_free_space, offset_index); | |
986 | ret += info->bytes; | |
987 | } | |
988 | ||
989 | return ret; | |
990 | } | |
991 | ||
fa9c0d79 CM |
992 | /* |
993 | * for a given cluster, put all of its extents back into the free | |
994 | * space cache. If the block group passed doesn't match the block group | |
995 | * pointed to by the cluster, someone else raced in and freed the | |
996 | * cluster already. In that case, we just return without changing anything | |
997 | */ | |
998 | static int | |
999 | __btrfs_return_cluster_to_free_space( | |
1000 | struct btrfs_block_group_cache *block_group, | |
1001 | struct btrfs_free_cluster *cluster) | |
1002 | { | |
1003 | struct btrfs_free_space *entry; | |
1004 | struct rb_node *node; | |
96303081 | 1005 | bool bitmap; |
fa9c0d79 CM |
1006 | |
1007 | spin_lock(&cluster->lock); | |
1008 | if (cluster->block_group != block_group) | |
1009 | goto out; | |
1010 | ||
96303081 JB |
1011 | bitmap = cluster->points_to_bitmap; |
1012 | cluster->block_group = NULL; | |
fa9c0d79 | 1013 | cluster->window_start = 0; |
96303081 JB |
1014 | list_del_init(&cluster->block_group_list); |
1015 | cluster->points_to_bitmap = false; | |
1016 | ||
1017 | if (bitmap) | |
1018 | goto out; | |
1019 | ||
fa9c0d79 | 1020 | node = rb_first(&cluster->root); |
96303081 | 1021 | while (node) { |
fa9c0d79 CM |
1022 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
1023 | node = rb_next(&entry->offset_index); | |
1024 | rb_erase(&entry->offset_index, &cluster->root); | |
96303081 JB |
1025 | BUG_ON(entry->bitmap); |
1026 | tree_insert_offset(&block_group->free_space_offset, | |
1027 | entry->offset, &entry->offset_index, 0); | |
fa9c0d79 | 1028 | } |
6bef4d31 | 1029 | cluster->root = RB_ROOT; |
96303081 | 1030 | |
fa9c0d79 CM |
1031 | out: |
1032 | spin_unlock(&cluster->lock); | |
96303081 | 1033 | btrfs_put_block_group(block_group); |
fa9c0d79 CM |
1034 | return 0; |
1035 | } | |
1036 | ||
0f9dd46c JB |
1037 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) |
1038 | { | |
1039 | struct btrfs_free_space *info; | |
1040 | struct rb_node *node; | |
fa9c0d79 | 1041 | struct btrfs_free_cluster *cluster; |
96303081 | 1042 | struct list_head *head; |
0f9dd46c | 1043 | |
6226cb0a | 1044 | spin_lock(&block_group->tree_lock); |
96303081 JB |
1045 | while ((head = block_group->cluster_list.next) != |
1046 | &block_group->cluster_list) { | |
1047 | cluster = list_entry(head, struct btrfs_free_cluster, | |
1048 | block_group_list); | |
fa9c0d79 CM |
1049 | |
1050 | WARN_ON(cluster->block_group != block_group); | |
1051 | __btrfs_return_cluster_to_free_space(block_group, cluster); | |
96303081 JB |
1052 | if (need_resched()) { |
1053 | spin_unlock(&block_group->tree_lock); | |
1054 | cond_resched(); | |
1055 | spin_lock(&block_group->tree_lock); | |
1056 | } | |
fa9c0d79 CM |
1057 | } |
1058 | ||
96303081 JB |
1059 | while ((node = rb_last(&block_group->free_space_offset)) != NULL) { |
1060 | info = rb_entry(node, struct btrfs_free_space, offset_index); | |
0f9dd46c | 1061 | unlink_free_space(block_group, info); |
96303081 JB |
1062 | if (info->bitmap) |
1063 | kfree(info->bitmap); | |
0f9dd46c JB |
1064 | kfree(info); |
1065 | if (need_resched()) { | |
6226cb0a | 1066 | spin_unlock(&block_group->tree_lock); |
0f9dd46c | 1067 | cond_resched(); |
6226cb0a | 1068 | spin_lock(&block_group->tree_lock); |
0f9dd46c JB |
1069 | } |
1070 | } | |
96303081 | 1071 | |
6226cb0a | 1072 | spin_unlock(&block_group->tree_lock); |
0f9dd46c JB |
1073 | } |
1074 | ||
6226cb0a JB |
1075 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, |
1076 | u64 offset, u64 bytes, u64 empty_size) | |
0f9dd46c | 1077 | { |
6226cb0a | 1078 | struct btrfs_free_space *entry = NULL; |
96303081 | 1079 | u64 bytes_search = bytes + empty_size; |
6226cb0a | 1080 | u64 ret = 0; |
0f9dd46c | 1081 | |
6226cb0a | 1082 | spin_lock(&block_group->tree_lock); |
96303081 | 1083 | entry = find_free_space(block_group, &offset, &bytes_search, 0); |
6226cb0a | 1084 | if (!entry) |
96303081 JB |
1085 | goto out; |
1086 | ||
1087 | ret = offset; | |
1088 | if (entry->bitmap) { | |
817d52f8 | 1089 | bitmap_clear_bits(block_group, entry, offset, bytes); |
96303081 JB |
1090 | if (!entry->bytes) { |
1091 | unlink_free_space(block_group, entry); | |
1092 | kfree(entry->bitmap); | |
1093 | kfree(entry); | |
1094 | block_group->total_bitmaps--; | |
1095 | recalculate_thresholds(block_group); | |
1096 | } | |
1097 | } else { | |
6226cb0a | 1098 | unlink_free_space(block_group, entry); |
6226cb0a JB |
1099 | entry->offset += bytes; |
1100 | entry->bytes -= bytes; | |
6226cb0a JB |
1101 | if (!entry->bytes) |
1102 | kfree(entry); | |
1103 | else | |
1104 | link_free_space(block_group, entry); | |
1105 | } | |
0f9dd46c | 1106 | |
96303081 JB |
1107 | out: |
1108 | spin_unlock(&block_group->tree_lock); | |
817d52f8 | 1109 | |
0f9dd46c JB |
1110 | return ret; |
1111 | } | |
fa9c0d79 CM |
1112 | |
1113 | /* | |
1114 | * given a cluster, put all of its extents back into the free space | |
1115 | * cache. If a block group is passed, this function will only free | |
1116 | * a cluster that belongs to the passed block group. | |
1117 | * | |
1118 | * Otherwise, it'll get a reference on the block group pointed to by the | |
1119 | * cluster and remove the cluster from it. | |
1120 | */ | |
1121 | int btrfs_return_cluster_to_free_space( | |
1122 | struct btrfs_block_group_cache *block_group, | |
1123 | struct btrfs_free_cluster *cluster) | |
1124 | { | |
1125 | int ret; | |
1126 | ||
1127 | /* first, get a safe pointer to the block group */ | |
1128 | spin_lock(&cluster->lock); | |
1129 | if (!block_group) { | |
1130 | block_group = cluster->block_group; | |
1131 | if (!block_group) { | |
1132 | spin_unlock(&cluster->lock); | |
1133 | return 0; | |
1134 | } | |
1135 | } else if (cluster->block_group != block_group) { | |
1136 | /* someone else has already freed it don't redo their work */ | |
1137 | spin_unlock(&cluster->lock); | |
1138 | return 0; | |
1139 | } | |
1140 | atomic_inc(&block_group->count); | |
1141 | spin_unlock(&cluster->lock); | |
1142 | ||
1143 | /* now return any extents the cluster had on it */ | |
1144 | spin_lock(&block_group->tree_lock); | |
1145 | ret = __btrfs_return_cluster_to_free_space(block_group, cluster); | |
1146 | spin_unlock(&block_group->tree_lock); | |
1147 | ||
1148 | /* finally drop our ref */ | |
1149 | btrfs_put_block_group(block_group); | |
1150 | return ret; | |
1151 | } | |
1152 | ||
96303081 JB |
1153 | static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, |
1154 | struct btrfs_free_cluster *cluster, | |
1155 | u64 bytes, u64 min_start) | |
1156 | { | |
1157 | struct btrfs_free_space *entry; | |
1158 | int err; | |
1159 | u64 search_start = cluster->window_start; | |
1160 | u64 search_bytes = bytes; | |
1161 | u64 ret = 0; | |
1162 | ||
1163 | spin_lock(&block_group->tree_lock); | |
1164 | spin_lock(&cluster->lock); | |
1165 | ||
1166 | if (!cluster->points_to_bitmap) | |
1167 | goto out; | |
1168 | ||
1169 | if (cluster->block_group != block_group) | |
1170 | goto out; | |
1171 | ||
6606bb97 JB |
1172 | /* |
1173 | * search_start is the beginning of the bitmap, but at some point it may | |
1174 | * be a good idea to point to the actual start of the free area in the | |
1175 | * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only | |
1176 | * to 1 to make sure we get the bitmap entry | |
1177 | */ | |
1178 | entry = tree_search_offset(block_group, | |
1179 | offset_to_bitmap(block_group, search_start), | |
1180 | 1, 0); | |
96303081 JB |
1181 | if (!entry || !entry->bitmap) |
1182 | goto out; | |
1183 | ||
1184 | search_start = min_start; | |
1185 | search_bytes = bytes; | |
1186 | ||
1187 | err = search_bitmap(block_group, entry, &search_start, | |
1188 | &search_bytes); | |
1189 | if (err) | |
1190 | goto out; | |
1191 | ||
1192 | ret = search_start; | |
817d52f8 | 1193 | bitmap_clear_bits(block_group, entry, ret, bytes); |
96303081 JB |
1194 | out: |
1195 | spin_unlock(&cluster->lock); | |
1196 | spin_unlock(&block_group->tree_lock); | |
1197 | ||
1198 | return ret; | |
1199 | } | |
1200 | ||
fa9c0d79 CM |
1201 | /* |
1202 | * given a cluster, try to allocate 'bytes' from it, returns 0 | |
1203 | * if it couldn't find anything suitably large, or a logical disk offset | |
1204 | * if things worked out | |
1205 | */ | |
1206 | u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |
1207 | struct btrfs_free_cluster *cluster, u64 bytes, | |
1208 | u64 min_start) | |
1209 | { | |
1210 | struct btrfs_free_space *entry = NULL; | |
1211 | struct rb_node *node; | |
1212 | u64 ret = 0; | |
1213 | ||
96303081 JB |
1214 | if (cluster->points_to_bitmap) |
1215 | return btrfs_alloc_from_bitmap(block_group, cluster, bytes, | |
1216 | min_start); | |
1217 | ||
fa9c0d79 CM |
1218 | spin_lock(&cluster->lock); |
1219 | if (bytes > cluster->max_size) | |
1220 | goto out; | |
1221 | ||
1222 | if (cluster->block_group != block_group) | |
1223 | goto out; | |
1224 | ||
1225 | node = rb_first(&cluster->root); | |
1226 | if (!node) | |
1227 | goto out; | |
1228 | ||
1229 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | |
1230 | ||
1231 | while(1) { | |
1232 | if (entry->bytes < bytes || entry->offset < min_start) { | |
1233 | struct rb_node *node; | |
1234 | ||
1235 | node = rb_next(&entry->offset_index); | |
1236 | if (!node) | |
1237 | break; | |
1238 | entry = rb_entry(node, struct btrfs_free_space, | |
1239 | offset_index); | |
1240 | continue; | |
1241 | } | |
1242 | ret = entry->offset; | |
1243 | ||
1244 | entry->offset += bytes; | |
1245 | entry->bytes -= bytes; | |
1246 | ||
1247 | if (entry->bytes == 0) { | |
1248 | rb_erase(&entry->offset_index, &cluster->root); | |
1249 | kfree(entry); | |
1250 | } | |
1251 | break; | |
1252 | } | |
1253 | out: | |
1254 | spin_unlock(&cluster->lock); | |
96303081 | 1255 | |
fa9c0d79 CM |
1256 | return ret; |
1257 | } | |
1258 | ||
96303081 JB |
1259 | static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, |
1260 | struct btrfs_free_space *entry, | |
1261 | struct btrfs_free_cluster *cluster, | |
1262 | u64 offset, u64 bytes, u64 min_bytes) | |
1263 | { | |
1264 | unsigned long next_zero; | |
1265 | unsigned long i; | |
1266 | unsigned long search_bits; | |
1267 | unsigned long total_bits; | |
1268 | unsigned long found_bits; | |
1269 | unsigned long start = 0; | |
1270 | unsigned long total_found = 0; | |
1271 | bool found = false; | |
1272 | ||
1273 | i = offset_to_bit(entry->offset, block_group->sectorsize, | |
1274 | max_t(u64, offset, entry->offset)); | |
1275 | search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); | |
1276 | total_bits = bytes_to_bits(bytes, block_group->sectorsize); | |
1277 | ||
1278 | again: | |
1279 | found_bits = 0; | |
1280 | for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); | |
1281 | i < BITS_PER_BITMAP; | |
1282 | i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { | |
1283 | next_zero = find_next_zero_bit(entry->bitmap, | |
1284 | BITS_PER_BITMAP, i); | |
1285 | if (next_zero - i >= search_bits) { | |
1286 | found_bits = next_zero - i; | |
1287 | break; | |
1288 | } | |
1289 | i = next_zero; | |
1290 | } | |
1291 | ||
1292 | if (!found_bits) | |
1293 | return -1; | |
1294 | ||
1295 | if (!found) { | |
1296 | start = i; | |
1297 | found = true; | |
1298 | } | |
1299 | ||
1300 | total_found += found_bits; | |
1301 | ||
1302 | if (cluster->max_size < found_bits * block_group->sectorsize) | |
1303 | cluster->max_size = found_bits * block_group->sectorsize; | |
1304 | ||
1305 | if (total_found < total_bits) { | |
1306 | i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero); | |
1307 | if (i - start > total_bits * 2) { | |
1308 | total_found = 0; | |
1309 | cluster->max_size = 0; | |
1310 | found = false; | |
1311 | } | |
1312 | goto again; | |
1313 | } | |
1314 | ||
1315 | cluster->window_start = start * block_group->sectorsize + | |
1316 | entry->offset; | |
1317 | cluster->points_to_bitmap = true; | |
1318 | ||
1319 | return 0; | |
1320 | } | |
1321 | ||
fa9c0d79 CM |
1322 | /* |
1323 | * here we try to find a cluster of blocks in a block group. The goal | |
1324 | * is to find at least bytes free and up to empty_size + bytes free. | |
1325 | * We might not find them all in one contiguous area. | |
1326 | * | |
1327 | * returns zero and sets up cluster if things worked out, otherwise | |
1328 | * it returns -enospc | |
1329 | */ | |
1330 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | |
451d7585 | 1331 | struct btrfs_root *root, |
fa9c0d79 CM |
1332 | struct btrfs_block_group_cache *block_group, |
1333 | struct btrfs_free_cluster *cluster, | |
1334 | u64 offset, u64 bytes, u64 empty_size) | |
1335 | { | |
1336 | struct btrfs_free_space *entry = NULL; | |
1337 | struct rb_node *node; | |
1338 | struct btrfs_free_space *next; | |
96303081 | 1339 | struct btrfs_free_space *last = NULL; |
fa9c0d79 CM |
1340 | u64 min_bytes; |
1341 | u64 window_start; | |
1342 | u64 window_free; | |
1343 | u64 max_extent = 0; | |
96303081 | 1344 | bool found_bitmap = false; |
fa9c0d79 CM |
1345 | int ret; |
1346 | ||
1347 | /* for metadata, allow allocates with more holes */ | |
451d7585 CM |
1348 | if (btrfs_test_opt(root, SSD_SPREAD)) { |
1349 | min_bytes = bytes + empty_size; | |
1350 | } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | |
fa9c0d79 CM |
1351 | /* |
1352 | * we want to do larger allocations when we are | |
1353 | * flushing out the delayed refs, it helps prevent | |
1354 | * making more work as we go along. | |
1355 | */ | |
1356 | if (trans->transaction->delayed_refs.flushing) | |
1357 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | |
1358 | else | |
1359 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | |
1360 | } else | |
1361 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | |
1362 | ||
1363 | spin_lock(&block_group->tree_lock); | |
1364 | spin_lock(&cluster->lock); | |
1365 | ||
1366 | /* someone already found a cluster, hooray */ | |
1367 | if (cluster->block_group) { | |
1368 | ret = 0; | |
1369 | goto out; | |
1370 | } | |
1371 | again: | |
96303081 | 1372 | entry = tree_search_offset(block_group, offset, found_bitmap, 1); |
fa9c0d79 CM |
1373 | if (!entry) { |
1374 | ret = -ENOSPC; | |
1375 | goto out; | |
1376 | } | |
96303081 JB |
1377 | |
1378 | /* | |
1379 | * If found_bitmap is true, we exhausted our search for extent entries, | |
1380 | * and we just want to search all of the bitmaps that we can find, and | |
1381 | * ignore any extent entries we find. | |
1382 | */ | |
1383 | while (entry->bitmap || found_bitmap || | |
1384 | (!entry->bitmap && entry->bytes < min_bytes)) { | |
1385 | struct rb_node *node = rb_next(&entry->offset_index); | |
1386 | ||
1387 | if (entry->bitmap && entry->bytes > bytes + empty_size) { | |
1388 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, | |
1389 | offset, bytes + empty_size, | |
1390 | min_bytes); | |
1391 | if (!ret) | |
1392 | goto got_it; | |
1393 | } | |
1394 | ||
1395 | if (!node) { | |
1396 | ret = -ENOSPC; | |
1397 | goto out; | |
1398 | } | |
1399 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | |
1400 | } | |
1401 | ||
1402 | /* | |
1403 | * We already searched all the extent entries from the passed in offset | |
1404 | * to the end and didn't find enough space for the cluster, and we also | |
1405 | * didn't find any bitmaps that met our criteria, just go ahead and exit | |
1406 | */ | |
1407 | if (found_bitmap) { | |
1408 | ret = -ENOSPC; | |
1409 | goto out; | |
1410 | } | |
1411 | ||
1412 | cluster->points_to_bitmap = false; | |
fa9c0d79 CM |
1413 | window_start = entry->offset; |
1414 | window_free = entry->bytes; | |
1415 | last = entry; | |
1416 | max_extent = entry->bytes; | |
1417 | ||
96303081 | 1418 | while (1) { |
fa9c0d79 CM |
1419 | /* out window is just right, lets fill it */ |
1420 | if (window_free >= bytes + empty_size) | |
1421 | break; | |
1422 | ||
1423 | node = rb_next(&last->offset_index); | |
1424 | if (!node) { | |
96303081 JB |
1425 | if (found_bitmap) |
1426 | goto again; | |
fa9c0d79 CM |
1427 | ret = -ENOSPC; |
1428 | goto out; | |
1429 | } | |
1430 | next = rb_entry(node, struct btrfs_free_space, offset_index); | |
1431 | ||
96303081 JB |
1432 | /* |
1433 | * we found a bitmap, so if this search doesn't result in a | |
1434 | * cluster, we know to go and search again for the bitmaps and | |
1435 | * start looking for space there | |
1436 | */ | |
1437 | if (next->bitmap) { | |
1438 | if (!found_bitmap) | |
1439 | offset = next->offset; | |
1440 | found_bitmap = true; | |
1441 | last = next; | |
1442 | continue; | |
1443 | } | |
1444 | ||
fa9c0d79 CM |
1445 | /* |
1446 | * we haven't filled the empty size and the window is | |
1447 | * very large. reset and try again | |
1448 | */ | |
c6044801 CM |
1449 | if (next->offset - (last->offset + last->bytes) > 128 * 1024 || |
1450 | next->offset - window_start > (bytes + empty_size) * 2) { | |
fa9c0d79 CM |
1451 | entry = next; |
1452 | window_start = entry->offset; | |
1453 | window_free = entry->bytes; | |
1454 | last = entry; | |
01dea1ef | 1455 | max_extent = entry->bytes; |
fa9c0d79 CM |
1456 | } else { |
1457 | last = next; | |
1458 | window_free += next->bytes; | |
1459 | if (entry->bytes > max_extent) | |
1460 | max_extent = entry->bytes; | |
1461 | } | |
1462 | } | |
1463 | ||
1464 | cluster->window_start = entry->offset; | |
1465 | ||
1466 | /* | |
1467 | * now we've found our entries, pull them out of the free space | |
1468 | * cache and put them into the cluster rbtree | |
1469 | * | |
1470 | * The cluster includes an rbtree, but only uses the offset index | |
1471 | * of each free space cache entry. | |
1472 | */ | |
96303081 | 1473 | while (1) { |
fa9c0d79 | 1474 | node = rb_next(&entry->offset_index); |
96303081 JB |
1475 | if (entry->bitmap && node) { |
1476 | entry = rb_entry(node, struct btrfs_free_space, | |
1477 | offset_index); | |
1478 | continue; | |
1479 | } else if (entry->bitmap && !node) { | |
1480 | break; | |
1481 | } | |
1482 | ||
1483 | rb_erase(&entry->offset_index, &block_group->free_space_offset); | |
fa9c0d79 | 1484 | ret = tree_insert_offset(&cluster->root, entry->offset, |
96303081 | 1485 | &entry->offset_index, 0); |
fa9c0d79 CM |
1486 | BUG_ON(ret); |
1487 | ||
1488 | if (!node || entry == last) | |
1489 | break; | |
1490 | ||
1491 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | |
1492 | } | |
96303081 | 1493 | |
fa9c0d79 | 1494 | cluster->max_size = max_extent; |
96303081 JB |
1495 | got_it: |
1496 | ret = 0; | |
fa9c0d79 CM |
1497 | atomic_inc(&block_group->count); |
1498 | list_add_tail(&cluster->block_group_list, &block_group->cluster_list); | |
1499 | cluster->block_group = block_group; | |
1500 | out: | |
1501 | spin_unlock(&cluster->lock); | |
1502 | spin_unlock(&block_group->tree_lock); | |
1503 | ||
1504 | return ret; | |
1505 | } | |
1506 | ||
1507 | /* | |
1508 | * simple code to zero out a cluster | |
1509 | */ | |
1510 | void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | |
1511 | { | |
1512 | spin_lock_init(&cluster->lock); | |
1513 | spin_lock_init(&cluster->refill_lock); | |
6bef4d31 | 1514 | cluster->root = RB_ROOT; |
fa9c0d79 | 1515 | cluster->max_size = 0; |
96303081 | 1516 | cluster->points_to_bitmap = false; |
fa9c0d79 CM |
1517 | INIT_LIST_HEAD(&cluster->block_group_list); |
1518 | cluster->block_group = NULL; | |
1519 | } | |
1520 |