]> bbs.cooldavid.org Git - net-next-2.6.git/blame - fs/btrfs/ctree.c
Btrfs: Add sparse checking to Makefile
[net-next-2.6.git] / fs / btrfs / ctree.c
CommitLineData
be0e5c09
CM
1#include <stdio.h>
2#include <stdlib.h>
3#include "kerncompat.h"
eb60ceac
CM
4#include "radix-tree.h"
5#include "ctree.h"
6#include "disk-io.h"
5de08d7d 7#include "print-tree.h"
9a8dd150 8
5c680ed6
CM
9int split_node(struct ctree_root *root, struct ctree_path *path, int level);
10int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size);
5de08d7d
CM
11int push_node_left(struct ctree_root *root, struct ctree_path *path, int level);
12int push_node_right(struct ctree_root *root,
13 struct ctree_path *path, int level);
14int del_ptr(struct ctree_root *root, struct ctree_path *path, int level);
d97e63b6 15
5de08d7d 16inline void init_path(struct ctree_path *p)
be0e5c09
CM
17{
18 memset(p, 0, sizeof(*p));
19}
20
5de08d7d 21void release_path(struct ctree_root *root, struct ctree_path *p)
eb60ceac
CM
22{
23 int i;
24 for (i = 0; i < MAX_LEVEL; i++) {
25 if (!p->nodes[i])
26 break;
27 tree_block_release(root, p->nodes[i]);
28 }
29}
30
74123bd7
CM
31/*
32 * The leaf data grows from end-to-front in the node.
33 * this returns the address of the start of the last item,
34 * which is the stop of the leaf data stack
35 */
be0e5c09
CM
36static inline unsigned int leaf_data_end(struct leaf *leaf)
37{
38 unsigned int nr = leaf->header.nritems;
39 if (nr == 0)
d97e63b6 40 return sizeof(leaf->data);
be0e5c09
CM
41 return leaf->items[nr-1].offset;
42}
43
74123bd7
CM
44/*
45 * The space between the end of the leaf items and
46 * the start of the leaf data. IOW, how much room
47 * the leaf has left for both items and data
48 */
5de08d7d 49int leaf_free_space(struct leaf *leaf)
be0e5c09
CM
50{
51 int data_end = leaf_data_end(leaf);
52 int nritems = leaf->header.nritems;
53 char *items_end = (char *)(leaf->items + nritems + 1);
54 return (char *)(leaf->data + data_end) - (char *)items_end;
55}
56
74123bd7
CM
57/*
58 * compare two keys in a memcmp fashion
59 */
be0e5c09
CM
60int comp_keys(struct key *k1, struct key *k2)
61{
62 if (k1->objectid > k2->objectid)
63 return 1;
64 if (k1->objectid < k2->objectid)
65 return -1;
66 if (k1->flags > k2->flags)
67 return 1;
68 if (k1->flags < k2->flags)
69 return -1;
70 if (k1->offset > k2->offset)
71 return 1;
72 if (k1->offset < k2->offset)
73 return -1;
74 return 0;
75}
74123bd7
CM
76
77/*
78 * search for key in the array p. items p are item_size apart
79 * and there are 'max' items in p
80 * the slot in the array is returned via slot, and it points to
81 * the place where you would insert key if it is not found in
82 * the array.
83 *
84 * slot may point to max if the key is bigger than all of the keys
85 */
be0e5c09
CM
86int generic_bin_search(char *p, int item_size, struct key *key,
87 int max, int *slot)
88{
89 int low = 0;
90 int high = max;
91 int mid;
92 int ret;
93 struct key *tmp;
94
95 while(low < high) {
96 mid = (low + high) / 2;
97 tmp = (struct key *)(p + mid * item_size);
98 ret = comp_keys(tmp, key);
99
100 if (ret < 0)
101 low = mid + 1;
102 else if (ret > 0)
103 high = mid;
104 else {
105 *slot = mid;
106 return 0;
107 }
108 }
109 *slot = low;
110 return 1;
111}
112
97571fd0
CM
113/*
114 * simple bin_search frontend that does the right thing for
115 * leaves vs nodes
116 */
be0e5c09
CM
117int bin_search(struct node *c, struct key *key, int *slot)
118{
119 if (is_leaf(c->header.flags)) {
120 struct leaf *l = (struct leaf *)c;
121 return generic_bin_search((void *)l->items, sizeof(struct item),
122 key, c->header.nritems, slot);
123 } else {
124 return generic_bin_search((void *)c->keys, sizeof(struct key),
125 key, c->header.nritems, slot);
126 }
127 return -1;
128}
129
74123bd7
CM
130/*
131 * look for key in the tree. path is filled in with nodes along the way
132 * if key is found, we return zero and you can find the item in the leaf
133 * level of the path (level 0)
134 *
135 * If the key isn't found, the path points to the slot where it should
136 * be inserted.
97571fd0
CM
137 *
138 * if ins_len > 0, nodes and leaves will be split as we walk down the
139 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
140 * possible)
74123bd7 141 */
5de08d7d
CM
142int search_slot(struct ctree_root *root, struct key *key,
143 struct ctree_path *p, int ins_len)
be0e5c09 144{
eb60ceac
CM
145 struct tree_buffer *b = root->node;
146 struct node *c;
be0e5c09
CM
147 int slot;
148 int ret;
149 int level;
5c680ed6 150
eb60ceac
CM
151 b->count++;
152 while (b) {
153 c = &b->node;
be0e5c09 154 level = node_level(c->header.flags);
eb60ceac 155 p->nodes[level] = b;
be0e5c09
CM
156 ret = bin_search(c, key, &slot);
157 if (!is_leaf(c->header.flags)) {
158 if (ret && slot > 0)
159 slot -= 1;
160 p->slots[level] = slot;
5de08d7d
CM
161 if (ins_len > 0 &&
162 c->header.nritems == NODEPTRS_PER_BLOCK) {
5c680ed6
CM
163 int sret = split_node(root, p, level);
164 BUG_ON(sret > 0);
165 if (sret)
166 return sret;
167 b = p->nodes[level];
168 c = &b->node;
169 slot = p->slots[level];
170 }
eb60ceac 171 b = read_tree_block(root, c->blockptrs[slot]);
be0e5c09
CM
172 continue;
173 } else {
5c680ed6 174 struct leaf *l = (struct leaf *)c;
be0e5c09 175 p->slots[level] = slot;
5de08d7d
CM
176 if (ins_len > 0 && leaf_free_space(l) <
177 sizeof(struct item) + ins_len) {
5c680ed6
CM
178 int sret = split_leaf(root, p, ins_len);
179 BUG_ON(sret > 0);
180 if (sret)
181 return sret;
182 }
be0e5c09
CM
183 return ret;
184 }
185 }
186 return -1;
187}
188
74123bd7
CM
189/*
190 * adjust the pointers going up the tree, starting at level
191 * making sure the right key of each node is points to 'key'.
192 * This is used after shifting pointers to the left, so it stops
193 * fixing up pointers when a given leaf/node is not in slot 0 of the
194 * higher levels
195 */
eb60ceac
CM
196static void fixup_low_keys(struct ctree_root *root,
197 struct ctree_path *path, struct key *key,
198 int level)
be0e5c09
CM
199{
200 int i;
be0e5c09 201 for (i = level; i < MAX_LEVEL; i++) {
eb60ceac 202 struct node *t;
be0e5c09 203 int tslot = path->slots[i];
eb60ceac 204 if (!path->nodes[i])
be0e5c09 205 break;
eb60ceac 206 t = &path->nodes[i]->node;
be0e5c09 207 memcpy(t->keys + tslot, key, sizeof(*key));
eb60ceac 208 write_tree_block(root, path->nodes[i]);
be0e5c09
CM
209 if (tslot != 0)
210 break;
211 }
212}
213
74123bd7
CM
214/*
215 * try to push data from one node into the next node left in the
216 * tree. The src node is found at specified level in the path.
217 * If some bytes were pushed, return 0, otherwise return 1.
218 *
219 * Lower nodes/leaves in the path are not touched, higher nodes may
220 * be modified to reflect the push.
221 *
222 * The path is altered to reflect the push.
223 */
be0e5c09
CM
224int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
225{
226 int slot;
227 struct node *left;
228 struct node *right;
229 int push_items = 0;
230 int left_nritems;
231 int right_nritems;
eb60ceac
CM
232 struct tree_buffer *t;
233 struct tree_buffer *right_buf;
be0e5c09
CM
234
235 if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
236 return 1;
237 slot = path->slots[level + 1];
238 if (slot == 0)
239 return 1;
240
eb60ceac
CM
241 t = read_tree_block(root,
242 path->nodes[level + 1]->node.blockptrs[slot - 1]);
243 left = &t->node;
244 right_buf = path->nodes[level];
245 right = &right_buf->node;
be0e5c09
CM
246 left_nritems = left->header.nritems;
247 right_nritems = right->header.nritems;
248 push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
eb60ceac
CM
249 if (push_items <= 0) {
250 tree_block_release(root, t);
be0e5c09 251 return 1;
eb60ceac 252 }
be0e5c09
CM
253
254 if (right_nritems < push_items)
255 push_items = right_nritems;
256 memcpy(left->keys + left_nritems, right->keys,
257 push_items * sizeof(struct key));
258 memcpy(left->blockptrs + left_nritems, right->blockptrs,
259 push_items * sizeof(u64));
260 memmove(right->keys, right->keys + push_items,
261 (right_nritems - push_items) * sizeof(struct key));
262 memmove(right->blockptrs, right->blockptrs + push_items,
263 (right_nritems - push_items) * sizeof(u64));
264 right->header.nritems -= push_items;
265 left->header.nritems += push_items;
266
267 /* adjust the pointers going up the tree */
eb60ceac
CM
268 fixup_low_keys(root, path, right->keys, level + 1);
269
270 write_tree_block(root, t);
271 write_tree_block(root, right_buf);
be0e5c09
CM
272
273 /* then fixup the leaf pointer in the path */
274 if (path->slots[level] < push_items) {
275 path->slots[level] += left_nritems;
eb60ceac
CM
276 tree_block_release(root, path->nodes[level]);
277 path->nodes[level] = t;
be0e5c09
CM
278 path->slots[level + 1] -= 1;
279 } else {
280 path->slots[level] -= push_items;
eb60ceac 281 tree_block_release(root, t);
be0e5c09
CM
282 }
283 return 0;
284}
285
74123bd7
CM
286/*
287 * try to push data from one node into the next node right in the
288 * tree. The src node is found at specified level in the path.
289 * If some bytes were pushed, return 0, otherwise return 1.
290 *
291 * Lower nodes/leaves in the path are not touched, higher nodes may
292 * be modified to reflect the push.
293 *
294 * The path is altered to reflect the push.
295 */
be0e5c09
CM
296int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
297{
298 int slot;
eb60ceac
CM
299 struct tree_buffer *t;
300 struct tree_buffer *src_buffer;
be0e5c09
CM
301 struct node *dst;
302 struct node *src;
303 int push_items = 0;
304 int dst_nritems;
305 int src_nritems;
306
74123bd7 307 /* can't push from the root */
be0e5c09
CM
308 if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
309 return 1;
74123bd7
CM
310
311 /* only try to push inside the node higher up */
be0e5c09
CM
312 slot = path->slots[level + 1];
313 if (slot == NODEPTRS_PER_BLOCK - 1)
314 return 1;
315
eb60ceac 316 if (slot >= path->nodes[level + 1]->node.header.nritems -1)
be0e5c09
CM
317 return 1;
318
eb60ceac
CM
319 t = read_tree_block(root,
320 path->nodes[level + 1]->node.blockptrs[slot + 1]);
321 dst = &t->node;
322 src_buffer = path->nodes[level];
323 src = &src_buffer->node;
be0e5c09
CM
324 dst_nritems = dst->header.nritems;
325 src_nritems = src->header.nritems;
326 push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
eb60ceac
CM
327 if (push_items <= 0) {
328 tree_block_release(root, t);
be0e5c09 329 return 1;
eb60ceac 330 }
be0e5c09
CM
331
332 if (src_nritems < push_items)
333 push_items = src_nritems;
334 memmove(dst->keys + push_items, dst->keys,
335 dst_nritems * sizeof(struct key));
336 memcpy(dst->keys, src->keys + src_nritems - push_items,
337 push_items * sizeof(struct key));
338
339 memmove(dst->blockptrs + push_items, dst->blockptrs,
340 dst_nritems * sizeof(u64));
341 memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
342 push_items * sizeof(u64));
343
344 src->header.nritems -= push_items;
345 dst->header.nritems += push_items;
346
347 /* adjust the pointers going up the tree */
eb60ceac 348 memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
be0e5c09 349 dst->keys, sizeof(struct key));
eb60ceac
CM
350
351 write_tree_block(root, path->nodes[level + 1]);
352 write_tree_block(root, t);
353 write_tree_block(root, src_buffer);
354
74123bd7 355 /* then fixup the pointers in the path */
be0e5c09
CM
356 if (path->slots[level] >= src->header.nritems) {
357 path->slots[level] -= src->header.nritems;
eb60ceac
CM
358 tree_block_release(root, path->nodes[level]);
359 path->nodes[level] = t;
be0e5c09 360 path->slots[level + 1] += 1;
eb60ceac
CM
361 } else {
362 tree_block_release(root, t);
be0e5c09
CM
363 }
364 return 0;
365}
366
97571fd0
CM
367/*
368 * helper function to insert a new root level in the tree.
369 * A new node is allocated, and a single item is inserted to
370 * point to the existing root
371 */
5de08d7d
CM
372static int insert_new_root(struct ctree_root *root,
373 struct ctree_path *path, int level)
5c680ed6
CM
374{
375 struct tree_buffer *t;
376 struct node *lower;
377 struct node *c;
378 struct key *lower_key;
379
380 BUG_ON(path->nodes[level]);
381 BUG_ON(path->nodes[level-1] != root->node);
382
383 t = alloc_free_block(root);
384 c = &t->node;
385 memset(c, 0, sizeof(c));
386 c->header.nritems = 1;
387 c->header.flags = node_level(level);
388 c->header.blocknr = t->blocknr;
389 c->header.parentid = root->node->node.header.parentid;
390 lower = &path->nodes[level-1]->node;
391 if (is_leaf(lower->header.flags))
392 lower_key = &((struct leaf *)lower)->items[0].key;
393 else
394 lower_key = lower->keys;
395 memcpy(c->keys, lower_key, sizeof(struct key));
396 c->blockptrs[0] = path->nodes[level-1]->blocknr;
397 /* the super has an extra ref to root->node */
398 tree_block_release(root, root->node);
399 root->node = t;
400 t->count++;
401 write_tree_block(root, t);
402 path->nodes[level] = t;
403 path->slots[level] = 0;
404 return 0;
405}
406
74123bd7
CM
407/*
408 * worker function to insert a single pointer in a node.
409 * the node should have enough room for the pointer already
97571fd0 410 *
74123bd7
CM
411 * slot and level indicate where you want the key to go, and
412 * blocknr is the block the key points to.
413 */
5c680ed6 414int insert_ptr(struct ctree_root *root,
74123bd7
CM
415 struct ctree_path *path, struct key *key,
416 u64 blocknr, int slot, int level)
417{
74123bd7 418 struct node *lower;
74123bd7 419 int nritems;
5c680ed6
CM
420
421 BUG_ON(!path->nodes[level]);
74123bd7
CM
422 lower = &path->nodes[level]->node;
423 nritems = lower->header.nritems;
424 if (slot > nritems)
425 BUG();
426 if (nritems == NODEPTRS_PER_BLOCK)
427 BUG();
428 if (slot != nritems) {
429 memmove(lower->keys + slot + 1, lower->keys + slot,
430 (nritems - slot) * sizeof(struct key));
431 memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
432 (nritems - slot) * sizeof(u64));
433 }
434 memcpy(lower->keys + slot, key, sizeof(struct key));
435 lower->blockptrs[slot] = blocknr;
436 lower->header.nritems++;
437 if (lower->keys[1].objectid == 0)
438 BUG();
439 write_tree_block(root, path->nodes[level]);
440 return 0;
441}
442
97571fd0
CM
443/*
444 * split the node at the specified level in path in two.
445 * The path is corrected to point to the appropriate node after the split
446 *
447 * Before splitting this tries to make some room in the node by pushing
448 * left and right, if either one works, it returns right away.
449 */
5c680ed6 450int split_node(struct ctree_root *root, struct ctree_path *path, int level)
be0e5c09 451{
5c680ed6
CM
452 struct tree_buffer *t;
453 struct node *c;
454 struct tree_buffer *split_buffer;
455 struct node *split;
be0e5c09 456 int mid;
5c680ed6 457 int ret;
eb60ceac 458
5c680ed6
CM
459 ret = push_node_left(root, path, level);
460 if (!ret)
461 return 0;
462 ret = push_node_right(root, path, level);
463 if (!ret)
464 return 0;
465 t = path->nodes[level];
466 c = &t->node;
467 if (t == root->node) {
468 /* trying to split the root, lets make a new one */
469 ret = insert_new_root(root, path, level + 1);
470 if (ret)
471 return ret;
be0e5c09 472 }
5c680ed6
CM
473 split_buffer = alloc_free_block(root);
474 split = &split_buffer->node;
475 split->header.flags = c->header.flags;
476 split->header.blocknr = split_buffer->blocknr;
477 split->header.parentid = root->node->node.header.parentid;
478 mid = (c->header.nritems + 1) / 2;
479 memcpy(split->keys, c->keys + mid,
480 (c->header.nritems - mid) * sizeof(struct key));
481 memcpy(split->blockptrs, c->blockptrs + mid,
482 (c->header.nritems - mid) * sizeof(u64));
483 split->header.nritems = c->header.nritems - mid;
484 c->header.nritems = mid;
485 write_tree_block(root, t);
486 write_tree_block(root, split_buffer);
487 insert_ptr(root, path, split->keys, split_buffer->blocknr,
488 path->slots[level + 1] + 1, level + 1);
5de08d7d 489 if (path->slots[level] >= mid) {
5c680ed6
CM
490 path->slots[level] -= mid;
491 tree_block_release(root, t);
492 path->nodes[level] = split_buffer;
493 path->slots[level + 1] += 1;
494 } else {
495 tree_block_release(root, split_buffer);
be0e5c09 496 }
5c680ed6 497 return 0;
be0e5c09
CM
498}
499
74123bd7
CM
500/*
501 * how many bytes are required to store the items in a leaf. start
502 * and nr indicate which items in the leaf to check. This totals up the
503 * space used both by the item structs and the item data
504 */
be0e5c09
CM
505int leaf_space_used(struct leaf *l, int start, int nr)
506{
507 int data_len;
508 int end = start + nr - 1;
509
510 if (!nr)
511 return 0;
512 data_len = l->items[start].offset + l->items[start].size;
513 data_len = data_len - l->items[end].offset;
514 data_len += sizeof(struct item) * nr;
515 return data_len;
516}
517
00ec4c51
CM
518/*
519 * push some data in the path leaf to the right, trying to free up at
520 * least data_size bytes. returns zero if the push worked, nonzero otherwise
521 */
522int push_leaf_right(struct ctree_root *root, struct ctree_path *path,
523 int data_size)
524{
525 struct tree_buffer *left_buf = path->nodes[0];
526 struct leaf *left = &left_buf->leaf;
527 struct leaf *right;
528 struct tree_buffer *right_buf;
529 struct tree_buffer *upper;
530 int slot;
531 int i;
532 int free_space;
533 int push_space = 0;
534 int push_items = 0;
535 struct item *item;
536
537 slot = path->slots[1];
538 if (!path->nodes[1]) {
539 return 1;
540 }
541 upper = path->nodes[1];
542 if (slot >= upper->node.header.nritems - 1) {
543 return 1;
544 }
545 right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]);
546 right = &right_buf->leaf;
547 free_space = leaf_free_space(right);
548 if (free_space < data_size + sizeof(struct item)) {
549 tree_block_release(root, right_buf);
550 return 1;
551 }
552 for (i = left->header.nritems - 1; i >= 0; i--) {
553 item = left->items + i;
554 if (path->slots[0] == i)
555 push_space += data_size + sizeof(*item);
556 if (item->size + sizeof(*item) + push_space > free_space)
557 break;
558 push_items++;
559 push_space += item->size + sizeof(*item);
560 }
561 if (push_items == 0) {
562 tree_block_release(root, right_buf);
563 return 1;
564 }
565 /* push left to right */
566 push_space = left->items[left->header.nritems - push_items].offset +
567 left->items[left->header.nritems - push_items].size;
568 push_space -= leaf_data_end(left);
569 /* make room in the right data area */
570 memmove(right->data + leaf_data_end(right) - push_space,
571 right->data + leaf_data_end(right),
572 LEAF_DATA_SIZE - leaf_data_end(right));
573 /* copy from the left data area */
574 memcpy(right->data + LEAF_DATA_SIZE - push_space,
575 left->data + leaf_data_end(left),
576 push_space);
577 memmove(right->items + push_items, right->items,
578 right->header.nritems * sizeof(struct item));
579 /* copy the items from left to right */
580 memcpy(right->items, left->items + left->header.nritems - push_items,
581 push_items * sizeof(struct item));
582
583 /* update the item pointers */
584 right->header.nritems += push_items;
585 push_space = LEAF_DATA_SIZE;
586 for (i = 0; i < right->header.nritems; i++) {
587 right->items[i].offset = push_space - right->items[i].size;
588 push_space = right->items[i].offset;
589 }
590 left->header.nritems -= push_items;
591
592 write_tree_block(root, left_buf);
593 write_tree_block(root, right_buf);
594 memcpy(upper->node.keys + slot + 1,
595 &right->items[0].key, sizeof(struct key));
596 write_tree_block(root, upper);
597 /* then fixup the leaf pointer in the path */
00ec4c51
CM
598 if (path->slots[0] >= left->header.nritems) {
599 path->slots[0] -= left->header.nritems;
600 tree_block_release(root, path->nodes[0]);
601 path->nodes[0] = right_buf;
602 path->slots[1] += 1;
603 } else {
604 tree_block_release(root, right_buf);
605 }
606 return 0;
607}
74123bd7
CM
608/*
609 * push some data in the path leaf to the left, trying to free up at
610 * least data_size bytes. returns zero if the push worked, nonzero otherwise
611 */
be0e5c09
CM
612int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
613 int data_size)
614{
eb60ceac
CM
615 struct tree_buffer *right_buf = path->nodes[0];
616 struct leaf *right = &right_buf->leaf;
617 struct tree_buffer *t;
be0e5c09
CM
618 struct leaf *left;
619 int slot;
620 int i;
621 int free_space;
622 int push_space = 0;
623 int push_items = 0;
624 struct item *item;
625 int old_left_nritems;
626
627 slot = path->slots[1];
628 if (slot == 0) {
629 return 1;
630 }
631 if (!path->nodes[1]) {
632 return 1;
633 }
eb60ceac
CM
634 t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
635 left = &t->leaf;
be0e5c09
CM
636 free_space = leaf_free_space(left);
637 if (free_space < data_size + sizeof(struct item)) {
eb60ceac 638 tree_block_release(root, t);
be0e5c09
CM
639 return 1;
640 }
641 for (i = 0; i < right->header.nritems; i++) {
642 item = right->items + i;
643 if (path->slots[0] == i)
644 push_space += data_size + sizeof(*item);
645 if (item->size + sizeof(*item) + push_space > free_space)
646 break;
647 push_items++;
648 push_space += item->size + sizeof(*item);
649 }
650 if (push_items == 0) {
eb60ceac 651 tree_block_release(root, t);
be0e5c09
CM
652 return 1;
653 }
654 /* push data from right to left */
655 memcpy(left->items + left->header.nritems,
656 right->items, push_items * sizeof(struct item));
657 push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
658 memcpy(left->data + leaf_data_end(left) - push_space,
659 right->data + right->items[push_items - 1].offset,
660 push_space);
661 old_left_nritems = left->header.nritems;
eb60ceac
CM
662 BUG_ON(old_left_nritems < 0);
663
be0e5c09
CM
664 for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
665 left->items[i].offset -= LEAF_DATA_SIZE -
666 left->items[old_left_nritems -1].offset;
667 }
668 left->header.nritems += push_items;
669
670 /* fixup right node */
671 push_space = right->items[push_items-1].offset - leaf_data_end(right);
672 memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
673 leaf_data_end(right), push_space);
674 memmove(right->items, right->items + push_items,
675 (right->header.nritems - push_items) * sizeof(struct item));
676 right->header.nritems -= push_items;
677 push_space = LEAF_DATA_SIZE;
eb60ceac 678
be0e5c09
CM
679 for (i = 0; i < right->header.nritems; i++) {
680 right->items[i].offset = push_space - right->items[i].size;
681 push_space = right->items[i].offset;
682 }
eb60ceac
CM
683
684 write_tree_block(root, t);
685 write_tree_block(root, right_buf);
686
687 fixup_low_keys(root, path, &right->items[0].key, 1);
be0e5c09
CM
688
689 /* then fixup the leaf pointer in the path */
690 if (path->slots[0] < push_items) {
691 path->slots[0] += old_left_nritems;
eb60ceac
CM
692 tree_block_release(root, path->nodes[0]);
693 path->nodes[0] = t;
be0e5c09
CM
694 path->slots[1] -= 1;
695 } else {
eb60ceac 696 tree_block_release(root, t);
be0e5c09
CM
697 path->slots[0] -= push_items;
698 }
eb60ceac 699 BUG_ON(path->slots[0] < 0);
be0e5c09
CM
700 return 0;
701}
702
74123bd7
CM
703/*
704 * split the path's leaf in two, making sure there is at least data_size
705 * available for the resulting leaf level of the path.
706 */
be0e5c09
CM
707int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
708{
eb60ceac
CM
709 struct tree_buffer *l_buf = path->nodes[0];
710 struct leaf *l = &l_buf->leaf;
711 int nritems;
712 int mid;
713 int slot;
be0e5c09 714 struct leaf *right;
eb60ceac 715 struct tree_buffer *right_buffer;
be0e5c09
CM
716 int space_needed = data_size + sizeof(struct item);
717 int data_copy_size;
718 int rt_data_off;
719 int i;
720 int ret;
721
00ec4c51
CM
722 if (push_leaf_left(root, path, data_size) == 0 ||
723 push_leaf_right(root, path, data_size) == 0) {
eb60ceac
CM
724 l_buf = path->nodes[0];
725 l = &l_buf->leaf;
726 if (leaf_free_space(l) >= sizeof(struct item) + data_size)
727 return 0;
be0e5c09 728 }
5c680ed6
CM
729 if (!path->nodes[1]) {
730 ret = insert_new_root(root, path, 1);
731 if (ret)
732 return ret;
733 }
eb60ceac
CM
734 slot = path->slots[0];
735 nritems = l->header.nritems;
736 mid = (nritems + 1)/ 2;
737
738 right_buffer = alloc_free_block(root);
739 BUG_ON(!right_buffer);
740 BUG_ON(mid == nritems);
741 right = &right_buffer->leaf;
be0e5c09
CM
742 memset(right, 0, sizeof(*right));
743 if (mid <= slot) {
97571fd0 744 /* FIXME, just alloc a new leaf here */
be0e5c09
CM
745 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
746 LEAF_DATA_SIZE)
747 BUG();
748 } else {
97571fd0 749 /* FIXME, just alloc a new leaf here */
be0e5c09
CM
750 if (leaf_space_used(l, 0, mid + 1) + space_needed >
751 LEAF_DATA_SIZE)
752 BUG();
753 }
754 right->header.nritems = nritems - mid;
eb60ceac
CM
755 right->header.blocknr = right_buffer->blocknr;
756 right->header.flags = node_level(0);
cfaa7295 757 right->header.parentid = root->node->node.header.parentid;
be0e5c09
CM
758 data_copy_size = l->items[mid].offset + l->items[mid].size -
759 leaf_data_end(l);
760 memcpy(right->items, l->items + mid,
761 (nritems - mid) * sizeof(struct item));
762 memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
763 l->data + leaf_data_end(l), data_copy_size);
764 rt_data_off = LEAF_DATA_SIZE -
765 (l->items[mid].offset + l->items[mid].size);
74123bd7
CM
766
767 for (i = 0; i < right->header.nritems; i++)
be0e5c09 768 right->items[i].offset += rt_data_off;
74123bd7 769
be0e5c09
CM
770 l->header.nritems = mid;
771 ret = insert_ptr(root, path, &right->items[0].key,
5c680ed6 772 right_buffer->blocknr, path->slots[1] + 1, 1);
eb60ceac
CM
773 write_tree_block(root, right_buffer);
774 write_tree_block(root, l_buf);
775
776 BUG_ON(path->slots[0] != slot);
be0e5c09 777 if (mid <= slot) {
eb60ceac
CM
778 tree_block_release(root, path->nodes[0]);
779 path->nodes[0] = right_buffer;
be0e5c09
CM
780 path->slots[0] -= mid;
781 path->slots[1] += 1;
eb60ceac
CM
782 } else
783 tree_block_release(root, right_buffer);
784 BUG_ON(path->slots[0] < 0);
be0e5c09
CM
785 return ret;
786}
787
74123bd7
CM
788/*
789 * Given a key and some data, insert an item into the tree.
790 * This does all the path init required, making room in the tree if needed.
791 */
be0e5c09
CM
792int insert_item(struct ctree_root *root, struct key *key,
793 void *data, int data_size)
794{
795 int ret;
796 int slot;
eb60ceac 797 int slot_orig;
be0e5c09 798 struct leaf *leaf;
eb60ceac 799 struct tree_buffer *leaf_buf;
be0e5c09
CM
800 unsigned int nritems;
801 unsigned int data_end;
802 struct ctree_path path;
803
74123bd7 804 /* create a root if there isn't one */
5c680ed6 805 if (!root->node)
cfaa7295 806 BUG();
be0e5c09 807 init_path(&path);
5c680ed6 808 ret = search_slot(root, key, &path, data_size);
eb60ceac
CM
809 if (ret == 0) {
810 release_path(root, &path);
be0e5c09 811 return -EEXIST;
eb60ceac 812 }
be0e5c09 813
eb60ceac
CM
814 slot_orig = path.slots[0];
815 leaf_buf = path.nodes[0];
816 leaf = &leaf_buf->leaf;
74123bd7 817
be0e5c09
CM
818 nritems = leaf->header.nritems;
819 data_end = leaf_data_end(leaf);
eb60ceac 820
be0e5c09
CM
821 if (leaf_free_space(leaf) < sizeof(struct item) + data_size)
822 BUG();
823
824 slot = path.slots[0];
eb60ceac 825 BUG_ON(slot < 0);
be0e5c09
CM
826 if (slot != nritems) {
827 int i;
828 unsigned int old_data = leaf->items[slot].offset +
829 leaf->items[slot].size;
830
831 /*
832 * item0..itemN ... dataN.offset..dataN.size .. data0.size
833 */
834 /* first correct the data pointers */
835 for (i = slot; i < nritems; i++)
836 leaf->items[i].offset -= data_size;
837
838 /* shift the items */
839 memmove(leaf->items + slot + 1, leaf->items + slot,
840 (nritems - slot) * sizeof(struct item));
841
842 /* shift the data */
843 memmove(leaf->data + data_end - data_size, leaf->data +
844 data_end, old_data - data_end);
845 data_end = old_data;
846 }
74123bd7 847 /* copy the new data in */
be0e5c09
CM
848 memcpy(&leaf->items[slot].key, key, sizeof(struct key));
849 leaf->items[slot].offset = data_end - data_size;
850 leaf->items[slot].size = data_size;
851 memcpy(leaf->data + data_end - data_size, data, data_size);
852 leaf->header.nritems += 1;
eb60ceac 853 write_tree_block(root, leaf_buf);
8e19f2cd
CM
854 if (slot == 0)
855 fixup_low_keys(root, &path, key, 1);
be0e5c09
CM
856 if (leaf_free_space(leaf) < 0)
857 BUG();
eb60ceac 858 release_path(root, &path);
be0e5c09
CM
859 return 0;
860}
861
74123bd7 862/*
5de08d7d 863 * delete the pointer from a given node.
74123bd7
CM
864 *
865 * If the delete empties a node, the node is removed from the tree,
866 * continuing all the way the root if required. The root is converted into
867 * a leaf if all the nodes are emptied.
868 */
be0e5c09
CM
869int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
870{
871 int slot;
eb60ceac 872 struct tree_buffer *t;
be0e5c09
CM
873 struct node *node;
874 int nritems;
9a8dd150 875 u64 blocknr;
be0e5c09
CM
876
877 while(1) {
eb60ceac
CM
878 t = path->nodes[level];
879 if (!t)
be0e5c09 880 break;
eb60ceac 881 node = &t->node;
be0e5c09
CM
882 slot = path->slots[level];
883 nritems = node->header.nritems;
884
885 if (slot != nritems -1) {
886 memmove(node->keys + slot, node->keys + slot + 1,
887 sizeof(struct key) * (nritems - slot - 1));
888 memmove(node->blockptrs + slot,
889 node->blockptrs + slot + 1,
890 sizeof(u64) * (nritems - slot - 1));
891 }
892 node->header.nritems--;
9a8dd150 893 blocknr = t->blocknr;
8e19f2cd 894 write_tree_block(root, t);
be0e5c09 895 if (node->header.nritems != 0) {
8e19f2cd 896 int tslot;
be0e5c09 897 if (slot == 0)
eb60ceac
CM
898 fixup_low_keys(root, path, node->keys,
899 level + 1);
8e19f2cd
CM
900 tslot = path->slots[level + 1];
901 t->count++;
902 if (push_node_left(root, path, level))
903 push_node_right(root, path, level);
904 path->slots[level + 1] = tslot;
905 if (node->header.nritems != 0) {
906 tree_block_release(root, t);
907 break;
908 }
909 tree_block_release(root, t);
be0e5c09 910 }
eb60ceac
CM
911 if (t == root->node) {
912 /* just turn the root into a leaf and break */
913 root->node->node.header.flags = node_level(0);
914 write_tree_block(root, t);
be0e5c09
CM
915 break;
916 }
917 level++;
9a8dd150 918 free_extent(root, blocknr, 1);
be0e5c09
CM
919 if (!path->nodes[level])
920 BUG();
be0e5c09
CM
921 }
922 return 0;
923}
924
74123bd7
CM
925/*
926 * delete the item at the leaf level in path. If that empties
927 * the leaf, remove it from the tree
928 */
4920c9ac 929int del_item(struct ctree_root *root, struct ctree_path *path)
be0e5c09 930{
be0e5c09
CM
931 int slot;
932 struct leaf *leaf;
eb60ceac 933 struct tree_buffer *leaf_buf;
be0e5c09
CM
934 int doff;
935 int dsize;
936
eb60ceac
CM
937 leaf_buf = path->nodes[0];
938 leaf = &leaf_buf->leaf;
4920c9ac 939 slot = path->slots[0];
be0e5c09
CM
940 doff = leaf->items[slot].offset;
941 dsize = leaf->items[slot].size;
942
943 if (slot != leaf->header.nritems - 1) {
944 int i;
945 int data_end = leaf_data_end(leaf);
946 memmove(leaf->data + data_end + dsize,
947 leaf->data + data_end,
948 doff - data_end);
949 for (i = slot + 1; i < leaf->header.nritems; i++)
950 leaf->items[i].offset += dsize;
951 memmove(leaf->items + slot, leaf->items + slot + 1,
952 sizeof(struct item) *
953 (leaf->header.nritems - slot - 1));
954 }
955 leaf->header.nritems -= 1;
74123bd7 956 /* delete the leaf if we've emptied it */
be0e5c09 957 if (leaf->header.nritems == 0) {
eb60ceac
CM
958 if (leaf_buf == root->node) {
959 leaf->header.flags = node_level(0);
960 write_tree_block(root, leaf_buf);
9a8dd150 961 } else {
4920c9ac 962 del_ptr(root, path, 1);
9a8dd150
CM
963 free_extent(root, leaf_buf->blocknr, 1);
964 }
be0e5c09 965 } else {
5de08d7d 966 int used = leaf_space_used(leaf, 0, leaf->header.nritems);
be0e5c09 967 if (slot == 0)
eb60ceac
CM
968 fixup_low_keys(root, path, &leaf->items[0].key, 1);
969 write_tree_block(root, leaf_buf);
74123bd7 970 /* delete the leaf if it is mostly empty */
5de08d7d 971 if (used < LEAF_DATA_SIZE / 3) {
be0e5c09
CM
972 /* push_leaf_left fixes the path.
973 * make sure the path still points to our leaf
974 * for possible call to del_ptr below
975 */
4920c9ac 976 slot = path->slots[1];
eb60ceac 977 leaf_buf->count++;
4920c9ac 978 push_leaf_left(root, path, 1);
00ec4c51
CM
979 if (leaf->header.nritems)
980 push_leaf_right(root, path, 1);
be0e5c09 981 if (leaf->header.nritems == 0) {
5de08d7d 982 u64 blocknr = leaf_buf->blocknr;
4920c9ac
CM
983 path->slots[1] = slot;
984 del_ptr(root, path, 1);
5de08d7d
CM
985 tree_block_release(root, leaf_buf);
986 free_extent(root, blocknr, 1);
987 } else {
988 tree_block_release(root, leaf_buf);
be0e5c09
CM
989 }
990 }
991 }
992 return 0;
993}
994
97571fd0
CM
995/*
996 * walk up the tree as far as required to find the next leaf.
997 * returns 0 if it found something or -1 if there are no greater leaves.
998 */
d97e63b6
CM
999int next_leaf(struct ctree_root *root, struct ctree_path *path)
1000{
1001 int slot;
1002 int level = 1;
1003 u64 blocknr;
1004 struct tree_buffer *c;
cfaa7295 1005 struct tree_buffer *next = NULL;
d97e63b6
CM
1006
1007 while(level < MAX_LEVEL) {
1008 if (!path->nodes[level])
1009 return -1;
1010 slot = path->slots[level] + 1;
1011 c = path->nodes[level];
1012 if (slot >= c->node.header.nritems) {
1013 level++;
1014 continue;
1015 }
1016 blocknr = c->node.blockptrs[slot];
cfaa7295
CM
1017 if (next)
1018 tree_block_release(root, next);
d97e63b6
CM
1019 next = read_tree_block(root, blocknr);
1020 break;
1021 }
1022 path->slots[level] = slot;
1023 while(1) {
1024 level--;
1025 c = path->nodes[level];
1026 tree_block_release(root, c);
1027 path->nodes[level] = next;
1028 path->slots[level] = 0;
1029 if (!level)
1030 break;
1031 next = read_tree_block(root, next->node.blockptrs[0]);
1032 }
1033 return 0;
1034}
1035
fec577fb
CM
1036/* some sample code to insert,search & delete items */
1037#if 0
be0e5c09
CM
1038/* for testing only */
1039int next_key(int i, int max_key) {
5de08d7d 1040 return rand() % max_key;
00ec4c51 1041 //return i;
be0e5c09 1042}
be0e5c09 1043int main() {
be0e5c09 1044 struct key ins;
4920c9ac 1045 struct key last = { (u64)-1, 0, 0};
be0e5c09
CM
1046 char *buf;
1047 int i;
1048 int num;
1049 int ret;
5de08d7d
CM
1050 int run_size = 20000000;
1051 int max_key = 100000000;
be0e5c09
CM
1052 int tree_size = 0;
1053 struct ctree_path path;
cfaa7295 1054 struct ctree_super_block super;
fec577fb 1055 struct ctree_root *root;
be0e5c09 1056
eb60ceac
CM
1057 radix_tree_init();
1058
1059
cfaa7295 1060 root = open_ctree("dbfile", &super);
be0e5c09 1061 srand(55);
be0e5c09
CM
1062 for (i = 0; i < run_size; i++) {
1063 buf = malloc(64);
1064 num = next_key(i, max_key);
1065 // num = i;
1066 sprintf(buf, "string-%d", num);
5de08d7d 1067 if (i % 10000 == 0)
00ec4c51 1068 fprintf(stderr, "insert %d:%d\n", num, i);
be0e5c09
CM
1069 ins.objectid = num;
1070 ins.offset = 0;
1071 ins.flags = 0;
eb60ceac 1072 ret = insert_item(root, &ins, buf, strlen(buf));
be0e5c09
CM
1073 if (!ret)
1074 tree_size++;
5de08d7d 1075 free(buf);
be0e5c09 1076 }
cfaa7295 1077 write_ctree_super(root, &super);
eb60ceac 1078 close_ctree(root);
cfaa7295
CM
1079
1080 root = open_ctree("dbfile", &super);
eb60ceac 1081 printf("starting search\n");
be0e5c09
CM
1082 srand(55);
1083 for (i = 0; i < run_size; i++) {
1084 num = next_key(i, max_key);
1085 ins.objectid = num;
be0e5c09 1086 init_path(&path);
5de08d7d 1087 if (i % 10000 == 0)
00ec4c51 1088 fprintf(stderr, "search %d:%d\n", num, i);
5c680ed6 1089 ret = search_slot(root, &ins, &path, 0);
be0e5c09 1090 if (ret) {
eb60ceac 1091 print_tree(root, root->node);
be0e5c09
CM
1092 printf("unable to find %d\n", num);
1093 exit(1);
1094 }
eb60ceac
CM
1095 release_path(root, &path);
1096 }
cfaa7295 1097 write_ctree_super(root, &super);
eb60ceac 1098 close_ctree(root);
cfaa7295 1099 root = open_ctree("dbfile", &super);
eb60ceac
CM
1100 printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
1101 node_level(root->node->node.header.flags),
1102 root->node->node.header.nritems,
1103 NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
1104 printf("all searches good, deleting some items\n");
be0e5c09
CM
1105 i = 0;
1106 srand(55);
4920c9ac
CM
1107 for (i = 0 ; i < run_size/4; i++) {
1108 num = next_key(i, max_key);
1109 ins.objectid = num;
1110 init_path(&path);
5de08d7d
CM
1111 ret = search_slot(root, &ins, &path, -1);
1112 if (!ret) {
1113 if (i % 10000 == 0)
00ec4c51 1114 fprintf(stderr, "del %d:%d\n", num, i);
5de08d7d
CM
1115 ret = del_item(root, &path);
1116 if (ret != 0)
1117 BUG();
1118 tree_size--;
1119 }
eb60ceac 1120 release_path(root, &path);
4920c9ac 1121 }
5de08d7d
CM
1122 write_ctree_super(root, &super);
1123 close_ctree(root);
1124 root = open_ctree("dbfile", &super);
4920c9ac 1125 srand(128);
be0e5c09 1126 for (i = 0; i < run_size; i++) {
4920c9ac 1127 buf = malloc(64);
be0e5c09 1128 num = next_key(i, max_key);
4920c9ac 1129 sprintf(buf, "string-%d", num);
be0e5c09 1130 ins.objectid = num;
5de08d7d 1131 if (i % 10000 == 0)
00ec4c51 1132 fprintf(stderr, "insert %d:%d\n", num, i);
eb60ceac 1133 ret = insert_item(root, &ins, buf, strlen(buf));
4920c9ac
CM
1134 if (!ret)
1135 tree_size++;
5de08d7d 1136 free(buf);
4920c9ac 1137 }
cfaa7295 1138 write_ctree_super(root, &super);
eb60ceac 1139 close_ctree(root);
cfaa7295 1140 root = open_ctree("dbfile", &super);
eb60ceac 1141 srand(128);
9a8dd150 1142 printf("starting search2\n");
eb60ceac
CM
1143 for (i = 0; i < run_size; i++) {
1144 num = next_key(i, max_key);
1145 ins.objectid = num;
1146 init_path(&path);
5de08d7d 1147 if (i % 10000 == 0)
00ec4c51 1148 fprintf(stderr, "search %d:%d\n", num, i);
5c680ed6 1149 ret = search_slot(root, &ins, &path, 0);
eb60ceac
CM
1150 if (ret) {
1151 print_tree(root, root->node);
1152 printf("unable to find %d\n", num);
1153 exit(1);
1154 }
1155 release_path(root, &path);
1156 }
1157 printf("starting big long delete run\n");
1158 while(root->node && root->node->node.header.nritems > 0) {
4920c9ac
CM
1159 struct leaf *leaf;
1160 int slot;
1161 ins.objectid = (u64)-1;
1162 init_path(&path);
5de08d7d 1163 ret = search_slot(root, &ins, &path, -1);
4920c9ac
CM
1164 if (ret == 0)
1165 BUG();
1166
eb60ceac 1167 leaf = &path.nodes[0]->leaf;
4920c9ac
CM
1168 slot = path.slots[0];
1169 if (slot != leaf->header.nritems)
1170 BUG();
1171 while(path.slots[0] > 0) {
1172 path.slots[0] -= 1;
1173 slot = path.slots[0];
eb60ceac 1174 leaf = &path.nodes[0]->leaf;
4920c9ac
CM
1175
1176 if (comp_keys(&last, &leaf->items[slot].key) <= 0)
1177 BUG();
1178 memcpy(&last, &leaf->items[slot].key, sizeof(last));
5de08d7d
CM
1179 if (tree_size % 10000 == 0)
1180 printf("big del %d:%d\n", tree_size, i);
eb60ceac
CM
1181 ret = del_item(root, &path);
1182 if (ret != 0) {
1183 printf("del_item returned %d\n", ret);
4920c9ac 1184 BUG();
eb60ceac 1185 }
4920c9ac
CM
1186 tree_size--;
1187 }
eb60ceac 1188 release_path(root, &path);
be0e5c09 1189 }
4920c9ac 1190 printf("tree size is now %d\n", tree_size);
9a8dd150 1191 printf("map tree\n");
00ec4c51 1192 print_tree(root->extent_root, root->extent_root->node);
5de08d7d
CM
1193 write_ctree_super(root, &super);
1194 close_ctree(root);
be0e5c09
CM
1195 return 0;
1196}
fec577fb 1197#endif