]> bbs.cooldavid.org Git - net-next-2.6.git/blame - fs/reiserfs/stree.c
reiserfs: rename p_s_sb to sb
[net-next-2.6.git] / fs / reiserfs / stree.c
CommitLineData
1da177e4
LT
1/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5/*
6 * Written by Anatoly P. Pinchuk pap@namesys.botik.ru
7 * Programm System Institute
8 * Pereslavl-Zalessky Russia
9 */
10
11/*
12 * This file contains functions dealing with S+tree
13 *
14 * B_IS_IN_TREE
15 * copy_item_head
16 * comp_short_keys
17 * comp_keys
18 * comp_short_le_keys
19 * le_key2cpu_key
20 * comp_le_keys
21 * bin_search
22 * get_lkey
23 * get_rkey
24 * key_in_buffer
25 * decrement_bcount
1da177e4
LT
26 * reiserfs_check_path
27 * pathrelse_and_restore
28 * pathrelse
29 * search_by_key_reada
30 * search_by_key
31 * search_for_position_by_key
32 * comp_items
33 * prepare_for_direct_item
34 * prepare_for_direntry_item
35 * prepare_for_delete_or_cut
36 * calc_deleted_bytes_number
37 * init_tb_struct
38 * padd_item
39 * reiserfs_delete_item
40 * reiserfs_delete_solid_item
41 * reiserfs_delete_object
42 * maybe_indirect_to_direct
43 * indirect_to_direct_roll_back
44 * reiserfs_cut_from_item
45 * truncate_directory
46 * reiserfs_do_truncate
47 * reiserfs_paste_into_item
48 * reiserfs_insert_item
49 */
50
1da177e4
LT
51#include <linux/time.h>
52#include <linux/string.h>
53#include <linux/pagemap.h>
54#include <linux/reiserfs_fs.h>
1da177e4
LT
55#include <linux/buffer_head.h>
56#include <linux/quotaops.h>
57
58/* Does the buffer contain a disk block which is in the tree. */
bd4c625c 59inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh)
1da177e4
LT
60{
61
bd4c625c
LT
62 RFALSE(B_LEVEL(p_s_bh) > MAX_HEIGHT,
63 "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh);
1da177e4 64
bd4c625c 65 return (B_LEVEL(p_s_bh) != FREE_LEVEL);
1da177e4
LT
66}
67
68//
69// to gets item head in le form
70//
bd4c625c
LT
71inline void copy_item_head(struct item_head *p_v_to,
72 const struct item_head *p_v_from)
1da177e4 73{
bd4c625c 74 memcpy(p_v_to, p_v_from, IH_SIZE);
1da177e4
LT
75}
76
1da177e4
LT
77/* k1 is pointer to on-disk structure which is stored in little-endian
78 form. k2 is pointer to cpu variable. For key of items of the same
79 object this returns 0.
0222e657 80 Returns: -1 if key1 < key2
1da177e4
LT
81 0 if key1 == key2
82 1 if key1 > key2 */
bd4c625c
LT
83inline int comp_short_keys(const struct reiserfs_key *le_key,
84 const struct cpu_key *cpu_key)
1da177e4 85{
bd4c625c
LT
86 __u32 n;
87 n = le32_to_cpu(le_key->k_dir_id);
88 if (n < cpu_key->on_disk_key.k_dir_id)
89 return -1;
90 if (n > cpu_key->on_disk_key.k_dir_id)
91 return 1;
92 n = le32_to_cpu(le_key->k_objectid);
93 if (n < cpu_key->on_disk_key.k_objectid)
94 return -1;
95 if (n > cpu_key->on_disk_key.k_objectid)
96 return 1;
97 return 0;
1da177e4
LT
98}
99
1da177e4
LT
100/* k1 is pointer to on-disk structure which is stored in little-endian
101 form. k2 is pointer to cpu variable.
102 Compare keys using all 4 key fields.
103 Returns: -1 if key1 < key2 0
104 if key1 = key2 1 if key1 > key2 */
bd4c625c
LT
105static inline int comp_keys(const struct reiserfs_key *le_key,
106 const struct cpu_key *cpu_key)
1da177e4 107{
bd4c625c
LT
108 int retval;
109
110 retval = comp_short_keys(le_key, cpu_key);
111 if (retval)
112 return retval;
113 if (le_key_k_offset(le_key_version(le_key), le_key) <
114 cpu_key_k_offset(cpu_key))
115 return -1;
116 if (le_key_k_offset(le_key_version(le_key), le_key) >
117 cpu_key_k_offset(cpu_key))
118 return 1;
119
120 if (cpu_key->key_length == 3)
121 return 0;
122
123 /* this part is needed only when tail conversion is in progress */
124 if (le_key_k_type(le_key_version(le_key), le_key) <
125 cpu_key_k_type(cpu_key))
126 return -1;
127
128 if (le_key_k_type(le_key_version(le_key), le_key) >
129 cpu_key_k_type(cpu_key))
130 return 1;
1da177e4 131
bd4c625c 132 return 0;
1da177e4
LT
133}
134
bd4c625c
LT
135inline int comp_short_le_keys(const struct reiserfs_key *key1,
136 const struct reiserfs_key *key2)
1da177e4 137{
bd4c625c
LT
138 __u32 *p_s_1_u32, *p_s_2_u32;
139 int n_key_length = REISERFS_SHORT_KEY_LEN;
140
141 p_s_1_u32 = (__u32 *) key1;
142 p_s_2_u32 = (__u32 *) key2;
143 for (; n_key_length--; ++p_s_1_u32, ++p_s_2_u32) {
144 if (le32_to_cpu(*p_s_1_u32) < le32_to_cpu(*p_s_2_u32))
145 return -1;
146 if (le32_to_cpu(*p_s_1_u32) > le32_to_cpu(*p_s_2_u32))
147 return 1;
148 }
149 return 0;
1da177e4
LT
150}
151
bd4c625c 152inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
1da177e4 153{
bd4c625c
LT
154 int version;
155 to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
156 to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
157
158 // find out version of the key
159 version = le_key_version(from);
160 to->version = version;
161 to->on_disk_key.k_offset = le_key_k_offset(version, from);
162 to->on_disk_key.k_type = le_key_k_type(version, from);
1da177e4
LT
163}
164
1da177e4
LT
165// this does not say which one is bigger, it only returns 1 if keys
166// are not equal, 0 otherwise
bd4c625c
LT
167inline int comp_le_keys(const struct reiserfs_key *k1,
168 const struct reiserfs_key *k2)
1da177e4 169{
bd4c625c 170 return memcmp(k1, k2, sizeof(struct reiserfs_key));
1da177e4
LT
171}
172
173/**************************************************************************
174 * Binary search toolkit function *
175 * Search for an item in the array by the item key *
176 * Returns: 1 if found, 0 if not found; *
177 * *p_n_pos = number of the searched element if found, else the *
178 * number of the first element that is larger than p_v_key. *
179 **************************************************************************/
180/* For those not familiar with binary search: n_lbound is the leftmost item that it
181 could be, n_rbound the rightmost item that it could be. We examine the item
182 halfway between n_lbound and n_rbound, and that tells us either that we can increase
183 n_lbound, or decrease n_rbound, or that we have found it, or if n_lbound <= n_rbound that
184 there are no possible items, and we have not found it. With each examination we
185 cut the number of possible items it could be by one more than half rounded down,
186 or we find it. */
bd4c625c
LT
187static inline int bin_search(const void *p_v_key, /* Key to search for. */
188 const void *p_v_base, /* First item in the array. */
189 int p_n_num, /* Number of items in the array. */
190 int p_n_width, /* Item size in the array.
191 searched. Lest the reader be
192 confused, note that this is crafted
193 as a general function, and when it
194 is applied specifically to the array
195 of item headers in a node, p_n_width
196 is actually the item header size not
197 the item size. */
198 int *p_n_pos /* Number of the searched for element. */
199 )
200{
201 int n_rbound, n_lbound, n_j;
202
203 for (n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0)) / 2;
204 n_lbound <= n_rbound; n_j = (n_rbound + n_lbound) / 2)
205 switch (comp_keys
206 ((struct reiserfs_key *)((char *)p_v_base +
207 n_j * p_n_width),
208 (struct cpu_key *)p_v_key)) {
209 case -1:
210 n_lbound = n_j + 1;
211 continue;
212 case 1:
213 n_rbound = n_j - 1;
214 continue;
215 case 0:
216 *p_n_pos = n_j;
217 return ITEM_FOUND; /* Key found in the array. */
218 }
219
220 /* bin_search did not find given key, it returns position of key,
221 that is minimal and greater than the given one. */
222 *p_n_pos = n_lbound;
223 return ITEM_NOT_FOUND;
1da177e4
LT
224}
225
226#ifdef CONFIG_REISERFS_CHECK
bd4c625c 227extern struct tree_balance *cur_tb;
1da177e4
LT
228#endif
229
1da177e4 230/* Minimal possible key. It is never in the tree. */
bd4c625c 231const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
1da177e4
LT
232
233/* Maximal possible key. It is never in the tree. */
bd4c625c 234static const struct reiserfs_key MAX_KEY = {
3e8962be
AV
235 __constant_cpu_to_le32(0xffffffff),
236 __constant_cpu_to_le32(0xffffffff),
237 {{__constant_cpu_to_le32(0xffffffff),
bd4c625c 238 __constant_cpu_to_le32(0xffffffff)},}
3e8962be 239};
1da177e4 240
1da177e4
LT
241/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
242 of the path, and going upwards. We must check the path's validity at each step. If the key is not in
243 the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
244 case we return a special key, either MIN_KEY or MAX_KEY. */
fec6d055 245static inline const struct reiserfs_key *get_lkey(const struct treepath
bd4c625c
LT
246 *p_s_chk_path,
247 const struct super_block
a9dd3643 248 *sb)
bd4c625c
LT
249{
250 int n_position, n_path_offset = p_s_chk_path->path_length;
251 struct buffer_head *p_s_parent;
252
253 RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
254 "PAP-5010: invalid offset in the path");
255
256 /* While not higher in path than first element. */
257 while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
258
259 RFALSE(!buffer_uptodate
260 (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
261 "PAP-5020: parent is not uptodate");
262
263 /* Parent at the path is not in the tree now. */
264 if (!B_IS_IN_TREE
265 (p_s_parent =
266 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
267 return &MAX_KEY;
268 /* Check whether position in the parent is correct. */
269 if ((n_position =
270 PATH_OFFSET_POSITION(p_s_chk_path,
271 n_path_offset)) >
272 B_NR_ITEMS(p_s_parent))
273 return &MAX_KEY;
274 /* Check whether parent at the path really points to the child. */
275 if (B_N_CHILD_NUM(p_s_parent, n_position) !=
276 PATH_OFFSET_PBUFFER(p_s_chk_path,
277 n_path_offset + 1)->b_blocknr)
278 return &MAX_KEY;
279 /* Return delimiting key if position in the parent is not equal to zero. */
280 if (n_position)
281 return B_N_PDELIM_KEY(p_s_parent, n_position - 1);
282 }
283 /* Return MIN_KEY if we are in the root of the buffer tree. */
284 if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->
a9dd3643 285 b_blocknr == SB_ROOT_BLOCK(sb))
bd4c625c
LT
286 return &MIN_KEY;
287 return &MAX_KEY;
1da177e4
LT
288}
289
1da177e4 290/* Get delimiting key of the buffer at the path and its right neighbor. */
fec6d055 291inline const struct reiserfs_key *get_rkey(const struct treepath *p_s_chk_path,
a9dd3643 292 const struct super_block *sb)
bd4c625c
LT
293{
294 int n_position, n_path_offset = p_s_chk_path->path_length;
295 struct buffer_head *p_s_parent;
296
297 RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
298 "PAP-5030: invalid offset in the path");
299
300 while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
301
302 RFALSE(!buffer_uptodate
303 (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
304 "PAP-5040: parent is not uptodate");
305
306 /* Parent at the path is not in the tree now. */
307 if (!B_IS_IN_TREE
308 (p_s_parent =
309 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
310 return &MIN_KEY;
311 /* Check whether position in the parent is correct. */
312 if ((n_position =
313 PATH_OFFSET_POSITION(p_s_chk_path,
314 n_path_offset)) >
315 B_NR_ITEMS(p_s_parent))
316 return &MIN_KEY;
317 /* Check whether parent at the path really points to the child. */
318 if (B_N_CHILD_NUM(p_s_parent, n_position) !=
319 PATH_OFFSET_PBUFFER(p_s_chk_path,
320 n_path_offset + 1)->b_blocknr)
321 return &MIN_KEY;
322 /* Return delimiting key if position in the parent is not the last one. */
323 if (n_position != B_NR_ITEMS(p_s_parent))
324 return B_N_PDELIM_KEY(p_s_parent, n_position);
325 }
326 /* Return MAX_KEY if we are in the root of the buffer tree. */
327 if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->
a9dd3643 328 b_blocknr == SB_ROOT_BLOCK(sb))
bd4c625c
LT
329 return &MAX_KEY;
330 return &MIN_KEY;
1da177e4
LT
331}
332
1da177e4
LT
333/* Check whether a key is contained in the tree rooted from a buffer at a path. */
334/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
335 the path. These delimiting keys are stored at least one level above that buffer in the tree. If the
336 buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
337 this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
fec6d055 338static inline int key_in_buffer(struct treepath *p_s_chk_path, /* Path which should be checked. */
bd4c625c 339 const struct cpu_key *p_s_key, /* Key which should be checked. */
a9dd3643 340 struct super_block *sb /* Super block pointer. */
bd4c625c
LT
341 )
342{
1da177e4 343
bd4c625c
LT
344 RFALSE(!p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
345 || p_s_chk_path->path_length > MAX_HEIGHT,
346 "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
347 p_s_key, p_s_chk_path->path_length);
348 RFALSE(!PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev,
349 "PAP-5060: device must not be NODEV");
350
a9dd3643 351 if (comp_keys(get_lkey(p_s_chk_path, sb), p_s_key) == 1)
bd4c625c
LT
352 /* left delimiting key is bigger, that the key we look for */
353 return 0;
a9dd3643
JM
354 // if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, sb)) != -1 )
355 if (comp_keys(get_rkey(p_s_chk_path, sb), p_s_key) != 1)
bd4c625c
LT
356 /* p_s_key must be less than right delimitiing key */
357 return 0;
358 return 1;
1da177e4
LT
359}
360
fec6d055 361int reiserfs_check_path(struct treepath *p)
bd4c625c
LT
362{
363 RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
364 "path not properly relsed");
365 return 0;
366}
1da177e4 367
3cd6dbe6
JM
368/* Drop the reference to each buffer in a path and restore
369 * dirty bits clean when preparing the buffer for the log.
370 * This version should only be called from fix_nodes() */
371void pathrelse_and_restore(struct super_block *sb,
372 struct treepath *p_s_search_path)
bd4c625c
LT
373{
374 int n_path_offset = p_s_search_path->path_length;
375
376 RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
377 "clm-4000: invalid path offset");
378
379 while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
3cd6dbe6
JM
380 struct buffer_head *bh;
381 bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
382 reiserfs_restore_prepared_buffer(sb, bh);
383 brelse(bh);
bd4c625c
LT
384 }
385 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
1da177e4
LT
386}
387
3cd6dbe6 388/* Drop the reference to each buffer in a path */
fec6d055 389void pathrelse(struct treepath *p_s_search_path)
bd4c625c
LT
390{
391 int n_path_offset = p_s_search_path->path_length;
1da177e4 392
bd4c625c
LT
393 RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
394 "PAP-5090: invalid path offset");
1da177e4 395
bd4c625c
LT
396 while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
397 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
1da177e4 398
bd4c625c
LT
399 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
400}
1da177e4 401
bd4c625c
LT
402static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
403{
404 struct block_head *blkh;
405 struct item_head *ih;
406 int used_space;
407 int prev_location;
408 int i;
409 int nr;
410
411 blkh = (struct block_head *)buf;
412 if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
45b03d5e
JM
413 reiserfs_warning(NULL, "reiserfs-5080",
414 "this should be caught earlier");
bd4c625c 415 return 0;
1da177e4 416 }
bd4c625c
LT
417
418 nr = blkh_nr_item(blkh);
419 if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
420 /* item number is too big or too small */
45b03d5e
JM
421 reiserfs_warning(NULL, "reiserfs-5081",
422 "nr_item seems wrong: %z", bh);
bd4c625c 423 return 0;
1da177e4 424 }
bd4c625c
LT
425 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
426 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
427 if (used_space != blocksize - blkh_free_space(blkh)) {
428 /* free space does not match to calculated amount of use space */
45b03d5e
JM
429 reiserfs_warning(NULL, "reiserfs-5082",
430 "free space seems wrong: %z", bh);
bd4c625c 431 return 0;
1da177e4 432 }
bd4c625c
LT
433 // FIXME: it is_leaf will hit performance too much - we may have
434 // return 1 here
435
436 /* check tables of item heads */
437 ih = (struct item_head *)(buf + BLKH_SIZE);
438 prev_location = blocksize;
439 for (i = 0; i < nr; i++, ih++) {
440 if (le_ih_k_type(ih) == TYPE_ANY) {
45b03d5e
JM
441 reiserfs_warning(NULL, "reiserfs-5083",
442 "wrong item type for item %h",
bd4c625c
LT
443 ih);
444 return 0;
445 }
446 if (ih_location(ih) >= blocksize
447 || ih_location(ih) < IH_SIZE * nr) {
45b03d5e
JM
448 reiserfs_warning(NULL, "reiserfs-5084",
449 "item location seems wrong: %h",
bd4c625c
LT
450 ih);
451 return 0;
452 }
453 if (ih_item_len(ih) < 1
454 || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
45b03d5e
JM
455 reiserfs_warning(NULL, "reiserfs-5085",
456 "item length seems wrong: %h",
bd4c625c
LT
457 ih);
458 return 0;
459 }
460 if (prev_location - ih_location(ih) != ih_item_len(ih)) {
45b03d5e
JM
461 reiserfs_warning(NULL, "reiserfs-5086",
462 "item location seems wrong "
463 "(second one): %h", ih);
bd4c625c
LT
464 return 0;
465 }
466 prev_location = ih_location(ih);
1da177e4 467 }
1da177e4 468
bd4c625c
LT
469 // one may imagine much more checks
470 return 1;
1da177e4
LT
471}
472
1da177e4 473/* returns 1 if buf looks like an internal node, 0 otherwise */
bd4c625c 474static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
1da177e4 475{
bd4c625c
LT
476 struct block_head *blkh;
477 int nr;
478 int used_space;
479
480 blkh = (struct block_head *)buf;
481 nr = blkh_level(blkh);
482 if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
483 /* this level is not possible for internal nodes */
45b03d5e
JM
484 reiserfs_warning(NULL, "reiserfs-5087",
485 "this should be caught earlier");
bd4c625c
LT
486 return 0;
487 }
1da177e4 488
bd4c625c
LT
489 nr = blkh_nr_item(blkh);
490 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
491 /* for internal which is not root we might check min number of keys */
45b03d5e
JM
492 reiserfs_warning(NULL, "reiserfs-5088",
493 "number of key seems wrong: %z", bh);
bd4c625c
LT
494 return 0;
495 }
1da177e4 496
bd4c625c
LT
497 used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
498 if (used_space != blocksize - blkh_free_space(blkh)) {
45b03d5e
JM
499 reiserfs_warning(NULL, "reiserfs-5089",
500 "free space seems wrong: %z", bh);
bd4c625c
LT
501 return 0;
502 }
503 // one may imagine much more checks
504 return 1;
1da177e4
LT
505}
506
1da177e4
LT
507// make sure that bh contains formatted node of reiserfs tree of
508// 'level'-th level
bd4c625c 509static int is_tree_node(struct buffer_head *bh, int level)
1da177e4 510{
bd4c625c 511 if (B_LEVEL(bh) != level) {
45b03d5e
JM
512 reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
513 "not match to the expected one %d",
bd4c625c
LT
514 B_LEVEL(bh), level);
515 return 0;
516 }
517 if (level == DISK_LEAF_NODE_LEVEL)
518 return is_leaf(bh->b_data, bh->b_size, bh);
1da177e4 519
bd4c625c 520 return is_internal(bh->b_data, bh->b_size, bh);
1da177e4
LT
521}
522
1da177e4
LT
523#define SEARCH_BY_KEY_READA 16
524
525/* The function is NOT SCHEDULE-SAFE! */
bd4c625c
LT
526static void search_by_key_reada(struct super_block *s,
527 struct buffer_head **bh,
3ee16670 528 b_blocknr_t *b, int num)
1da177e4 529{
bd4c625c
LT
530 int i, j;
531
532 for (i = 0; i < num; i++) {
533 bh[i] = sb_getblk(s, b[i]);
534 }
535 for (j = 0; j < i; j++) {
536 /*
537 * note, this needs attention if we are getting rid of the BKL
538 * you have to make sure the prepared bit isn't set on this buffer
539 */
540 if (!buffer_uptodate(bh[j]))
541 ll_rw_block(READA, 1, bh + j);
542 brelse(bh[j]);
543 }
1da177e4
LT
544}
545
546/**************************************************************************
547 * Algorithm SearchByKey *
548 * look for item in the Disk S+Tree by its key *
a9dd3643 549 * Input: sb - super block *
1da177e4
LT
550 * p_s_key - pointer to the key to search *
551 * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR *
552 * p_s_search_path - path from the root to the needed leaf *
553 **************************************************************************/
554
555/* This function fills up the path from the root to the leaf as it
556 descends the tree looking for the key. It uses reiserfs_bread to
557 try to find buffers in the cache given their block number. If it
558 does not find them in the cache it reads them from disk. For each
559 node search_by_key finds using reiserfs_bread it then uses
560 bin_search to look through that node. bin_search will find the
561 position of the block_number of the next node if it is looking
562 through an internal node. If it is looking through a leaf node
563 bin_search will find the position of the item which has key either
564 equal to given key, or which is the maximal key less than the given
565 key. search_by_key returns a path that must be checked for the
566 correctness of the top of the path but need not be checked for the
567 correctness of the bottom of the path */
568/* The function is NOT SCHEDULE-SAFE! */
a9dd3643 569int search_by_key(struct super_block *sb, const struct cpu_key *p_s_key, /* Key to search. */
fec6d055 570 struct treepath *p_s_search_path,/* This structure was
bd4c625c
LT
571 allocated and initialized
572 by the calling
573 function. It is filled up
574 by this function. */
575 int n_stop_level /* How far down the tree to search. To
576 stop at leaf level - set to
577 DISK_LEAF_NODE_LEVEL */
578 )
579{
3ee16670 580 b_blocknr_t n_block_number;
bd4c625c
LT
581 int expected_level;
582 struct buffer_head *p_s_bh;
583 struct path_element *p_s_last_element;
584 int n_node_level, n_retval;
585 int right_neighbor_of_leaf_node;
586 int fs_gen;
587 struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
3ee16670 588 b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
bd4c625c 589 int reada_count = 0;
1da177e4
LT
590
591#ifdef CONFIG_REISERFS_CHECK
bd4c625c 592 int n_repeat_counter = 0;
1da177e4 593#endif
1da177e4 594
a9dd3643 595 PROC_INFO_INC(sb, search_by_key);
bd4c625c
LT
596
597 /* As we add each node to a path we increase its count. This means that
598 we must be careful to release all nodes in a path before we either
599 discard the path struct or re-use the path struct, as we do here. */
1da177e4 600
3cd6dbe6 601 pathrelse(p_s_search_path);
1da177e4 602
bd4c625c
LT
603 right_neighbor_of_leaf_node = 0;
604
605 /* With each iteration of this loop we search through the items in the
606 current node, and calculate the next current node(next path element)
607 for the next iteration of this loop.. */
a9dd3643 608 n_block_number = SB_ROOT_BLOCK(sb);
bd4c625c
LT
609 expected_level = -1;
610 while (1) {
1da177e4
LT
611
612#ifdef CONFIG_REISERFS_CHECK
bd4c625c 613 if (!(++n_repeat_counter % 50000))
a9dd3643 614 reiserfs_warning(sb, "PAP-5100",
45b03d5e
JM
615 "%s: there were %d iterations of "
616 "while loop looking for key %K",
bd4c625c
LT
617 current->comm, n_repeat_counter,
618 p_s_key);
1da177e4
LT
619#endif
620
bd4c625c
LT
621 /* prep path to have another element added to it. */
622 p_s_last_element =
623 PATH_OFFSET_PELEMENT(p_s_search_path,
624 ++p_s_search_path->path_length);
a9dd3643 625 fs_gen = get_generation(sb);
bd4c625c
LT
626
627 /* Read the next tree node, and set the last element in the path to
628 have a pointer to it. */
629 if ((p_s_bh = p_s_last_element->pe_buffer =
a9dd3643 630 sb_getblk(sb, n_block_number))) {
bd4c625c 631 if (!buffer_uptodate(p_s_bh) && reada_count > 1) {
a9dd3643 632 search_by_key_reada(sb, reada_bh,
bd4c625c
LT
633 reada_blocks, reada_count);
634 }
635 ll_rw_block(READ, 1, &p_s_bh);
636 wait_on_buffer(p_s_bh);
637 if (!buffer_uptodate(p_s_bh))
638 goto io_error;
639 } else {
640 io_error:
641 p_s_search_path->path_length--;
642 pathrelse(p_s_search_path);
643 return IO_ERROR;
644 }
645 reada_count = 0;
646 if (expected_level == -1)
a9dd3643 647 expected_level = SB_TREE_HEIGHT(sb);
bd4c625c
LT
648 expected_level--;
649
650 /* It is possible that schedule occurred. We must check whether the key
651 to search is still in the tree rooted from the current buffer. If
652 not then repeat search from the root. */
a9dd3643 653 if (fs_changed(fs_gen, sb) &&
bd4c625c
LT
654 (!B_IS_IN_TREE(p_s_bh) ||
655 B_LEVEL(p_s_bh) != expected_level ||
a9dd3643
JM
656 !key_in_buffer(p_s_search_path, p_s_key, sb))) {
657 PROC_INFO_INC(sb, search_by_key_fs_changed);
658 PROC_INFO_INC(sb, search_by_key_restarted);
659 PROC_INFO_INC(sb,
bd4c625c 660 sbk_restarted[expected_level - 1]);
3cd6dbe6 661 pathrelse(p_s_search_path);
bd4c625c
LT
662
663 /* Get the root block number so that we can repeat the search
664 starting from the root. */
a9dd3643 665 n_block_number = SB_ROOT_BLOCK(sb);
bd4c625c
LT
666 expected_level = -1;
667 right_neighbor_of_leaf_node = 0;
668
669 /* repeat search from the root */
670 continue;
671 }
1da177e4 672
bd4c625c
LT
673 /* only check that the key is in the buffer if p_s_key is not
674 equal to the MAX_KEY. Latter case is only possible in
675 "finish_unfinished()" processing during mount. */
676 RFALSE(comp_keys(&MAX_KEY, p_s_key) &&
a9dd3643 677 !key_in_buffer(p_s_search_path, p_s_key, sb),
bd4c625c 678 "PAP-5130: key is not in the buffer");
1da177e4 679#ifdef CONFIG_REISERFS_CHECK
bd4c625c
LT
680 if (cur_tb) {
681 print_cur_tb("5140");
a9dd3643 682 reiserfs_panic(sb, "PAP-5140",
c3a9c210 683 "schedule occurred in do_balance!");
bd4c625c 684 }
1da177e4
LT
685#endif
686
bd4c625c
LT
687 // make sure, that the node contents look like a node of
688 // certain level
689 if (!is_tree_node(p_s_bh, expected_level)) {
a9dd3643 690 reiserfs_error(sb, "vs-5150",
0030b645
JM
691 "invalid format found in block %ld. "
692 "Fsck?", p_s_bh->b_blocknr);
bd4c625c
LT
693 pathrelse(p_s_search_path);
694 return IO_ERROR;
695 }
1da177e4 696
bd4c625c
LT
697 /* ok, we have acquired next formatted node in the tree */
698 n_node_level = B_LEVEL(p_s_bh);
1da177e4 699
a9dd3643 700 PROC_INFO_BH_STAT(sb, p_s_bh, n_node_level - 1);
1da177e4 701
bd4c625c
LT
702 RFALSE(n_node_level < n_stop_level,
703 "vs-5152: tree level (%d) is less than stop level (%d)",
704 n_node_level, n_stop_level);
1da177e4 705
bd4c625c
LT
706 n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(p_s_bh, 0),
707 B_NR_ITEMS(p_s_bh),
708 (n_node_level ==
709 DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
710 KEY_SIZE,
711 &(p_s_last_element->pe_position));
712 if (n_node_level == n_stop_level) {
713 return n_retval;
714 }
1da177e4 715
bd4c625c
LT
716 /* we are not in the stop level */
717 if (n_retval == ITEM_FOUND)
718 /* item has been found, so we choose the pointer which is to the right of the found one */
719 p_s_last_element->pe_position++;
720
721 /* if item was not found we choose the position which is to
722 the left of the found item. This requires no code,
723 bin_search did it already. */
724
725 /* So we have chosen a position in the current node which is
726 an internal node. Now we calculate child block number by
727 position in the node. */
728 n_block_number =
729 B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
730
731 /* if we are going to read leaf nodes, try for read ahead as well */
732 if ((p_s_search_path->reada & PATH_READA) &&
733 n_node_level == DISK_LEAF_NODE_LEVEL + 1) {
734 int pos = p_s_last_element->pe_position;
735 int limit = B_NR_ITEMS(p_s_bh);
736 struct reiserfs_key *le_key;
737
738 if (p_s_search_path->reada & PATH_READA_BACK)
739 limit = 0;
740 while (reada_count < SEARCH_BY_KEY_READA) {
741 if (pos == limit)
742 break;
743 reada_blocks[reada_count++] =
744 B_N_CHILD_NUM(p_s_bh, pos);
745 if (p_s_search_path->reada & PATH_READA_BACK)
746 pos--;
747 else
748 pos++;
749
750 /*
751 * check to make sure we're in the same object
752 */
753 le_key = B_N_PDELIM_KEY(p_s_bh, pos);
754 if (le32_to_cpu(le_key->k_objectid) !=
755 p_s_key->on_disk_key.k_objectid) {
756 break;
757 }
758 }
1da177e4 759 }
bd4c625c 760 }
1da177e4
LT
761}
762
1da177e4
LT
763/* Form the path to an item and position in this item which contains
764 file byte defined by p_s_key. If there is no such item
765 corresponding to the key, we point the path to the item with
766 maximal key less than p_s_key, and *p_n_pos_in_item is set to one
767 past the last entry/byte in the item. If searching for entry in a
768 directory item, and it is not found, *p_n_pos_in_item is set to one
769 entry more than the entry with maximal key which is less than the
770 sought key.
771
772 Note that if there is no entry in this same node which is one more,
773 then we point to an imaginary entry. for direct items, the
774 position is in units of bytes, for indirect items the position is
775 in units of blocknr entries, for directory items the position is in
776 units of directory entries. */
777
778/* The function is NOT SCHEDULE-SAFE! */
a9dd3643 779int search_for_position_by_key(struct super_block *sb, /* Pointer to the super block. */
bd4c625c 780 const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */
fec6d055 781 struct treepath *p_s_search_path /* Filled up by this function. */
bd4c625c
LT
782 )
783{
784 struct item_head *p_le_ih; /* pointer to on-disk structure */
785 int n_blk_size;
786 loff_t item_offset, offset;
787 struct reiserfs_dir_entry de;
788 int retval;
789
790 /* If searching for directory entry. */
791 if (is_direntry_cpu_key(p_cpu_key))
a9dd3643 792 return search_by_entry_key(sb, p_cpu_key, p_s_search_path,
bd4c625c
LT
793 &de);
794
795 /* If not searching for directory entry. */
796
797 /* If item is found. */
a9dd3643 798 retval = search_item(sb, p_cpu_key, p_s_search_path);
bd4c625c
LT
799 if (retval == IO_ERROR)
800 return retval;
801 if (retval == ITEM_FOUND) {
1da177e4 802
bd4c625c
LT
803 RFALSE(!ih_item_len
804 (B_N_PITEM_HEAD
805 (PATH_PLAST_BUFFER(p_s_search_path),
806 PATH_LAST_POSITION(p_s_search_path))),
807 "PAP-5165: item length equals zero");
1da177e4 808
bd4c625c
LT
809 pos_in_item(p_s_search_path) = 0;
810 return POSITION_FOUND;
811 }
1da177e4 812
bd4c625c
LT
813 RFALSE(!PATH_LAST_POSITION(p_s_search_path),
814 "PAP-5170: position equals zero");
1da177e4 815
bd4c625c
LT
816 /* Item is not found. Set path to the previous item. */
817 p_le_ih =
818 B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
819 --PATH_LAST_POSITION(p_s_search_path));
a9dd3643 820 n_blk_size = sb->s_blocksize;
1da177e4 821
bd4c625c
LT
822 if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
823 return FILE_NOT_FOUND;
824 }
825 // FIXME: quite ugly this far
1da177e4 826
bd4c625c
LT
827 item_offset = le_ih_k_offset(p_le_ih);
828 offset = cpu_key_k_offset(p_cpu_key);
1da177e4 829
bd4c625c
LT
830 /* Needed byte is contained in the item pointed to by the path. */
831 if (item_offset <= offset &&
832 item_offset + op_bytes_number(p_le_ih, n_blk_size) > offset) {
833 pos_in_item(p_s_search_path) = offset - item_offset;
834 if (is_indirect_le_ih(p_le_ih)) {
835 pos_in_item(p_s_search_path) /= n_blk_size;
836 }
837 return POSITION_FOUND;
1da177e4 838 }
1da177e4 839
bd4c625c
LT
840 /* Needed byte is not contained in the item pointed to by the
841 path. Set pos_in_item out of the item. */
842 if (is_indirect_le_ih(p_le_ih))
843 pos_in_item(p_s_search_path) =
844 ih_item_len(p_le_ih) / UNFM_P_SIZE;
845 else
846 pos_in_item(p_s_search_path) = ih_item_len(p_le_ih);
847
848 return POSITION_NOT_FOUND;
849}
1da177e4
LT
850
851/* Compare given item and item pointed to by the path. */
fec6d055 852int comp_items(const struct item_head *stored_ih, const struct treepath *p_s_path)
1da177e4 853{
bd4c625c
LT
854 struct buffer_head *p_s_bh;
855 struct item_head *ih;
1da177e4 856
bd4c625c
LT
857 /* Last buffer at the path is not in the tree. */
858 if (!B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)))
859 return 1;
1da177e4 860
bd4c625c
LT
861 /* Last path position is invalid. */
862 if (PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh))
863 return 1;
1da177e4 864
bd4c625c
LT
865 /* we need only to know, whether it is the same item */
866 ih = get_ih(p_s_path);
867 return memcmp(stored_ih, ih, IH_SIZE);
1da177e4
LT
868}
869
1da177e4
LT
870/* unformatted nodes are not logged anymore, ever. This is safe
871** now
872*/
873#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
874
875// block can not be forgotten as it is in I/O or held by someone
876#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
877
1da177e4 878// prepare for delete or cut of direct item
fec6d055 879static inline int prepare_for_direct_item(struct treepath *path,
bd4c625c
LT
880 struct item_head *le_ih,
881 struct inode *inode,
882 loff_t new_file_length, int *cut_size)
1da177e4 883{
bd4c625c
LT
884 loff_t round_len;
885
886 if (new_file_length == max_reiserfs_offset(inode)) {
887 /* item has to be deleted */
888 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
889 return M_DELETE;
890 }
891 // new file gets truncated
892 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
0222e657 893 //
bd4c625c
LT
894 round_len = ROUND_UP(new_file_length);
895 /* this was n_new_file_length < le_ih ... */
896 if (round_len < le_ih_k_offset(le_ih)) {
897 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
898 return M_DELETE; /* Delete this item. */
899 }
900 /* Calculate first position and size for cutting from item. */
901 pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
902 *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
903
904 return M_CUT; /* Cut from this item. */
905 }
906
907 // old file: items may have any length
908
909 if (new_file_length < le_ih_k_offset(le_ih)) {
910 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
911 return M_DELETE; /* Delete this item. */
1da177e4
LT
912 }
913 /* Calculate first position and size for cutting from item. */
bd4c625c
LT
914 *cut_size = -(ih_item_len(le_ih) -
915 (pos_in_item(path) =
916 new_file_length + 1 - le_ih_k_offset(le_ih)));
917 return M_CUT; /* Cut from this item. */
1da177e4
LT
918}
919
fec6d055 920static inline int prepare_for_direntry_item(struct treepath *path,
bd4c625c
LT
921 struct item_head *le_ih,
922 struct inode *inode,
923 loff_t new_file_length,
924 int *cut_size)
1da177e4 925{
bd4c625c
LT
926 if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
927 new_file_length == max_reiserfs_offset(inode)) {
928 RFALSE(ih_entry_count(le_ih) != 2,
929 "PAP-5220: incorrect empty directory item (%h)", le_ih);
930 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
931 return M_DELETE; /* Delete the directory item containing "." and ".." entry. */
932 }
1da177e4 933
bd4c625c
LT
934 if (ih_entry_count(le_ih) == 1) {
935 /* Delete the directory item such as there is one record only
936 in this item */
937 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
938 return M_DELETE;
939 }
940
941 /* Cut one record from the directory item. */
942 *cut_size =
943 -(DEH_SIZE +
944 entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
945 return M_CUT;
946}
1da177e4 947
23f9e0f8
AZ
948#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
949
1da177e4
LT
950/* If the path points to a directory or direct item, calculate mode and the size cut, for balance.
951 If the path points to an indirect item, remove some number of its unformatted nodes.
952 In case of file truncate calculate whether this item must be deleted/truncated or last
953 unformatted node of this item will be converted to a direct item.
954 This function returns a determination of what balance mode the calling function should employ. */
fec6d055 955static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *p_s_path, const struct cpu_key *p_s_item_key, int *p_n_removed, /* Number of unformatted nodes which were removed
bd4c625c
LT
956 from end of the file. */
957 int *p_n_cut_size, unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */
958 )
959{
a9dd3643 960 struct super_block *sb = inode->i_sb;
bd4c625c
LT
961 struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_path);
962 struct buffer_head *p_s_bh = PATH_PLAST_BUFFER(p_s_path);
1da177e4 963
bd4c625c 964 BUG_ON(!th->t_trans_id);
1da177e4 965
bd4c625c
LT
966 /* Stat_data item. */
967 if (is_statdata_le_ih(p_le_ih)) {
1da177e4 968
bd4c625c
LT
969 RFALSE(n_new_file_length != max_reiserfs_offset(inode),
970 "PAP-5210: mode must be M_DELETE");
1da177e4 971
bd4c625c
LT
972 *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
973 return M_DELETE;
974 }
1da177e4 975
bd4c625c
LT
976 /* Directory item. */
977 if (is_direntry_le_ih(p_le_ih))
978 return prepare_for_direntry_item(p_s_path, p_le_ih, inode,
979 n_new_file_length,
980 p_n_cut_size);
1da177e4 981
bd4c625c
LT
982 /* Direct item. */
983 if (is_direct_le_ih(p_le_ih))
984 return prepare_for_direct_item(p_s_path, p_le_ih, inode,
985 n_new_file_length, p_n_cut_size);
986
987 /* Case of an indirect item. */
988 {
a9dd3643 989 int blk_size = sb->s_blocksize;
23f9e0f8
AZ
990 struct item_head s_ih;
991 int need_re_search;
992 int delete = 0;
993 int result = M_CUT;
994 int pos = 0;
995
996 if ( n_new_file_length == max_reiserfs_offset (inode) ) {
997 /* prepare_for_delete_or_cut() is called by
998 * reiserfs_delete_item() */
999 n_new_file_length = 0;
1000 delete = 1;
1001 }
1002
1003 do {
1004 need_re_search = 0;
1005 *p_n_cut_size = 0;
1006 p_s_bh = PATH_PLAST_BUFFER(p_s_path);
1007 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1008 pos = I_UNFM_NUM(&s_ih);
bd4c625c 1009
23f9e0f8 1010 while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > n_new_file_length) {
87588dd6
AV
1011 __le32 *unfm;
1012 __u32 block;
bd4c625c 1013
23f9e0f8
AZ
1014 /* Each unformatted block deletion may involve one additional
1015 * bitmap block into the transaction, thereby the initial
1016 * journal space reservation might not be enough. */
1017 if (!delete && (*p_n_cut_size) != 0 &&
1018 reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
1019 break;
1020 }
bd4c625c 1021
87588dd6 1022 unfm = (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + pos - 1;
23f9e0f8 1023 block = get_block_num(unfm, 0);
bd4c625c 1024
23f9e0f8 1025 if (block != 0) {
a9dd3643 1026 reiserfs_prepare_for_journal(sb, p_s_bh, 1);
23f9e0f8 1027 put_block_num(unfm, 0, 0);
a9dd3643 1028 journal_mark_dirty (th, sb, p_s_bh);
23f9e0f8
AZ
1029 reiserfs_free_block(th, inode, block, 1);
1030 }
bd4c625c 1031
23f9e0f8 1032 cond_resched();
bd4c625c 1033
23f9e0f8
AZ
1034 if (item_moved (&s_ih, p_s_path)) {
1035 need_re_search = 1;
1036 break;
1037 }
1038
1039 pos --;
1040 (*p_n_removed) ++;
1041 (*p_n_cut_size) -= UNFM_P_SIZE;
1042
1043 if (pos == 0) {
1044 (*p_n_cut_size) -= IH_SIZE;
1045 result = M_DELETE;
1046 break;
1047 }
1048 }
1049 /* a trick. If the buffer has been logged, this will do nothing. If
1050 ** we've broken the loop without logging it, it will restore the
1051 ** buffer */
a9dd3643 1052 reiserfs_restore_prepared_buffer(sb, p_s_bh);
23f9e0f8 1053 } while (need_re_search &&
a9dd3643 1054 search_for_position_by_key(sb, p_s_item_key, p_s_path) == POSITION_FOUND);
23f9e0f8
AZ
1055 pos_in_item(p_s_path) = pos * UNFM_P_SIZE;
1056
1057 if (*p_n_cut_size == 0) {
1058 /* Nothing were cut. maybe convert last unformatted node to the
1059 * direct item? */
1060 result = M_CONVERT;
1061 }
1062 return result;
bd4c625c 1063 }
1da177e4
LT
1064}
1065
1066/* Calculate number of bytes which will be deleted or cut during balance */
bd4c625c
LT
1067static int calc_deleted_bytes_number(struct tree_balance *p_s_tb, char c_mode)
1068{
1069 int n_del_size;
1070 struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path);
1071
1072 if (is_statdata_le_ih(p_le_ih))
1073 return 0;
1074
1075 n_del_size =
1076 (c_mode ==
1077 M_DELETE) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0];
1078 if (is_direntry_le_ih(p_le_ih)) {
1079 // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */
1080 // we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1081 // empty size. ick. FIXME, is this right?
1082 //
1083 return n_del_size;
1084 }
1da177e4 1085
bd4c625c
LT
1086 if (is_indirect_le_ih(p_le_ih))
1087 n_del_size = (n_del_size / UNFM_P_SIZE) * (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size); // - get_ih_free_space (p_le_ih);
1088 return n_del_size;
1da177e4
LT
1089}
1090
bd4c625c
LT
1091static void init_tb_struct(struct reiserfs_transaction_handle *th,
1092 struct tree_balance *p_s_tb,
a9dd3643 1093 struct super_block *sb,
fec6d055 1094 struct treepath *p_s_path, int n_size)
bd4c625c 1095{
1da177e4 1096
bd4c625c 1097 BUG_ON(!th->t_trans_id);
1da177e4 1098
bd4c625c
LT
1099 memset(p_s_tb, '\0', sizeof(struct tree_balance));
1100 p_s_tb->transaction_handle = th;
a9dd3643 1101 p_s_tb->tb_sb = sb;
bd4c625c
LT
1102 p_s_tb->tb_path = p_s_path;
1103 PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1104 PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1105 p_s_tb->insert_size[0] = n_size;
1106}
1da177e4 1107
bd4c625c 1108void padd_item(char *item, int total_length, int length)
1da177e4 1109{
bd4c625c 1110 int i;
1da177e4 1111
bd4c625c
LT
1112 for (i = total_length; i > length;)
1113 item[--i] = 0;
1da177e4
LT
1114}
1115
1116#ifdef REISERQUOTA_DEBUG
1117char key2type(struct reiserfs_key *ih)
1118{
bd4c625c
LT
1119 if (is_direntry_le_key(2, ih))
1120 return 'd';
1121 if (is_direct_le_key(2, ih))
1122 return 'D';
1123 if (is_indirect_le_key(2, ih))
1124 return 'i';
1125 if (is_statdata_le_key(2, ih))
1126 return 's';
1127 return 'u';
1da177e4
LT
1128}
1129
1130char head2type(struct item_head *ih)
1131{
bd4c625c
LT
1132 if (is_direntry_le_ih(ih))
1133 return 'd';
1134 if (is_direct_le_ih(ih))
1135 return 'D';
1136 if (is_indirect_le_ih(ih))
1137 return 'i';
1138 if (is_statdata_le_ih(ih))
1139 return 's';
1140 return 'u';
1da177e4
LT
1141}
1142#endif
1143
1144/* Delete object item. */
fec6d055 1145int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_path, /* Path to the deleted item. */
bd4c625c
LT
1146 const struct cpu_key *p_s_item_key, /* Key to search for the deleted item. */
1147 struct inode *p_s_inode, /* inode is here just to update i_blocks and quotas */
1148 struct buffer_head *p_s_un_bh)
1149{ /* NULL or unformatted node pointer. */
a9dd3643 1150 struct super_block *sb = p_s_inode->i_sb;
bd4c625c
LT
1151 struct tree_balance s_del_balance;
1152 struct item_head s_ih;
1153 struct item_head *q_ih;
1154 int quota_cut_bytes;
1155 int n_ret_value, n_del_size, n_removed;
1da177e4
LT
1156
1157#ifdef CONFIG_REISERFS_CHECK
bd4c625c
LT
1158 char c_mode;
1159 int n_iter = 0;
1da177e4
LT
1160#endif
1161
bd4c625c 1162 BUG_ON(!th->t_trans_id);
1da177e4 1163
a9dd3643 1164 init_tb_struct(th, &s_del_balance, sb, p_s_path,
bd4c625c 1165 0 /*size is unknown */ );
1da177e4 1166
bd4c625c
LT
1167 while (1) {
1168 n_removed = 0;
1da177e4
LT
1169
1170#ifdef CONFIG_REISERFS_CHECK
bd4c625c
LT
1171 n_iter++;
1172 c_mode =
1da177e4 1173#endif
bd4c625c
LT
1174 prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
1175 p_s_item_key, &n_removed,
1176 &n_del_size,
1177 max_reiserfs_offset(p_s_inode));
1178
1179 RFALSE(c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1180
1181 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1182 s_del_balance.insert_size[0] = n_del_size;
1183
1184 n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1185 if (n_ret_value != REPEAT_SEARCH)
1186 break;
1187
a9dd3643 1188 PROC_INFO_INC(sb, delete_item_restarted);
bd4c625c
LT
1189
1190 // file system changed, repeat search
1191 n_ret_value =
a9dd3643 1192 search_for_position_by_key(sb, p_s_item_key, p_s_path);
bd4c625c
LT
1193 if (n_ret_value == IO_ERROR)
1194 break;
1195 if (n_ret_value == FILE_NOT_FOUND) {
a9dd3643 1196 reiserfs_warning(sb, "vs-5340",
bd4c625c
LT
1197 "no items of the file %K found",
1198 p_s_item_key);
1199 break;
1200 }
1201 } /* while (1) */
1da177e4 1202
bd4c625c
LT
1203 if (n_ret_value != CARRY_ON) {
1204 unfix_nodes(&s_del_balance);
1205 return 0;
1206 }
1207 // reiserfs_delete_item returns item length when success
1208 n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1209 q_ih = get_ih(p_s_path);
1210 quota_cut_bytes = ih_item_len(q_ih);
1211
1212 /* hack so the quota code doesn't have to guess if the file
1213 ** has a tail. On tail insert, we allocate quota for 1 unformatted node.
1214 ** We test the offset because the tail might have been
1215 ** split into multiple items, and we only want to decrement for
1216 ** the unfm node once
1217 */
1218 if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(q_ih)) {
a9dd3643
JM
1219 if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
1220 quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
bd4c625c
LT
1221 } else {
1222 quota_cut_bytes = 0;
1223 }
1da177e4 1224 }
1da177e4 1225
bd4c625c
LT
1226 if (p_s_un_bh) {
1227 int off;
1228 char *data;
1229
1230 /* We are in direct2indirect conversion, so move tail contents
1231 to the unformatted node */
1232 /* note, we do the copy before preparing the buffer because we
1233 ** don't care about the contents of the unformatted node yet.
1234 ** the only thing we really care about is the direct item's data
1235 ** is in the unformatted node.
1236 **
1237 ** Otherwise, we would have to call reiserfs_prepare_for_journal on
1238 ** the unformatted node, which might schedule, meaning we'd have to
1239 ** loop all the way back up to the start of the while loop.
1240 **
1241 ** The unformatted node must be dirtied later on. We can't be
1242 ** sure here if the entire tail has been deleted yet.
1243 **
1244 ** p_s_un_bh is from the page cache (all unformatted nodes are
1245 ** from the page cache) and might be a highmem page. So, we
1246 ** can't use p_s_un_bh->b_data.
1247 ** -clm
1248 */
1249
1250 data = kmap_atomic(p_s_un_bh->b_page, KM_USER0);
1251 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1252 memcpy(data + off,
1253 B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih),
1254 n_ret_value);
1255 kunmap_atomic(data, KM_USER0);
1da177e4 1256 }
bd4c625c
LT
1257 /* Perform balancing after all resources have been collected at once. */
1258 do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1da177e4
LT
1259
1260#ifdef REISERQUOTA_DEBUG
a9dd3643 1261 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
bd4c625c
LT
1262 "reiserquota delete_item(): freeing %u, id=%u type=%c",
1263 quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih));
1da177e4 1264#endif
bd4c625c 1265 DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1da177e4 1266
bd4c625c
LT
1267 /* Return deleted body length */
1268 return n_ret_value;
1da177e4
LT
1269}
1270
1da177e4
LT
1271/* Summary Of Mechanisms For Handling Collisions Between Processes:
1272
1273 deletion of the body of the object is performed by iput(), with the
1274 result that if multiple processes are operating on a file, the
1275 deletion of the body of the file is deferred until the last process
1276 that has an open inode performs its iput().
1277
1278 writes and truncates are protected from collisions by use of
1279 semaphores.
1280
1281 creates, linking, and mknod are protected from collisions with other
1282 processes by making the reiserfs_add_entry() the last step in the
1283 creation, and then rolling back all changes if there was a collision.
1284 - Hans
1285*/
1286
1da177e4 1287/* this deletes item which never gets split */
bd4c625c
LT
1288void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1289 struct inode *inode, struct reiserfs_key *key)
1da177e4 1290{
bd4c625c
LT
1291 struct tree_balance tb;
1292 INITIALIZE_PATH(path);
1293 int item_len = 0;
1294 int tb_init = 0;
1295 struct cpu_key cpu_key;
1296 int retval;
1297 int quota_cut_bytes = 0;
1298
1299 BUG_ON(!th->t_trans_id);
1300
1301 le_key2cpu_key(&cpu_key, key);
1302
1303 while (1) {
1304 retval = search_item(th->t_super, &cpu_key, &path);
1305 if (retval == IO_ERROR) {
0030b645
JM
1306 reiserfs_error(th->t_super, "vs-5350",
1307 "i/o failure occurred trying "
1308 "to delete %K", &cpu_key);
bd4c625c
LT
1309 break;
1310 }
1311 if (retval != ITEM_FOUND) {
1312 pathrelse(&path);
1313 // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
1314 if (!
1315 ((unsigned long long)
1316 GET_HASH_VALUE(le_key_k_offset
1317 (le_key_version(key), key)) == 0
1318 && (unsigned long long)
1319 GET_GENERATION_NUMBER(le_key_k_offset
1320 (le_key_version(key),
1321 key)) == 1))
45b03d5e
JM
1322 reiserfs_warning(th->t_super, "vs-5355",
1323 "%k not found", key);
bd4c625c
LT
1324 break;
1325 }
1326 if (!tb_init) {
1327 tb_init = 1;
1328 item_len = ih_item_len(PATH_PITEM_HEAD(&path));
1329 init_tb_struct(th, &tb, th->t_super, &path,
1330 -(IH_SIZE + item_len));
1331 }
1332 quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path));
1da177e4 1333
bd4c625c
LT
1334 retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1335 if (retval == REPEAT_SEARCH) {
1336 PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
1337 continue;
1338 }
1da177e4 1339
bd4c625c
LT
1340 if (retval == CARRY_ON) {
1341 do_balance(&tb, NULL, NULL, M_DELETE);
1342 if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */
1da177e4 1343#ifdef REISERQUOTA_DEBUG
bd4c625c
LT
1344 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
1345 "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
1346 quota_cut_bytes, inode->i_uid,
1347 key2type(key));
1da177e4 1348#endif
bd4c625c
LT
1349 DQUOT_FREE_SPACE_NODIRTY(inode,
1350 quota_cut_bytes);
1351 }
1352 break;
1353 }
1354 // IO_ERROR, NO_DISK_SPACE, etc
45b03d5e 1355 reiserfs_warning(th->t_super, "vs-5360",
bd4c625c
LT
1356 "could not delete %K due to fix_nodes failure",
1357 &cpu_key);
1358 unfix_nodes(&tb);
1359 break;
1da177e4
LT
1360 }
1361
bd4c625c 1362 reiserfs_check_path(&path);
1da177e4
LT
1363}
1364
bd4c625c
LT
1365int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1366 struct inode *inode)
1da177e4 1367{
bd4c625c
LT
1368 int err;
1369 inode->i_size = 0;
1370 BUG_ON(!th->t_trans_id);
1371
1372 /* for directory this deletes item containing "." and ".." */
1373 err =
1374 reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
1375 if (err)
1376 return err;
1377
1da177e4 1378#if defined( USE_INODE_GENERATION_COUNTER )
bd4c625c
LT
1379 if (!old_format_only(th->t_super)) {
1380 __le32 *inode_generation;
1381
1382 inode_generation =
1383 &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
9e902df6 1384 le32_add_cpu(inode_generation, 1);
bd4c625c 1385 }
1da177e4
LT
1386/* USE_INODE_GENERATION_COUNTER */
1387#endif
bd4c625c 1388 reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1da177e4 1389
bd4c625c 1390 return err;
1da177e4
LT
1391}
1392
bd4c625c
LT
1393static void unmap_buffers(struct page *page, loff_t pos)
1394{
1395 struct buffer_head *bh;
1396 struct buffer_head *head;
1397 struct buffer_head *next;
1398 unsigned long tail_index;
1399 unsigned long cur_index;
1400
1401 if (page) {
1402 if (page_has_buffers(page)) {
1403 tail_index = pos & (PAGE_CACHE_SIZE - 1);
1404 cur_index = 0;
1405 head = page_buffers(page);
1406 bh = head;
1407 do {
1408 next = bh->b_this_page;
1409
1410 /* we want to unmap the buffers that contain the tail, and
1411 ** all the buffers after it (since the tail must be at the
1412 ** end of the file). We don't want to unmap file data
1413 ** before the tail, since it might be dirty and waiting to
1414 ** reach disk
1415 */
1416 cur_index += bh->b_size;
1417 if (cur_index > tail_index) {
1418 reiserfs_unmap_buffer(bh);
1419 }
1420 bh = next;
1421 } while (bh != head);
1da177e4 1422 }
1da177e4 1423 }
1da177e4
LT
1424}
1425
bd4c625c
LT
1426static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1427 struct inode *p_s_inode,
1428 struct page *page,
fec6d055 1429 struct treepath *p_s_path,
bd4c625c
LT
1430 const struct cpu_key *p_s_item_key,
1431 loff_t n_new_file_size, char *p_c_mode)
1432{
a9dd3643
JM
1433 struct super_block *sb = p_s_inode->i_sb;
1434 int n_block_size = sb->s_blocksize;
bd4c625c
LT
1435 int cut_bytes;
1436 BUG_ON(!th->t_trans_id);
14a61442 1437 BUG_ON(n_new_file_size != p_s_inode->i_size);
1da177e4 1438
bd4c625c
LT
1439 /* the page being sent in could be NULL if there was an i/o error
1440 ** reading in the last block. The user will hit problems trying to
1441 ** read the file, but for now we just skip the indirect2direct
1442 */
1443 if (atomic_read(&p_s_inode->i_count) > 1 ||
1444 !tail_has_to_be_packed(p_s_inode) ||
1445 !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) {
0222e657 1446 /* leave tail in an unformatted node */
bd4c625c
LT
1447 *p_c_mode = M_SKIP_BALANCING;
1448 cut_bytes =
1449 n_block_size - (n_new_file_size & (n_block_size - 1));
1450 pathrelse(p_s_path);
1451 return cut_bytes;
1452 }
1453 /* Permorm the conversion to a direct_item. */
1454 /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode); */
1455 return indirect2direct(th, p_s_inode, page, p_s_path, p_s_item_key,
1456 n_new_file_size, p_c_mode);
1457}
1da177e4
LT
1458
1459/* we did indirect_to_direct conversion. And we have inserted direct
1460 item successesfully, but there were no disk space to cut unfm
1461 pointer being converted. Therefore we have to delete inserted
1462 direct item(s) */
bd4c625c 1463static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
fec6d055 1464 struct inode *inode, struct treepath *path)
1da177e4 1465{
bd4c625c
LT
1466 struct cpu_key tail_key;
1467 int tail_len;
1468 int removed;
1469 BUG_ON(!th->t_trans_id);
1470
1471 make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); // !!!!
1472 tail_key.key_length = 4;
1473
1474 tail_len =
1475 (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1476 while (tail_len) {
1477 /* look for the last byte of the tail */
1478 if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1479 POSITION_NOT_FOUND)
c3a9c210
JM
1480 reiserfs_panic(inode->i_sb, "vs-5615",
1481 "found invalid item");
bd4c625c
LT
1482 RFALSE(path->pos_in_item !=
1483 ih_item_len(PATH_PITEM_HEAD(path)) - 1,
1484 "vs-5616: appended bytes found");
1485 PATH_LAST_POSITION(path)--;
1486
1487 removed =
1488 reiserfs_delete_item(th, path, &tail_key, inode,
1489 NULL /*unbh not needed */ );
1490 RFALSE(removed <= 0
1491 || removed > tail_len,
1492 "vs-5617: there was tail %d bytes, removed item length %d bytes",
1493 tail_len, removed);
1494 tail_len -= removed;
1495 set_cpu_key_k_offset(&tail_key,
1496 cpu_key_k_offset(&tail_key) - removed);
1497 }
45b03d5e
JM
1498 reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
1499 "conversion has been rolled back due to "
1500 "lack of disk space");
bd4c625c
LT
1501 //mark_file_without_tail (inode);
1502 mark_inode_dirty(inode);
1da177e4
LT
1503}
1504
1da177e4 1505/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
bd4c625c 1506int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
fec6d055 1507 struct treepath *p_s_path,
bd4c625c
LT
1508 struct cpu_key *p_s_item_key,
1509 struct inode *p_s_inode,
1510 struct page *page, loff_t n_new_file_size)
1da177e4 1511{
a9dd3643 1512 struct super_block *sb = p_s_inode->i_sb;
bd4c625c
LT
1513 /* Every function which is going to call do_balance must first
1514 create a tree_balance structure. Then it must fill up this
1515 structure by using the init_tb_struct and fix_nodes functions.
1516 After that we can make tree balancing. */
1517 struct tree_balance s_cut_balance;
1518 struct item_head *p_le_ih;
1519 int n_cut_size = 0, /* Amount to be cut. */
1520 n_ret_value = CARRY_ON, n_removed = 0, /* Number of the removed unformatted nodes. */
1521 n_is_inode_locked = 0;
1522 char c_mode; /* Mode of the balance. */
1523 int retval2 = -1;
1524 int quota_cut_bytes;
1525 loff_t tail_pos = 0;
1526
1527 BUG_ON(!th->t_trans_id);
1528
1529 init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path,
1530 n_cut_size);
1531
1532 /* Repeat this loop until we either cut the item without needing
1533 to balance, or we fix_nodes without schedule occurring */
1534 while (1) {
1535 /* Determine the balance mode, position of the first byte to
1536 be cut, and size to be cut. In case of the indirect item
1537 free unformatted nodes which are pointed to by the cut
1538 pointers. */
1539
1540 c_mode =
1541 prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
1542 p_s_item_key, &n_removed,
1543 &n_cut_size, n_new_file_size);
1544 if (c_mode == M_CONVERT) {
1545 /* convert last unformatted node to direct item or leave
1546 tail in the unformatted node */
1547 RFALSE(n_ret_value != CARRY_ON,
1548 "PAP-5570: can not convert twice");
1549
1550 n_ret_value =
1551 maybe_indirect_to_direct(th, p_s_inode, page,
1552 p_s_path, p_s_item_key,
1553 n_new_file_size, &c_mode);
1554 if (c_mode == M_SKIP_BALANCING)
1555 /* tail has been left in the unformatted node */
1556 return n_ret_value;
1557
1558 n_is_inode_locked = 1;
1559
1560 /* removing of last unformatted node will change value we
1561 have to return to truncate. Save it */
1562 retval2 = n_ret_value;
a9dd3643 1563 /*retval2 = sb->s_blocksize - (n_new_file_size & (sb->s_blocksize - 1)); */
bd4c625c
LT
1564
1565 /* So, we have performed the first part of the conversion:
1566 inserting the new direct item. Now we are removing the
1567 last unformatted node pointer. Set key to search for
1568 it. */
1569 set_cpu_key_k_type(p_s_item_key, TYPE_INDIRECT);
1570 p_s_item_key->key_length = 4;
1571 n_new_file_size -=
a9dd3643 1572 (n_new_file_size & (sb->s_blocksize - 1));
bd4c625c
LT
1573 tail_pos = n_new_file_size;
1574 set_cpu_key_k_offset(p_s_item_key, n_new_file_size + 1);
1575 if (search_for_position_by_key
a9dd3643 1576 (sb, p_s_item_key,
bd4c625c
LT
1577 p_s_path) == POSITION_NOT_FOUND) {
1578 print_block(PATH_PLAST_BUFFER(p_s_path), 3,
1579 PATH_LAST_POSITION(p_s_path) - 1,
1580 PATH_LAST_POSITION(p_s_path) + 1);
a9dd3643 1581 reiserfs_panic(sb, "PAP-5580", "item to "
c3a9c210 1582 "convert does not exist (%K)",
bd4c625c
LT
1583 p_s_item_key);
1584 }
1585 continue;
1586 }
1587 if (n_cut_size == 0) {
1588 pathrelse(p_s_path);
1589 return 0;
1590 }
1591
1592 s_cut_balance.insert_size[0] = n_cut_size;
1593
1594 n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL);
1595 if (n_ret_value != REPEAT_SEARCH)
1596 break;
1597
a9dd3643 1598 PROC_INFO_INC(sb, cut_from_item_restarted);
bd4c625c
LT
1599
1600 n_ret_value =
a9dd3643 1601 search_for_position_by_key(sb, p_s_item_key, p_s_path);
bd4c625c
LT
1602 if (n_ret_value == POSITION_FOUND)
1603 continue;
1da177e4 1604
a9dd3643 1605 reiserfs_warning(sb, "PAP-5610", "item %K not found",
bd4c625c
LT
1606 p_s_item_key);
1607 unfix_nodes(&s_cut_balance);
1608 return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT;
1609 } /* while */
1610
1611 // check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
1612 if (n_ret_value != CARRY_ON) {
1613 if (n_is_inode_locked) {
1614 // FIXME: this seems to be not needed: we are always able
1615 // to cut item
1616 indirect_to_direct_roll_back(th, p_s_inode, p_s_path);
1617 }
1618 if (n_ret_value == NO_DISK_SPACE)
a9dd3643 1619 reiserfs_warning(sb, "reiserfs-5092",
45b03d5e 1620 "NO_DISK_SPACE");
bd4c625c
LT
1621 unfix_nodes(&s_cut_balance);
1622 return -EIO;
1da177e4 1623 }
bd4c625c
LT
1624
1625 /* go ahead and perform balancing */
1626
1627 RFALSE(c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode");
1628
1629 /* Calculate number of bytes that need to be cut from the item. */
1630 quota_cut_bytes =
1631 (c_mode ==
1632 M_DELETE) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.
1633 insert_size[0];
1634 if (retval2 == -1)
1635 n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode);
1636 else
1637 n_ret_value = retval2;
1638
1639 /* For direct items, we only change the quota when deleting the last
1640 ** item.
1641 */
1642 p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path);
1643 if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1644 if (c_mode == M_DELETE &&
a9dd3643 1645 (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
bd4c625c
LT
1646 1) {
1647 // FIXME: this is to keep 3.5 happy
1648 REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX;
a9dd3643 1649 quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
bd4c625c
LT
1650 } else {
1651 quota_cut_bytes = 0;
1652 }
1da177e4 1653 }
1da177e4 1654#ifdef CONFIG_REISERFS_CHECK
bd4c625c
LT
1655 if (n_is_inode_locked) {
1656 struct item_head *le_ih =
1657 PATH_PITEM_HEAD(s_cut_balance.tb_path);
1658 /* we are going to complete indirect2direct conversion. Make
1659 sure, that we exactly remove last unformatted node pointer
1660 of the item */
1661 if (!is_indirect_le_ih(le_ih))
a9dd3643 1662 reiserfs_panic(sb, "vs-5652",
bd4c625c
LT
1663 "item must be indirect %h", le_ih);
1664
1665 if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
a9dd3643 1666 reiserfs_panic(sb, "vs-5653", "completing "
c3a9c210
JM
1667 "indirect2direct conversion indirect "
1668 "item %h being deleted must be of "
1669 "4 byte long", le_ih);
bd4c625c
LT
1670
1671 if (c_mode == M_CUT
1672 && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
a9dd3643 1673 reiserfs_panic(sb, "vs-5654", "can not complete "
c3a9c210
JM
1674 "indirect2direct conversion of %h "
1675 "(CUT, insert_size==%d)",
bd4c625c
LT
1676 le_ih, s_cut_balance.insert_size[0]);
1677 }
1678 /* it would be useful to make sure, that right neighboring
1679 item is direct item of this file */
1da177e4 1680 }
1da177e4 1681#endif
bd4c625c
LT
1682
1683 do_balance(&s_cut_balance, NULL, NULL, c_mode);
1684 if (n_is_inode_locked) {
1685 /* we've done an indirect->direct conversion. when the data block
1686 ** was freed, it was removed from the list of blocks that must
1687 ** be flushed before the transaction commits, make sure to
1688 ** unmap and invalidate it
1689 */
1690 unmap_buffers(page, tail_pos);
1691 REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask;
1692 }
1da177e4 1693#ifdef REISERQUOTA_DEBUG
bd4c625c
LT
1694 reiserfs_debug(p_s_inode->i_sb, REISERFS_DEBUG_CODE,
1695 "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1696 quota_cut_bytes, p_s_inode->i_uid, '?');
1da177e4 1697#endif
bd4c625c
LT
1698 DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1699 return n_ret_value;
1da177e4
LT
1700}
1701
bd4c625c
LT
1702static void truncate_directory(struct reiserfs_transaction_handle *th,
1703 struct inode *inode)
1da177e4 1704{
bd4c625c
LT
1705 BUG_ON(!th->t_trans_id);
1706 if (inode->i_nlink)
0030b645 1707 reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
bd4c625c
LT
1708
1709 set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1710 set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
1711 reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1712 reiserfs_update_sd(th, inode);
1713 set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
1714 set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1da177e4
LT
1715}
1716
bd4c625c
LT
1717/* Truncate file to the new size. Note, this must be called with a transaction
1718 already started */
1719int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p_s_inode, /* ->i_size contains new
1720 size */
1721 struct page *page, /* up to date for last block */
1722 int update_timestamps /* when it is called by
1723 file_release to convert
1724 the tail - no timestamps
1725 should be updated */
1726 )
1727{
1728 INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
1729 struct item_head *p_le_ih; /* Pointer to an item header. */
1730 struct cpu_key s_item_key; /* Key to search for a previous file item. */
1731 loff_t n_file_size, /* Old file size. */
1732 n_new_file_size; /* New file size. */
1733 int n_deleted; /* Number of deleted or truncated bytes. */
1734 int retval;
1735 int err = 0;
1736
1737 BUG_ON(!th->t_trans_id);
1738 if (!
1739 (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode)
1740 || S_ISLNK(p_s_inode->i_mode)))
1741 return 0;
1742
1743 if (S_ISDIR(p_s_inode->i_mode)) {
1744 // deletion of directory - no need to update timestamps
1745 truncate_directory(th, p_s_inode);
1746 return 0;
1747 }
1da177e4 1748
bd4c625c
LT
1749 /* Get new file size. */
1750 n_new_file_size = p_s_inode->i_size;
1da177e4 1751
bd4c625c
LT
1752 // FIXME: note, that key type is unimportant here
1753 make_cpu_key(&s_item_key, p_s_inode, max_reiserfs_offset(p_s_inode),
1754 TYPE_DIRECT, 3);
1da177e4 1755
bd4c625c
LT
1756 retval =
1757 search_for_position_by_key(p_s_inode->i_sb, &s_item_key,
1758 &s_search_path);
1759 if (retval == IO_ERROR) {
0030b645
JM
1760 reiserfs_error(p_s_inode->i_sb, "vs-5657",
1761 "i/o failure occurred trying to truncate %K",
1762 &s_item_key);
bd4c625c
LT
1763 err = -EIO;
1764 goto out;
1765 }
1766 if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
0030b645
JM
1767 reiserfs_error(p_s_inode->i_sb, "PAP-5660",
1768 "wrong result %d of search for %K", retval,
1769 &s_item_key);
bd4c625c
LT
1770
1771 err = -EIO;
1772 goto out;
1773 }
1da177e4 1774
bd4c625c
LT
1775 s_search_path.pos_in_item--;
1776
1777 /* Get real file size (total length of all file items) */
1778 p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1779 if (is_statdata_le_ih(p_le_ih))
1780 n_file_size = 0;
1781 else {
1782 loff_t offset = le_ih_k_offset(p_le_ih);
1783 int bytes =
1784 op_bytes_number(p_le_ih, p_s_inode->i_sb->s_blocksize);
1785
1786 /* this may mismatch with real file size: if last direct item
1787 had no padding zeros and last unformatted node had no free
1788 space, this file would have this file size */
1789 n_file_size = offset + bytes - 1;
1790 }
1791 /*
1792 * are we doing a full truncate or delete, if so
1793 * kick in the reada code
1794 */
1795 if (n_new_file_size == 0)
1796 s_search_path.reada = PATH_READA | PATH_READA_BACK;
1797
1798 if (n_file_size == 0 || n_file_size < n_new_file_size) {
1799 goto update_and_out;
1da177e4
LT
1800 }
1801
bd4c625c
LT
1802 /* Update key to search for the last file item. */
1803 set_cpu_key_k_offset(&s_item_key, n_file_size);
1804
1805 do {
1806 /* Cut or delete file item. */
1807 n_deleted =
1808 reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1809 p_s_inode, page, n_new_file_size);
1810 if (n_deleted < 0) {
45b03d5e
JM
1811 reiserfs_warning(p_s_inode->i_sb, "vs-5665",
1812 "reiserfs_cut_from_item failed");
bd4c625c
LT
1813 reiserfs_check_path(&s_search_path);
1814 return 0;
1815 }
1da177e4 1816
bd4c625c
LT
1817 RFALSE(n_deleted > n_file_size,
1818 "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1819 n_deleted, n_file_size, &s_item_key);
1da177e4 1820
bd4c625c
LT
1821 /* Change key to search the last file item. */
1822 n_file_size -= n_deleted;
1da177e4 1823
bd4c625c 1824 set_cpu_key_k_offset(&s_item_key, n_file_size);
1da177e4 1825
bd4c625c
LT
1826 /* While there are bytes to truncate and previous file item is presented in the tree. */
1827
1828 /*
0222e657 1829 ** This loop could take a really long time, and could log
bd4c625c
LT
1830 ** many more blocks than a transaction can hold. So, we do a polite
1831 ** journal end here, and if the transaction needs ending, we make
1832 ** sure the file is consistent before ending the current trans
1833 ** and starting a new one
1834 */
23f9e0f8
AZ
1835 if (journal_transaction_should_end(th, 0) ||
1836 reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
bd4c625c 1837 int orig_len_alloc = th->t_blocks_allocated;
3cd6dbe6 1838 pathrelse(&s_search_path);
bd4c625c
LT
1839
1840 if (update_timestamps) {
1841 p_s_inode->i_mtime = p_s_inode->i_ctime =
1842 CURRENT_TIME_SEC;
1843 }
1844 reiserfs_update_sd(th, p_s_inode);
1845
1846 err = journal_end(th, p_s_inode->i_sb, orig_len_alloc);
1847 if (err)
1848 goto out;
1849 err = journal_begin(th, p_s_inode->i_sb,
23f9e0f8 1850 JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
bd4c625c
LT
1851 if (err)
1852 goto out;
1853 reiserfs_update_inode_transaction(p_s_inode);
1854 }
1855 } while (n_file_size > ROUND_UP(n_new_file_size) &&
1856 search_for_position_by_key(p_s_inode->i_sb, &s_item_key,
1857 &s_search_path) == POSITION_FOUND);
1858
1859 RFALSE(n_file_size > ROUND_UP(n_new_file_size),
1860 "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1861 n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid);
1862
1863 update_and_out:
1864 if (update_timestamps) {
1865 // this is truncate, not file closing
1866 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
1da177e4 1867 }
bd4c625c 1868 reiserfs_update_sd(th, p_s_inode);
1da177e4 1869
bd4c625c
LT
1870 out:
1871 pathrelse(&s_search_path);
1872 return err;
1873}
1da177e4
LT
1874
1875#ifdef CONFIG_REISERFS_CHECK
1876// this makes sure, that we __append__, not overwrite or add holes
fec6d055 1877static void check_research_for_paste(struct treepath *path,
bd4c625c 1878 const struct cpu_key *p_s_key)
1da177e4 1879{
bd4c625c
LT
1880 struct item_head *found_ih = get_ih(path);
1881
1882 if (is_direct_le_ih(found_ih)) {
1883 if (le_ih_k_offset(found_ih) +
1884 op_bytes_number(found_ih,
1885 get_last_bh(path)->b_size) !=
1886 cpu_key_k_offset(p_s_key)
1887 || op_bytes_number(found_ih,
1888 get_last_bh(path)->b_size) !=
1889 pos_in_item(path))
c3a9c210
JM
1890 reiserfs_panic(NULL, "PAP-5720", "found direct item "
1891 "%h or position (%d) does not match "
1892 "to key %K", found_ih,
1893 pos_in_item(path), p_s_key);
bd4c625c
LT
1894 }
1895 if (is_indirect_le_ih(found_ih)) {
1896 if (le_ih_k_offset(found_ih) +
1897 op_bytes_number(found_ih,
1898 get_last_bh(path)->b_size) !=
1899 cpu_key_k_offset(p_s_key)
1900 || I_UNFM_NUM(found_ih) != pos_in_item(path)
1901 || get_ih_free_space(found_ih) != 0)
c3a9c210
JM
1902 reiserfs_panic(NULL, "PAP-5730", "found indirect "
1903 "item (%h) or position (%d) does not "
1904 "match to key (%K)",
bd4c625c
LT
1905 found_ih, pos_in_item(path), p_s_key);
1906 }
1da177e4 1907}
bd4c625c 1908#endif /* config reiserfs check */
1da177e4
LT
1909
1910/* Paste bytes to the existing item. Returns bytes number pasted into the item. */
fec6d055 1911int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_search_path, /* Path to the pasted item. */
bd4c625c
LT
1912 const struct cpu_key *p_s_key, /* Key to search for the needed item. */
1913 struct inode *inode, /* Inode item belongs to */
1914 const char *p_c_body, /* Pointer to the bytes to paste. */
1915 int n_pasted_size)
1916{ /* Size of pasted bytes. */
1917 struct tree_balance s_paste_balance;
1918 int retval;
1919 int fs_gen;
1920
1921 BUG_ON(!th->t_trans_id);
1da177e4 1922
bd4c625c 1923 fs_gen = get_generation(inode->i_sb);
1da177e4
LT
1924
1925#ifdef REISERQUOTA_DEBUG
bd4c625c
LT
1926 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1927 "reiserquota paste_into_item(): allocating %u id=%u type=%c",
1928 n_pasted_size, inode->i_uid,
1929 key2type(&(p_s_key->on_disk_key)));
1da177e4
LT
1930#endif
1931
bd4c625c
LT
1932 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) {
1933 pathrelse(p_s_search_path);
1934 return -EDQUOT;
1935 }
1936 init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path,
1937 n_pasted_size);
1da177e4 1938#ifdef DISPLACE_NEW_PACKING_LOCALITIES
bd4c625c 1939 s_paste_balance.key = p_s_key->on_disk_key;
1da177e4
LT
1940#endif
1941
bd4c625c
LT
1942 /* DQUOT_* can schedule, must check before the fix_nodes */
1943 if (fs_changed(fs_gen, inode->i_sb)) {
1944 goto search_again;
1da177e4 1945 }
bd4c625c
LT
1946
1947 while ((retval =
1948 fix_nodes(M_PASTE, &s_paste_balance, NULL,
1949 p_c_body)) == REPEAT_SEARCH) {
1950 search_again:
1951 /* file system changed while we were in the fix_nodes */
1952 PROC_INFO_INC(th->t_super, paste_into_item_restarted);
1953 retval =
1954 search_for_position_by_key(th->t_super, p_s_key,
1955 p_s_search_path);
1956 if (retval == IO_ERROR) {
1957 retval = -EIO;
1958 goto error_out;
1959 }
1960 if (retval == POSITION_FOUND) {
45b03d5e
JM
1961 reiserfs_warning(inode->i_sb, "PAP-5710",
1962 "entry or pasted byte (%K) exists",
bd4c625c
LT
1963 p_s_key);
1964 retval = -EEXIST;
1965 goto error_out;
1966 }
1da177e4 1967#ifdef CONFIG_REISERFS_CHECK
bd4c625c 1968 check_research_for_paste(p_s_search_path, p_s_key);
1da177e4 1969#endif
bd4c625c 1970 }
1da177e4 1971
bd4c625c
LT
1972 /* Perform balancing after all resources are collected by fix_nodes, and
1973 accessing them will not risk triggering schedule. */
1974 if (retval == CARRY_ON) {
1975 do_balance(&s_paste_balance, NULL /*ih */ , p_c_body, M_PASTE);
1976 return 0;
1977 }
1978 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
1979 error_out:
1980 /* this also releases the path */
1981 unfix_nodes(&s_paste_balance);
1da177e4 1982#ifdef REISERQUOTA_DEBUG
bd4c625c
LT
1983 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1984 "reiserquota paste_into_item(): freeing %u id=%u type=%c",
1985 n_pasted_size, inode->i_uid,
1986 key2type(&(p_s_key->on_disk_key)));
1da177e4 1987#endif
bd4c625c
LT
1988 DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
1989 return retval;
1da177e4
LT
1990}
1991
1da177e4 1992/* Insert new item into the buffer at the path. */
fec6d055 1993int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_path, /* Path to the inserteded item. */
bd4c625c
LT
1994 const struct cpu_key *key, struct item_head *p_s_ih, /* Pointer to the item header to insert. */
1995 struct inode *inode, const char *p_c_body)
1996{ /* Pointer to the bytes to insert. */
1997 struct tree_balance s_ins_balance;
1998 int retval;
1999 int fs_gen = 0;
2000 int quota_bytes = 0;
2001
2002 BUG_ON(!th->t_trans_id);
2003
2004 if (inode) { /* Do we count quotas for item? */
2005 fs_gen = get_generation(inode->i_sb);
2006 quota_bytes = ih_item_len(p_s_ih);
2007
2008 /* hack so the quota code doesn't have to guess if the file has
2009 ** a tail, links are always tails, so there's no guessing needed
2010 */
2011 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_s_ih)) {
2012 quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2013 }
1da177e4 2014#ifdef REISERQUOTA_DEBUG
bd4c625c
LT
2015 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2016 "reiserquota insert_item(): allocating %u id=%u type=%c",
2017 quota_bytes, inode->i_uid, head2type(p_s_ih));
1da177e4 2018#endif
bd4c625c
LT
2019 /* We can't dirty inode here. It would be immediately written but
2020 * appropriate stat item isn't inserted yet... */
2021 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) {
2022 pathrelse(p_s_path);
2023 return -EDQUOT;
2024 }
1da177e4 2025 }
bd4c625c
LT
2026 init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path,
2027 IH_SIZE + ih_item_len(p_s_ih));
1da177e4 2028#ifdef DISPLACE_NEW_PACKING_LOCALITIES
bd4c625c 2029 s_ins_balance.key = key->on_disk_key;
1da177e4 2030#endif
bd4c625c
LT
2031 /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
2032 if (inode && fs_changed(fs_gen, inode->i_sb)) {
2033 goto search_again;
1da177e4 2034 }
bd4c625c
LT
2035
2036 while ((retval =
2037 fix_nodes(M_INSERT, &s_ins_balance, p_s_ih,
2038 p_c_body)) == REPEAT_SEARCH) {
2039 search_again:
2040 /* file system changed while we were in the fix_nodes */
2041 PROC_INFO_INC(th->t_super, insert_item_restarted);
2042 retval = search_item(th->t_super, key, p_s_path);
2043 if (retval == IO_ERROR) {
2044 retval = -EIO;
2045 goto error_out;
2046 }
2047 if (retval == ITEM_FOUND) {
45b03d5e 2048 reiserfs_warning(th->t_super, "PAP-5760",
bd4c625c
LT
2049 "key %K already exists in the tree",
2050 key);
2051 retval = -EEXIST;
2052 goto error_out;
2053 }
1da177e4 2054 }
1da177e4 2055
bd4c625c
LT
2056 /* make balancing after all resources will be collected at a time */
2057 if (retval == CARRY_ON) {
2058 do_balance(&s_ins_balance, p_s_ih, p_c_body, M_INSERT);
2059 return 0;
2060 }
1da177e4 2061
bd4c625c
LT
2062 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2063 error_out:
2064 /* also releases the path */
2065 unfix_nodes(&s_ins_balance);
1da177e4 2066#ifdef REISERQUOTA_DEBUG
bd4c625c
LT
2067 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
2068 "reiserquota insert_item(): freeing %u id=%u type=%c",
2069 quota_bytes, inode->i_uid, head2type(p_s_ih));
1da177e4 2070#endif
bd4c625c
LT
2071 if (inode)
2072 DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes);
2073 return retval;
1da177e4 2074}