2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
6 * Now we have all buffers that must be used in balancing of the tree
7 * Further calculations can not cause schedule(), and thus the buffer
8 * tree will be stable until the balancing will be finished
9 * balance the tree according to the analysis made before,
10 * and using buffers obtained after all above.
13 #include <asm/uaccess.h>
14 #include <linux/time.h>
16 #include <linux/buffer_head.h>
17 #include <linux/kernel.h>
19 static inline void buffer_info_init_left(struct tree_balance
*tb
,
20 struct buffer_info
*bi
)
24 bi
->bi_parent
= tb
->FL
[0];
25 bi
->bi_position
= get_left_neighbor_position(tb
, 0);
28 static inline void buffer_info_init_right(struct tree_balance
*tb
,
29 struct buffer_info
*bi
)
33 bi
->bi_parent
= tb
->FR
[0];
34 bi
->bi_position
= get_right_neighbor_position(tb
, 0);
37 static inline void buffer_info_init_tbS0(struct tree_balance
*tb
,
38 struct buffer_info
*bi
)
41 bi
->bi_bh
= PATH_PLAST_BUFFER(tb
->tb_path
);
42 bi
->bi_parent
= PATH_H_PPARENT(tb
->tb_path
, 0);
43 bi
->bi_position
= PATH_H_POSITION(tb
->tb_path
, 1);
46 static inline void buffer_info_init_bh(struct tree_balance
*tb
,
47 struct buffer_info
*bi
,
48 struct buffer_head
*bh
)
56 inline void do_balance_mark_leaf_dirty(struct tree_balance
*tb
,
57 struct buffer_head
*bh
, int flag
)
59 journal_mark_dirty(tb
->transaction_handle
, bh
);
62 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
63 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
67 * if deleting something ( tb->insert_size[0] < 0 )
68 * return(balance_leaf_when_delete()); (flag d handled here)
70 * if lnum is larger than 0 we put items into the left node
71 * if rnum is larger than 0 we put items into the right node
72 * if snum1 is larger than 0 we put items into the new node s1
73 * if snum2 is larger than 0 we put items into the new node s2
74 * Note that all *num* count new items being created.
76 * It would be easier to read balance_leaf() if each of these summary
77 * lines was a separate procedure rather than being inlined. I think
78 * that there are many passages here and in balance_leaf_when_delete() in
79 * which two calls to one procedure can replace two passages, and it
80 * might save cache space and improve software maintenance costs to do so.
82 * Vladimir made the perceptive comment that we should offload most of
83 * the decision making in this function into fix_nodes/check_balance, and
84 * then create some sort of structure in tb that says what actions should
85 * be performed by do_balance.
91 * Balance leaf node in case of delete or cut: insert_size[0] < 0
93 * lnum, rnum can have values >= -1
94 * -1 means that the neighbor must be joined with S
95 * 0 means that nothing should be done with the neighbor
96 * >0 means to shift entirely or partly the specified number of items
99 static int balance_leaf_when_delete(struct tree_balance
*tb
, int flag
)
101 struct buffer_head
*tbS0
= PATH_PLAST_BUFFER(tb
->tb_path
);
102 int item_pos
= PATH_LAST_POSITION(tb
->tb_path
);
103 int pos_in_item
= tb
->tb_path
->pos_in_item
;
104 struct buffer_info bi
;
106 struct item_head
*ih
;
108 RFALSE(tb
->FR
[0] && B_LEVEL(tb
->FR
[0]) != DISK_LEAF_NODE_LEVEL
+ 1,
109 "vs- 12000: level: wrong FR %z", tb
->FR
[0]);
110 RFALSE(tb
->blknum
[0] > 1,
111 "PAP-12005: tb->blknum == %d, can not be > 1", tb
->blknum
[0]);
112 RFALSE(!tb
->blknum
[0] && !PATH_H_PPARENT(tb
->tb_path
, 0),
113 "PAP-12010: tree can not be empty");
115 ih
= item_head(tbS0
, item_pos
);
116 buffer_info_init_tbS0(tb
, &bi
);
118 /* Delete or truncate the item */
121 case M_DELETE
: /* delete item in S[0] */
123 RFALSE(ih_item_len(ih
) + IH_SIZE
!= -tb
->insert_size
[0],
124 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
125 -tb
->insert_size
[0], ih
);
127 leaf_delete_items(&bi
, 0, item_pos
, 1, -1);
129 if (!item_pos
&& tb
->CFL
[0]) {
130 if (B_NR_ITEMS(tbS0
)) {
131 replace_key(tb
, tb
->CFL
[0], tb
->lkey
[0], tbS0
,
134 if (!PATH_H_POSITION(tb
->tb_path
, 1))
135 replace_key(tb
, tb
->CFL
[0], tb
->lkey
[0],
136 PATH_H_PPARENT(tb
->tb_path
,
141 RFALSE(!item_pos
&& !tb
->CFL
[0],
142 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb
->CFL
[0],
147 case M_CUT
:{ /* cut item in S[0] */
148 if (is_direntry_le_ih(ih
)) {
151 * UFS unlink semantics are such that you
152 * can only delete one directory entry at
157 * when we cut a directory tb->insert_size[0]
158 * means number of entries to be cut (always 1)
160 tb
->insert_size
[0] = -1;
161 leaf_cut_from_buffer(&bi
, item_pos
, pos_in_item
,
162 -tb
->insert_size
[0]);
164 RFALSE(!item_pos
&& !pos_in_item
&& !tb
->CFL
[0],
165 "PAP-12030: can not change delimiting key. CFL[0]=%p",
168 if (!item_pos
&& !pos_in_item
&& tb
->CFL
[0]) {
169 replace_key(tb
, tb
->CFL
[0], tb
->lkey
[0],
173 leaf_cut_from_buffer(&bi
, item_pos
, pos_in_item
,
174 -tb
->insert_size
[0]);
176 RFALSE(!ih_item_len(ih
),
177 "PAP-12035: cut must leave non-zero dynamic length of item");
183 print_cur_tb("12040");
184 reiserfs_panic(tb
->tb_sb
, "PAP-12040",
185 "unexpected mode: %s(%d)",
187 M_PASTE
) ? "PASTE" : ((flag
==
188 M_INSERT
) ? "INSERT" :
193 * the rule is that no shifting occurs unless by shifting
194 * a node can be freed
196 n
= B_NR_ITEMS(tbS0
);
197 /* L[0] takes part in balancing */
199 /* L[0] must be joined with S[0] */
200 if (tb
->lnum
[0] == -1) {
201 /* R[0] must be also joined with S[0] */
202 if (tb
->rnum
[0] == -1) {
203 if (tb
->FR
[0] == PATH_H_PPARENT(tb
->tb_path
, 0)) {
205 * all contents of all the 3 buffers
208 if (PATH_H_POSITION(tb
->tb_path
, 1) == 0
209 && 1 < B_NR_ITEMS(tb
->FR
[0]))
210 replace_key(tb
, tb
->CFL
[0],
214 leaf_move_items(LEAF_FROM_S_TO_L
, tb
, n
,
216 leaf_move_items(LEAF_FROM_R_TO_L
, tb
,
217 B_NR_ITEMS(tb
->R
[0]),
220 reiserfs_invalidate_buffer(tb
, tbS0
);
221 reiserfs_invalidate_buffer(tb
,
227 * all contents of all the 3 buffers will
230 leaf_move_items(LEAF_FROM_S_TO_R
, tb
, n
, -1,
232 leaf_move_items(LEAF_FROM_L_TO_R
, tb
,
233 B_NR_ITEMS(tb
->L
[0]), -1, NULL
);
235 /* right_delimiting_key is correct in R[0] */
236 replace_key(tb
, tb
->CFR
[0], tb
->rkey
[0],
239 reiserfs_invalidate_buffer(tb
, tbS0
);
240 reiserfs_invalidate_buffer(tb
, tb
->L
[0]);
245 RFALSE(tb
->rnum
[0] != 0,
246 "PAP-12045: rnum must be 0 (%d)", tb
->rnum
[0]);
247 /* all contents of L[0] and S[0] will be in L[0] */
248 leaf_shift_left(tb
, n
, -1);
250 reiserfs_invalidate_buffer(tb
, tbS0
);
256 * a part of contents of S[0] will be in L[0] and the
257 * rest part of S[0] will be in R[0]
260 RFALSE((tb
->lnum
[0] + tb
->rnum
[0] < n
) ||
261 (tb
->lnum
[0] + tb
->rnum
[0] > n
+ 1),
262 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
263 tb
->rnum
[0], tb
->lnum
[0], n
);
264 RFALSE((tb
->lnum
[0] + tb
->rnum
[0] == n
) &&
265 (tb
->lbytes
!= -1 || tb
->rbytes
!= -1),
266 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
267 tb
->rbytes
, tb
->lbytes
);
268 RFALSE((tb
->lnum
[0] + tb
->rnum
[0] == n
+ 1) &&
269 (tb
->lbytes
< 1 || tb
->rbytes
!= -1),
270 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
271 tb
->rbytes
, tb
->lbytes
);
273 leaf_shift_left(tb
, tb
->lnum
[0], tb
->lbytes
);
274 leaf_shift_right(tb
, tb
->rnum
[0], tb
->rbytes
);
276 reiserfs_invalidate_buffer(tb
, tbS0
);
281 if (tb
->rnum
[0] == -1) {
282 /* all contents of R[0] and S[0] will be in R[0] */
283 leaf_shift_right(tb
, n
, -1);
284 reiserfs_invalidate_buffer(tb
, tbS0
);
289 "PAP-12065: bad rnum parameter must be 0 (%d)", tb
->rnum
[0]);
293 static int balance_leaf(struct tree_balance
*tb
, struct item_head
*ih
, /* item header of inserted item (this is on little endian) */
294 const char *body
, /* body of inserted item or bytes to paste */
295 int flag
, /* i - insert, d - delete, c - cut, p - paste
296 (see comment to do_balance) */
297 struct item_head
*insert_key
, /* in our processing of one level we sometimes determine what
298 must be inserted into the next higher level. This insertion
299 consists of a key or two keys and their corresponding
301 struct buffer_head
**insert_ptr
/* inserted node-ptrs for the next level */
304 struct buffer_head
*tbS0
= PATH_PLAST_BUFFER(tb
->tb_path
);
305 int item_pos
= PATH_LAST_POSITION(tb
->tb_path
); /* index into the array of item headers in S[0]
306 of the affected item */
307 struct buffer_info bi
;
308 struct buffer_head
*S_new
[2]; /* new nodes allocated to hold what could not fit into S */
309 int snum
[2]; /* number of items that will be placed
310 into S_new (includes partially shifted
312 int sbytes
[2]; /* if an item is partially shifted into S_new then
313 if it is a directory item
314 it is the number of entries from the item that are shifted into S_new
316 it is the number of bytes from the item that are shifted into S_new
323 PROC_INFO_INC(tb
->tb_sb
, balance_at
[0]);
325 /* Make balance in case insert_size[0] < 0 */
326 if (tb
->insert_size
[0] < 0)
327 return balance_leaf_when_delete(tb
, flag
);
330 if (flag
== M_INSERT
&& !body
)
331 zeros_num
= ih_item_len(ih
);
333 pos_in_item
= tb
->tb_path
->pos_in_item
;
334 /* for indirect item pos_in_item is measured in unformatted node
335 pointers. Recalculate to bytes */
337 && is_indirect_le_ih(item_head(tbS0
, item_pos
)))
338 pos_in_item
*= UNFM_P_SIZE
;
340 if (tb
->lnum
[0] > 0) {
341 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
342 if (item_pos
< tb
->lnum
[0]) {
343 /* new item or it part falls to L[0], shift it too */
344 n
= B_NR_ITEMS(tb
->L
[0]);
347 case M_INSERT
: /* insert item into L[0] */
349 if (item_pos
== tb
->lnum
[0] - 1 && tb
->lbytes
!= -1) {
350 /* part of new item falls into L[0] */
354 ret_val
= leaf_shift_left(tb
, tb
->lnum
[0] - 1, -1);
356 /* Calculate item length to insert to S[0] */
357 new_item_len
= ih_item_len(ih
) - tb
->lbytes
;
358 /* Calculate and check item length to insert to L[0] */
359 put_ih_item_len(ih
, ih_item_len(ih
) - new_item_len
);
361 RFALSE(ih_item_len(ih
) <= 0,
362 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
365 /* Insert new item into L[0] */
366 buffer_info_init_left(tb
, &bi
);
367 leaf_insert_into_buf(&bi
,
368 n
+ item_pos
- ret_val
, ih
, body
,
369 zeros_num
> ih_item_len(ih
) ? ih_item_len(ih
) : zeros_num
);
371 version
= ih_version(ih
);
373 /* Calculate key component, item length and body to insert into S[0] */
374 set_le_ih_k_offset(ih
, le_ih_k_offset(ih
) +
375 (tb
-> lbytes
<< (is_indirect_le_ih(ih
) ? tb
->tb_sb
-> s_blocksize_bits
- UNFM_P_SHIFT
: 0)));
377 put_ih_item_len(ih
, new_item_len
);
378 if (tb
->lbytes
> zeros_num
) {
379 body
+= (tb
->lbytes
- zeros_num
);
382 zeros_num
-= tb
->lbytes
;
384 RFALSE(ih_item_len(ih
) <= 0,
385 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
388 /* new item in whole falls into L[0] */
389 /* Shift lnum[0]-1 items to L[0] */
390 ret_val
= leaf_shift_left(tb
, tb
->lnum
[0] - 1, tb
->lbytes
);
391 /* Insert new item into L[0] */
392 buffer_info_init_left(tb
, &bi
);
393 leaf_insert_into_buf(&bi
, n
+ item_pos
- ret_val
, ih
, body
, zeros_num
);
394 tb
->insert_size
[0] = 0;
399 case M_PASTE
: /* append item in L[0] */
401 if (item_pos
== tb
->lnum
[0] - 1 && tb
->lbytes
!= -1) {
402 /* we must shift the part of the appended item */
403 if (is_direntry_le_ih(item_head(tbS0
, item_pos
))) {
406 "PAP-12090: invalid parameter in case of a directory");
408 if (tb
->lbytes
> pos_in_item
) {
409 /* new directory entry falls into L[0] */
410 struct item_head
*pasted
;
411 int l_pos_in_item
= pos_in_item
;
413 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
414 ret_val
= leaf_shift_left(tb
, tb
->lnum
[0], tb
->lbytes
-1);
415 if (ret_val
&& !item_pos
) {
416 pasted
= item_head(tb
->L
[0], B_NR_ITEMS(tb
->L
[0]) - 1);
417 l_pos_in_item
+= ih_entry_count(pasted
) - (tb
->lbytes
-1);
420 /* Append given directory entry to directory item */
421 buffer_info_init_left(tb
, &bi
);
422 leaf_paste_in_buffer(&bi
, n
+ item_pos
- ret_val
, l_pos_in_item
, tb
->insert_size
[0], body
, zeros_num
);
424 /* previous string prepared space for pasting new entry, following string pastes this entry */
426 /* when we have merge directory item, pos_in_item has been changed too */
428 /* paste new directory entry. 1 is entry number */
429 leaf_paste_entries(&bi
, n
+ item_pos
- ret_val
, l_pos_in_item
,
430 1, (struct reiserfs_de_head
*) body
,
431 body
+ DEH_SIZE
, tb
->insert_size
[0]);
432 tb
->insert_size
[0] = 0;
434 /* new directory item doesn't fall into L[0] */
435 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
436 leaf_shift_left(tb
, tb
->lnum
[0], tb
->lbytes
);
438 /* Calculate new position to append in item body */
439 pos_in_item
-= tb
->lbytes
;
442 RFALSE(tb
->lbytes
<= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb
->lbytes
);
443 RFALSE(pos_in_item
!= ih_item_len(item_head(tbS0
, item_pos
)),
444 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
445 ih_item_len(item_head(tbS0
, item_pos
)),pos_in_item
);
447 if (tb
->lbytes
>= pos_in_item
) {
448 /* appended item will be in L[0] in whole */
451 /* this bytes number must be appended to the last item of L[h] */
452 l_n
= tb
->lbytes
- pos_in_item
;
454 /* Calculate new insert_size[0] */
455 tb
->insert_size
[0] -= l_n
;
457 RFALSE(tb
->insert_size
[0] <= 0,
458 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
460 ret_val
= leaf_shift_left(tb
, tb
->lnum
[0], ih_item_len
461 (item_head(tbS0
, item_pos
)));
462 /* Append to body of item in L[0] */
463 buffer_info_init_left(tb
, &bi
);
465 (&bi
, n
+ item_pos
- ret_val
, ih_item_len
466 (item_head(tb
->L
[0], n
+ item_pos
- ret_val
)),
468 zeros_num
> l_n
? l_n
: zeros_num
);
469 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
474 RFALSE(ih_item_len(item_head(tbS0
, 0)),
475 "PAP-12106: item length must be 0");
476 RFALSE(comp_short_le_keys(leaf_key(tbS0
, 0), leaf_key
477 (tb
->L
[0], n
+ item_pos
- ret_val
)),
478 "PAP-12107: items must be of the same file");
479 if (is_indirect_le_ih(item_head(tb
->L
[0], n
+ item_pos
- ret_val
))) {
480 temp_l
= l_n
<< (tb
->tb_sb
-> s_blocksize_bits
- UNFM_P_SHIFT
);
482 /* update key of first item in S0 */
483 version
= ih_version(item_head(tbS0
, 0));
484 set_le_key_k_offset(version
, leaf_key(tbS0
, 0),
485 le_key_k_offset(version
,leaf_key(tbS0
, 0)) + temp_l
);
486 /* update left delimiting key */
487 set_le_key_k_offset(version
, internal_key(tb
->CFL
[0], tb
->lkey
[0]),
488 le_key_k_offset(version
, internal_key(tb
->CFL
[0], tb
->lkey
[0])) + temp_l
);
491 /* Calculate new body, position in item and insert_size[0] */
492 if (l_n
> zeros_num
) {
493 body
+= (l_n
- zeros_num
);
499 RFALSE(comp_short_le_keys(leaf_key(tbS0
, 0), leaf_key(tb
->L
[0], B_NR_ITEMS(tb
->L
[0]) - 1))
500 || !op_is_left_mergeable(leaf_key(tbS0
, 0), tbS0
->b_size
)
501 || !op_is_left_mergeable(internal_key(tb
->CFL
[0], tb
->lkey
[0]), tbS0
->b_size
),
502 "PAP-12120: item must be merge-able with left neighboring item");
503 } else { /* only part of the appended item will be in L[0] */
505 /* Calculate position in item for append in S[0] */
506 pos_in_item
-= tb
->lbytes
;
508 RFALSE(pos_in_item
<= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item
);
510 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
511 leaf_shift_left(tb
, tb
->lnum
[0], tb
->lbytes
);
514 } else { /* appended item will be in L[0] in whole */
516 struct item_head
*pasted
;
518 if (!item_pos
&& op_is_left_mergeable(leaf_key(tbS0
, 0), tbS0
->b_size
)) { /* if we paste into first item of S[0] and it is left mergable */
519 /* then increment pos_in_item by the size of the last item in L[0] */
520 pasted
= item_head(tb
->L
[0], n
- 1);
521 if (is_direntry_le_ih(pasted
))
522 pos_in_item
+= ih_entry_count(pasted
);
524 pos_in_item
+= ih_item_len(pasted
);
527 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
528 ret_val
= leaf_shift_left(tb
, tb
->lnum
[0], tb
->lbytes
);
529 /* Append to body of item in L[0] */
530 buffer_info_init_left(tb
, &bi
);
531 leaf_paste_in_buffer(&bi
, n
+ item_pos
- ret_val
,
536 /* if appended item is directory, paste entry */
537 pasted
= item_head(tb
->L
[0], n
+ item_pos
- ret_val
);
538 if (is_direntry_le_ih(pasted
))
539 leaf_paste_entries(&bi
, n
+ item_pos
- ret_val
,
541 (struct reiserfs_de_head
*) body
,
544 /* if appended item is indirect item, put unformatted node into un list */
545 if (is_indirect_le_ih(pasted
))
546 set_ih_free_space(pasted
, 0);
547 tb
->insert_size
[0] = 0;
551 default: /* cases d and t */
552 reiserfs_panic(tb
->tb_sb
, "PAP-12130",
553 "lnum > 0: unexpected mode: "
555 (flag
== M_DELETE
) ? "DELETE" : ((flag
== M_CUT
) ? "CUT" : "UNKNOWN"), flag
);
558 /* new item doesn't fall into L[0] */
559 leaf_shift_left(tb
, tb
->lnum
[0], tb
->lbytes
);
563 /* tb->lnum[0] > 0 */
564 /* Calculate new item position */
565 item_pos
-= (tb
->lnum
[0] - ((tb
->lbytes
!= -1) ? 1 : 0));
567 if (tb
->rnum
[0] > 0) {
568 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
569 n
= B_NR_ITEMS(tbS0
);
572 case M_INSERT
: /* insert item */
573 if (n
- tb
->rnum
[0] < item_pos
) { /* new item or its part falls to R[0] */
574 if (item_pos
== n
- tb
->rnum
[0] + 1 && tb
->rbytes
!= -1) { /* part of new item falls into R[0] */
575 loff_t old_key_comp
, old_len
, r_zeros_number
;
580 leaf_shift_right(tb
, tb
->rnum
[0] - 1, -1);
582 version
= ih_version(ih
);
583 /* Remember key component and item length */
584 old_key_comp
= le_ih_k_offset(ih
);
585 old_len
= ih_item_len(ih
);
587 /* Calculate key component and item length to insert into R[0] */
588 offset
= le_ih_k_offset(ih
) + ((old_len
- tb
->rbytes
) << (is_indirect_le_ih(ih
) ? tb
->tb_sb
->s_blocksize_bits
- UNFM_P_SHIFT
: 0));
589 set_le_ih_k_offset(ih
, offset
);
590 put_ih_item_len(ih
, tb
->rbytes
);
591 /* Insert part of the item into R[0] */
592 buffer_info_init_right(tb
, &bi
);
593 if ((old_len
- tb
->rbytes
) > zeros_num
) {
595 r_body
= body
+ (old_len
- tb
->rbytes
) - zeros_num
;
598 r_zeros_number
= zeros_num
- (old_len
- tb
->rbytes
);
599 zeros_num
-= r_zeros_number
;
602 leaf_insert_into_buf(&bi
, 0, ih
, r_body
,
605 /* Replace right delimiting key by first key in R[0] */
606 replace_key(tb
, tb
->CFR
[0], tb
->rkey
[0],
609 /* Calculate key component and item length to insert into S[0] */
610 set_le_ih_k_offset(ih
, old_key_comp
);
611 put_ih_item_len(ih
, old_len
- tb
->rbytes
);
613 tb
->insert_size
[0] -= tb
->rbytes
;
615 } else { /* whole new item falls into R[0] */
617 /* Shift rnum[0]-1 items to R[0] */
618 ret_val
= leaf_shift_right(tb
, tb
->rnum
[0] - 1, tb
->rbytes
);
619 /* Insert new item into R[0] */
620 buffer_info_init_right(tb
, &bi
);
621 leaf_insert_into_buf(&bi
, item_pos
- n
+ tb
->rnum
[0] - 1,
622 ih
, body
, zeros_num
);
624 if (item_pos
- n
+ tb
->rnum
[0] - 1 == 0) {
625 replace_key(tb
, tb
->CFR
[0],
630 zeros_num
= tb
->insert_size
[0] = 0;
632 } else { /* new item or part of it doesn't fall into R[0] */
634 leaf_shift_right(tb
, tb
->rnum
[0], tb
->rbytes
);
638 case M_PASTE
: /* append item */
640 if (n
- tb
->rnum
[0] <= item_pos
) { /* pasted item or part of it falls to R[0] */
641 if (item_pos
== n
- tb
->rnum
[0] && tb
->rbytes
!= -1) { /* we must shift the part of the appended item */
642 if (is_direntry_le_ih(item_head(tbS0
, item_pos
))) { /* we append to directory item */
646 "PAP-12145: invalid parameter in case of a directory");
647 entry_count
= ih_entry_count(item_head
649 if (entry_count
- tb
->rbytes
<
651 /* new directory entry falls into R[0] */
653 int paste_entry_position
;
655 RFALSE(tb
->rbytes
- 1 >= entry_count
|| !tb
-> insert_size
[0],
656 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
657 tb
->rbytes
, entry_count
);
658 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
659 leaf_shift_right(tb
, tb
->rnum
[0], tb
->rbytes
- 1);
660 /* Paste given directory entry to directory item */
661 paste_entry_position
= pos_in_item
- entry_count
+ tb
->rbytes
- 1;
662 buffer_info_init_right(tb
, &bi
);
663 leaf_paste_in_buffer(&bi
, 0, paste_entry_position
, tb
->insert_size
[0], body
, zeros_num
);
665 leaf_paste_entries(&bi
, 0, paste_entry_position
, 1,
666 (struct reiserfs_de_head
*) body
,
667 body
+ DEH_SIZE
, tb
->insert_size
[0]);
669 if (paste_entry_position
== 0) {
670 /* change delimiting keys */
671 replace_key(tb
, tb
->CFR
[0], tb
->rkey
[0], tb
->R
[0],0);
674 tb
->insert_size
[0] = 0;
676 } else { /* new directory entry doesn't fall into R[0] */
678 leaf_shift_right(tb
, tb
->rnum
[0], tb
->rbytes
);
680 } else { /* regular object */
682 int n_shift
, n_rem
, r_zeros_number
;
685 /* Calculate number of bytes which must be shifted from appended item */
686 if ((n_shift
= tb
->rbytes
- tb
->insert_size
[0]) < 0)
689 RFALSE(pos_in_item
!= ih_item_len
690 (item_head(tbS0
, item_pos
)),
691 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
692 pos_in_item
, ih_item_len
693 (item_head(tbS0
, item_pos
)));
695 leaf_shift_right(tb
, tb
->rnum
[0], n_shift
);
696 /* Calculate number of bytes which must remain in body after appending to R[0] */
697 if ((n_rem
= tb
->insert_size
[0] - tb
->rbytes
) < 0)
702 unsigned long temp_rem
= n_rem
;
704 version
= ih_version(item_head(tb
->R
[0], 0));
705 if (is_indirect_le_key(version
, leaf_key(tb
->R
[0], 0))) {
706 temp_rem
= n_rem
<< (tb
->tb_sb
->s_blocksize_bits
- UNFM_P_SHIFT
);
708 set_le_key_k_offset(version
, leaf_key(tb
->R
[0], 0),
709 le_key_k_offset(version
, leaf_key(tb
->R
[0], 0)) + temp_rem
);
710 set_le_key_k_offset(version
, internal_key(tb
->CFR
[0], tb
->rkey
[0]),
711 le_key_k_offset(version
, internal_key(tb
->CFR
[0], tb
->rkey
[0])) + temp_rem
);
713 /* k_offset (leaf_key(tb->R[0],0)) += n_rem;
714 k_offset (internal_key(tb->CFR[0],tb->rkey[0])) += n_rem;*/
715 do_balance_mark_internal_dirty(tb
, tb
->CFR
[0], 0);
717 /* Append part of body into R[0] */
718 buffer_info_init_right(tb
, &bi
);
719 if (n_rem
> zeros_num
) {
721 r_body
= body
+ n_rem
- zeros_num
;
724 r_zeros_number
= zeros_num
- n_rem
;
725 zeros_num
-= r_zeros_number
;
728 leaf_paste_in_buffer(&bi
, 0, n_shift
,
729 tb
->insert_size
[0] - n_rem
,
730 r_body
, r_zeros_number
);
732 if (is_indirect_le_ih(item_head(tb
->R
[0], 0))) {
735 "PAP-12160: paste more than one unformatted node pointer");
737 set_ih_free_space(item_head(tb
->R
[0], 0), 0);
739 tb
->insert_size
[0] = n_rem
;
743 } else { /* pasted item in whole falls into R[0] */
745 struct item_head
*pasted
;
747 ret_val
= leaf_shift_right(tb
, tb
->rnum
[0], tb
->rbytes
);
748 /* append item in R[0] */
749 if (pos_in_item
>= 0) {
750 buffer_info_init_right(tb
, &bi
);
751 leaf_paste_in_buffer(&bi
, item_pos
- n
+ tb
->rnum
[0], pos_in_item
,
752 tb
->insert_size
[0], body
, zeros_num
);
755 /* paste new entry, if item is directory item */
756 pasted
= item_head(tb
->R
[0], item_pos
- n
+ tb
->rnum
[0]);
757 if (is_direntry_le_ih(pasted
) && pos_in_item
>= 0) {
758 leaf_paste_entries(&bi
, item_pos
- n
+ tb
->rnum
[0],
760 (struct reiserfs_de_head
*) body
,
761 body
+ DEH_SIZE
, tb
->insert_size
[0]);
764 RFALSE(item_pos
- n
+ tb
->rnum
[0],
765 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
767 /* update delimiting keys */
768 replace_key(tb
, tb
->CFR
[0], tb
->rkey
[0], tb
->R
[0], 0);
772 if (is_indirect_le_ih(pasted
))
773 set_ih_free_space(pasted
, 0);
774 zeros_num
= tb
->insert_size
[0] = 0;
776 } else { /* new item doesn't fall into R[0] */
778 leaf_shift_right(tb
, tb
->rnum
[0], tb
->rbytes
);
781 default: /* cases d and t */
782 reiserfs_panic(tb
->tb_sb
, "PAP-12175",
783 "rnum > 0: unexpected mode: %s(%d)",
784 (flag
== M_DELETE
) ? "DELETE" : ((flag
== M_CUT
) ? "CUT" : "UNKNOWN"), flag
);
789 /* tb->rnum[0] > 0 */
790 RFALSE(tb
->blknum
[0] > 3,
791 "PAP-12180: blknum can not be %d. It must be <= 3", tb
->blknum
[0]);
792 RFALSE(tb
->blknum
[0] < 0,
793 "PAP-12185: blknum can not be %d. It must be >= 0", tb
->blknum
[0]);
795 /* if while adding to a node we discover that it is possible to split
796 it in two, and merge the left part into the left neighbor and the
797 right part into the right neighbor, eliminating the node */
798 if (tb
->blknum
[0] == 0) { /* node S[0] is empty now */
800 RFALSE(!tb
->lnum
[0] || !tb
->rnum
[0],
801 "PAP-12190: lnum and rnum must not be zero");
802 /* if insertion was done before 0-th position in R[0], right
803 delimiting key of the tb->L[0]'s and left delimiting key are
807 reiserfs_panic(tb
->tb_sb
, "vs-12195",
808 "CFR not initialized");
809 copy_key(internal_key(tb
->CFL
[0], tb
->lkey
[0]),
810 internal_key(tb
->CFR
[0], tb
->rkey
[0]));
811 do_balance_mark_internal_dirty(tb
, tb
->CFL
[0], 0);
814 reiserfs_invalidate_buffer(tb
, tbS0
);
818 /* Fill new nodes that appear in place of S[0] */
820 /* I am told that this copying is because we need an array to enable
821 the looping code. -Hans */
822 snum
[0] = tb
->s1num
, snum
[1] = tb
->s2num
;
823 sbytes
[0] = tb
->s1bytes
;
824 sbytes
[1] = tb
->s2bytes
;
825 for (i
= tb
->blknum
[0] - 2; i
>= 0; i
--) {
827 RFALSE(!snum
[i
], "PAP-12200: snum[%d] == %d. Must be > 0", i
,
830 /* here we shift from S to S_new nodes */
832 S_new
[i
] = get_FEB(tb
);
834 /* initialized block type and tree level */
835 set_blkh_level(B_BLK_HEAD(S_new
[i
]), DISK_LEAF_NODE_LEVEL
);
837 n
= B_NR_ITEMS(tbS0
);
840 case M_INSERT
: /* insert item */
842 if (n
- snum
[i
] < item_pos
) { /* new item or it's part falls to first new node S_new[i] */
843 if (item_pos
== n
- snum
[i
] + 1 && sbytes
[i
] != -1) { /* part of new item falls into S_new[i] */
844 int old_key_comp
, old_len
, r_zeros_number
;
848 /* Move snum[i]-1 items from S[0] to S_new[i] */
849 leaf_move_items(LEAF_FROM_S_TO_SNEW
, tb
,
852 /* Remember key component and item length */
853 version
= ih_version(ih
);
854 old_key_comp
= le_ih_k_offset(ih
);
855 old_len
= ih_item_len(ih
);
857 /* Calculate key component and item length to insert into S_new[i] */
858 set_le_ih_k_offset(ih
, le_ih_k_offset(ih
) +
859 ((old_len
- sbytes
[i
]) << (is_indirect_le_ih(ih
) ? tb
->tb_sb
-> s_blocksize_bits
- UNFM_P_SHIFT
: 0)));
861 put_ih_item_len(ih
, sbytes
[i
]);
863 /* Insert part of the item into S_new[i] before 0-th item */
864 buffer_info_init_bh(tb
, &bi
, S_new
[i
]);
866 if ((old_len
- sbytes
[i
]) > zeros_num
) {
868 r_body
= body
+ (old_len
- sbytes
[i
]) - zeros_num
;
871 r_zeros_number
= zeros_num
- (old_len
- sbytes
[i
]);
872 zeros_num
-= r_zeros_number
;
875 leaf_insert_into_buf(&bi
, 0, ih
, r_body
, r_zeros_number
);
877 /* Calculate key component and item length to insert into S[i] */
878 set_le_ih_k_offset(ih
, old_key_comp
);
879 put_ih_item_len(ih
, old_len
- sbytes
[i
]);
880 tb
->insert_size
[0] -= sbytes
[i
];
881 } else { /* whole new item falls into S_new[i] */
883 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
884 leaf_move_items(LEAF_FROM_S_TO_SNEW
, tb
,
885 snum
[i
] - 1, sbytes
[i
], S_new
[i
]);
887 /* Insert new item into S_new[i] */
888 buffer_info_init_bh(tb
, &bi
, S_new
[i
]);
889 leaf_insert_into_buf(&bi
, item_pos
- n
+ snum
[i
] - 1,
890 ih
, body
, zeros_num
);
892 zeros_num
= tb
->insert_size
[0] = 0;
896 else { /* new item or it part don't falls into S_new[i] */
898 leaf_move_items(LEAF_FROM_S_TO_SNEW
, tb
,
899 snum
[i
], sbytes
[i
], S_new
[i
]);
903 case M_PASTE
: /* append item */
905 if (n
- snum
[i
] <= item_pos
) { /* pasted item or part if it falls to S_new[i] */
906 if (item_pos
== n
- snum
[i
] && sbytes
[i
] != -1) { /* we must shift part of the appended item */
907 struct item_head
*aux_ih
;
909 RFALSE(ih
, "PAP-12210: ih must be 0");
911 aux_ih
= item_head(tbS0
, item_pos
);
912 if (is_direntry_le_ih(aux_ih
)) {
913 /* we append to directory item */
917 entry_count
= ih_entry_count(aux_ih
);
919 if (entry_count
- sbytes
[i
] < pos_in_item
&& pos_in_item
<= entry_count
) {
920 /* new directory entry falls into S_new[i] */
922 RFALSE(!tb
->insert_size
[0], "PAP-12215: insert_size is already 0");
923 RFALSE(sbytes
[i
] - 1 >= entry_count
,
924 "PAP-12220: there are no so much entries (%d), only %d",
925 sbytes
[i
] - 1, entry_count
);
927 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
928 leaf_move_items(LEAF_FROM_S_TO_SNEW
, tb
, snum
[i
], sbytes
[i
] - 1, S_new
[i
]);
929 /* Paste given directory entry to directory item */
930 buffer_info_init_bh(tb
, &bi
, S_new
[i
]);
931 leaf_paste_in_buffer(&bi
, 0, pos_in_item
- entry_count
+ sbytes
[i
] - 1,
932 tb
->insert_size
[0], body
, zeros_num
);
933 /* paste new directory entry */
934 leaf_paste_entries(&bi
, 0, pos_in_item
- entry_count
+ sbytes
[i
] - 1, 1,
935 (struct reiserfs_de_head
*) body
,
936 body
+ DEH_SIZE
, tb
->insert_size
[0]);
937 tb
->insert_size
[0] = 0;
939 } else { /* new directory entry doesn't fall into S_new[i] */
940 leaf_move_items(LEAF_FROM_S_TO_SNEW
,tb
, snum
[i
], sbytes
[i
], S_new
[i
]);
942 } else { /* regular object */
944 int n_shift
, n_rem
, r_zeros_number
;
947 RFALSE(pos_in_item
!= ih_item_len(item_head(tbS0
, item_pos
)) || tb
->insert_size
[0] <= 0,
948 "PAP-12225: item too short or insert_size <= 0");
950 /* Calculate number of bytes which must be shifted from appended item */
951 n_shift
= sbytes
[i
] - tb
->insert_size
[0];
954 leaf_move_items(LEAF_FROM_S_TO_SNEW
, tb
, snum
[i
], n_shift
, S_new
[i
]);
956 /* Calculate number of bytes which must remain in body after append to S_new[i] */
957 n_rem
= tb
->insert_size
[0] - sbytes
[i
];
960 /* Append part of body into S_new[0] */
961 buffer_info_init_bh(tb
, &bi
, S_new
[i
]);
962 if (n_rem
> zeros_num
) {
964 r_body
= body
+ n_rem
- zeros_num
;
967 r_zeros_number
= zeros_num
- n_rem
;
968 zeros_num
-= r_zeros_number
;
971 leaf_paste_in_buffer(&bi
, 0, n_shift
,
972 tb
->insert_size
[0] - n_rem
,
973 r_body
, r_zeros_number
);
975 struct item_head
*tmp
;
977 tmp
= item_head(S_new
[i
], 0);
978 if (is_indirect_le_ih
980 set_ih_free_space(tmp
, 0);
981 set_le_ih_k_offset(tmp
, le_ih_k_offset(tmp
) + (n_rem
<< (tb
->tb_sb
->s_blocksize_bits
- UNFM_P_SHIFT
)));
983 set_le_ih_k_offset(tmp
, le_ih_k_offset(tmp
) + n_rem
);
987 tb
->insert_size
[0] = n_rem
;
992 /* item falls wholly into S_new[i] */
995 struct item_head
*pasted
;
997 #ifdef CONFIG_REISERFS_CHECK
998 struct item_head
*ih_check
= item_head(tbS0
, item_pos
);
1000 if (!is_direntry_le_ih(ih_check
)
1001 && (pos_in_item
!= ih_item_len(ih_check
)
1002 || tb
->insert_size
[0] <= 0))
1003 reiserfs_panic(tb
->tb_sb
,
1008 #endif /* CONFIG_REISERFS_CHECK */
1010 leaf_mi
= leaf_move_items(LEAF_FROM_S_TO_SNEW
,
1016 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1019 /* paste into item */
1020 buffer_info_init_bh(tb
, &bi
, S_new
[i
]);
1021 leaf_paste_in_buffer(&bi
,
1022 item_pos
- n
+ snum
[i
],
1027 pasted
= item_head(S_new
[i
], item_pos
- n
+ snum
[i
]);
1028 if (is_direntry_le_ih(pasted
)) {
1029 leaf_paste_entries(&bi
,
1030 item_pos
- n
+ snum
[i
],
1032 (struct reiserfs_de_head
*)body
,
1038 /* if we paste to indirect item update ih_free_space */
1039 if (is_indirect_le_ih(pasted
))
1040 set_ih_free_space(pasted
, 0);
1041 zeros_num
= tb
->insert_size
[0] = 0;
1045 else { /* pasted item doesn't fall into S_new[i] */
1047 leaf_move_items(LEAF_FROM_S_TO_SNEW
, tb
,
1048 snum
[i
], sbytes
[i
], S_new
[i
]);
1051 default: /* cases d and t */
1052 reiserfs_panic(tb
->tb_sb
, "PAP-12245",
1053 "blknum > 2: unexpected mode: %s(%d)",
1054 (flag
== M_DELETE
) ? "DELETE" : ((flag
== M_CUT
) ? "CUT" : "UNKNOWN"), flag
);
1057 memcpy(insert_key
+ i
, leaf_key(S_new
[i
], 0), KEY_SIZE
);
1058 insert_ptr
[i
] = S_new
[i
];
1060 RFALSE(!buffer_journaled(S_new
[i
])
1061 || buffer_journal_dirty(S_new
[i
])
1062 || buffer_dirty(S_new
[i
]), "PAP-12247: S_new[%d] : (%b)",
1066 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1067 affected item which remains in S */
1068 if (0 <= item_pos
&& item_pos
< tb
->s0num
) { /* if we must insert or append into buffer S[0] */
1071 case M_INSERT
: /* insert item into S[0] */
1072 buffer_info_init_tbS0(tb
, &bi
);
1073 leaf_insert_into_buf(&bi
, item_pos
, ih
, body
,
1076 /* If we insert the first key change the delimiting key */
1077 if (item_pos
== 0) {
1078 if (tb
->CFL
[0]) /* can be 0 in reiserfsck */
1079 replace_key(tb
, tb
->CFL
[0], tb
->lkey
[0], tbS0
, 0);
1083 case M_PASTE
:{ /* append item in S[0] */
1084 struct item_head
*pasted
;
1086 pasted
= item_head(tbS0
, item_pos
);
1087 /* when directory, may be new entry already pasted */
1088 if (is_direntry_le_ih(pasted
)) {
1089 if (pos_in_item
>= 0 && pos_in_item
<= ih_entry_count(pasted
)) {
1091 RFALSE(!tb
->insert_size
[0],
1092 "PAP-12260: insert_size is 0 already");
1095 buffer_info_init_tbS0(tb
, &bi
);
1096 leaf_paste_in_buffer(&bi
, item_pos
, pos_in_item
,
1097 tb
->insert_size
[0], body
,
1101 leaf_paste_entries(&bi
, item_pos
, pos_in_item
, 1,
1102 (struct reiserfs_de_head
*)body
,
1104 tb
->insert_size
[0]);
1105 if (!item_pos
&& !pos_in_item
) {
1106 RFALSE(!tb
->CFL
[0] || !tb
->L
[0],
1107 "PAP-12270: CFL[0]/L[0] must be specified");
1109 replace_key(tb
, tb
->CFL
[0], tb
->lkey
[0], tbS0
, 0);
1111 tb
->insert_size
[0] = 0;
1113 } else { /* regular object */
1114 if (pos_in_item
== ih_item_len(pasted
)) {
1116 RFALSE(tb
->insert_size
[0] <= 0,
1117 "PAP-12275: insert size must not be %d",
1118 tb
->insert_size
[0]);
1119 buffer_info_init_tbS0(tb
, &bi
);
1120 leaf_paste_in_buffer(&bi
, item_pos
, pos_in_item
,
1121 tb
->insert_size
[0], body
, zeros_num
);
1123 if (is_indirect_le_ih(pasted
)) {
1128 "PAP-12280: insert_size for indirect item must be %d, not %d",
1133 set_ih_free_space(pasted
, 0);
1135 tb
->insert_size
[0] = 0;
1137 #ifdef CONFIG_REISERFS_CHECK
1139 if (tb
->insert_size
[0]) {
1140 print_cur_tb("12285");
1141 reiserfs_panic(tb
->tb_sb
,
1146 tb
->insert_size
[0]);
1149 #endif /* CONFIG_REISERFS_CHECK */
1152 } /* case M_PASTE: */
1155 #ifdef CONFIG_REISERFS_CHECK
1156 if (flag
== M_PASTE
&& tb
->insert_size
[0]) {
1157 print_cur_tb("12290");
1158 reiserfs_panic(tb
->tb_sb
,
1159 "PAP-12290", "insert_size is still not 0 (%d)",
1160 tb
->insert_size
[0]);
1162 #endif /* CONFIG_REISERFS_CHECK */
1164 } /* Leaf level of the tree is balanced (end of balance_leaf) */
1166 /* Make empty node */
1167 void make_empty_node(struct buffer_info
*bi
)
1169 struct block_head
*blkh
;
1171 RFALSE(bi
->bi_bh
== NULL
, "PAP-12295: pointer to the buffer is NULL");
1173 blkh
= B_BLK_HEAD(bi
->bi_bh
);
1174 set_blkh_nr_item(blkh
, 0);
1175 set_blkh_free_space(blkh
, MAX_CHILD_SIZE(bi
->bi_bh
));
1178 B_N_CHILD(bi
->bi_parent
, bi
->bi_position
)->dc_size
= 0; /* Endian safe if 0 */
1181 /* Get first empty buffer */
1182 struct buffer_head
*get_FEB(struct tree_balance
*tb
)
1185 struct buffer_info bi
;
1187 for (i
= 0; i
< MAX_FEB_SIZE
; i
++)
1188 if (tb
->FEB
[i
] != NULL
)
1191 if (i
== MAX_FEB_SIZE
)
1192 reiserfs_panic(tb
->tb_sb
, "vs-12300", "FEB list is empty");
1194 buffer_info_init_bh(tb
, &bi
, tb
->FEB
[i
]);
1195 make_empty_node(&bi
);
1196 set_buffer_uptodate(tb
->FEB
[i
]);
1197 tb
->used
[i
] = tb
->FEB
[i
];
1203 /* This is now used because reiserfs_free_block has to be able to schedule. */
1204 static void store_thrown(struct tree_balance
*tb
, struct buffer_head
*bh
)
1208 if (buffer_dirty(bh
))
1209 reiserfs_warning(tb
->tb_sb
, "reiserfs-12320",
1210 "called with dirty buffer");
1211 for (i
= 0; i
< ARRAY_SIZE(tb
->thrown
); i
++)
1212 if (!tb
->thrown
[i
]) {
1214 get_bh(bh
); /* free_thrown puts this */
1217 reiserfs_warning(tb
->tb_sb
, "reiserfs-12321",
1218 "too many thrown buffers");
1221 static void free_thrown(struct tree_balance
*tb
)
1224 b_blocknr_t blocknr
;
1225 for (i
= 0; i
< ARRAY_SIZE(tb
->thrown
); i
++) {
1226 if (tb
->thrown
[i
]) {
1227 blocknr
= tb
->thrown
[i
]->b_blocknr
;
1228 if (buffer_dirty(tb
->thrown
[i
]))
1229 reiserfs_warning(tb
->tb_sb
, "reiserfs-12322",
1230 "called with dirty buffer %d",
1232 brelse(tb
->thrown
[i
]); /* incremented in store_thrown */
1233 reiserfs_free_block(tb
->transaction_handle
, NULL
,
1239 void reiserfs_invalidate_buffer(struct tree_balance
*tb
, struct buffer_head
*bh
)
1241 struct block_head
*blkh
;
1242 blkh
= B_BLK_HEAD(bh
);
1243 set_blkh_level(blkh
, FREE_LEVEL
);
1244 set_blkh_nr_item(blkh
, 0);
1246 clear_buffer_dirty(bh
);
1247 store_thrown(tb
, bh
);
1250 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1251 void replace_key(struct tree_balance
*tb
, struct buffer_head
*dest
, int n_dest
,
1252 struct buffer_head
*src
, int n_src
)
1255 RFALSE(dest
== NULL
|| src
== NULL
,
1256 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1258 RFALSE(!B_IS_KEYS_LEVEL(dest
),
1259 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1261 RFALSE(n_dest
< 0 || n_src
< 0,
1262 "vs-12315: src(%d) or dest(%d) key number < 0", n_src
, n_dest
);
1263 RFALSE(n_dest
>= B_NR_ITEMS(dest
) || n_src
>= B_NR_ITEMS(src
),
1264 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1265 n_src
, B_NR_ITEMS(src
), n_dest
, B_NR_ITEMS(dest
));
1267 if (B_IS_ITEMS_LEVEL(src
))
1268 /* source buffer contains leaf node */
1269 memcpy(internal_key(dest
, n_dest
), item_head(src
, n_src
),
1272 memcpy(internal_key(dest
, n_dest
), internal_key(src
, n_src
),
1275 do_balance_mark_internal_dirty(tb
, dest
, 0);
1278 int get_left_neighbor_position(struct tree_balance
*tb
, int h
)
1280 int Sh_position
= PATH_H_POSITION(tb
->tb_path
, h
+ 1);
1282 RFALSE(PATH_H_PPARENT(tb
->tb_path
, h
) == NULL
|| tb
->FL
[h
] == NULL
,
1283 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1284 h
, tb
->FL
[h
], h
, PATH_H_PPARENT(tb
->tb_path
, h
));
1286 if (Sh_position
== 0)
1287 return B_NR_ITEMS(tb
->FL
[h
]);
1289 return Sh_position
- 1;
1292 int get_right_neighbor_position(struct tree_balance
*tb
, int h
)
1294 int Sh_position
= PATH_H_POSITION(tb
->tb_path
, h
+ 1);
1296 RFALSE(PATH_H_PPARENT(tb
->tb_path
, h
) == NULL
|| tb
->FR
[h
] == NULL
,
1297 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1298 h
, PATH_H_PPARENT(tb
->tb_path
, h
), h
, tb
->FR
[h
]);
1300 if (Sh_position
== B_NR_ITEMS(PATH_H_PPARENT(tb
->tb_path
, h
)))
1303 return Sh_position
+ 1;
1306 #ifdef CONFIG_REISERFS_CHECK
1308 int is_reusable(struct super_block
*s
, b_blocknr_t block
, int bit_value
);
1309 static void check_internal_node(struct super_block
*s
, struct buffer_head
*bh
,
1312 struct disk_child
*dc
;
1315 RFALSE(!bh
, "PAP-12336: bh == 0");
1317 if (!bh
|| !B_IS_IN_TREE(bh
))
1320 RFALSE(!buffer_dirty(bh
) &&
1321 !(buffer_journaled(bh
) || buffer_journal_dirty(bh
)),
1322 "PAP-12337: buffer (%b) must be dirty", bh
);
1323 dc
= B_N_CHILD(bh
, 0);
1325 for (i
= 0; i
<= B_NR_ITEMS(bh
); i
++, dc
++) {
1326 if (!is_reusable(s
, dc_block_number(dc
), 1)) {
1328 reiserfs_panic(s
, "PAP-12338",
1329 "invalid child pointer %y in %b",
1335 static int locked_or_not_in_tree(struct tree_balance
*tb
,
1336 struct buffer_head
*bh
, char *which
)
1338 if ((!buffer_journal_prepared(bh
) && buffer_locked(bh
)) ||
1339 !B_IS_IN_TREE(bh
)) {
1340 reiserfs_warning(tb
->tb_sb
, "vs-12339", "%s (%b)", which
, bh
);
1346 static int check_before_balancing(struct tree_balance
*tb
)
1350 if (REISERFS_SB(tb
->tb_sb
)->cur_tb
) {
1351 reiserfs_panic(tb
->tb_sb
, "vs-12335", "suspect that schedule "
1352 "occurred based on cur_tb not being null at "
1353 "this point in code. do_balance cannot properly "
1354 "handle concurrent tree accesses on a same "
1359 * double check that buffers that we will modify are unlocked.
1360 * (fix_nodes should already have prepped all of these for us).
1363 retval
|= locked_or_not_in_tree(tb
, tb
->L
[0], "L[0]");
1364 retval
|= locked_or_not_in_tree(tb
, tb
->FL
[0], "FL[0]");
1365 retval
|= locked_or_not_in_tree(tb
, tb
->CFL
[0], "CFL[0]");
1366 check_leaf(tb
->L
[0]);
1369 retval
|= locked_or_not_in_tree(tb
, tb
->R
[0], "R[0]");
1370 retval
|= locked_or_not_in_tree(tb
, tb
->FR
[0], "FR[0]");
1371 retval
|= locked_or_not_in_tree(tb
, tb
->CFR
[0], "CFR[0]");
1372 check_leaf(tb
->R
[0]);
1374 retval
|= locked_or_not_in_tree(tb
, PATH_PLAST_BUFFER(tb
->tb_path
),
1376 check_leaf(PATH_PLAST_BUFFER(tb
->tb_path
));
1381 static void check_after_balance_leaf(struct tree_balance
*tb
)
1384 if (B_FREE_SPACE(tb
->L
[0]) !=
1385 MAX_CHILD_SIZE(tb
->L
[0]) -
1387 (tb
->FL
[0], get_left_neighbor_position(tb
, 0)))) {
1388 print_cur_tb("12221");
1389 reiserfs_panic(tb
->tb_sb
, "PAP-12355",
1390 "shift to left was incorrect");
1394 if (B_FREE_SPACE(tb
->R
[0]) !=
1395 MAX_CHILD_SIZE(tb
->R
[0]) -
1397 (tb
->FR
[0], get_right_neighbor_position(tb
, 0)))) {
1398 print_cur_tb("12222");
1399 reiserfs_panic(tb
->tb_sb
, "PAP-12360",
1400 "shift to right was incorrect");
1403 if (PATH_H_PBUFFER(tb
->tb_path
, 1) &&
1404 (B_FREE_SPACE(PATH_H_PBUFFER(tb
->tb_path
, 0)) !=
1405 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb
->tb_path
, 0)) -
1406 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb
->tb_path
, 1),
1407 PATH_H_POSITION(tb
->tb_path
, 1)))))) {
1408 int left
= B_FREE_SPACE(PATH_H_PBUFFER(tb
->tb_path
, 0));
1409 int right
= (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb
->tb_path
, 0)) -
1410 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb
->tb_path
, 1),
1411 PATH_H_POSITION(tb
->tb_path
,
1413 print_cur_tb("12223");
1414 reiserfs_warning(tb
->tb_sb
, "reiserfs-12363",
1415 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1416 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1418 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb
->tb_path
, 0)),
1419 PATH_H_PBUFFER(tb
->tb_path
, 1),
1420 PATH_H_POSITION(tb
->tb_path
, 1),
1422 (PATH_H_PBUFFER(tb
->tb_path
, 1),
1423 PATH_H_POSITION(tb
->tb_path
, 1))),
1425 reiserfs_panic(tb
->tb_sb
, "PAP-12365", "S is incorrect");
1429 static void check_leaf_level(struct tree_balance
*tb
)
1431 check_leaf(tb
->L
[0]);
1432 check_leaf(tb
->R
[0]);
1433 check_leaf(PATH_PLAST_BUFFER(tb
->tb_path
));
1436 static void check_internal_levels(struct tree_balance
*tb
)
1440 /* check all internal nodes */
1441 for (h
= 1; tb
->insert_size
[h
]; h
++) {
1442 check_internal_node(tb
->tb_sb
, PATH_H_PBUFFER(tb
->tb_path
, h
),
1443 "BAD BUFFER ON PATH");
1445 check_internal_node(tb
->tb_sb
, tb
->L
[h
], "BAD L");
1447 check_internal_node(tb
->tb_sb
, tb
->R
[h
], "BAD R");
1455 * Now we have all of the buffers that must be used in balancing of
1456 * the tree. We rely on the assumption that schedule() will not occur
1457 * while do_balance works. ( Only interrupt handlers are acceptable.)
1458 * We balance the tree according to the analysis made before this,
1459 * using buffers already obtained. For SMP support it will someday be
1460 * necessary to add ordered locking of tb.
1464 * Some interesting rules of balancing:
1465 * we delete a maximum of two nodes per level per balancing: we never
1466 * delete R, when we delete two of three nodes L, S, R then we move
1469 * we only delete L if we are deleting two nodes, if we delete only
1470 * one node we delete S
1472 * if we shift leaves then we shift as much as we can: this is a
1473 * deliberate policy of extremism in node packing which results in
1474 * higher average utilization after repeated random balance operations
1475 * at the cost of more memory copies and more balancing as a result of
1476 * small insertions to full nodes.
1478 * if we shift internal nodes we try to evenly balance the node
1479 * utilization, with consequent less balancing at the cost of lower
1482 * one could argue that the policy for directories in leaves should be
1483 * that of internal nodes, but we will wait until another day to
1484 * evaluate this.... It would be nice to someday measure and prove
1485 * these assumptions as to what is optimal....
1488 static inline void do_balance_starts(struct tree_balance
*tb
)
1490 /* use print_cur_tb() to see initial state of struct tree_balance */
1492 /* store_print_tb (tb); */
1494 /* do not delete, just comment it out */
1496 print_tb(flag, PATH_LAST_POSITION(tb->tb_path),
1497 tb->tb_path->pos_in_item, tb, "check");
1499 RFALSE(check_before_balancing(tb
), "PAP-12340: locked buffers in TB");
1500 #ifdef CONFIG_REISERFS_CHECK
1501 REISERFS_SB(tb
->tb_sb
)->cur_tb
= tb
;
1505 static inline void do_balance_completed(struct tree_balance
*tb
)
1508 #ifdef CONFIG_REISERFS_CHECK
1509 check_leaf_level(tb
);
1510 check_internal_levels(tb
);
1511 REISERFS_SB(tb
->tb_sb
)->cur_tb
= NULL
;
1515 * reiserfs_free_block is no longer schedule safe. So, we need to
1516 * put the buffers we want freed on the thrown list during do_balance,
1517 * and then free them now
1520 REISERFS_SB(tb
->tb_sb
)->s_do_balance
++;
1522 /* release all nodes hold to perform the balancing */
1529 * do_balance - balance the tree
1531 * @tb: tree_balance structure
1532 * @ih: item header of inserted item
1533 * @body: body of inserted item or bytes to paste
1534 * @flag: 'i' - insert, 'd' - delete, 'c' - cut, 'p' paste
1536 * Cut means delete part of an item (includes removing an entry from a
1539 * Delete means delete whole item.
1541 * Insert means add a new item into the tree.
1543 * Paste means to append to the end of an existing file or to
1544 * insert a directory entry.
1546 void do_balance(struct tree_balance
*tb
, struct item_head
*ih
,
1547 const char *body
, int flag
)
1549 int child_pos
; /* position of a child node in its parent */
1550 int h
; /* level of the tree being processed */
1553 * in our processing of one level we sometimes determine what
1554 * must be inserted into the next higher level. This insertion
1555 * consists of a key or two keys and their corresponding
1558 struct item_head insert_key
[2];
1560 /* inserted node-ptrs for the next level */
1561 struct buffer_head
*insert_ptr
[2];
1564 tb
->need_balance_dirty
= 0;
1566 if (FILESYSTEM_CHANGED_TB(tb
)) {
1567 reiserfs_panic(tb
->tb_sb
, "clm-6000", "fs generation has "
1570 /* if we have no real work to do */
1571 if (!tb
->insert_size
[0]) {
1572 reiserfs_warning(tb
->tb_sb
, "PAP-12350",
1573 "insert_size == 0, mode == %c", flag
);
1578 atomic_inc(&(fs_generation(tb
->tb_sb
)));
1579 do_balance_starts(tb
);
1582 * balance_leaf returns 0 except if combining L R and S into
1583 * one node. see balance_internal() for explanation of this
1586 child_pos
= PATH_H_B_ITEM_ORDER(tb
->tb_path
, 0) +
1587 balance_leaf(tb
, ih
, body
, flag
, insert_key
, insert_ptr
);
1589 #ifdef CONFIG_REISERFS_CHECK
1590 check_after_balance_leaf(tb
);
1593 /* Balance internal level of the tree. */
1594 for (h
= 1; h
< MAX_HEIGHT
&& tb
->insert_size
[h
]; h
++)
1596 balance_internal(tb
, h
, child_pos
, insert_key
, insert_ptr
);
1598 do_balance_completed(tb
);
This page took 0.093264 seconds and 5 git commands to generate.