2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
5 /* Now we have all buffers that must be used in balancing of the tree */
6 /* Further calculations can not cause schedule(), and thus the buffer */
7 /* tree will be stable until the balancing will be finished */
8 /* balance the tree according to the analysis made before, */
9 /* and using buffers obtained after all above. */
12 ** balance_leaf_when_delete
18 #include <asm/uaccess.h>
19 #include <linux/time.h>
20 #include <linux/reiserfs_fs.h>
21 #include <linux/buffer_head.h>
22 #include <linux/kernel.h>
24 #ifdef CONFIG_REISERFS_CHECK
26 struct tree_balance
*cur_tb
= NULL
; /* detects whether more than one
27 copy of tb exists as a means
28 of checking whether schedule
29 is interrupting do_balance */
32 inline void do_balance_mark_leaf_dirty(struct tree_balance
*tb
,
33 struct buffer_head
*bh
, int flag
)
35 journal_mark_dirty(tb
->transaction_handle
,
36 tb
->transaction_handle
->t_super
, bh
);
39 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
40 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
43 if deleting something ( tb->insert_size[0] < 0 )
44 return(balance_leaf_when_delete()); (flag d handled here)
46 if lnum is larger than 0 we put items into the left node
47 if rnum is larger than 0 we put items into the right node
48 if snum1 is larger than 0 we put items into the new node s1
49 if snum2 is larger than 0 we put items into the new node s2
50 Note that all *num* count new items being created.
52 It would be easier to read balance_leaf() if each of these summary
53 lines was a separate procedure rather than being inlined. I think
54 that there are many passages here and in balance_leaf_when_delete() in
55 which two calls to one procedure can replace two passages, and it
56 might save cache space and improve software maintenance costs to do so.
58 Vladimir made the perceptive comment that we should offload most of
59 the decision making in this function into fix_nodes/check_balance, and
60 then create some sort of structure in tb that says what actions should
61 be performed by do_balance.
65 /* Balance leaf node in case of delete or cut: insert_size[0] < 0
67 * lnum, rnum can have values >= -1
68 * -1 means that the neighbor must be joined with S
69 * 0 means that nothing should be done with the neighbor
70 * >0 means to shift entirely or partly the specified number of items to the neighbor
72 static int balance_leaf_when_delete(struct tree_balance
*tb
, int flag
)
74 struct buffer_head
*tbS0
= PATH_PLAST_BUFFER(tb
->tb_path
);
75 int item_pos
= PATH_LAST_POSITION(tb
->tb_path
);
76 int pos_in_item
= tb
->tb_path
->pos_in_item
;
77 struct buffer_info bi
;
81 RFALSE(tb
->FR
[0] && B_LEVEL(tb
->FR
[0]) != DISK_LEAF_NODE_LEVEL
+ 1,
82 "vs- 12000: level: wrong FR %z", tb
->FR
[0]);
83 RFALSE(tb
->blknum
[0] > 1,
84 "PAP-12005: tb->blknum == %d, can not be > 1", tb
->blknum
[0]);
85 RFALSE(!tb
->blknum
[0] && !PATH_H_PPARENT(tb
->tb_path
, 0),
86 "PAP-12010: tree can not be empty");
88 ih
= B_N_PITEM_HEAD(tbS0
, item_pos
);
90 /* Delete or truncate the item */
93 case M_DELETE
: /* delete item in S[0] */
95 RFALSE(ih_item_len(ih
) + IH_SIZE
!= -tb
->insert_size
[0],
96 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
97 -tb
->insert_size
[0], ih
);
101 bi
.bi_parent
= PATH_H_PPARENT(tb
->tb_path
, 0);
102 bi
.bi_position
= PATH_H_POSITION(tb
->tb_path
, 1);
103 leaf_delete_items(&bi
, 0, item_pos
, 1, -1);
105 if (!item_pos
&& tb
->CFL
[0]) {
106 if (B_NR_ITEMS(tbS0
)) {
107 replace_key(tb
, tb
->CFL
[0], tb
->lkey
[0], tbS0
,
110 if (!PATH_H_POSITION(tb
->tb_path
, 1))
111 replace_key(tb
, tb
->CFL
[0], tb
->lkey
[0],
112 PATH_H_PPARENT(tb
->tb_path
,
117 RFALSE(!item_pos
&& !tb
->CFL
[0],
118 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb
->CFL
[0],
123 case M_CUT
:{ /* cut item in S[0] */
126 bi
.bi_parent
= PATH_H_PPARENT(tb
->tb_path
, 0);
127 bi
.bi_position
= PATH_H_POSITION(tb
->tb_path
, 1);
128 if (is_direntry_le_ih(ih
)) {
130 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
131 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
132 tb
->insert_size
[0] = -1;
133 leaf_cut_from_buffer(&bi
, item_pos
, pos_in_item
,
134 -tb
->insert_size
[0]);
136 RFALSE(!item_pos
&& !pos_in_item
&& !tb
->CFL
[0],
137 "PAP-12030: can not change delimiting key. CFL[0]=%p",
140 if (!item_pos
&& !pos_in_item
&& tb
->CFL
[0]) {
141 replace_key(tb
, tb
->CFL
[0], tb
->lkey
[0],
145 leaf_cut_from_buffer(&bi
, item_pos
, pos_in_item
,
146 -tb
->insert_size
[0]);
148 RFALSE(!ih_item_len(ih
),
149 "PAP-12035: cut must leave non-zero dynamic length of item");
155 print_cur_tb("12040");
156 reiserfs_panic(tb
->tb_sb
,
157 "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
159 M_PASTE
) ? "PASTE" : ((flag
==
160 M_INSERT
) ? "INSERT" :
164 /* the rule is that no shifting occurs unless by shifting a node can be freed */
165 n
= B_NR_ITEMS(tbS0
);
166 if (tb
->lnum
[0]) { /* L[0] takes part in balancing */
167 if (tb
->lnum
[0] == -1) { /* L[0] must be joined with S[0] */
168 if (tb
->rnum
[0] == -1) { /* R[0] must be also joined with S[0] */
169 if (tb
->FR
[0] == PATH_H_PPARENT(tb
->tb_path
, 0)) {
170 /* all contents of all the 3 buffers will be in L[0] */
171 if (PATH_H_POSITION(tb
->tb_path
, 1) == 0
172 && 1 < B_NR_ITEMS(tb
->FR
[0]))
173 replace_key(tb
, tb
->CFL
[0],
177 leaf_move_items(LEAF_FROM_S_TO_L
, tb
, n
,
179 leaf_move_items(LEAF_FROM_R_TO_L
, tb
,
180 B_NR_ITEMS(tb
->R
[0]),
183 reiserfs_invalidate_buffer(tb
, tbS0
);
184 reiserfs_invalidate_buffer(tb
,
189 /* all contents of all the 3 buffers will be in R[0] */
190 leaf_move_items(LEAF_FROM_S_TO_R
, tb
, n
, -1,
192 leaf_move_items(LEAF_FROM_L_TO_R
, tb
,
193 B_NR_ITEMS(tb
->L
[0]), -1, NULL
);
195 /* right_delimiting_key is correct in R[0] */
196 replace_key(tb
, tb
->CFR
[0], tb
->rkey
[0],
199 reiserfs_invalidate_buffer(tb
, tbS0
);
200 reiserfs_invalidate_buffer(tb
, tb
->L
[0]);
205 RFALSE(tb
->rnum
[0] != 0,
206 "PAP-12045: rnum must be 0 (%d)", tb
->rnum
[0]);
207 /* all contents of L[0] and S[0] will be in L[0] */
208 leaf_shift_left(tb
, n
, -1);
210 reiserfs_invalidate_buffer(tb
, tbS0
);
214 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
216 RFALSE((tb
->lnum
[0] + tb
->rnum
[0] < n
) ||
217 (tb
->lnum
[0] + tb
->rnum
[0] > n
+ 1),
218 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
219 tb
->rnum
[0], tb
->lnum
[0], n
);
220 RFALSE((tb
->lnum
[0] + tb
->rnum
[0] == n
) &&
221 (tb
->lbytes
!= -1 || tb
->rbytes
!= -1),
222 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
223 tb
->rbytes
, tb
->lbytes
);
224 RFALSE((tb
->lnum
[0] + tb
->rnum
[0] == n
+ 1) &&
225 (tb
->lbytes
< 1 || tb
->rbytes
!= -1),
226 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
227 tb
->rbytes
, tb
->lbytes
);
229 leaf_shift_left(tb
, tb
->lnum
[0], tb
->lbytes
);
230 leaf_shift_right(tb
, tb
->rnum
[0], tb
->rbytes
);
232 reiserfs_invalidate_buffer(tb
, tbS0
);
237 if (tb
->rnum
[0] == -1) {
238 /* all contents of R[0] and S[0] will be in R[0] */
239 leaf_shift_right(tb
, n
, -1);
240 reiserfs_invalidate_buffer(tb
, tbS0
);
245 "PAP-12065: bad rnum parameter must be 0 (%d)", tb
->rnum
[0]);
249 static int balance_leaf(struct tree_balance
*tb
, struct item_head
*ih
, /* item header of inserted item (this is on little endian) */
250 const char *body
, /* body of inserted item or bytes to paste */
251 int flag
, /* i - insert, d - delete, c - cut, p - paste
252 (see comment to do_balance) */
253 struct item_head
*insert_key
, /* in our processing of one level we sometimes determine what
254 must be inserted into the next higher level. This insertion
255 consists of a key or two keys and their corresponding
257 struct buffer_head
**insert_ptr
/* inserted node-ptrs for the next level */
260 struct buffer_head
*tbS0
= PATH_PLAST_BUFFER(tb
->tb_path
);
261 int item_pos
= PATH_LAST_POSITION(tb
->tb_path
); /* index into the array of item headers in S[0]
262 of the affected item */
263 struct buffer_info bi
;
264 struct buffer_head
*S_new
[2]; /* new nodes allocated to hold what could not fit into S */
265 int snum
[2]; /* number of items that will be placed
266 into S_new (includes partially shifted
268 int sbytes
[2]; /* if an item is partially shifted into S_new then
269 if it is a directory item
270 it is the number of entries from the item that are shifted into S_new
272 it is the number of bytes from the item that are shifted into S_new
279 PROC_INFO_INC(tb
->tb_sb
, balance_at
[0]);
281 /* Make balance in case insert_size[0] < 0 */
282 if (tb
->insert_size
[0] < 0)
283 return balance_leaf_when_delete(tb
, flag
);
286 if (flag
== M_INSERT
&& !body
)
287 zeros_num
= ih_item_len(ih
);
289 pos_in_item
= tb
->tb_path
->pos_in_item
;
290 /* for indirect item pos_in_item is measured in unformatted node
291 pointers. Recalculate to bytes */
293 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0
, item_pos
)))
294 pos_in_item
*= UNFM_P_SIZE
;
296 if (tb
->lnum
[0] > 0) {
297 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
298 if (item_pos
< tb
->lnum
[0]) {
299 /* new item or it part falls to L[0], shift it too */
300 n
= B_NR_ITEMS(tb
->L
[0]);
303 case M_INSERT
: /* insert item into L[0] */
305 if (item_pos
== tb
->lnum
[0] - 1
306 && tb
->lbytes
!= -1) {
307 /* part of new item falls into L[0] */
312 leaf_shift_left(tb
, tb
->lnum
[0] - 1,
315 /* Calculate item length to insert to S[0] */
317 ih_item_len(ih
) - tb
->lbytes
;
318 /* Calculate and check item length to insert to L[0] */
323 RFALSE(ih_item_len(ih
) <= 0,
324 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
327 /* Insert new item into L[0] */
330 bi
.bi_parent
= tb
->FL
[0];
332 get_left_neighbor_position(tb
, 0);
333 leaf_insert_into_buf(&bi
,
341 version
= ih_version(ih
);
343 /* Calculate key component, item length and body to insert into S[0] */
344 set_le_ih_k_offset(ih
,
354 put_ih_item_len(ih
, new_item_len
);
355 if (tb
->lbytes
> zeros_num
) {
357 (tb
->lbytes
- zeros_num
);
360 zeros_num
-= tb
->lbytes
;
362 RFALSE(ih_item_len(ih
) <= 0,
363 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
366 /* new item in whole falls into L[0] */
367 /* Shift lnum[0]-1 items to L[0] */
369 leaf_shift_left(tb
, tb
->lnum
[0] - 1,
371 /* Insert new item into L[0] */
374 bi
.bi_parent
= tb
->FL
[0];
376 get_left_neighbor_position(tb
, 0);
377 leaf_insert_into_buf(&bi
,
381 tb
->insert_size
[0] = 0;
386 case M_PASTE
: /* append item in L[0] */
388 if (item_pos
== tb
->lnum
[0] - 1
389 && tb
->lbytes
!= -1) {
390 /* we must shift the part of the appended item */
391 if (is_direntry_le_ih
392 (B_N_PITEM_HEAD(tbS0
, item_pos
))) {
395 "PAP-12090: invalid parameter in case of a directory");
397 if (tb
->lbytes
> pos_in_item
) {
398 /* new directory entry falls into L[0] */
404 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
431 /* Append given directory entry to directory item */
437 get_left_neighbor_position
447 /* previous string prepared space for pasting new entry, following string pastes this entry */
449 /* when we have merge directory item, pos_in_item has been changed too */
451 /* paste new directory entry. 1 is entry number */
452 leaf_paste_entries(&bi
,
470 tb
->insert_size
[0] = 0;
472 /* new directory item doesn't fall into L[0] */
473 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
480 /* Calculate new position to append in item body */
481 pos_in_item
-= tb
->lbytes
;
484 RFALSE(tb
->lbytes
<= 0,
485 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
487 RFALSE(pos_in_item
!=
491 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
497 if (tb
->lbytes
>= pos_in_item
) {
498 /* appended item will be in L[0] in whole */
501 /* this bytes number must be appended to the last item of L[h] */
506 /* Calculate new insert_size[0] */
507 tb
->insert_size
[0] -=
513 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
525 /* Append to body of item in L[0] */
531 get_left_neighbor_position
546 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
557 "PAP-12106: item length must be 0");
568 "PAP-12107: items must be of the same file");
569 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb
->L
[0], n
+ item_pos
- ret_val
))) {
579 /* update key of first item in S0 */
594 /* update left delimiting key */
612 /* Calculate new body, position in item and insert_size[0] */
613 if (l_n
> zeros_num
) {
632 !op_is_left_mergeable
636 !op_is_left_mergeable
641 "PAP-12120: item must be merge-able with left neighboring item");
642 } else { /* only part of the appended item will be in L[0] */
644 /* Calculate position in item for append in S[0] */
648 RFALSE(pos_in_item
<= 0,
649 "PAP-12125: no place for paste. pos_in_item=%d",
652 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
660 } else { /* appended item will be in L[0] in whole */
662 struct item_head
*pasted
;
664 if (!item_pos
&& op_is_left_mergeable(B_N_PKEY(tbS0
, 0), tbS0
->b_size
)) { /* if we paste into first item of S[0] and it is left mergable */
665 /* then increment pos_in_item by the size of the last item in L[0] */
667 B_N_PITEM_HEAD(tb
->L
[0],
669 if (is_direntry_le_ih(pasted
))
678 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
680 leaf_shift_left(tb
, tb
->lnum
[0],
682 /* Append to body of item in L[0] */
685 bi
.bi_parent
= tb
->FL
[0];
687 get_left_neighbor_position(tb
, 0);
688 leaf_paste_in_buffer(&bi
,
695 /* if appended item is directory, paste entry */
697 B_N_PITEM_HEAD(tb
->L
[0],
700 if (is_direntry_le_ih(pasted
))
701 leaf_paste_entries(&bi
,
716 /* if appended item is indirect item, put unformatted node into un list */
717 if (is_indirect_le_ih(pasted
))
718 set_ih_free_space(pasted
, 0);
719 tb
->insert_size
[0] = 0;
723 default: /* cases d and t */
724 reiserfs_panic(tb
->tb_sb
,
725 "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
727 M_DELETE
) ? "DELETE" : ((flag
==
735 /* new item doesn't fall into L[0] */
736 leaf_shift_left(tb
, tb
->lnum
[0], tb
->lbytes
);
740 /* tb->lnum[0] > 0 */
741 /* Calculate new item position */
742 item_pos
-= (tb
->lnum
[0] - ((tb
->lbytes
!= -1) ? 1 : 0));
744 if (tb
->rnum
[0] > 0) {
745 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
746 n
= B_NR_ITEMS(tbS0
);
749 case M_INSERT
: /* insert item */
750 if (n
- tb
->rnum
[0] < item_pos
) { /* new item or its part falls to R[0] */
751 if (item_pos
== n
- tb
->rnum
[0] + 1 && tb
->rbytes
!= -1) { /* part of new item falls into R[0] */
752 loff_t old_key_comp
, old_len
,
758 leaf_shift_right(tb
, tb
->rnum
[0] - 1,
761 version
= ih_version(ih
);
762 /* Remember key component and item length */
763 old_key_comp
= le_ih_k_offset(ih
);
764 old_len
= ih_item_len(ih
);
766 /* Calculate key component and item length to insert into R[0] */
771 rbytes
) << (is_indirect_le_ih(ih
)
775 set_le_ih_k_offset(ih
, offset
);
776 put_ih_item_len(ih
, tb
->rbytes
);
777 /* Insert part of the item into R[0] */
780 bi
.bi_parent
= tb
->FR
[0];
782 get_right_neighbor_position(tb
, 0);
783 if ((old_len
- tb
->rbytes
) > zeros_num
) {
792 zeros_num
- (old_len
-
794 zeros_num
-= r_zeros_number
;
797 leaf_insert_into_buf(&bi
, 0, ih
, r_body
,
800 /* Replace right delimiting key by first key in R[0] */
801 replace_key(tb
, tb
->CFR
[0], tb
->rkey
[0],
804 /* Calculate key component and item length to insert into S[0] */
805 set_le_ih_k_offset(ih
, old_key_comp
);
807 old_len
- tb
->rbytes
);
809 tb
->insert_size
[0] -= tb
->rbytes
;
811 } else { /* whole new item falls into R[0] */
813 /* Shift rnum[0]-1 items to R[0] */
818 /* Insert new item into R[0] */
821 bi
.bi_parent
= tb
->FR
[0];
823 get_right_neighbor_position(tb
, 0);
824 leaf_insert_into_buf(&bi
,
830 if (item_pos
- n
+ tb
->rnum
[0] - 1 == 0) {
831 replace_key(tb
, tb
->CFR
[0],
836 zeros_num
= tb
->insert_size
[0] = 0;
838 } else { /* new item or part of it doesn't fall into R[0] */
840 leaf_shift_right(tb
, tb
->rnum
[0], tb
->rbytes
);
844 case M_PASTE
: /* append item */
846 if (n
- tb
->rnum
[0] <= item_pos
) { /* pasted item or part of it falls to R[0] */
847 if (item_pos
== n
- tb
->rnum
[0] && tb
->rbytes
!= -1) { /* we must shift the part of the appended item */
848 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0
, item_pos
))) { /* we append to directory item */
852 "PAP-12145: invalid parameter in case of a directory");
854 I_ENTRY_COUNT(B_N_PITEM_HEAD
857 if (entry_count
- tb
->rbytes
<
859 /* new directory entry falls into R[0] */
861 int paste_entry_position
;
863 RFALSE(tb
->rbytes
- 1 >=
867 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
870 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
878 /* Paste given directory entry to directory item */
879 paste_entry_position
=
888 get_right_neighbor_position
892 paste_entry_position
,
896 leaf_paste_entries(&bi
,
898 paste_entry_position
,
912 if (paste_entry_position
914 /* change delimiting keys */
928 tb
->insert_size
[0] = 0;
930 } else { /* new directory entry doesn't fall into R[0] */
939 } else { /* regular object */
945 /* Calculate number of bytes which must be shifted from appended item */
948 tb
->insert_size
[0]) < 0)
951 RFALSE(pos_in_item
!=
955 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
964 /* Calculate number of bytes which must remain in body after appending to R[0] */
972 unsigned long temp_rem
=
979 if (is_indirect_le_key
1014 /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
1015 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
1016 do_balance_mark_internal_dirty
1017 (tb
, tb
->CFR
[0], 0);
1019 /* Append part of body into R[0] */
1021 bi
.bi_bh
= tb
->R
[0];
1022 bi
.bi_parent
= tb
->FR
[0];
1024 get_right_neighbor_position
1026 if (n_rem
> zeros_num
) {
1039 leaf_paste_in_buffer(&bi
, 0,
1048 if (is_indirect_le_ih
1053 "PAP-12160: paste more than one unformatted node pointer");
1059 tb
->insert_size
[0] = n_rem
;
1063 } else { /* pasted item in whole falls into R[0] */
1065 struct item_head
*pasted
;
1068 leaf_shift_right(tb
, tb
->rnum
[0],
1070 /* append item in R[0] */
1071 if (pos_in_item
>= 0) {
1073 bi
.bi_bh
= tb
->R
[0];
1074 bi
.bi_parent
= tb
->FR
[0];
1076 get_right_neighbor_position
1078 leaf_paste_in_buffer(&bi
,
1090 /* paste new entry, if item is directory item */
1092 B_N_PITEM_HEAD(tb
->R
[0],
1095 if (is_direntry_le_ih(pasted
)
1096 && pos_in_item
>= 0) {
1097 leaf_paste_entries(&bi
,
1114 RFALSE(item_pos
- n
+
1116 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
1118 /* update delimiting keys */
1127 if (is_indirect_le_ih(pasted
))
1128 set_ih_free_space(pasted
, 0);
1129 zeros_num
= tb
->insert_size
[0] = 0;
1131 } else { /* new item doesn't fall into R[0] */
1133 leaf_shift_right(tb
, tb
->rnum
[0], tb
->rbytes
);
1136 default: /* cases d and t */
1137 reiserfs_panic(tb
->tb_sb
,
1138 "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
1140 M_DELETE
) ? "DELETE" : ((flag
==
1148 /* tb->rnum[0] > 0 */
1149 RFALSE(tb
->blknum
[0] > 3,
1150 "PAP-12180: blknum can not be %d. It must be <= 3",
1152 RFALSE(tb
->blknum
[0] < 0,
1153 "PAP-12185: blknum can not be %d. It must be >= 0",
1156 /* if while adding to a node we discover that it is possible to split
1157 it in two, and merge the left part into the left neighbor and the
1158 right part into the right neighbor, eliminating the node */
1159 if (tb
->blknum
[0] == 0) { /* node S[0] is empty now */
1161 RFALSE(!tb
->lnum
[0] || !tb
->rnum
[0],
1162 "PAP-12190: lnum and rnum must not be zero");
1163 /* if insertion was done before 0-th position in R[0], right
1164 delimiting key of the tb->L[0]'s and left delimiting key are
1165 not set correctly */
1168 reiserfs_panic(tb
->tb_sb
,
1169 "vs-12195: balance_leaf: CFR not initialized");
1170 copy_key(B_N_PDELIM_KEY(tb
->CFL
[0], tb
->lkey
[0]),
1171 B_N_PDELIM_KEY(tb
->CFR
[0], tb
->rkey
[0]));
1172 do_balance_mark_internal_dirty(tb
, tb
->CFL
[0], 0);
1175 reiserfs_invalidate_buffer(tb
, tbS0
);
1179 /* Fill new nodes that appear in place of S[0] */
1181 /* I am told that this copying is because we need an array to enable
1182 the looping code. -Hans */
1183 snum
[0] = tb
->s1num
, snum
[1] = tb
->s2num
;
1184 sbytes
[0] = tb
->s1bytes
;
1185 sbytes
[1] = tb
->s2bytes
;
1186 for (i
= tb
->blknum
[0] - 2; i
>= 0; i
--) {
1188 RFALSE(!snum
[i
], "PAP-12200: snum[%d] == %d. Must be > 0", i
,
1191 /* here we shift from S to S_new nodes */
1193 S_new
[i
] = get_FEB(tb
);
1195 /* initialized block type and tree level */
1196 set_blkh_level(B_BLK_HEAD(S_new
[i
]), DISK_LEAF_NODE_LEVEL
);
1198 n
= B_NR_ITEMS(tbS0
);
1201 case M_INSERT
: /* insert item */
1203 if (n
- snum
[i
] < item_pos
) { /* new item or it's part falls to first new node S_new[i] */
1204 if (item_pos
== n
- snum
[i
] + 1 && sbytes
[i
] != -1) { /* part of new item falls into S_new[i] */
1205 int old_key_comp
, old_len
,
1210 /* Move snum[i]-1 items from S[0] to S_new[i] */
1211 leaf_move_items(LEAF_FROM_S_TO_SNEW
, tb
,
1214 /* Remember key component and item length */
1215 version
= ih_version(ih
);
1216 old_key_comp
= le_ih_k_offset(ih
);
1217 old_len
= ih_item_len(ih
);
1219 /* Calculate key component and item length to insert into S_new[i] */
1220 set_le_ih_k_offset(ih
,
1221 le_ih_k_offset(ih
) +
1230 put_ih_item_len(ih
, sbytes
[i
]);
1232 /* Insert part of the item into S_new[i] before 0-th item */
1234 bi
.bi_bh
= S_new
[i
];
1235 bi
.bi_parent
= NULL
;
1238 if ((old_len
- sbytes
[i
]) > zeros_num
) {
1247 zeros_num
- (old_len
-
1249 zeros_num
-= r_zeros_number
;
1252 leaf_insert_into_buf(&bi
, 0, ih
, r_body
,
1255 /* Calculate key component and item length to insert into S[i] */
1256 set_le_ih_k_offset(ih
, old_key_comp
);
1258 old_len
- sbytes
[i
]);
1259 tb
->insert_size
[0] -= sbytes
[i
];
1260 } else { /* whole new item falls into S_new[i] */
1262 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
1263 leaf_move_items(LEAF_FROM_S_TO_SNEW
, tb
,
1264 snum
[i
] - 1, sbytes
[i
],
1267 /* Insert new item into S_new[i] */
1269 bi
.bi_bh
= S_new
[i
];
1270 bi
.bi_parent
= NULL
;
1272 leaf_insert_into_buf(&bi
,
1277 zeros_num
= tb
->insert_size
[0] = 0;
1281 else { /* new item or it part don't falls into S_new[i] */
1283 leaf_move_items(LEAF_FROM_S_TO_SNEW
, tb
,
1284 snum
[i
], sbytes
[i
], S_new
[i
]);
1288 case M_PASTE
: /* append item */
1290 if (n
- snum
[i
] <= item_pos
) { /* pasted item or part if it falls to S_new[i] */
1291 if (item_pos
== n
- snum
[i
] && sbytes
[i
] != -1) { /* we must shift part of the appended item */
1292 struct item_head
*aux_ih
;
1294 RFALSE(ih
, "PAP-12210: ih must be 0");
1296 if (is_direntry_le_ih
1298 B_N_PITEM_HEAD(tbS0
, item_pos
))) {
1299 /* we append to directory item */
1304 ih_entry_count(aux_ih
);
1306 if (entry_count
- sbytes
[i
] <
1310 /* new directory entry falls into S_new[i] */
1314 "PAP-12215: insert_size is already 0");
1315 RFALSE(sbytes
[i
] - 1 >=
1317 "PAP-12220: there are no so much entries (%d), only %d",
1321 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
1323 (LEAF_FROM_S_TO_SNEW
,
1327 /* Paste given directory entry to directory item */
1329 bi
.bi_bh
= S_new
[i
];
1330 bi
.bi_parent
= NULL
;
1332 leaf_paste_in_buffer
1339 /* paste new directory entry */
1340 leaf_paste_entries(&bi
,
1360 tb
->insert_size
[0] = 0;
1362 } else { /* new directory entry doesn't fall into S_new[i] */
1364 (LEAF_FROM_S_TO_SNEW
,
1369 } else { /* regular object */
1375 RFALSE(pos_in_item
!=
1379 || tb
->insert_size
[0] <=
1381 "PAP-12225: item too short or insert_size <= 0");
1383 /* Calculate number of bytes which must be shifted from appended item */
1390 (LEAF_FROM_S_TO_SNEW
, tb
,
1394 /* Calculate number of bytes which must remain in body after append to S_new[i] */
1396 tb
->insert_size
[0] -
1400 /* Append part of body into S_new[0] */
1402 bi
.bi_bh
= S_new
[i
];
1403 bi
.bi_parent
= NULL
;
1406 if (n_rem
> zeros_num
) {
1419 leaf_paste_in_buffer(&bi
, 0,
1428 struct item_head
*tmp
;
1431 B_N_PITEM_HEAD(S_new
1434 if (is_indirect_le_ih
1457 tb
->insert_size
[0] = n_rem
;
1462 /* item falls wholly into S_new[i] */
1465 struct item_head
*pasted
;
1467 #ifdef CONFIG_REISERFS_CHECK
1468 struct item_head
*ih_check
=
1469 B_N_PITEM_HEAD(tbS0
, item_pos
);
1471 if (!is_direntry_le_ih(ih_check
)
1472 && (pos_in_item
!= ih_item_len(ih_check
)
1473 || tb
->insert_size
[0] <= 0))
1474 reiserfs_panic(tb
->tb_sb
,
1475 "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1476 #endif /* CONFIG_REISERFS_CHECK */
1479 leaf_move_items(LEAF_FROM_S_TO_SNEW
,
1485 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1488 /* paste into item */
1490 bi
.bi_bh
= S_new
[i
];
1491 bi
.bi_parent
= NULL
;
1493 leaf_paste_in_buffer(&bi
,
1501 B_N_PITEM_HEAD(S_new
[i
],
1504 if (is_direntry_le_ih(pasted
)) {
1505 leaf_paste_entries(&bi
,
1521 /* if we paste to indirect item update ih_free_space */
1522 if (is_indirect_le_ih(pasted
))
1523 set_ih_free_space(pasted
, 0);
1524 zeros_num
= tb
->insert_size
[0] = 0;
1528 else { /* pasted item doesn't fall into S_new[i] */
1530 leaf_move_items(LEAF_FROM_S_TO_SNEW
, tb
,
1531 snum
[i
], sbytes
[i
], S_new
[i
]);
1534 default: /* cases d and t */
1535 reiserfs_panic(tb
->tb_sb
,
1536 "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1538 M_DELETE
) ? "DELETE" : ((flag
==
1544 memcpy(insert_key
+ i
, B_N_PKEY(S_new
[i
], 0), KEY_SIZE
);
1545 insert_ptr
[i
] = S_new
[i
];
1547 RFALSE(!buffer_journaled(S_new
[i
])
1548 || buffer_journal_dirty(S_new
[i
])
1549 || buffer_dirty(S_new
[i
]), "PAP-12247: S_new[%d] : (%b)",
1553 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1554 affected item which remains in S */
1555 if (0 <= item_pos
&& item_pos
< tb
->s0num
) { /* if we must insert or append into buffer S[0] */
1558 case M_INSERT
: /* insert item into S[0] */
1561 bi
.bi_parent
= PATH_H_PPARENT(tb
->tb_path
, 0);
1562 bi
.bi_position
= PATH_H_POSITION(tb
->tb_path
, 1);
1563 leaf_insert_into_buf(&bi
, item_pos
, ih
, body
,
1566 /* If we insert the first key change the delimiting key */
1567 if (item_pos
== 0) {
1568 if (tb
->CFL
[0]) /* can be 0 in reiserfsck */
1569 replace_key(tb
, tb
->CFL
[0], tb
->lkey
[0],
1575 case M_PASTE
:{ /* append item in S[0] */
1576 struct item_head
*pasted
;
1578 pasted
= B_N_PITEM_HEAD(tbS0
, item_pos
);
1579 /* when directory, may be new entry already pasted */
1580 if (is_direntry_le_ih(pasted
)) {
1581 if (pos_in_item
>= 0 &&
1583 ih_entry_count(pasted
)) {
1585 RFALSE(!tb
->insert_size
[0],
1586 "PAP-12260: insert_size is 0 already");
1592 PATH_H_PPARENT(tb
->tb_path
,
1595 PATH_H_POSITION(tb
->tb_path
,
1597 leaf_paste_in_buffer(&bi
,
1606 leaf_paste_entries(&bi
,
1619 if (!item_pos
&& !pos_in_item
) {
1622 "PAP-12270: CFL[0]/L[0] must be specified");
1636 tb
->insert_size
[0] = 0;
1638 } else { /* regular object */
1639 if (pos_in_item
== ih_item_len(pasted
)) {
1641 RFALSE(tb
->insert_size
[0] <= 0,
1642 "PAP-12275: insert size must not be %d",
1643 tb
->insert_size
[0]);
1647 PATH_H_PPARENT(tb
->tb_path
,
1650 PATH_H_POSITION(tb
->tb_path
,
1652 leaf_paste_in_buffer(&bi
,
1660 if (is_indirect_le_ih(pasted
)) {
1665 "PAP-12280: insert_size for indirect item must be %d, not %d",
1673 tb
->insert_size
[0] = 0;
1675 #ifdef CONFIG_REISERFS_CHECK
1677 if (tb
->insert_size
[0]) {
1678 print_cur_tb("12285");
1681 "PAP-12285: balance_leaf: insert_size must be 0 (%d)",
1687 #endif /* CONFIG_REISERFS_CHECK */
1690 } /* case M_PASTE: */
1693 #ifdef CONFIG_REISERFS_CHECK
1694 if (flag
== M_PASTE
&& tb
->insert_size
[0]) {
1695 print_cur_tb("12290");
1696 reiserfs_panic(tb
->tb_sb
,
1697 "PAP-12290: balance_leaf: insert_size is still not 0 (%d)",
1698 tb
->insert_size
[0]);
1700 #endif /* CONFIG_REISERFS_CHECK */
1703 } /* Leaf level of the tree is balanced (end of balance_leaf) */
1705 /* Make empty node */
1706 void make_empty_node(struct buffer_info
*bi
)
1708 struct block_head
*blkh
;
1710 RFALSE(bi
->bi_bh
== NULL
, "PAP-12295: pointer to the buffer is NULL");
1712 blkh
= B_BLK_HEAD(bi
->bi_bh
);
1713 set_blkh_nr_item(blkh
, 0);
1714 set_blkh_free_space(blkh
, MAX_CHILD_SIZE(bi
->bi_bh
));
1717 B_N_CHILD(bi
->bi_parent
, bi
->bi_position
)->dc_size
= 0; /* Endian safe if 0 */
1720 /* Get first empty buffer */
1721 struct buffer_head
*get_FEB(struct tree_balance
*tb
)
1724 struct buffer_head
*first_b
;
1725 struct buffer_info bi
;
1727 for (i
= 0; i
< MAX_FEB_SIZE
; i
++)
1728 if (tb
->FEB
[i
] != NULL
)
1731 if (i
== MAX_FEB_SIZE
)
1732 reiserfs_panic(tb
->tb_sb
,
1733 "vs-12300: get_FEB: FEB list is empty");
1736 bi
.bi_bh
= first_b
= tb
->FEB
[i
];
1737 bi
.bi_parent
= NULL
;
1739 make_empty_node(&bi
);
1740 set_buffer_uptodate(first_b
);
1742 tb
->used
[i
] = first_b
;
1747 /* This is now used because reiserfs_free_block has to be able to
1750 static void store_thrown(struct tree_balance
*tb
, struct buffer_head
*bh
)
1754 if (buffer_dirty(bh
))
1755 reiserfs_warning(tb
->tb_sb
,
1756 "store_thrown deals with dirty buffer");
1757 for (i
= 0; i
< ARRAY_SIZE(tb
->thrown
); i
++)
1758 if (!tb
->thrown
[i
]) {
1760 get_bh(bh
); /* free_thrown puts this */
1763 reiserfs_warning(tb
->tb_sb
, "store_thrown: too many thrown buffers");
1766 static void free_thrown(struct tree_balance
*tb
)
1769 b_blocknr_t blocknr
;
1770 for (i
= 0; i
< ARRAY_SIZE(tb
->thrown
); i
++) {
1771 if (tb
->thrown
[i
]) {
1772 blocknr
= tb
->thrown
[i
]->b_blocknr
;
1773 if (buffer_dirty(tb
->thrown
[i
]))
1774 reiserfs_warning(tb
->tb_sb
,
1775 "free_thrown deals with dirty buffer %d",
1777 brelse(tb
->thrown
[i
]); /* incremented in store_thrown */
1778 reiserfs_free_block(tb
->transaction_handle
, NULL
,
1784 void reiserfs_invalidate_buffer(struct tree_balance
*tb
, struct buffer_head
*bh
)
1786 struct block_head
*blkh
;
1787 blkh
= B_BLK_HEAD(bh
);
1788 set_blkh_level(blkh
, FREE_LEVEL
);
1789 set_blkh_nr_item(blkh
, 0);
1791 clear_buffer_dirty(bh
);
1792 store_thrown(tb
, bh
);
1795 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1796 void replace_key(struct tree_balance
*tb
, struct buffer_head
*dest
, int n_dest
,
1797 struct buffer_head
*src
, int n_src
)
1800 RFALSE(dest
== NULL
|| src
== NULL
,
1801 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1803 RFALSE(!B_IS_KEYS_LEVEL(dest
),
1804 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1806 RFALSE(n_dest
< 0 || n_src
< 0,
1807 "vs-12315: src(%d) or dest(%d) key number < 0", n_src
, n_dest
);
1808 RFALSE(n_dest
>= B_NR_ITEMS(dest
) || n_src
>= B_NR_ITEMS(src
),
1809 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1810 n_src
, B_NR_ITEMS(src
), n_dest
, B_NR_ITEMS(dest
));
1812 if (B_IS_ITEMS_LEVEL(src
))
1813 /* source buffer contains leaf node */
1814 memcpy(B_N_PDELIM_KEY(dest
, n_dest
), B_N_PITEM_HEAD(src
, n_src
),
1817 memcpy(B_N_PDELIM_KEY(dest
, n_dest
), B_N_PDELIM_KEY(src
, n_src
),
1820 do_balance_mark_internal_dirty(tb
, dest
, 0);
1823 int get_left_neighbor_position(struct tree_balance
*tb
, int h
)
1825 int Sh_position
= PATH_H_POSITION(tb
->tb_path
, h
+ 1);
1827 RFALSE(PATH_H_PPARENT(tb
->tb_path
, h
) == NULL
|| tb
->FL
[h
] == NULL
,
1828 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1829 h
, tb
->FL
[h
], h
, PATH_H_PPARENT(tb
->tb_path
, h
));
1831 if (Sh_position
== 0)
1832 return B_NR_ITEMS(tb
->FL
[h
]);
1834 return Sh_position
- 1;
1837 int get_right_neighbor_position(struct tree_balance
*tb
, int h
)
1839 int Sh_position
= PATH_H_POSITION(tb
->tb_path
, h
+ 1);
1841 RFALSE(PATH_H_PPARENT(tb
->tb_path
, h
) == NULL
|| tb
->FR
[h
] == NULL
,
1842 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1843 h
, PATH_H_PPARENT(tb
->tb_path
, h
), h
, tb
->FR
[h
]);
1845 if (Sh_position
== B_NR_ITEMS(PATH_H_PPARENT(tb
->tb_path
, h
)))
1848 return Sh_position
+ 1;
1851 #ifdef CONFIG_REISERFS_CHECK
1853 int is_reusable(struct super_block
*s
, b_blocknr_t block
, int bit_value
);
1854 static void check_internal_node(struct super_block
*s
, struct buffer_head
*bh
,
1857 struct disk_child
*dc
;
1860 RFALSE(!bh
, "PAP-12336: bh == 0");
1862 if (!bh
|| !B_IS_IN_TREE(bh
))
1865 RFALSE(!buffer_dirty(bh
) &&
1866 !(buffer_journaled(bh
) || buffer_journal_dirty(bh
)),
1867 "PAP-12337: buffer (%b) must be dirty", bh
);
1868 dc
= B_N_CHILD(bh
, 0);
1870 for (i
= 0; i
<= B_NR_ITEMS(bh
); i
++, dc
++) {
1871 if (!is_reusable(s
, dc_block_number(dc
), 1)) {
1874 "PAP-12338: check_internal_node: invalid child pointer %y in %b",
1880 static int locked_or_not_in_tree(struct buffer_head
*bh
, char *which
)
1882 if ((!buffer_journal_prepared(bh
) && buffer_locked(bh
)) ||
1883 !B_IS_IN_TREE(bh
)) {
1884 reiserfs_warning(NULL
,
1885 "vs-12339: locked_or_not_in_tree: %s (%b)",
1892 static int check_before_balancing(struct tree_balance
*tb
)
1897 reiserfs_panic(tb
->tb_sb
, "vs-12335: check_before_balancing: "
1898 "suspect that schedule occurred based on cur_tb not being null at this point in code. "
1899 "do_balance cannot properly handle schedule occurring while it runs.");
1902 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1903 prepped all of these for us). */
1905 retval
|= locked_or_not_in_tree(tb
->L
[0], "L[0]");
1906 retval
|= locked_or_not_in_tree(tb
->FL
[0], "FL[0]");
1907 retval
|= locked_or_not_in_tree(tb
->CFL
[0], "CFL[0]");
1908 check_leaf(tb
->L
[0]);
1911 retval
|= locked_or_not_in_tree(tb
->R
[0], "R[0]");
1912 retval
|= locked_or_not_in_tree(tb
->FR
[0], "FR[0]");
1913 retval
|= locked_or_not_in_tree(tb
->CFR
[0], "CFR[0]");
1914 check_leaf(tb
->R
[0]);
1916 retval
|= locked_or_not_in_tree(PATH_PLAST_BUFFER(tb
->tb_path
), "S[0]");
1917 check_leaf(PATH_PLAST_BUFFER(tb
->tb_path
));
1922 static void check_after_balance_leaf(struct tree_balance
*tb
)
1925 if (B_FREE_SPACE(tb
->L
[0]) !=
1926 MAX_CHILD_SIZE(tb
->L
[0]) -
1928 (tb
->FL
[0], get_left_neighbor_position(tb
, 0)))) {
1929 print_cur_tb("12221");
1930 reiserfs_panic(tb
->tb_sb
,
1931 "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
1935 if (B_FREE_SPACE(tb
->R
[0]) !=
1936 MAX_CHILD_SIZE(tb
->R
[0]) -
1938 (tb
->FR
[0], get_right_neighbor_position(tb
, 0)))) {
1939 print_cur_tb("12222");
1940 reiserfs_panic(tb
->tb_sb
,
1941 "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
1944 if (PATH_H_PBUFFER(tb
->tb_path
, 1) &&
1945 (B_FREE_SPACE(PATH_H_PBUFFER(tb
->tb_path
, 0)) !=
1946 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb
->tb_path
, 0)) -
1947 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb
->tb_path
, 1),
1948 PATH_H_POSITION(tb
->tb_path
, 1)))))) {
1949 int left
= B_FREE_SPACE(PATH_H_PBUFFER(tb
->tb_path
, 0));
1950 int right
= (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb
->tb_path
, 0)) -
1951 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb
->tb_path
, 1),
1952 PATH_H_POSITION(tb
->tb_path
,
1954 print_cur_tb("12223");
1955 reiserfs_warning(tb
->tb_sb
,
1956 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1957 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1959 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb
->tb_path
, 0)),
1960 PATH_H_PBUFFER(tb
->tb_path
, 1),
1961 PATH_H_POSITION(tb
->tb_path
, 1),
1963 (PATH_H_PBUFFER(tb
->tb_path
, 1),
1964 PATH_H_POSITION(tb
->tb_path
, 1))),
1966 reiserfs_panic(tb
->tb_sb
,
1967 "PAP-12365: check_after_balance_leaf: S is incorrect");
1971 static void check_leaf_level(struct tree_balance
*tb
)
1973 check_leaf(tb
->L
[0]);
1974 check_leaf(tb
->R
[0]);
1975 check_leaf(PATH_PLAST_BUFFER(tb
->tb_path
));
1978 static void check_internal_levels(struct tree_balance
*tb
)
1982 /* check all internal nodes */
1983 for (h
= 1; tb
->insert_size
[h
]; h
++) {
1984 check_internal_node(tb
->tb_sb
, PATH_H_PBUFFER(tb
->tb_path
, h
),
1985 "BAD BUFFER ON PATH");
1987 check_internal_node(tb
->tb_sb
, tb
->L
[h
], "BAD L");
1989 check_internal_node(tb
->tb_sb
, tb
->R
[h
], "BAD R");
1996 /* Now we have all of the buffers that must be used in balancing of
1997 the tree. We rely on the assumption that schedule() will not occur
1998 while do_balance works. ( Only interrupt handlers are acceptable.)
1999 We balance the tree according to the analysis made before this,
2000 using buffers already obtained. For SMP support it will someday be
2001 necessary to add ordered locking of tb. */
2003 /* Some interesting rules of balancing:
2005 we delete a maximum of two nodes per level per balancing: we never
2006 delete R, when we delete two of three nodes L, S, R then we move
2009 we only delete L if we are deleting two nodes, if we delete only
2010 one node we delete S
2012 if we shift leaves then we shift as much as we can: this is a
2013 deliberate policy of extremism in node packing which results in
2014 higher average utilization after repeated random balance operations
2015 at the cost of more memory copies and more balancing as a result of
2016 small insertions to full nodes.
2018 if we shift internal nodes we try to evenly balance the node
2019 utilization, with consequent less balancing at the cost of lower
2022 one could argue that the policy for directories in leaves should be
2023 that of internal nodes, but we will wait until another day to
2024 evaluate this.... It would be nice to someday measure and prove
2025 these assumptions as to what is optimal....
2029 static inline void do_balance_starts(struct tree_balance
*tb
)
2031 /* use print_cur_tb() to see initial state of struct
2034 /* store_print_tb (tb); */
2036 /* do not delete, just comment it out */
2037 /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
2039 RFALSE(check_before_balancing(tb
), "PAP-12340: locked buffers in TB");
2040 #ifdef CONFIG_REISERFS_CHECK
2045 static inline void do_balance_completed(struct tree_balance
*tb
)
2048 #ifdef CONFIG_REISERFS_CHECK
2049 check_leaf_level(tb
);
2050 check_internal_levels(tb
);
2054 /* reiserfs_free_block is no longer schedule safe. So, we need to
2055 ** put the buffers we want freed on the thrown list during do_balance,
2056 ** and then free them now
2059 REISERFS_SB(tb
->tb_sb
)->s_do_balance
++;
2061 /* release all nodes hold to perform the balancing */
2067 void do_balance(struct tree_balance
*tb
, /* tree_balance structure */
2068 struct item_head
*ih
, /* item header of inserted item */
2069 const char *body
, /* body of inserted item or bytes to paste */
2071 { /* i - insert, d - delete
2074 Cut means delete part of an item
2075 (includes removing an entry from a
2078 Delete means delete whole item.
2080 Insert means add a new item into the
2083 Paste means to append to the end of an
2084 existing file or to insert a directory
2086 int child_pos
, /* position of a child node in its parent */
2087 h
; /* level of the tree being processed */
2088 struct item_head insert_key
[2]; /* in our processing of one level
2089 we sometimes determine what
2090 must be inserted into the next
2091 higher level. This insertion
2092 consists of a key or two keys
2093 and their corresponding
2095 struct buffer_head
*insert_ptr
[2]; /* inserted node-ptrs for the next
2099 tb
->need_balance_dirty
= 0;
2101 if (FILESYSTEM_CHANGED_TB(tb
)) {
2102 reiserfs_panic(tb
->tb_sb
,
2103 "clm-6000: do_balance, fs generation has changed\n");
2105 /* if we have no real work to do */
2106 if (!tb
->insert_size
[0]) {
2107 reiserfs_warning(tb
->tb_sb
,
2108 "PAP-12350: do_balance: insert_size == 0, mode == %c",
2114 atomic_inc(&(fs_generation(tb
->tb_sb
)));
2115 do_balance_starts(tb
);
2117 /* balance leaf returns 0 except if combining L R and S into
2118 one node. see balance_internal() for explanation of this
2120 child_pos
= PATH_H_B_ITEM_ORDER(tb
->tb_path
, 0) +
2121 balance_leaf(tb
, ih
, body
, flag
, insert_key
, insert_ptr
);
2123 #ifdef CONFIG_REISERFS_CHECK
2124 check_after_balance_leaf(tb
);
2127 /* Balance internal level of the tree. */
2128 for (h
= 1; h
< MAX_HEIGHT
&& tb
->insert_size
[h
]; h
++)
2130 balance_internal(tb
, h
, child_pos
, insert_key
, insert_ptr
);
2132 do_balance_completed(tb
);