reiserfs: use per-fs commit workqueues
[deliverable/linux.git] / fs / reiserfs / do_balan.c
CommitLineData
1da177e4
LT
1/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5/* Now we have all buffers that must be used in balancing of the tree */
6/* Further calculations can not cause schedule(), and thus the buffer */
7/* tree will be stable until the balancing will be finished */
8/* balance the tree according to the analysis made before, */
9/* and using buffers obtained after all above. */
10
1da177e4
LT
11/**
12 ** balance_leaf_when_delete
13 ** balance_leaf
14 ** do_balance
15 **
16 **/
17
1da177e4
LT
18#include <asm/uaccess.h>
19#include <linux/time.h>
f466c6fd 20#include "reiserfs.h"
1da177e4 21#include <linux/buffer_head.h>
79a81aef 22#include <linux/kernel.h>
1da177e4 23
fba4ebb5
JM
24static inline void buffer_info_init_left(struct tree_balance *tb,
25 struct buffer_info *bi)
26{
27 bi->tb = tb;
28 bi->bi_bh = tb->L[0];
29 bi->bi_parent = tb->FL[0];
30 bi->bi_position = get_left_neighbor_position(tb, 0);
31}
32
33static inline void buffer_info_init_right(struct tree_balance *tb,
34 struct buffer_info *bi)
35{
36 bi->tb = tb;
37 bi->bi_bh = tb->R[0];
38 bi->bi_parent = tb->FR[0];
39 bi->bi_position = get_right_neighbor_position(tb, 0);
40}
41
42static inline void buffer_info_init_tbS0(struct tree_balance *tb,
43 struct buffer_info *bi)
44{
45 bi->tb = tb;
46 bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
47 bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
48 bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
49}
50
51static inline void buffer_info_init_bh(struct tree_balance *tb,
52 struct buffer_info *bi,
53 struct buffer_head *bh)
54{
55 bi->tb = tb;
56 bi->bi_bh = bh;
57 bi->bi_parent = NULL;
58 bi->bi_position = 0;
59}
60
bd4c625c
LT
61inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
62 struct buffer_head *bh, int flag)
1da177e4 63{
bd4c625c
LT
64 journal_mark_dirty(tb->transaction_handle,
65 tb->transaction_handle->t_super, bh);
1da177e4
LT
66}
67
68#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
69#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
70
0222e657 71/* summary:
1da177e4
LT
72 if deleting something ( tb->insert_size[0] < 0 )
73 return(balance_leaf_when_delete()); (flag d handled here)
74 else
75 if lnum is larger than 0 we put items into the left node
76 if rnum is larger than 0 we put items into the right node
77 if snum1 is larger than 0 we put items into the new node s1
0222e657 78 if snum2 is larger than 0 we put items into the new node s2
1da177e4
LT
79Note that all *num* count new items being created.
80
81It would be easier to read balance_leaf() if each of these summary
82lines was a separate procedure rather than being inlined. I think
83that there are many passages here and in balance_leaf_when_delete() in
84which two calls to one procedure can replace two passages, and it
0222e657 85might save cache space and improve software maintenance costs to do so.
1da177e4
LT
86
87Vladimir made the perceptive comment that we should offload most of
88the decision making in this function into fix_nodes/check_balance, and
89then create some sort of structure in tb that says what actions should
90be performed by do_balance.
91
92-Hans */
93
1da177e4
LT
94/* Balance leaf node in case of delete or cut: insert_size[0] < 0
95 *
96 * lnum, rnum can have values >= -1
97 * -1 means that the neighbor must be joined with S
98 * 0 means that nothing should be done with the neighbor
99 * >0 means to shift entirely or partly the specified number of items to the neighbor
100 */
bd4c625c 101static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
1da177e4 102{
bd4c625c
LT
103 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
104 int item_pos = PATH_LAST_POSITION(tb->tb_path);
105 int pos_in_item = tb->tb_path->pos_in_item;
106 struct buffer_info bi;
107 int n;
108 struct item_head *ih;
1da177e4 109
bd4c625c
LT
110 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
111 "vs- 12000: level: wrong FR %z", tb->FR[0]);
112 RFALSE(tb->blknum[0] > 1,
113 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
114 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
115 "PAP-12010: tree can not be empty");
1da177e4 116
bd4c625c 117 ih = B_N_PITEM_HEAD(tbS0, item_pos);
fba4ebb5 118 buffer_info_init_tbS0(tb, &bi);
1da177e4 119
bd4c625c 120 /* Delete or truncate the item */
1da177e4 121
bd4c625c
LT
122 switch (flag) {
123 case M_DELETE: /* delete item in S[0] */
124
125 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
126 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
127 -tb->insert_size[0], ih);
128
bd4c625c
LT
129 leaf_delete_items(&bi, 0, item_pos, 1, -1);
130
131 if (!item_pos && tb->CFL[0]) {
132 if (B_NR_ITEMS(tbS0)) {
133 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
134 0);
135 } else {
136 if (!PATH_H_POSITION(tb->tb_path, 1))
137 replace_key(tb, tb->CFL[0], tb->lkey[0],
138 PATH_H_PPARENT(tb->tb_path,
139 0), 0);
140 }
141 }
1da177e4 142
bd4c625c
LT
143 RFALSE(!item_pos && !tb->CFL[0],
144 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
145 tb->L[0]);
1da177e4 146
bd4c625c
LT
147 break;
148
149 case M_CUT:{ /* cut item in S[0] */
bd4c625c
LT
150 if (is_direntry_le_ih(ih)) {
151
152 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
153 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
154 tb->insert_size[0] = -1;
155 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
156 -tb->insert_size[0]);
157
158 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
159 "PAP-12030: can not change delimiting key. CFL[0]=%p",
160 tb->CFL[0]);
161
162 if (!item_pos && !pos_in_item && tb->CFL[0]) {
163 replace_key(tb, tb->CFL[0], tb->lkey[0],
164 tbS0, 0);
165 }
166 } else {
167 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
168 -tb->insert_size[0]);
169
170 RFALSE(!ih_item_len(ih),
171 "PAP-12035: cut must leave non-zero dynamic length of item");
172 }
173 break;
1da177e4 174 }
1da177e4 175
bd4c625c
LT
176 default:
177 print_cur_tb("12040");
c3a9c210
JM
178 reiserfs_panic(tb->tb_sb, "PAP-12040",
179 "unexpected mode: %s(%d)",
bd4c625c
LT
180 (flag ==
181 M_PASTE) ? "PASTE" : ((flag ==
182 M_INSERT) ? "INSERT" :
183 "UNKNOWN"), flag);
184 }
1da177e4 185
bd4c625c
LT
186 /* the rule is that no shifting occurs unless by shifting a node can be freed */
187 n = B_NR_ITEMS(tbS0);
188 if (tb->lnum[0]) { /* L[0] takes part in balancing */
189 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */
190 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */
191 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
192 /* all contents of all the 3 buffers will be in L[0] */
193 if (PATH_H_POSITION(tb->tb_path, 1) == 0
194 && 1 < B_NR_ITEMS(tb->FR[0]))
195 replace_key(tb, tb->CFL[0],
196 tb->lkey[0],
197 tb->FR[0], 1);
198
199 leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
200 -1, NULL);
201 leaf_move_items(LEAF_FROM_R_TO_L, tb,
202 B_NR_ITEMS(tb->R[0]),
203 -1, NULL);
204
205 reiserfs_invalidate_buffer(tb, tbS0);
206 reiserfs_invalidate_buffer(tb,
207 tb->R[0]);
208
209 return 0;
210 }
211 /* all contents of all the 3 buffers will be in R[0] */
212 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
213 NULL);
214 leaf_move_items(LEAF_FROM_L_TO_R, tb,
215 B_NR_ITEMS(tb->L[0]), -1, NULL);
216
217 /* right_delimiting_key is correct in R[0] */
218 replace_key(tb, tb->CFR[0], tb->rkey[0],
219 tb->R[0], 0);
1da177e4 220
bd4c625c
LT
221 reiserfs_invalidate_buffer(tb, tbS0);
222 reiserfs_invalidate_buffer(tb, tb->L[0]);
1da177e4 223
bd4c625c
LT
224 return -1;
225 }
1da177e4 226
bd4c625c
LT
227 RFALSE(tb->rnum[0] != 0,
228 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
229 /* all contents of L[0] and S[0] will be in L[0] */
230 leaf_shift_left(tb, n, -1);
1da177e4 231
bd4c625c
LT
232 reiserfs_invalidate_buffer(tb, tbS0);
233
234 return 0;
235 }
236 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
237
238 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
239 (tb->lnum[0] + tb->rnum[0] > n + 1),
240 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
241 tb->rnum[0], tb->lnum[0], n);
242 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
243 (tb->lbytes != -1 || tb->rbytes != -1),
244 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
245 tb->rbytes, tb->lbytes);
246 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
247 (tb->lbytes < 1 || tb->rbytes != -1),
248 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
249 tb->rbytes, tb->lbytes);
250
251 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
252 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
253
254 reiserfs_invalidate_buffer(tb, tbS0);
255
256 return 0;
1da177e4 257 }
1da177e4 258
bd4c625c
LT
259 if (tb->rnum[0] == -1) {
260 /* all contents of R[0] and S[0] will be in R[0] */
261 leaf_shift_right(tb, n, -1);
262 reiserfs_invalidate_buffer(tb, tbS0);
263 return 0;
264 }
1da177e4 265
bd4c625c
LT
266 RFALSE(tb->rnum[0],
267 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
1da177e4 268 return 0;
1da177e4
LT
269}
270
bd4c625c
LT
271static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */
272 const char *body, /* body of inserted item or bytes to paste */
273 int flag, /* i - insert, d - delete, c - cut, p - paste
274 (see comment to do_balance) */
275 struct item_head *insert_key, /* in our processing of one level we sometimes determine what
276 must be inserted into the next higher level. This insertion
277 consists of a key or two keys and their corresponding
278 pointers */
279 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
1da177e4
LT
280 )
281{
bd4c625c 282 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
0222e657 283 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0]
bd4c625c
LT
284 of the affected item */
285 struct buffer_info bi;
286 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
287 int snum[2]; /* number of items that will be placed
288 into S_new (includes partially shifted
289 items) */
0222e657
JM
290 int sbytes[2]; /* if an item is partially shifted into S_new then
291 if it is a directory item
bd4c625c
LT
292 it is the number of entries from the item that are shifted into S_new
293 else
294 it is the number of bytes from the item that are shifted into S_new
295 */
296 int n, i;
297 int ret_val;
298 int pos_in_item;
299 int zeros_num;
300
301 PROC_INFO_INC(tb->tb_sb, balance_at[0]);
302
303 /* Make balance in case insert_size[0] < 0 */
304 if (tb->insert_size[0] < 0)
305 return balance_leaf_when_delete(tb, flag);
306
307 zeros_num = 0;
9dce07f1 308 if (flag == M_INSERT && !body)
bd4c625c
LT
309 zeros_num = ih_item_len(ih);
310
311 pos_in_item = tb->tb_path->pos_in_item;
312 /* for indirect item pos_in_item is measured in unformatted node
313 pointers. Recalculate to bytes */
314 if (flag != M_INSERT
315 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
316 pos_in_item *= UNFM_P_SIZE;
317
318 if (tb->lnum[0] > 0) {
319 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
320 if (item_pos < tb->lnum[0]) {
321 /* new item or it part falls to L[0], shift it too */
322 n = B_NR_ITEMS(tb->L[0]);
323
324 switch (flag) {
325 case M_INSERT: /* insert item into L[0] */
326
416e2abd 327 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
bd4c625c
LT
328 /* part of new item falls into L[0] */
329 int new_item_len;
330 int version;
331
416e2abd 332 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
bd4c625c
LT
333
334 /* Calculate item length to insert to S[0] */
416e2abd 335 new_item_len = ih_item_len(ih) - tb->lbytes;
bd4c625c 336 /* Calculate and check item length to insert to L[0] */
416e2abd 337 put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
bd4c625c
LT
338
339 RFALSE(ih_item_len(ih) <= 0,
340 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
341 ih_item_len(ih));
342
343 /* Insert new item into L[0] */
fba4ebb5 344 buffer_info_init_left(tb, &bi);
bd4c625c 345 leaf_insert_into_buf(&bi,
416e2abd
DJ
346 n + item_pos - ret_val, ih, body,
347 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
bd4c625c
LT
348
349 version = ih_version(ih);
350
351 /* Calculate key component, item length and body to insert into S[0] */
416e2abd
DJ
352 set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
353 (tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
bd4c625c
LT
354
355 put_ih_item_len(ih, new_item_len);
356 if (tb->lbytes > zeros_num) {
416e2abd 357 body += (tb->lbytes - zeros_num);
bd4c625c
LT
358 zeros_num = 0;
359 } else
360 zeros_num -= tb->lbytes;
361
362 RFALSE(ih_item_len(ih) <= 0,
363 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
364 ih_item_len(ih));
365 } else {
366 /* new item in whole falls into L[0] */
367 /* Shift lnum[0]-1 items to L[0] */
416e2abd 368 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
bd4c625c 369 /* Insert new item into L[0] */
fba4ebb5 370 buffer_info_init_left(tb, &bi);
416e2abd 371 leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num);
bd4c625c
LT
372 tb->insert_size[0] = 0;
373 zeros_num = 0;
1da177e4 374 }
bd4c625c
LT
375 break;
376
377 case M_PASTE: /* append item in L[0] */
378
416e2abd 379 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
bd4c625c 380 /* we must shift the part of the appended item */
416e2abd 381 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) {
bd4c625c
LT
382
383 RFALSE(zeros_num,
384 "PAP-12090: invalid parameter in case of a directory");
385 /* directory item */
386 if (tb->lbytes > pos_in_item) {
387 /* new directory entry falls into L[0] */
416e2abd
DJ
388 struct item_head *pasted;
389 int l_pos_in_item = pos_in_item;
bd4c625c
LT
390
391 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
416e2abd
DJ
392 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1);
393 if (ret_val && !item_pos) {
394 pasted = B_N_PITEM_HEAD(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
395 l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes -1);
bd4c625c
LT
396 }
397
398 /* Append given directory entry to directory item */
fba4ebb5 399 buffer_info_init_left(tb, &bi);
416e2abd 400 leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num);
bd4c625c
LT
401
402 /* previous string prepared space for pasting new entry, following string pastes this entry */
403
404 /* when we have merge directory item, pos_in_item has been changed too */
405
406 /* paste new directory entry. 1 is entry number */
416e2abd
DJ
407 leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item,
408 1, (struct reiserfs_de_head *) body,
409 body + DEH_SIZE, tb->insert_size[0]);
bd4c625c
LT
410 tb->insert_size[0] = 0;
411 } else {
412 /* new directory item doesn't fall into L[0] */
413 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
416e2abd 414 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
bd4c625c
LT
415 }
416 /* Calculate new position to append in item body */
417 pos_in_item -= tb->lbytes;
418 } else {
419 /* regular object */
416e2abd
DJ
420 RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes);
421 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
bd4c625c 422 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
416e2abd 423 ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),pos_in_item);
bd4c625c
LT
424
425 if (tb->lbytes >= pos_in_item) {
426 /* appended item will be in L[0] in whole */
427 int l_n;
428
429 /* this bytes number must be appended to the last item of L[h] */
416e2abd 430 l_n = tb->lbytes - pos_in_item;
bd4c625c
LT
431
432 /* Calculate new insert_size[0] */
416e2abd 433 tb->insert_size[0] -= l_n;
bd4c625c 434
416e2abd 435 RFALSE(tb->insert_size[0] <= 0,
bd4c625c 436 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
416e2abd
DJ
437 tb->insert_size[0]);
438 ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len
439 (B_N_PITEM_HEAD(tbS0, item_pos)));
bd4c625c 440 /* Append to body of item in L[0] */
fba4ebb5 441 buffer_info_init_left(tb, &bi);
bd4c625c 442 leaf_paste_in_buffer
416e2abd
DJ
443 (&bi, n + item_pos - ret_val, ih_item_len
444 (B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val)),
445 l_n, body,
446 zeros_num > l_n ? l_n : zeros_num);
bd4c625c
LT
447 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
448 {
449 int version;
416e2abd
DJ
450 int temp_l = l_n;
451
452 RFALSE(ih_item_len(B_N_PITEM_HEAD(tbS0, 0)),
bd4c625c 453 "PAP-12106: item length must be 0");
416e2abd
DJ
454 RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY
455 (tb->L[0], n + item_pos - ret_val)),
bd4c625c
LT
456 "PAP-12107: items must be of the same file");
457 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
416e2abd 458 temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT);
bd4c625c
LT
459 }
460 /* update key of first item in S0 */
416e2abd
DJ
461 version = ih_version(B_N_PITEM_HEAD(tbS0, 0));
462 set_le_key_k_offset(version, B_N_PKEY(tbS0, 0),
463 le_key_k_offset(version,B_N_PKEY(tbS0, 0)) + temp_l);
bd4c625c 464 /* update left delimiting key */
416e2abd
DJ
465 set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
466 le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0])) + temp_l);
bd4c625c
LT
467 }
468
469 /* Calculate new body, position in item and insert_size[0] */
470 if (l_n > zeros_num) {
416e2abd 471 body += (l_n - zeros_num);
bd4c625c
LT
472 zeros_num = 0;
473 } else
416e2abd 474 zeros_num -= l_n;
bd4c625c
LT
475 pos_in_item = 0;
476
416e2abd
DJ
477 RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1))
478 || !op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)
479 || !op_is_left_mergeable(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), tbS0->b_size),
bd4c625c
LT
480 "PAP-12120: item must be merge-able with left neighboring item");
481 } else { /* only part of the appended item will be in L[0] */
482
483 /* Calculate position in item for append in S[0] */
416e2abd 484 pos_in_item -= tb->lbytes;
bd4c625c 485
416e2abd 486 RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
bd4c625c
LT
487
488 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
416e2abd 489 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
bd4c625c
LT
490 }
491 }
492 } else { /* appended item will be in L[0] in whole */
493
494 struct item_head *pasted;
495
496 if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */
497 /* then increment pos_in_item by the size of the last item in L[0] */
416e2abd 498 pasted = B_N_PITEM_HEAD(tb->L[0], n - 1);
bd4c625c 499 if (is_direntry_le_ih(pasted))
416e2abd 500 pos_in_item += ih_entry_count(pasted);
bd4c625c 501 else
416e2abd 502 pos_in_item += ih_item_len(pasted);
bd4c625c
LT
503 }
504
505 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
416e2abd 506 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
bd4c625c 507 /* Append to body of item in L[0] */
fba4ebb5 508 buffer_info_init_left(tb, &bi);
416e2abd 509 leaf_paste_in_buffer(&bi, n + item_pos - ret_val,
bd4c625c
LT
510 pos_in_item,
511 tb->insert_size[0],
512 body, zeros_num);
513
514 /* if appended item is directory, paste entry */
416e2abd 515 pasted = B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val);
bd4c625c 516 if (is_direntry_le_ih(pasted))
416e2abd
DJ
517 leaf_paste_entries(&bi, n + item_pos - ret_val,
518 pos_in_item, 1,
519 (struct reiserfs_de_head *) body,
520 body + DEH_SIZE,
521 tb->insert_size[0]);
bd4c625c
LT
522 /* if appended item is indirect item, put unformatted node into un list */
523 if (is_indirect_le_ih(pasted))
524 set_ih_free_space(pasted, 0);
525 tb->insert_size[0] = 0;
526 zeros_num = 0;
527 }
528 break;
529 default: /* cases d and t */
c3a9c210
JM
530 reiserfs_panic(tb->tb_sb, "PAP-12130",
531 "lnum > 0: unexpected mode: "
532 " %s(%d)",
416e2abd 533 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1da177e4 534 }
bd4c625c
LT
535 } else {
536 /* new item doesn't fall into L[0] */
537 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
1da177e4 538 }
1da177e4 539 }
1da177e4 540
bd4c625c
LT
541 /* tb->lnum[0] > 0 */
542 /* Calculate new item position */
543 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
544
545 if (tb->rnum[0] > 0) {
546 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
547 n = B_NR_ITEMS(tbS0);
548 switch (flag) {
549
550 case M_INSERT: /* insert item */
551 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */
552 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */
416e2abd 553 loff_t old_key_comp, old_len, r_zeros_number;
bd4c625c
LT
554 const char *r_body;
555 int version;
556 loff_t offset;
557
416e2abd 558 leaf_shift_right(tb, tb->rnum[0] - 1, -1);
bd4c625c
LT
559
560 version = ih_version(ih);
561 /* Remember key component and item length */
562 old_key_comp = le_ih_k_offset(ih);
563 old_len = ih_item_len(ih);
564
565 /* Calculate key component and item length to insert into R[0] */
416e2abd 566 offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0));
bd4c625c
LT
567 set_le_ih_k_offset(ih, offset);
568 put_ih_item_len(ih, tb->rbytes);
569 /* Insert part of the item into R[0] */
fba4ebb5 570 buffer_info_init_right(tb, &bi);
bd4c625c
LT
571 if ((old_len - tb->rbytes) > zeros_num) {
572 r_zeros_number = 0;
416e2abd 573 r_body = body + (old_len - tb->rbytes) - zeros_num;
bd4c625c
LT
574 } else {
575 r_body = body;
416e2abd 576 r_zeros_number = zeros_num - (old_len - tb->rbytes);
bd4c625c
LT
577 zeros_num -= r_zeros_number;
578 }
579
580 leaf_insert_into_buf(&bi, 0, ih, r_body,
581 r_zeros_number);
582
583 /* Replace right delimiting key by first key in R[0] */
584 replace_key(tb, tb->CFR[0], tb->rkey[0],
585 tb->R[0], 0);
586
587 /* Calculate key component and item length to insert into S[0] */
588 set_le_ih_k_offset(ih, old_key_comp);
416e2abd 589 put_ih_item_len(ih, old_len - tb->rbytes);
bd4c625c
LT
590
591 tb->insert_size[0] -= tb->rbytes;
592
593 } else { /* whole new item falls into R[0] */
594
595 /* Shift rnum[0]-1 items to R[0] */
416e2abd 596 ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
bd4c625c 597 /* Insert new item into R[0] */
fba4ebb5 598 buffer_info_init_right(tb, &bi);
416e2abd
DJ
599 leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1,
600 ih, body, zeros_num);
bd4c625c
LT
601
602 if (item_pos - n + tb->rnum[0] - 1 == 0) {
603 replace_key(tb, tb->CFR[0],
604 tb->rkey[0],
605 tb->R[0], 0);
606
607 }
608 zeros_num = tb->insert_size[0] = 0;
609 }
610 } else { /* new item or part of it doesn't fall into R[0] */
1da177e4 611
bd4c625c 612 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
1da177e4 613 }
bd4c625c
LT
614 break;
615
616 case M_PASTE: /* append item */
617
618 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */
619 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */
620 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */
621 int entry_count;
622
623 RFALSE(zeros_num,
624 "PAP-12145: invalid parameter in case of a directory");
416e2abd
DJ
625 entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD
626 (tbS0, item_pos));
bd4c625c
LT
627 if (entry_count - tb->rbytes <
628 pos_in_item)
629 /* new directory entry falls into R[0] */
630 {
631 int paste_entry_position;
632
416e2abd 633 RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0],
bd4c625c 634 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
416e2abd 635 tb->rbytes, entry_count);
bd4c625c 636 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
416e2abd 637 leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
bd4c625c 638 /* Paste given directory entry to directory item */
416e2abd 639 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
fba4ebb5 640 buffer_info_init_right(tb, &bi);
416e2abd 641 leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num);
bd4c625c 642 /* paste entry */
416e2abd
DJ
643 leaf_paste_entries(&bi, 0, paste_entry_position, 1,
644 (struct reiserfs_de_head *) body,
645 body + DEH_SIZE, tb->insert_size[0]);
646
647 if (paste_entry_position == 0) {
bd4c625c 648 /* change delimiting keys */
416e2abd 649 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0);
bd4c625c
LT
650 }
651
652 tb->insert_size[0] = 0;
653 pos_in_item++;
654 } else { /* new directory entry doesn't fall into R[0] */
655
416e2abd 656 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
bd4c625c
LT
657 }
658 } else { /* regular object */
659
416e2abd 660 int n_shift, n_rem, r_zeros_number;
bd4c625c
LT
661 const char *r_body;
662
663 /* Calculate number of bytes which must be shifted from appended item */
416e2abd 664 if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0)
bd4c625c
LT
665 n_shift = 0;
666
416e2abd
DJ
667 RFALSE(pos_in_item != ih_item_len
668 (B_N_PITEM_HEAD(tbS0, item_pos)),
bd4c625c 669 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
416e2abd
DJ
670 pos_in_item, ih_item_len
671 (B_N_PITEM_HEAD(tbS0, item_pos)));
672
673 leaf_shift_right(tb, tb->rnum[0], n_shift);
bd4c625c 674 /* Calculate number of bytes which must remain in body after appending to R[0] */
416e2abd 675 if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0)
bd4c625c
LT
676 n_rem = 0;
677
678 {
679 int version;
416e2abd
DJ
680 unsigned long temp_rem = n_rem;
681
682 version = ih_version(B_N_PITEM_HEAD(tb->R[0], 0));
683 if (is_indirect_le_key(version, B_N_PKEY(tb->R[0], 0))) {
684 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
bd4c625c 685 }
416e2abd
DJ
686 set_le_key_k_offset(version, B_N_PKEY(tb->R[0], 0),
687 le_key_k_offset(version, B_N_PKEY(tb->R[0], 0)) + temp_rem);
688 set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]),
689 le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])) + temp_rem);
bd4c625c 690 }
1da177e4
LT
691/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
692 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
416e2abd 693 do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
bd4c625c
LT
694
695 /* Append part of body into R[0] */
fba4ebb5 696 buffer_info_init_right(tb, &bi);
bd4c625c
LT
697 if (n_rem > zeros_num) {
698 r_zeros_number = 0;
416e2abd 699 r_body = body + n_rem - zeros_num;
bd4c625c
LT
700 } else {
701 r_body = body;
416e2abd
DJ
702 r_zeros_number = zeros_num - n_rem;
703 zeros_num -= r_zeros_number;
bd4c625c
LT
704 }
705
416e2abd
DJ
706 leaf_paste_in_buffer(&bi, 0, n_shift,
707 tb->insert_size[0] - n_rem,
708 r_body, r_zeros_number);
709
710 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->R[0], 0))) {
1da177e4 711#if 0
bd4c625c
LT
712 RFALSE(n_rem,
713 "PAP-12160: paste more than one unformatted node pointer");
1da177e4 714#endif
416e2abd 715 set_ih_free_space(B_N_PITEM_HEAD(tb->R[0], 0), 0);
bd4c625c
LT
716 }
717 tb->insert_size[0] = n_rem;
718 if (!n_rem)
719 pos_in_item++;
720 }
721 } else { /* pasted item in whole falls into R[0] */
722
723 struct item_head *pasted;
724
416e2abd 725 ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
bd4c625c
LT
726 /* append item in R[0] */
727 if (pos_in_item >= 0) {
fba4ebb5 728 buffer_info_init_right(tb, &bi);
416e2abd
DJ
729 leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item,
730 tb->insert_size[0], body, zeros_num);
bd4c625c
LT
731 }
732
733 /* paste new entry, if item is directory item */
416e2abd
DJ
734 pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
735 if (is_direntry_le_ih(pasted) && pos_in_item >= 0) {
736 leaf_paste_entries(&bi, item_pos - n + tb->rnum[0],
737 pos_in_item, 1,
738 (struct reiserfs_de_head *) body,
739 body + DEH_SIZE, tb->insert_size[0]);
bd4c625c
LT
740 if (!pos_in_item) {
741
416e2abd 742 RFALSE(item_pos - n + tb->rnum[0],
bd4c625c
LT
743 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
744
745 /* update delimiting keys */
416e2abd 746 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
bd4c625c
LT
747 }
748 }
749
750 if (is_indirect_le_ih(pasted))
751 set_ih_free_space(pasted, 0);
752 zeros_num = tb->insert_size[0] = 0;
753 }
754 } else { /* new item doesn't fall into R[0] */
1da177e4 755
bd4c625c 756 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
1da177e4 757 }
bd4c625c
LT
758 break;
759 default: /* cases d and t */
c3a9c210
JM
760 reiserfs_panic(tb->tb_sb, "PAP-12175",
761 "rnum > 0: unexpected mode: %s(%d)",
416e2abd 762 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1da177e4 763 }
1da177e4 764
bd4c625c 765 }
1da177e4 766
bd4c625c
LT
767 /* tb->rnum[0] > 0 */
768 RFALSE(tb->blknum[0] > 3,
416e2abd 769 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
bd4c625c 770 RFALSE(tb->blknum[0] < 0,
416e2abd 771 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
bd4c625c
LT
772
773 /* if while adding to a node we discover that it is possible to split
774 it in two, and merge the left part into the left neighbor and the
775 right part into the right neighbor, eliminating the node */
776 if (tb->blknum[0] == 0) { /* node S[0] is empty now */
777
778 RFALSE(!tb->lnum[0] || !tb->rnum[0],
779 "PAP-12190: lnum and rnum must not be zero");
780 /* if insertion was done before 0-th position in R[0], right
781 delimiting key of the tb->L[0]'s and left delimiting key are
782 not set correctly */
783 if (tb->CFL[0]) {
784 if (!tb->CFR[0])
c3a9c210
JM
785 reiserfs_panic(tb->tb_sb, "vs-12195",
786 "CFR not initialized");
bd4c625c
LT
787 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
788 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
789 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
790 }
1da177e4 791
bd4c625c
LT
792 reiserfs_invalidate_buffer(tb, tbS0);
793 return 0;
794 }
1da177e4 795
bd4c625c
LT
796 /* Fill new nodes that appear in place of S[0] */
797
798 /* I am told that this copying is because we need an array to enable
799 the looping code. -Hans */
800 snum[0] = tb->s1num, snum[1] = tb->s2num;
801 sbytes[0] = tb->s1bytes;
802 sbytes[1] = tb->s2bytes;
803 for (i = tb->blknum[0] - 2; i >= 0; i--) {
804
805 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
806 snum[i]);
807
808 /* here we shift from S to S_new nodes */
809
810 S_new[i] = get_FEB(tb);
811
812 /* initialized block type and tree level */
813 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
814
815 n = B_NR_ITEMS(tbS0);
816
817 switch (flag) {
818 case M_INSERT: /* insert item */
819
820 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */
821 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */
416e2abd 822 int old_key_comp, old_len, r_zeros_number;
bd4c625c
LT
823 const char *r_body;
824 int version;
825
826 /* Move snum[i]-1 items from S[0] to S_new[i] */
827 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
828 snum[i] - 1, -1,
829 S_new[i]);
830 /* Remember key component and item length */
831 version = ih_version(ih);
832 old_key_comp = le_ih_k_offset(ih);
833 old_len = ih_item_len(ih);
834
835 /* Calculate key component and item length to insert into S_new[i] */
416e2abd
DJ
836 set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
837 ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
bd4c625c
LT
838
839 put_ih_item_len(ih, sbytes[i]);
840
841 /* Insert part of the item into S_new[i] before 0-th item */
fba4ebb5 842 buffer_info_init_bh(tb, &bi, S_new[i]);
bd4c625c
LT
843
844 if ((old_len - sbytes[i]) > zeros_num) {
845 r_zeros_number = 0;
416e2abd 846 r_body = body + (old_len - sbytes[i]) - zeros_num;
bd4c625c
LT
847 } else {
848 r_body = body;
416e2abd 849 r_zeros_number = zeros_num - (old_len - sbytes[i]);
bd4c625c
LT
850 zeros_num -= r_zeros_number;
851 }
852
416e2abd 853 leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number);
bd4c625c
LT
854
855 /* Calculate key component and item length to insert into S[i] */
856 set_le_ih_k_offset(ih, old_key_comp);
416e2abd 857 put_ih_item_len(ih, old_len - sbytes[i]);
bd4c625c
LT
858 tb->insert_size[0] -= sbytes[i];
859 } else { /* whole new item falls into S_new[i] */
860
861 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
862 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
416e2abd 863 snum[i] - 1, sbytes[i], S_new[i]);
bd4c625c
LT
864
865 /* Insert new item into S_new[i] */
fba4ebb5 866 buffer_info_init_bh(tb, &bi, S_new[i]);
416e2abd
DJ
867 leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1,
868 ih, body, zeros_num);
bd4c625c
LT
869
870 zeros_num = tb->insert_size[0] = 0;
871 }
872 }
1da177e4 873
bd4c625c 874 else { /* new item or it part don't falls into S_new[i] */
1da177e4 875
bd4c625c
LT
876 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
877 snum[i], sbytes[i], S_new[i]);
1da177e4 878 }
bd4c625c
LT
879 break;
880
881 case M_PASTE: /* append item */
882
883 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */
884 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */
885 struct item_head *aux_ih;
886
887 RFALSE(ih, "PAP-12210: ih must be 0");
888
1d965fe0
JM
889 aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
890 if (is_direntry_le_ih(aux_ih)) {
bd4c625c
LT
891 /* we append to directory item */
892
893 int entry_count;
894
416e2abd 895 entry_count = ih_entry_count(aux_ih);
bd4c625c 896
416e2abd 897 if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) {
bd4c625c
LT
898 /* new directory entry falls into S_new[i] */
899
416e2abd
DJ
900 RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0");
901 RFALSE(sbytes[i] - 1 >= entry_count,
bd4c625c 902 "PAP-12220: there are no so much entries (%d), only %d",
416e2abd 903 sbytes[i] - 1, entry_count);
bd4c625c
LT
904
905 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
416e2abd 906 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]);
bd4c625c 907 /* Paste given directory entry to directory item */
fba4ebb5 908 buffer_info_init_bh(tb, &bi, S_new[i]);
416e2abd
DJ
909 leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
910 tb->insert_size[0], body, zeros_num);
bd4c625c 911 /* paste new directory entry */
416e2abd
DJ
912 leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1,
913 (struct reiserfs_de_head *) body,
914 body + DEH_SIZE, tb->insert_size[0]);
bd4c625c
LT
915 tb->insert_size[0] = 0;
916 pos_in_item++;
917 } else { /* new directory entry doesn't fall into S_new[i] */
416e2abd 918 leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]);
bd4c625c
LT
919 }
920 } else { /* regular object */
921
416e2abd 922 int n_shift, n_rem, r_zeros_number;
bd4c625c
LT
923 const char *r_body;
924
416e2abd 925 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)) || tb->insert_size[0] <= 0,
bd4c625c
LT
926 "PAP-12225: item too short or insert_size <= 0");
927
928 /* Calculate number of bytes which must be shifted from appended item */
416e2abd 929 n_shift = sbytes[i] - tb->insert_size[0];
bd4c625c
LT
930 if (n_shift < 0)
931 n_shift = 0;
416e2abd 932 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
bd4c625c
LT
933
934 /* Calculate number of bytes which must remain in body after append to S_new[i] */
416e2abd 935 n_rem = tb->insert_size[0] - sbytes[i];
bd4c625c
LT
936 if (n_rem < 0)
937 n_rem = 0;
938 /* Append part of body into S_new[0] */
fba4ebb5 939 buffer_info_init_bh(tb, &bi, S_new[i]);
bd4c625c
LT
940 if (n_rem > zeros_num) {
941 r_zeros_number = 0;
416e2abd 942 r_body = body + n_rem - zeros_num;
bd4c625c
LT
943 } else {
944 r_body = body;
416e2abd
DJ
945 r_zeros_number = zeros_num - n_rem;
946 zeros_num -= r_zeros_number;
bd4c625c
LT
947 }
948
416e2abd
DJ
949 leaf_paste_in_buffer(&bi, 0, n_shift,
950 tb->insert_size[0] - n_rem,
951 r_body, r_zeros_number);
bd4c625c
LT
952 {
953 struct item_head *tmp;
954
416e2abd 955 tmp = B_N_PITEM_HEAD(S_new[i], 0);
bd4c625c
LT
956 if (is_indirect_le_ih
957 (tmp)) {
416e2abd
DJ
958 set_ih_free_space(tmp, 0);
959 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
bd4c625c 960 } else {
416e2abd 961 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem);
bd4c625c
LT
962 }
963 }
964
965 tb->insert_size[0] = n_rem;
966 if (!n_rem)
967 pos_in_item++;
968 }
969 } else
970 /* item falls wholly into S_new[i] */
971 {
8acc570f 972 int leaf_mi;
bd4c625c 973 struct item_head *pasted;
1da177e4 974
bd4c625c 975#ifdef CONFIG_REISERFS_CHECK
416e2abd 976 struct item_head *ih_check = B_N_PITEM_HEAD(tbS0, item_pos);
bd4c625c 977
8acc570f
HH
978 if (!is_direntry_le_ih(ih_check)
979 && (pos_in_item != ih_item_len(ih_check)
bd4c625c
LT
980 || tb->insert_size[0] <= 0))
981 reiserfs_panic(tb->tb_sb,
c3a9c210
JM
982 "PAP-12235",
983 "pos_in_item "
984 "must be equal "
985 "to ih_item_len");
bd4c625c
LT
986#endif /* CONFIG_REISERFS_CHECK */
987
416e2abd 988 leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW,
bd4c625c
LT
989 tb, snum[i],
990 sbytes[i],
991 S_new[i]);
992
8acc570f 993 RFALSE(leaf_mi,
bd4c625c 994 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
8acc570f 995 leaf_mi);
bd4c625c
LT
996
997 /* paste into item */
fba4ebb5 998 buffer_info_init_bh(tb, &bi, S_new[i]);
bd4c625c 999 leaf_paste_in_buffer(&bi,
416e2abd 1000 item_pos - n + snum[i],
bd4c625c
LT
1001 pos_in_item,
1002 tb->insert_size[0],
1003 body, zeros_num);
1004
416e2abd 1005 pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
bd4c625c 1006 if (is_direntry_le_ih(pasted)) {
eba00305 1007 leaf_paste_entries(&bi,
416e2abd
DJ
1008 item_pos - n + snum[i],
1009 pos_in_item, 1,
1010 (struct reiserfs_de_head *)body,
1011 body + DEH_SIZE,
1012 tb->insert_size[0]
bd4c625c
LT
1013 );
1014 }
1015
1016 /* if we paste to indirect item update ih_free_space */
1017 if (is_indirect_le_ih(pasted))
1018 set_ih_free_space(pasted, 0);
1019 zeros_num = tb->insert_size[0] = 0;
1020 }
1da177e4
LT
1021 }
1022
bd4c625c 1023 else { /* pasted item doesn't fall into S_new[i] */
1da177e4 1024
bd4c625c
LT
1025 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1026 snum[i], sbytes[i], S_new[i]);
1027 }
1028 break;
1029 default: /* cases d and t */
c3a9c210
JM
1030 reiserfs_panic(tb->tb_sb, "PAP-12245",
1031 "blknum > 2: unexpected mode: %s(%d)",
416e2abd 1032 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1da177e4 1033 }
1da177e4 1034
bd4c625c
LT
1035 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
1036 insert_ptr[i] = S_new[i];
1037
1038 RFALSE(!buffer_journaled(S_new[i])
1039 || buffer_journal_dirty(S_new[i])
1040 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1041 i, S_new[i]);
1da177e4
LT
1042 }
1043
bd4c625c
LT
1044 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1045 affected item which remains in S */
1046 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */
1047
1048 switch (flag) {
1049 case M_INSERT: /* insert item into S[0] */
fba4ebb5 1050 buffer_info_init_tbS0(tb, &bi);
bd4c625c
LT
1051 leaf_insert_into_buf(&bi, item_pos, ih, body,
1052 zeros_num);
1053
1054 /* If we insert the first key change the delimiting key */
1055 if (item_pos == 0) {
1056 if (tb->CFL[0]) /* can be 0 in reiserfsck */
416e2abd 1057 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1da177e4 1058 }
bd4c625c
LT
1059 break;
1060
1061 case M_PASTE:{ /* append item in S[0] */
1062 struct item_head *pasted;
1063
1064 pasted = B_N_PITEM_HEAD(tbS0, item_pos);
1065 /* when directory, may be new entry already pasted */
1066 if (is_direntry_le_ih(pasted)) {
416e2abd 1067 if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) {
bd4c625c
LT
1068
1069 RFALSE(!tb->insert_size[0],
1070 "PAP-12260: insert_size is 0 already");
1071
1072 /* prepare space */
fba4ebb5 1073 buffer_info_init_tbS0(tb, &bi);
416e2abd
DJ
1074 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1075 tb->insert_size[0], body,
bd4c625c
LT
1076 zeros_num);
1077
1078 /* paste entry */
416e2abd
DJ
1079 leaf_paste_entries(&bi, item_pos, pos_in_item, 1,
1080 (struct reiserfs_de_head *)body,
1081 body + DEH_SIZE,
1082 tb->insert_size[0]);
bd4c625c 1083 if (!item_pos && !pos_in_item) {
416e2abd 1084 RFALSE(!tb->CFL[0] || !tb->L[0],
bd4c625c 1085 "PAP-12270: CFL[0]/L[0] must be specified");
416e2abd
DJ
1086 if (tb->CFL[0])
1087 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
bd4c625c
LT
1088 }
1089 tb->insert_size[0] = 0;
1090 }
1091 } else { /* regular object */
1092 if (pos_in_item == ih_item_len(pasted)) {
1093
1094 RFALSE(tb->insert_size[0] <= 0,
1095 "PAP-12275: insert size must not be %d",
1096 tb->insert_size[0]);
fba4ebb5 1097 buffer_info_init_tbS0(tb, &bi);
416e2abd
DJ
1098 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1099 tb->insert_size[0], body, zeros_num);
bd4c625c
LT
1100
1101 if (is_indirect_le_ih(pasted)) {
1da177e4 1102#if 0
bd4c625c
LT
1103 RFALSE(tb->
1104 insert_size[0] !=
1105 UNFM_P_SIZE,
1106 "PAP-12280: insert_size for indirect item must be %d, not %d",
1107 UNFM_P_SIZE,
1108 tb->
1109 insert_size[0]);
1da177e4 1110#endif
416e2abd 1111 set_ih_free_space(pasted, 0);
bd4c625c
LT
1112 }
1113 tb->insert_size[0] = 0;
1114 }
1da177e4 1115#ifdef CONFIG_REISERFS_CHECK
bd4c625c
LT
1116 else {
1117 if (tb->insert_size[0]) {
1118 print_cur_tb("12285");
416e2abd 1119 reiserfs_panic(tb->tb_sb,
c3a9c210
JM
1120 "PAP-12285",
1121 "insert_size "
1122 "must be 0 "
1123 "(%d)",
1124 tb->insert_size[0]);
bd4c625c
LT
1125 }
1126 }
1127#endif /* CONFIG_REISERFS_CHECK */
1128
1129 }
1130 } /* case M_PASTE: */
1da177e4 1131 }
1da177e4 1132 }
1da177e4 1133#ifdef CONFIG_REISERFS_CHECK
bd4c625c
LT
1134 if (flag == M_PASTE && tb->insert_size[0]) {
1135 print_cur_tb("12290");
1136 reiserfs_panic(tb->tb_sb,
c3a9c210 1137 "PAP-12290", "insert_size is still not 0 (%d)",
bd4c625c
LT
1138 tb->insert_size[0]);
1139 }
1140#endif /* CONFIG_REISERFS_CHECK */
bd4c625c
LT
1141 return 0;
1142} /* Leaf level of the tree is balanced (end of balance_leaf) */
1da177e4
LT
1143
1144/* Make empty node */
bd4c625c 1145void make_empty_node(struct buffer_info *bi)
1da177e4 1146{
bd4c625c 1147 struct block_head *blkh;
1da177e4 1148
bd4c625c 1149 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1da177e4 1150
bd4c625c
LT
1151 blkh = B_BLK_HEAD(bi->bi_bh);
1152 set_blkh_nr_item(blkh, 0);
1153 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
1da177e4 1154
bd4c625c
LT
1155 if (bi->bi_parent)
1156 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1da177e4
LT
1157}
1158
1da177e4 1159/* Get first empty buffer */
bd4c625c 1160struct buffer_head *get_FEB(struct tree_balance *tb)
1da177e4 1161{
bd4c625c 1162 int i;
bd4c625c 1163 struct buffer_info bi;
1da177e4 1164
bd4c625c 1165 for (i = 0; i < MAX_FEB_SIZE; i++)
9dce07f1 1166 if (tb->FEB[i] != NULL)
bd4c625c
LT
1167 break;
1168
1169 if (i == MAX_FEB_SIZE)
c3a9c210 1170 reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
bd4c625c 1171
fba4ebb5 1172 buffer_info_init_bh(tb, &bi, tb->FEB[i]);
bd4c625c 1173 make_empty_node(&bi);
fba4ebb5
JM
1174 set_buffer_uptodate(tb->FEB[i]);
1175 tb->used[i] = tb->FEB[i];
bd4c625c 1176 tb->FEB[i] = NULL;
bd4c625c 1177
fba4ebb5 1178 return tb->used[i];
bd4c625c 1179}
1da177e4
LT
1180
1181/* This is now used because reiserfs_free_block has to be able to
1182** schedule.
1183*/
bd4c625c 1184static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1da177e4 1185{
bd4c625c
LT
1186 int i;
1187
1188 if (buffer_dirty(bh))
45b03d5e
JM
1189 reiserfs_warning(tb->tb_sb, "reiserfs-12320",
1190 "called with dirty buffer");
79a81aef 1191 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
bd4c625c
LT
1192 if (!tb->thrown[i]) {
1193 tb->thrown[i] = bh;
1194 get_bh(bh); /* free_thrown puts this */
1195 return;
1196 }
45b03d5e
JM
1197 reiserfs_warning(tb->tb_sb, "reiserfs-12321",
1198 "too many thrown buffers");
1da177e4
LT
1199}
1200
bd4c625c
LT
1201static void free_thrown(struct tree_balance *tb)
1202{
1203 int i;
1204 b_blocknr_t blocknr;
79a81aef 1205 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
bd4c625c
LT
1206 if (tb->thrown[i]) {
1207 blocknr = tb->thrown[i]->b_blocknr;
1208 if (buffer_dirty(tb->thrown[i]))
45b03d5e
JM
1209 reiserfs_warning(tb->tb_sb, "reiserfs-12322",
1210 "called with dirty buffer %d",
bd4c625c
LT
1211 blocknr);
1212 brelse(tb->thrown[i]); /* incremented in store_thrown */
1213 reiserfs_free_block(tb->transaction_handle, NULL,
1214 blocknr, 0);
1215 }
1da177e4 1216 }
1da177e4
LT
1217}
1218
bd4c625c 1219void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1da177e4 1220{
bd4c625c
LT
1221 struct block_head *blkh;
1222 blkh = B_BLK_HEAD(bh);
1223 set_blkh_level(blkh, FREE_LEVEL);
1224 set_blkh_nr_item(blkh, 0);
1225
1226 clear_buffer_dirty(bh);
1227 store_thrown(tb, bh);
1da177e4
LT
1228}
1229
1230/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
bd4c625c
LT
1231void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1232 struct buffer_head *src, int n_src)
1da177e4
LT
1233{
1234
bd4c625c
LT
1235 RFALSE(dest == NULL || src == NULL,
1236 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1237 src, dest);
1238 RFALSE(!B_IS_KEYS_LEVEL(dest),
1239 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1240 dest);
1241 RFALSE(n_dest < 0 || n_src < 0,
1242 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1243 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1244 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1245 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1246
1247 if (B_IS_ITEMS_LEVEL(src))
1248 /* source buffer contains leaf node */
1249 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
1250 KEY_SIZE);
1251 else
1252 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
1253 KEY_SIZE);
1254
1255 do_balance_mark_internal_dirty(tb, dest, 0);
1da177e4
LT
1256}
1257
bd4c625c 1258int get_left_neighbor_position(struct tree_balance *tb, int h)
1da177e4 1259{
bd4c625c 1260 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1da177e4 1261
9dce07f1 1262 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
bd4c625c
LT
1263 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1264 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1da177e4 1265
bd4c625c
LT
1266 if (Sh_position == 0)
1267 return B_NR_ITEMS(tb->FL[h]);
1268 else
1269 return Sh_position - 1;
1da177e4
LT
1270}
1271
bd4c625c 1272int get_right_neighbor_position(struct tree_balance *tb, int h)
1da177e4 1273{
bd4c625c 1274 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1da177e4 1275
9dce07f1 1276 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
bd4c625c
LT
1277 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1278 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1da177e4 1279
bd4c625c
LT
1280 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1281 return 0;
1282 else
1283 return Sh_position + 1;
1da177e4
LT
1284}
1285
1da177e4
LT
1286#ifdef CONFIG_REISERFS_CHECK
1287
bd4c625c
LT
1288int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1289static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1290 char *mes)
1da177e4 1291{
bd4c625c
LT
1292 struct disk_child *dc;
1293 int i;
1da177e4 1294
bd4c625c 1295 RFALSE(!bh, "PAP-12336: bh == 0");
1da177e4 1296
bd4c625c
LT
1297 if (!bh || !B_IS_IN_TREE(bh))
1298 return;
1da177e4 1299
bd4c625c
LT
1300 RFALSE(!buffer_dirty(bh) &&
1301 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1302 "PAP-12337: buffer (%b) must be dirty", bh);
1303 dc = B_N_CHILD(bh, 0);
1da177e4 1304
bd4c625c
LT
1305 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1306 if (!is_reusable(s, dc_block_number(dc), 1)) {
1307 print_cur_tb(mes);
c3a9c210
JM
1308 reiserfs_panic(s, "PAP-12338",
1309 "invalid child pointer %y in %b",
bd4c625c
LT
1310 dc, bh);
1311 }
1312 }
1da177e4
LT
1313}
1314
45b03d5e
JM
1315static int locked_or_not_in_tree(struct tree_balance *tb,
1316 struct buffer_head *bh, char *which)
bd4c625c
LT
1317{
1318 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1319 !B_IS_IN_TREE(bh)) {
45b03d5e 1320 reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
bd4c625c
LT
1321 return 1;
1322 }
1323 return 0;
1324}
1da177e4 1325
bd4c625c 1326static int check_before_balancing(struct tree_balance *tb)
1da177e4 1327{
bd4c625c
LT
1328 int retval = 0;
1329
08f14fc8 1330 if (REISERFS_SB(tb->tb_sb)->cur_tb) {
c3a9c210
JM
1331 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1332 "occurred based on cur_tb not being null at "
1333 "this point in code. do_balance cannot properly "
08f14fc8
FW
1334 "handle concurrent tree accesses on a same "
1335 "mount point.");
1da177e4 1336 }
bd4c625c
LT
1337
1338 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1339 prepped all of these for us). */
1340 if (tb->lnum[0]) {
45b03d5e
JM
1341 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1342 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
1343 retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
bd4c625c 1344 check_leaf(tb->L[0]);
1da177e4 1345 }
bd4c625c 1346 if (tb->rnum[0]) {
45b03d5e
JM
1347 retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
1348 retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
1349 retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
bd4c625c
LT
1350 check_leaf(tb->R[0]);
1351 }
45b03d5e
JM
1352 retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
1353 "S[0]");
bd4c625c 1354 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1da177e4 1355
bd4c625c
LT
1356 return retval;
1357}
1da177e4 1358
bd4c625c 1359static void check_after_balance_leaf(struct tree_balance *tb)
1da177e4 1360{
bd4c625c
LT
1361 if (tb->lnum[0]) {
1362 if (B_FREE_SPACE(tb->L[0]) !=
1363 MAX_CHILD_SIZE(tb->L[0]) -
1364 dc_size(B_N_CHILD
1365 (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1366 print_cur_tb("12221");
c3a9c210
JM
1367 reiserfs_panic(tb->tb_sb, "PAP-12355",
1368 "shift to left was incorrect");
bd4c625c
LT
1369 }
1370 }
1371 if (tb->rnum[0]) {
1372 if (B_FREE_SPACE(tb->R[0]) !=
1373 MAX_CHILD_SIZE(tb->R[0]) -
1374 dc_size(B_N_CHILD
1375 (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1376 print_cur_tb("12222");
c3a9c210
JM
1377 reiserfs_panic(tb->tb_sb, "PAP-12360",
1378 "shift to right was incorrect");
bd4c625c
LT
1379 }
1380 }
1381 if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1382 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1383 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1384 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1385 PATH_H_POSITION(tb->tb_path, 1)))))) {
1386 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1387 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1388 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1389 PATH_H_POSITION(tb->tb_path,
1390 1))));
1391 print_cur_tb("12223");
45b03d5e 1392 reiserfs_warning(tb->tb_sb, "reiserfs-12363",
bd4c625c
LT
1393 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1394 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1395 left,
1396 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1397 PATH_H_PBUFFER(tb->tb_path, 1),
1398 PATH_H_POSITION(tb->tb_path, 1),
1399 dc_size(B_N_CHILD
1400 (PATH_H_PBUFFER(tb->tb_path, 1),
1401 PATH_H_POSITION(tb->tb_path, 1))),
1402 right);
c3a9c210 1403 reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
bd4c625c 1404 }
1da177e4
LT
1405}
1406
bd4c625c 1407static void check_leaf_level(struct tree_balance *tb)
1da177e4 1408{
bd4c625c
LT
1409 check_leaf(tb->L[0]);
1410 check_leaf(tb->R[0]);
1411 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1412}
1da177e4 1413
bd4c625c
LT
1414static void check_internal_levels(struct tree_balance *tb)
1415{
1416 int h;
1417
1418 /* check all internal nodes */
1419 for (h = 1; tb->insert_size[h]; h++) {
1420 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1421 "BAD BUFFER ON PATH");
1422 if (tb->lnum[h])
1423 check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1424 if (tb->rnum[h])
1425 check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1426 }
1da177e4
LT
1427
1428}
1429
1430#endif
1431
1da177e4
LT
1432/* Now we have all of the buffers that must be used in balancing of
1433 the tree. We rely on the assumption that schedule() will not occur
1434 while do_balance works. ( Only interrupt handlers are acceptable.)
1435 We balance the tree according to the analysis made before this,
1436 using buffers already obtained. For SMP support it will someday be
1437 necessary to add ordered locking of tb. */
1438
1439/* Some interesting rules of balancing:
1440
1441 we delete a maximum of two nodes per level per balancing: we never
1442 delete R, when we delete two of three nodes L, S, R then we move
1443 them into R.
1444
1445 we only delete L if we are deleting two nodes, if we delete only
1446 one node we delete S
1447
1448 if we shift leaves then we shift as much as we can: this is a
1449 deliberate policy of extremism in node packing which results in
1450 higher average utilization after repeated random balance operations
1451 at the cost of more memory copies and more balancing as a result of
1452 small insertions to full nodes.
1453
1454 if we shift internal nodes we try to evenly balance the node
1455 utilization, with consequent less balancing at the cost of lower
1456 utilization.
1457
1458 one could argue that the policy for directories in leaves should be
1459 that of internal nodes, but we will wait until another day to
1460 evaluate this.... It would be nice to someday measure and prove
1461 these assumptions as to what is optimal....
1462
1463*/
1464
bd4c625c 1465static inline void do_balance_starts(struct tree_balance *tb)
1da177e4 1466{
bd4c625c
LT
1467 /* use print_cur_tb() to see initial state of struct
1468 tree_balance */
1da177e4 1469
bd4c625c 1470 /* store_print_tb (tb); */
1da177e4 1471
bd4c625c 1472 /* do not delete, just comment it out */
0222e657 1473/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
1da177e4 1474 "check");*/
bd4c625c 1475 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1da177e4 1476#ifdef CONFIG_REISERFS_CHECK
08f14fc8 1477 REISERFS_SB(tb->tb_sb)->cur_tb = tb;
1da177e4
LT
1478#endif
1479}
1480
bd4c625c 1481static inline void do_balance_completed(struct tree_balance *tb)
1da177e4 1482{
bd4c625c 1483
1da177e4 1484#ifdef CONFIG_REISERFS_CHECK
bd4c625c
LT
1485 check_leaf_level(tb);
1486 check_internal_levels(tb);
08f14fc8 1487 REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
1da177e4
LT
1488#endif
1489
bd4c625c
LT
1490 /* reiserfs_free_block is no longer schedule safe. So, we need to
1491 ** put the buffers we want freed on the thrown list during do_balance,
1492 ** and then free them now
1493 */
1da177e4 1494
bd4c625c 1495 REISERFS_SB(tb->tb_sb)->s_do_balance++;
1da177e4 1496
bd4c625c
LT
1497 /* release all nodes hold to perform the balancing */
1498 unfix_nodes(tb);
1da177e4 1499
bd4c625c 1500 free_thrown(tb);
1da177e4
LT
1501}
1502
bd4c625c
LT
1503void do_balance(struct tree_balance *tb, /* tree_balance structure */
1504 struct item_head *ih, /* item header of inserted item */
1505 const char *body, /* body of inserted item or bytes to paste */
1506 int flag)
1507{ /* i - insert, d - delete
1508 c - cut, p - paste
1509
1510 Cut means delete part of an item
1511 (includes removing an entry from a
1512 directory).
1513
1514 Delete means delete whole item.
1515
1516 Insert means add a new item into the
1517 tree.
1518
1519 Paste means to append to the end of an
1520 existing file or to insert a directory
1521 entry. */
1522 int child_pos, /* position of a child node in its parent */
1523 h; /* level of the tree being processed */
1524 struct item_head insert_key[2]; /* in our processing of one level
1525 we sometimes determine what
1526 must be inserted into the next
1527 higher level. This insertion
1528 consists of a key or two keys
1529 and their corresponding
1530 pointers */
1531 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
1532 level */
1533
1534 tb->tb_mode = flag;
1535 tb->need_balance_dirty = 0;
1536
1537 if (FILESYSTEM_CHANGED_TB(tb)) {
c3a9c210
JM
1538 reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
1539 "changed");
bd4c625c
LT
1540 }
1541 /* if we have no real work to do */
1542 if (!tb->insert_size[0]) {
45b03d5e
JM
1543 reiserfs_warning(tb->tb_sb, "PAP-12350",
1544 "insert_size == 0, mode == %c", flag);
bd4c625c
LT
1545 unfix_nodes(tb);
1546 return;
1547 }
1da177e4 1548
bd4c625c
LT
1549 atomic_inc(&(fs_generation(tb->tb_sb)));
1550 do_balance_starts(tb);
1da177e4 1551
1da177e4
LT
1552 /* balance leaf returns 0 except if combining L R and S into
1553 one node. see balance_internal() for explanation of this
bd4c625c
LT
1554 line of code. */
1555 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1556 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1da177e4
LT
1557
1558#ifdef CONFIG_REISERFS_CHECK
bd4c625c 1559 check_after_balance_leaf(tb);
1da177e4
LT
1560#endif
1561
bd4c625c
LT
1562 /* Balance internal level of the tree. */
1563 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
1564 child_pos =
1565 balance_internal(tb, h, child_pos, insert_key, insert_ptr);
1da177e4 1566
bd4c625c 1567 do_balance_completed(tb);
1da177e4
LT
1568
1569}
This page took 0.755764 seconds and 5 git commands to generate.