reiserfs: cleanup, remove sb argument from journal_mark_dirty
[deliverable/linux.git] / fs / reiserfs / do_balan.c
1 /*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5 /*
6 * Now we have all buffers that must be used in balancing of the tree
7 * Further calculations can not cause schedule(), and thus the buffer
8 * tree will be stable until the balancing will be finished
9 * balance the tree according to the analysis made before,
10 * and using buffers obtained after all above.
11 */
12
13 #include <asm/uaccess.h>
14 #include <linux/time.h>
15 #include "reiserfs.h"
16 #include <linux/buffer_head.h>
17 #include <linux/kernel.h>
18
19 static inline void buffer_info_init_left(struct tree_balance *tb,
20 struct buffer_info *bi)
21 {
22 bi->tb = tb;
23 bi->bi_bh = tb->L[0];
24 bi->bi_parent = tb->FL[0];
25 bi->bi_position = get_left_neighbor_position(tb, 0);
26 }
27
28 static inline void buffer_info_init_right(struct tree_balance *tb,
29 struct buffer_info *bi)
30 {
31 bi->tb = tb;
32 bi->bi_bh = tb->R[0];
33 bi->bi_parent = tb->FR[0];
34 bi->bi_position = get_right_neighbor_position(tb, 0);
35 }
36
37 static inline void buffer_info_init_tbS0(struct tree_balance *tb,
38 struct buffer_info *bi)
39 {
40 bi->tb = tb;
41 bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
42 bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
43 bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
44 }
45
46 static inline void buffer_info_init_bh(struct tree_balance *tb,
47 struct buffer_info *bi,
48 struct buffer_head *bh)
49 {
50 bi->tb = tb;
51 bi->bi_bh = bh;
52 bi->bi_parent = NULL;
53 bi->bi_position = 0;
54 }
55
56 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
57 struct buffer_head *bh, int flag)
58 {
59 journal_mark_dirty(tb->transaction_handle, bh);
60 }
61
62 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
63 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
64
65 /*
66 * summary:
67 * if deleting something ( tb->insert_size[0] < 0 )
68 * return(balance_leaf_when_delete()); (flag d handled here)
69 * else
70 * if lnum is larger than 0 we put items into the left node
71 * if rnum is larger than 0 we put items into the right node
72 * if snum1 is larger than 0 we put items into the new node s1
73 * if snum2 is larger than 0 we put items into the new node s2
74 * Note that all *num* count new items being created.
75 *
76 * It would be easier to read balance_leaf() if each of these summary
77 * lines was a separate procedure rather than being inlined. I think
78 * that there are many passages here and in balance_leaf_when_delete() in
79 * which two calls to one procedure can replace two passages, and it
80 * might save cache space and improve software maintenance costs to do so.
81 *
82 * Vladimir made the perceptive comment that we should offload most of
83 * the decision making in this function into fix_nodes/check_balance, and
84 * then create some sort of structure in tb that says what actions should
85 * be performed by do_balance.
86 *
87 * -Hans
88 */
89
90 /*
91 * Balance leaf node in case of delete or cut: insert_size[0] < 0
92 *
93 * lnum, rnum can have values >= -1
94 * -1 means that the neighbor must be joined with S
95 * 0 means that nothing should be done with the neighbor
96 * >0 means to shift entirely or partly the specified number of items
97 * to the neighbor
98 */
99 static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
100 {
101 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
102 int item_pos = PATH_LAST_POSITION(tb->tb_path);
103 int pos_in_item = tb->tb_path->pos_in_item;
104 struct buffer_info bi;
105 int n;
106 struct item_head *ih;
107
108 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
109 "vs- 12000: level: wrong FR %z", tb->FR[0]);
110 RFALSE(tb->blknum[0] > 1,
111 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
112 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
113 "PAP-12010: tree can not be empty");
114
115 ih = item_head(tbS0, item_pos);
116 buffer_info_init_tbS0(tb, &bi);
117
118 /* Delete or truncate the item */
119
120 switch (flag) {
121 case M_DELETE: /* delete item in S[0] */
122
123 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
124 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
125 -tb->insert_size[0], ih);
126
127 leaf_delete_items(&bi, 0, item_pos, 1, -1);
128
129 if (!item_pos && tb->CFL[0]) {
130 if (B_NR_ITEMS(tbS0)) {
131 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
132 0);
133 } else {
134 if (!PATH_H_POSITION(tb->tb_path, 1))
135 replace_key(tb, tb->CFL[0], tb->lkey[0],
136 PATH_H_PPARENT(tb->tb_path,
137 0), 0);
138 }
139 }
140
141 RFALSE(!item_pos && !tb->CFL[0],
142 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
143 tb->L[0]);
144
145 break;
146
147 case M_CUT:{ /* cut item in S[0] */
148 if (is_direntry_le_ih(ih)) {
149
150 /*
151 * UFS unlink semantics are such that you
152 * can only delete one directory entry at
153 * a time.
154 */
155
156 /*
157 * when we cut a directory tb->insert_size[0]
158 * means number of entries to be cut (always 1)
159 */
160 tb->insert_size[0] = -1;
161 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
162 -tb->insert_size[0]);
163
164 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
165 "PAP-12030: can not change delimiting key. CFL[0]=%p",
166 tb->CFL[0]);
167
168 if (!item_pos && !pos_in_item && tb->CFL[0]) {
169 replace_key(tb, tb->CFL[0], tb->lkey[0],
170 tbS0, 0);
171 }
172 } else {
173 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
174 -tb->insert_size[0]);
175
176 RFALSE(!ih_item_len(ih),
177 "PAP-12035: cut must leave non-zero dynamic length of item");
178 }
179 break;
180 }
181
182 default:
183 print_cur_tb("12040");
184 reiserfs_panic(tb->tb_sb, "PAP-12040",
185 "unexpected mode: %s(%d)",
186 (flag ==
187 M_PASTE) ? "PASTE" : ((flag ==
188 M_INSERT) ? "INSERT" :
189 "UNKNOWN"), flag);
190 }
191
192 /*
193 * the rule is that no shifting occurs unless by shifting
194 * a node can be freed
195 */
196 n = B_NR_ITEMS(tbS0);
197 /* L[0] takes part in balancing */
198 if (tb->lnum[0]) {
199 /* L[0] must be joined with S[0] */
200 if (tb->lnum[0] == -1) {
201 /* R[0] must be also joined with S[0] */
202 if (tb->rnum[0] == -1) {
203 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
204 /*
205 * all contents of all the 3 buffers
206 * will be in L[0]
207 */
208 if (PATH_H_POSITION(tb->tb_path, 1) == 0
209 && 1 < B_NR_ITEMS(tb->FR[0]))
210 replace_key(tb, tb->CFL[0],
211 tb->lkey[0],
212 tb->FR[0], 1);
213
214 leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
215 -1, NULL);
216 leaf_move_items(LEAF_FROM_R_TO_L, tb,
217 B_NR_ITEMS(tb->R[0]),
218 -1, NULL);
219
220 reiserfs_invalidate_buffer(tb, tbS0);
221 reiserfs_invalidate_buffer(tb,
222 tb->R[0]);
223
224 return 0;
225 }
226 /*
227 * all contents of all the 3 buffers will
228 * be in R[0]
229 */
230 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
231 NULL);
232 leaf_move_items(LEAF_FROM_L_TO_R, tb,
233 B_NR_ITEMS(tb->L[0]), -1, NULL);
234
235 /* right_delimiting_key is correct in R[0] */
236 replace_key(tb, tb->CFR[0], tb->rkey[0],
237 tb->R[0], 0);
238
239 reiserfs_invalidate_buffer(tb, tbS0);
240 reiserfs_invalidate_buffer(tb, tb->L[0]);
241
242 return -1;
243 }
244
245 RFALSE(tb->rnum[0] != 0,
246 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
247 /* all contents of L[0] and S[0] will be in L[0] */
248 leaf_shift_left(tb, n, -1);
249
250 reiserfs_invalidate_buffer(tb, tbS0);
251
252 return 0;
253 }
254
255 /*
256 * a part of contents of S[0] will be in L[0] and the
257 * rest part of S[0] will be in R[0]
258 */
259
260 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
261 (tb->lnum[0] + tb->rnum[0] > n + 1),
262 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
263 tb->rnum[0], tb->lnum[0], n);
264 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
265 (tb->lbytes != -1 || tb->rbytes != -1),
266 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
267 tb->rbytes, tb->lbytes);
268 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
269 (tb->lbytes < 1 || tb->rbytes != -1),
270 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
271 tb->rbytes, tb->lbytes);
272
273 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
274 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
275
276 reiserfs_invalidate_buffer(tb, tbS0);
277
278 return 0;
279 }
280
281 if (tb->rnum[0] == -1) {
282 /* all contents of R[0] and S[0] will be in R[0] */
283 leaf_shift_right(tb, n, -1);
284 reiserfs_invalidate_buffer(tb, tbS0);
285 return 0;
286 }
287
288 RFALSE(tb->rnum[0],
289 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
290 return 0;
291 }
292
293 static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */
294 const char *body, /* body of inserted item or bytes to paste */
295 int flag, /* i - insert, d - delete, c - cut, p - paste
296 (see comment to do_balance) */
297 struct item_head *insert_key, /* in our processing of one level we sometimes determine what
298 must be inserted into the next higher level. This insertion
299 consists of a key or two keys and their corresponding
300 pointers */
301 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
302 )
303 {
304 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
305 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0]
306 of the affected item */
307 struct buffer_info bi;
308 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
309 int snum[2]; /* number of items that will be placed
310 into S_new (includes partially shifted
311 items) */
312 int sbytes[2]; /* if an item is partially shifted into S_new then
313 if it is a directory item
314 it is the number of entries from the item that are shifted into S_new
315 else
316 it is the number of bytes from the item that are shifted into S_new
317 */
318 int n, i;
319 int ret_val;
320 int pos_in_item;
321 int zeros_num;
322
323 PROC_INFO_INC(tb->tb_sb, balance_at[0]);
324
325 /* Make balance in case insert_size[0] < 0 */
326 if (tb->insert_size[0] < 0)
327 return balance_leaf_when_delete(tb, flag);
328
329 zeros_num = 0;
330 if (flag == M_INSERT && !body)
331 zeros_num = ih_item_len(ih);
332
333 pos_in_item = tb->tb_path->pos_in_item;
334 /* for indirect item pos_in_item is measured in unformatted node
335 pointers. Recalculate to bytes */
336 if (flag != M_INSERT
337 && is_indirect_le_ih(item_head(tbS0, item_pos)))
338 pos_in_item *= UNFM_P_SIZE;
339
340 if (tb->lnum[0] > 0) {
341 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
342 if (item_pos < tb->lnum[0]) {
343 /* new item or it part falls to L[0], shift it too */
344 n = B_NR_ITEMS(tb->L[0]);
345
346 switch (flag) {
347 case M_INSERT: /* insert item into L[0] */
348
349 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
350 /* part of new item falls into L[0] */
351 int new_item_len;
352 int version;
353
354 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
355
356 /* Calculate item length to insert to S[0] */
357 new_item_len = ih_item_len(ih) - tb->lbytes;
358 /* Calculate and check item length to insert to L[0] */
359 put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
360
361 RFALSE(ih_item_len(ih) <= 0,
362 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
363 ih_item_len(ih));
364
365 /* Insert new item into L[0] */
366 buffer_info_init_left(tb, &bi);
367 leaf_insert_into_buf(&bi,
368 n + item_pos - ret_val, ih, body,
369 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
370
371 version = ih_version(ih);
372
373 /* Calculate key component, item length and body to insert into S[0] */
374 set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
375 (tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
376
377 put_ih_item_len(ih, new_item_len);
378 if (tb->lbytes > zeros_num) {
379 body += (tb->lbytes - zeros_num);
380 zeros_num = 0;
381 } else
382 zeros_num -= tb->lbytes;
383
384 RFALSE(ih_item_len(ih) <= 0,
385 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
386 ih_item_len(ih));
387 } else {
388 /* new item in whole falls into L[0] */
389 /* Shift lnum[0]-1 items to L[0] */
390 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
391 /* Insert new item into L[0] */
392 buffer_info_init_left(tb, &bi);
393 leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num);
394 tb->insert_size[0] = 0;
395 zeros_num = 0;
396 }
397 break;
398
399 case M_PASTE: /* append item in L[0] */
400
401 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
402 /* we must shift the part of the appended item */
403 if (is_direntry_le_ih(item_head(tbS0, item_pos))) {
404
405 RFALSE(zeros_num,
406 "PAP-12090: invalid parameter in case of a directory");
407 /* directory item */
408 if (tb->lbytes > pos_in_item) {
409 /* new directory entry falls into L[0] */
410 struct item_head *pasted;
411 int l_pos_in_item = pos_in_item;
412
413 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
414 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1);
415 if (ret_val && !item_pos) {
416 pasted = item_head(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
417 l_pos_in_item += ih_entry_count(pasted) - (tb->lbytes -1);
418 }
419
420 /* Append given directory entry to directory item */
421 buffer_info_init_left(tb, &bi);
422 leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num);
423
424 /* previous string prepared space for pasting new entry, following string pastes this entry */
425
426 /* when we have merge directory item, pos_in_item has been changed too */
427
428 /* paste new directory entry. 1 is entry number */
429 leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item,
430 1, (struct reiserfs_de_head *) body,
431 body + DEH_SIZE, tb->insert_size[0]);
432 tb->insert_size[0] = 0;
433 } else {
434 /* new directory item doesn't fall into L[0] */
435 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
436 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
437 }
438 /* Calculate new position to append in item body */
439 pos_in_item -= tb->lbytes;
440 } else {
441 /* regular object */
442 RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes);
443 RFALSE(pos_in_item != ih_item_len(item_head(tbS0, item_pos)),
444 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
445 ih_item_len(item_head(tbS0, item_pos)),pos_in_item);
446
447 if (tb->lbytes >= pos_in_item) {
448 /* appended item will be in L[0] in whole */
449 int l_n;
450
451 /* this bytes number must be appended to the last item of L[h] */
452 l_n = tb->lbytes - pos_in_item;
453
454 /* Calculate new insert_size[0] */
455 tb->insert_size[0] -= l_n;
456
457 RFALSE(tb->insert_size[0] <= 0,
458 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
459 tb->insert_size[0]);
460 ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len
461 (item_head(tbS0, item_pos)));
462 /* Append to body of item in L[0] */
463 buffer_info_init_left(tb, &bi);
464 leaf_paste_in_buffer
465 (&bi, n + item_pos - ret_val, ih_item_len
466 (item_head(tb->L[0], n + item_pos - ret_val)),
467 l_n, body,
468 zeros_num > l_n ? l_n : zeros_num);
469 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
470 {
471 int version;
472 int temp_l = l_n;
473
474 RFALSE(ih_item_len(item_head(tbS0, 0)),
475 "PAP-12106: item length must be 0");
476 RFALSE(comp_short_le_keys(leaf_key(tbS0, 0), leaf_key
477 (tb->L[0], n + item_pos - ret_val)),
478 "PAP-12107: items must be of the same file");
479 if (is_indirect_le_ih(item_head(tb->L[0], n + item_pos - ret_val))) {
480 temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT);
481 }
482 /* update key of first item in S0 */
483 version = ih_version(item_head(tbS0, 0));
484 set_le_key_k_offset(version, leaf_key(tbS0, 0),
485 le_key_k_offset(version,leaf_key(tbS0, 0)) + temp_l);
486 /* update left delimiting key */
487 set_le_key_k_offset(version, internal_key(tb->CFL[0], tb->lkey[0]),
488 le_key_k_offset(version, internal_key(tb->CFL[0], tb->lkey[0])) + temp_l);
489 }
490
491 /* Calculate new body, position in item and insert_size[0] */
492 if (l_n > zeros_num) {
493 body += (l_n - zeros_num);
494 zeros_num = 0;
495 } else
496 zeros_num -= l_n;
497 pos_in_item = 0;
498
499 RFALSE(comp_short_le_keys(leaf_key(tbS0, 0), leaf_key(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1))
500 || !op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)
501 || !op_is_left_mergeable(internal_key(tb->CFL[0], tb->lkey[0]), tbS0->b_size),
502 "PAP-12120: item must be merge-able with left neighboring item");
503 } else { /* only part of the appended item will be in L[0] */
504
505 /* Calculate position in item for append in S[0] */
506 pos_in_item -= tb->lbytes;
507
508 RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
509
510 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
511 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
512 }
513 }
514 } else { /* appended item will be in L[0] in whole */
515
516 struct item_head *pasted;
517
518 if (!item_pos && op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */
519 /* then increment pos_in_item by the size of the last item in L[0] */
520 pasted = item_head(tb->L[0], n - 1);
521 if (is_direntry_le_ih(pasted))
522 pos_in_item += ih_entry_count(pasted);
523 else
524 pos_in_item += ih_item_len(pasted);
525 }
526
527 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
528 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
529 /* Append to body of item in L[0] */
530 buffer_info_init_left(tb, &bi);
531 leaf_paste_in_buffer(&bi, n + item_pos - ret_val,
532 pos_in_item,
533 tb->insert_size[0],
534 body, zeros_num);
535
536 /* if appended item is directory, paste entry */
537 pasted = item_head(tb->L[0], n + item_pos - ret_val);
538 if (is_direntry_le_ih(pasted))
539 leaf_paste_entries(&bi, n + item_pos - ret_val,
540 pos_in_item, 1,
541 (struct reiserfs_de_head *) body,
542 body + DEH_SIZE,
543 tb->insert_size[0]);
544 /* if appended item is indirect item, put unformatted node into un list */
545 if (is_indirect_le_ih(pasted))
546 set_ih_free_space(pasted, 0);
547 tb->insert_size[0] = 0;
548 zeros_num = 0;
549 }
550 break;
551 default: /* cases d and t */
552 reiserfs_panic(tb->tb_sb, "PAP-12130",
553 "lnum > 0: unexpected mode: "
554 " %s(%d)",
555 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
556 }
557 } else {
558 /* new item doesn't fall into L[0] */
559 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
560 }
561 }
562
563 /* tb->lnum[0] > 0 */
564 /* Calculate new item position */
565 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
566
567 if (tb->rnum[0] > 0) {
568 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
569 n = B_NR_ITEMS(tbS0);
570 switch (flag) {
571
572 case M_INSERT: /* insert item */
573 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */
574 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */
575 loff_t old_key_comp, old_len, r_zeros_number;
576 const char *r_body;
577 int version;
578 loff_t offset;
579
580 leaf_shift_right(tb, tb->rnum[0] - 1, -1);
581
582 version = ih_version(ih);
583 /* Remember key component and item length */
584 old_key_comp = le_ih_k_offset(ih);
585 old_len = ih_item_len(ih);
586
587 /* Calculate key component and item length to insert into R[0] */
588 offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0));
589 set_le_ih_k_offset(ih, offset);
590 put_ih_item_len(ih, tb->rbytes);
591 /* Insert part of the item into R[0] */
592 buffer_info_init_right(tb, &bi);
593 if ((old_len - tb->rbytes) > zeros_num) {
594 r_zeros_number = 0;
595 r_body = body + (old_len - tb->rbytes) - zeros_num;
596 } else {
597 r_body = body;
598 r_zeros_number = zeros_num - (old_len - tb->rbytes);
599 zeros_num -= r_zeros_number;
600 }
601
602 leaf_insert_into_buf(&bi, 0, ih, r_body,
603 r_zeros_number);
604
605 /* Replace right delimiting key by first key in R[0] */
606 replace_key(tb, tb->CFR[0], tb->rkey[0],
607 tb->R[0], 0);
608
609 /* Calculate key component and item length to insert into S[0] */
610 set_le_ih_k_offset(ih, old_key_comp);
611 put_ih_item_len(ih, old_len - tb->rbytes);
612
613 tb->insert_size[0] -= tb->rbytes;
614
615 } else { /* whole new item falls into R[0] */
616
617 /* Shift rnum[0]-1 items to R[0] */
618 ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
619 /* Insert new item into R[0] */
620 buffer_info_init_right(tb, &bi);
621 leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1,
622 ih, body, zeros_num);
623
624 if (item_pos - n + tb->rnum[0] - 1 == 0) {
625 replace_key(tb, tb->CFR[0],
626 tb->rkey[0],
627 tb->R[0], 0);
628
629 }
630 zeros_num = tb->insert_size[0] = 0;
631 }
632 } else { /* new item or part of it doesn't fall into R[0] */
633
634 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
635 }
636 break;
637
638 case M_PASTE: /* append item */
639
640 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */
641 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */
642 if (is_direntry_le_ih(item_head(tbS0, item_pos))) { /* we append to directory item */
643 int entry_count;
644
645 RFALSE(zeros_num,
646 "PAP-12145: invalid parameter in case of a directory");
647 entry_count = ih_entry_count(item_head
648 (tbS0, item_pos));
649 if (entry_count - tb->rbytes <
650 pos_in_item)
651 /* new directory entry falls into R[0] */
652 {
653 int paste_entry_position;
654
655 RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0],
656 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
657 tb->rbytes, entry_count);
658 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
659 leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
660 /* Paste given directory entry to directory item */
661 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
662 buffer_info_init_right(tb, &bi);
663 leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num);
664 /* paste entry */
665 leaf_paste_entries(&bi, 0, paste_entry_position, 1,
666 (struct reiserfs_de_head *) body,
667 body + DEH_SIZE, tb->insert_size[0]);
668
669 if (paste_entry_position == 0) {
670 /* change delimiting keys */
671 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0);
672 }
673
674 tb->insert_size[0] = 0;
675 pos_in_item++;
676 } else { /* new directory entry doesn't fall into R[0] */
677
678 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
679 }
680 } else { /* regular object */
681
682 int n_shift, n_rem, r_zeros_number;
683 const char *r_body;
684
685 /* Calculate number of bytes which must be shifted from appended item */
686 if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0)
687 n_shift = 0;
688
689 RFALSE(pos_in_item != ih_item_len
690 (item_head(tbS0, item_pos)),
691 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
692 pos_in_item, ih_item_len
693 (item_head(tbS0, item_pos)));
694
695 leaf_shift_right(tb, tb->rnum[0], n_shift);
696 /* Calculate number of bytes which must remain in body after appending to R[0] */
697 if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0)
698 n_rem = 0;
699
700 {
701 int version;
702 unsigned long temp_rem = n_rem;
703
704 version = ih_version(item_head(tb->R[0], 0));
705 if (is_indirect_le_key(version, leaf_key(tb->R[0], 0))) {
706 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
707 }
708 set_le_key_k_offset(version, leaf_key(tb->R[0], 0),
709 le_key_k_offset(version, leaf_key(tb->R[0], 0)) + temp_rem);
710 set_le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0]),
711 le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0])) + temp_rem);
712 }
713 /* k_offset (leaf_key(tb->R[0],0)) += n_rem;
714 k_offset (internal_key(tb->CFR[0],tb->rkey[0])) += n_rem;*/
715 do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
716
717 /* Append part of body into R[0] */
718 buffer_info_init_right(tb, &bi);
719 if (n_rem > zeros_num) {
720 r_zeros_number = 0;
721 r_body = body + n_rem - zeros_num;
722 } else {
723 r_body = body;
724 r_zeros_number = zeros_num - n_rem;
725 zeros_num -= r_zeros_number;
726 }
727
728 leaf_paste_in_buffer(&bi, 0, n_shift,
729 tb->insert_size[0] - n_rem,
730 r_body, r_zeros_number);
731
732 if (is_indirect_le_ih(item_head(tb->R[0], 0))) {
733 #if 0
734 RFALSE(n_rem,
735 "PAP-12160: paste more than one unformatted node pointer");
736 #endif
737 set_ih_free_space(item_head(tb->R[0], 0), 0);
738 }
739 tb->insert_size[0] = n_rem;
740 if (!n_rem)
741 pos_in_item++;
742 }
743 } else { /* pasted item in whole falls into R[0] */
744
745 struct item_head *pasted;
746
747 ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
748 /* append item in R[0] */
749 if (pos_in_item >= 0) {
750 buffer_info_init_right(tb, &bi);
751 leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item,
752 tb->insert_size[0], body, zeros_num);
753 }
754
755 /* paste new entry, if item is directory item */
756 pasted = item_head(tb->R[0], item_pos - n + tb->rnum[0]);
757 if (is_direntry_le_ih(pasted) && pos_in_item >= 0) {
758 leaf_paste_entries(&bi, item_pos - n + tb->rnum[0],
759 pos_in_item, 1,
760 (struct reiserfs_de_head *) body,
761 body + DEH_SIZE, tb->insert_size[0]);
762 if (!pos_in_item) {
763
764 RFALSE(item_pos - n + tb->rnum[0],
765 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
766
767 /* update delimiting keys */
768 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
769 }
770 }
771
772 if (is_indirect_le_ih(pasted))
773 set_ih_free_space(pasted, 0);
774 zeros_num = tb->insert_size[0] = 0;
775 }
776 } else { /* new item doesn't fall into R[0] */
777
778 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
779 }
780 break;
781 default: /* cases d and t */
782 reiserfs_panic(tb->tb_sb, "PAP-12175",
783 "rnum > 0: unexpected mode: %s(%d)",
784 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
785 }
786
787 }
788
789 /* tb->rnum[0] > 0 */
790 RFALSE(tb->blknum[0] > 3,
791 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
792 RFALSE(tb->blknum[0] < 0,
793 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
794
795 /* if while adding to a node we discover that it is possible to split
796 it in two, and merge the left part into the left neighbor and the
797 right part into the right neighbor, eliminating the node */
798 if (tb->blknum[0] == 0) { /* node S[0] is empty now */
799
800 RFALSE(!tb->lnum[0] || !tb->rnum[0],
801 "PAP-12190: lnum and rnum must not be zero");
802 /* if insertion was done before 0-th position in R[0], right
803 delimiting key of the tb->L[0]'s and left delimiting key are
804 not set correctly */
805 if (tb->CFL[0]) {
806 if (!tb->CFR[0])
807 reiserfs_panic(tb->tb_sb, "vs-12195",
808 "CFR not initialized");
809 copy_key(internal_key(tb->CFL[0], tb->lkey[0]),
810 internal_key(tb->CFR[0], tb->rkey[0]));
811 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
812 }
813
814 reiserfs_invalidate_buffer(tb, tbS0);
815 return 0;
816 }
817
818 /* Fill new nodes that appear in place of S[0] */
819
820 /* I am told that this copying is because we need an array to enable
821 the looping code. -Hans */
822 snum[0] = tb->s1num, snum[1] = tb->s2num;
823 sbytes[0] = tb->s1bytes;
824 sbytes[1] = tb->s2bytes;
825 for (i = tb->blknum[0] - 2; i >= 0; i--) {
826
827 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
828 snum[i]);
829
830 /* here we shift from S to S_new nodes */
831
832 S_new[i] = get_FEB(tb);
833
834 /* initialized block type and tree level */
835 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
836
837 n = B_NR_ITEMS(tbS0);
838
839 switch (flag) {
840 case M_INSERT: /* insert item */
841
842 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */
843 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */
844 int old_key_comp, old_len, r_zeros_number;
845 const char *r_body;
846 int version;
847
848 /* Move snum[i]-1 items from S[0] to S_new[i] */
849 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
850 snum[i] - 1, -1,
851 S_new[i]);
852 /* Remember key component and item length */
853 version = ih_version(ih);
854 old_key_comp = le_ih_k_offset(ih);
855 old_len = ih_item_len(ih);
856
857 /* Calculate key component and item length to insert into S_new[i] */
858 set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
859 ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
860
861 put_ih_item_len(ih, sbytes[i]);
862
863 /* Insert part of the item into S_new[i] before 0-th item */
864 buffer_info_init_bh(tb, &bi, S_new[i]);
865
866 if ((old_len - sbytes[i]) > zeros_num) {
867 r_zeros_number = 0;
868 r_body = body + (old_len - sbytes[i]) - zeros_num;
869 } else {
870 r_body = body;
871 r_zeros_number = zeros_num - (old_len - sbytes[i]);
872 zeros_num -= r_zeros_number;
873 }
874
875 leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number);
876
877 /* Calculate key component and item length to insert into S[i] */
878 set_le_ih_k_offset(ih, old_key_comp);
879 put_ih_item_len(ih, old_len - sbytes[i]);
880 tb->insert_size[0] -= sbytes[i];
881 } else { /* whole new item falls into S_new[i] */
882
883 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
884 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
885 snum[i] - 1, sbytes[i], S_new[i]);
886
887 /* Insert new item into S_new[i] */
888 buffer_info_init_bh(tb, &bi, S_new[i]);
889 leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1,
890 ih, body, zeros_num);
891
892 zeros_num = tb->insert_size[0] = 0;
893 }
894 }
895
896 else { /* new item or it part don't falls into S_new[i] */
897
898 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
899 snum[i], sbytes[i], S_new[i]);
900 }
901 break;
902
903 case M_PASTE: /* append item */
904
905 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */
906 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */
907 struct item_head *aux_ih;
908
909 RFALSE(ih, "PAP-12210: ih must be 0");
910
911 aux_ih = item_head(tbS0, item_pos);
912 if (is_direntry_le_ih(aux_ih)) {
913 /* we append to directory item */
914
915 int entry_count;
916
917 entry_count = ih_entry_count(aux_ih);
918
919 if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) {
920 /* new directory entry falls into S_new[i] */
921
922 RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0");
923 RFALSE(sbytes[i] - 1 >= entry_count,
924 "PAP-12220: there are no so much entries (%d), only %d",
925 sbytes[i] - 1, entry_count);
926
927 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
928 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]);
929 /* Paste given directory entry to directory item */
930 buffer_info_init_bh(tb, &bi, S_new[i]);
931 leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
932 tb->insert_size[0], body, zeros_num);
933 /* paste new directory entry */
934 leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1,
935 (struct reiserfs_de_head *) body,
936 body + DEH_SIZE, tb->insert_size[0]);
937 tb->insert_size[0] = 0;
938 pos_in_item++;
939 } else { /* new directory entry doesn't fall into S_new[i] */
940 leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]);
941 }
942 } else { /* regular object */
943
944 int n_shift, n_rem, r_zeros_number;
945 const char *r_body;
946
947 RFALSE(pos_in_item != ih_item_len(item_head(tbS0, item_pos)) || tb->insert_size[0] <= 0,
948 "PAP-12225: item too short or insert_size <= 0");
949
950 /* Calculate number of bytes which must be shifted from appended item */
951 n_shift = sbytes[i] - tb->insert_size[0];
952 if (n_shift < 0)
953 n_shift = 0;
954 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
955
956 /* Calculate number of bytes which must remain in body after append to S_new[i] */
957 n_rem = tb->insert_size[0] - sbytes[i];
958 if (n_rem < 0)
959 n_rem = 0;
960 /* Append part of body into S_new[0] */
961 buffer_info_init_bh(tb, &bi, S_new[i]);
962 if (n_rem > zeros_num) {
963 r_zeros_number = 0;
964 r_body = body + n_rem - zeros_num;
965 } else {
966 r_body = body;
967 r_zeros_number = zeros_num - n_rem;
968 zeros_num -= r_zeros_number;
969 }
970
971 leaf_paste_in_buffer(&bi, 0, n_shift,
972 tb->insert_size[0] - n_rem,
973 r_body, r_zeros_number);
974 {
975 struct item_head *tmp;
976
977 tmp = item_head(S_new[i], 0);
978 if (is_indirect_le_ih
979 (tmp)) {
980 set_ih_free_space(tmp, 0);
981 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
982 } else {
983 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem);
984 }
985 }
986
987 tb->insert_size[0] = n_rem;
988 if (!n_rem)
989 pos_in_item++;
990 }
991 } else
992 /* item falls wholly into S_new[i] */
993 {
994 int leaf_mi;
995 struct item_head *pasted;
996
997 #ifdef CONFIG_REISERFS_CHECK
998 struct item_head *ih_check = item_head(tbS0, item_pos);
999
1000 if (!is_direntry_le_ih(ih_check)
1001 && (pos_in_item != ih_item_len(ih_check)
1002 || tb->insert_size[0] <= 0))
1003 reiserfs_panic(tb->tb_sb,
1004 "PAP-12235",
1005 "pos_in_item "
1006 "must be equal "
1007 "to ih_item_len");
1008 #endif /* CONFIG_REISERFS_CHECK */
1009
1010 leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW,
1011 tb, snum[i],
1012 sbytes[i],
1013 S_new[i]);
1014
1015 RFALSE(leaf_mi,
1016 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1017 leaf_mi);
1018
1019 /* paste into item */
1020 buffer_info_init_bh(tb, &bi, S_new[i]);
1021 leaf_paste_in_buffer(&bi,
1022 item_pos - n + snum[i],
1023 pos_in_item,
1024 tb->insert_size[0],
1025 body, zeros_num);
1026
1027 pasted = item_head(S_new[i], item_pos - n + snum[i]);
1028 if (is_direntry_le_ih(pasted)) {
1029 leaf_paste_entries(&bi,
1030 item_pos - n + snum[i],
1031 pos_in_item, 1,
1032 (struct reiserfs_de_head *)body,
1033 body + DEH_SIZE,
1034 tb->insert_size[0]
1035 );
1036 }
1037
1038 /* if we paste to indirect item update ih_free_space */
1039 if (is_indirect_le_ih(pasted))
1040 set_ih_free_space(pasted, 0);
1041 zeros_num = tb->insert_size[0] = 0;
1042 }
1043 }
1044
1045 else { /* pasted item doesn't fall into S_new[i] */
1046
1047 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1048 snum[i], sbytes[i], S_new[i]);
1049 }
1050 break;
1051 default: /* cases d and t */
1052 reiserfs_panic(tb->tb_sb, "PAP-12245",
1053 "blknum > 2: unexpected mode: %s(%d)",
1054 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1055 }
1056
1057 memcpy(insert_key + i, leaf_key(S_new[i], 0), KEY_SIZE);
1058 insert_ptr[i] = S_new[i];
1059
1060 RFALSE(!buffer_journaled(S_new[i])
1061 || buffer_journal_dirty(S_new[i])
1062 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1063 i, S_new[i]);
1064 }
1065
1066 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1067 affected item which remains in S */
1068 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */
1069
1070 switch (flag) {
1071 case M_INSERT: /* insert item into S[0] */
1072 buffer_info_init_tbS0(tb, &bi);
1073 leaf_insert_into_buf(&bi, item_pos, ih, body,
1074 zeros_num);
1075
1076 /* If we insert the first key change the delimiting key */
1077 if (item_pos == 0) {
1078 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1079 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1080 }
1081 break;
1082
1083 case M_PASTE:{ /* append item in S[0] */
1084 struct item_head *pasted;
1085
1086 pasted = item_head(tbS0, item_pos);
1087 /* when directory, may be new entry already pasted */
1088 if (is_direntry_le_ih(pasted)) {
1089 if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) {
1090
1091 RFALSE(!tb->insert_size[0],
1092 "PAP-12260: insert_size is 0 already");
1093
1094 /* prepare space */
1095 buffer_info_init_tbS0(tb, &bi);
1096 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1097 tb->insert_size[0], body,
1098 zeros_num);
1099
1100 /* paste entry */
1101 leaf_paste_entries(&bi, item_pos, pos_in_item, 1,
1102 (struct reiserfs_de_head *)body,
1103 body + DEH_SIZE,
1104 tb->insert_size[0]);
1105 if (!item_pos && !pos_in_item) {
1106 RFALSE(!tb->CFL[0] || !tb->L[0],
1107 "PAP-12270: CFL[0]/L[0] must be specified");
1108 if (tb->CFL[0])
1109 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1110 }
1111 tb->insert_size[0] = 0;
1112 }
1113 } else { /* regular object */
1114 if (pos_in_item == ih_item_len(pasted)) {
1115
1116 RFALSE(tb->insert_size[0] <= 0,
1117 "PAP-12275: insert size must not be %d",
1118 tb->insert_size[0]);
1119 buffer_info_init_tbS0(tb, &bi);
1120 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1121 tb->insert_size[0], body, zeros_num);
1122
1123 if (is_indirect_le_ih(pasted)) {
1124 #if 0
1125 RFALSE(tb->
1126 insert_size[0] !=
1127 UNFM_P_SIZE,
1128 "PAP-12280: insert_size for indirect item must be %d, not %d",
1129 UNFM_P_SIZE,
1130 tb->
1131 insert_size[0]);
1132 #endif
1133 set_ih_free_space(pasted, 0);
1134 }
1135 tb->insert_size[0] = 0;
1136 }
1137 #ifdef CONFIG_REISERFS_CHECK
1138 else {
1139 if (tb->insert_size[0]) {
1140 print_cur_tb("12285");
1141 reiserfs_panic(tb->tb_sb,
1142 "PAP-12285",
1143 "insert_size "
1144 "must be 0 "
1145 "(%d)",
1146 tb->insert_size[0]);
1147 }
1148 }
1149 #endif /* CONFIG_REISERFS_CHECK */
1150
1151 }
1152 } /* case M_PASTE: */
1153 }
1154 }
1155 #ifdef CONFIG_REISERFS_CHECK
1156 if (flag == M_PASTE && tb->insert_size[0]) {
1157 print_cur_tb("12290");
1158 reiserfs_panic(tb->tb_sb,
1159 "PAP-12290", "insert_size is still not 0 (%d)",
1160 tb->insert_size[0]);
1161 }
1162 #endif /* CONFIG_REISERFS_CHECK */
1163 return 0;
1164 } /* Leaf level of the tree is balanced (end of balance_leaf) */
1165
1166 /* Make empty node */
1167 void make_empty_node(struct buffer_info *bi)
1168 {
1169 struct block_head *blkh;
1170
1171 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1172
1173 blkh = B_BLK_HEAD(bi->bi_bh);
1174 set_blkh_nr_item(blkh, 0);
1175 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
1176
1177 if (bi->bi_parent)
1178 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1179 }
1180
1181 /* Get first empty buffer */
1182 struct buffer_head *get_FEB(struct tree_balance *tb)
1183 {
1184 int i;
1185 struct buffer_info bi;
1186
1187 for (i = 0; i < MAX_FEB_SIZE; i++)
1188 if (tb->FEB[i] != NULL)
1189 break;
1190
1191 if (i == MAX_FEB_SIZE)
1192 reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
1193
1194 buffer_info_init_bh(tb, &bi, tb->FEB[i]);
1195 make_empty_node(&bi);
1196 set_buffer_uptodate(tb->FEB[i]);
1197 tb->used[i] = tb->FEB[i];
1198 tb->FEB[i] = NULL;
1199
1200 return tb->used[i];
1201 }
1202
1203 /* This is now used because reiserfs_free_block has to be able to schedule. */
1204 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1205 {
1206 int i;
1207
1208 if (buffer_dirty(bh))
1209 reiserfs_warning(tb->tb_sb, "reiserfs-12320",
1210 "called with dirty buffer");
1211 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
1212 if (!tb->thrown[i]) {
1213 tb->thrown[i] = bh;
1214 get_bh(bh); /* free_thrown puts this */
1215 return;
1216 }
1217 reiserfs_warning(tb->tb_sb, "reiserfs-12321",
1218 "too many thrown buffers");
1219 }
1220
1221 static void free_thrown(struct tree_balance *tb)
1222 {
1223 int i;
1224 b_blocknr_t blocknr;
1225 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
1226 if (tb->thrown[i]) {
1227 blocknr = tb->thrown[i]->b_blocknr;
1228 if (buffer_dirty(tb->thrown[i]))
1229 reiserfs_warning(tb->tb_sb, "reiserfs-12322",
1230 "called with dirty buffer %d",
1231 blocknr);
1232 brelse(tb->thrown[i]); /* incremented in store_thrown */
1233 reiserfs_free_block(tb->transaction_handle, NULL,
1234 blocknr, 0);
1235 }
1236 }
1237 }
1238
1239 void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1240 {
1241 struct block_head *blkh;
1242 blkh = B_BLK_HEAD(bh);
1243 set_blkh_level(blkh, FREE_LEVEL);
1244 set_blkh_nr_item(blkh, 0);
1245
1246 clear_buffer_dirty(bh);
1247 store_thrown(tb, bh);
1248 }
1249
1250 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1251 void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1252 struct buffer_head *src, int n_src)
1253 {
1254
1255 RFALSE(dest == NULL || src == NULL,
1256 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1257 src, dest);
1258 RFALSE(!B_IS_KEYS_LEVEL(dest),
1259 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1260 dest);
1261 RFALSE(n_dest < 0 || n_src < 0,
1262 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1263 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1264 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1265 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1266
1267 if (B_IS_ITEMS_LEVEL(src))
1268 /* source buffer contains leaf node */
1269 memcpy(internal_key(dest, n_dest), item_head(src, n_src),
1270 KEY_SIZE);
1271 else
1272 memcpy(internal_key(dest, n_dest), internal_key(src, n_src),
1273 KEY_SIZE);
1274
1275 do_balance_mark_internal_dirty(tb, dest, 0);
1276 }
1277
1278 int get_left_neighbor_position(struct tree_balance *tb, int h)
1279 {
1280 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1281
1282 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
1283 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1284 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1285
1286 if (Sh_position == 0)
1287 return B_NR_ITEMS(tb->FL[h]);
1288 else
1289 return Sh_position - 1;
1290 }
1291
1292 int get_right_neighbor_position(struct tree_balance *tb, int h)
1293 {
1294 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1295
1296 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
1297 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1298 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1299
1300 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1301 return 0;
1302 else
1303 return Sh_position + 1;
1304 }
1305
1306 #ifdef CONFIG_REISERFS_CHECK
1307
1308 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1309 static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1310 char *mes)
1311 {
1312 struct disk_child *dc;
1313 int i;
1314
1315 RFALSE(!bh, "PAP-12336: bh == 0");
1316
1317 if (!bh || !B_IS_IN_TREE(bh))
1318 return;
1319
1320 RFALSE(!buffer_dirty(bh) &&
1321 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1322 "PAP-12337: buffer (%b) must be dirty", bh);
1323 dc = B_N_CHILD(bh, 0);
1324
1325 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1326 if (!is_reusable(s, dc_block_number(dc), 1)) {
1327 print_cur_tb(mes);
1328 reiserfs_panic(s, "PAP-12338",
1329 "invalid child pointer %y in %b",
1330 dc, bh);
1331 }
1332 }
1333 }
1334
1335 static int locked_or_not_in_tree(struct tree_balance *tb,
1336 struct buffer_head *bh, char *which)
1337 {
1338 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1339 !B_IS_IN_TREE(bh)) {
1340 reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
1341 return 1;
1342 }
1343 return 0;
1344 }
1345
1346 static int check_before_balancing(struct tree_balance *tb)
1347 {
1348 int retval = 0;
1349
1350 if (REISERFS_SB(tb->tb_sb)->cur_tb) {
1351 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1352 "occurred based on cur_tb not being null at "
1353 "this point in code. do_balance cannot properly "
1354 "handle concurrent tree accesses on a same "
1355 "mount point.");
1356 }
1357
1358 /*
1359 * double check that buffers that we will modify are unlocked.
1360 * (fix_nodes should already have prepped all of these for us).
1361 */
1362 if (tb->lnum[0]) {
1363 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1364 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
1365 retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
1366 check_leaf(tb->L[0]);
1367 }
1368 if (tb->rnum[0]) {
1369 retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
1370 retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
1371 retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
1372 check_leaf(tb->R[0]);
1373 }
1374 retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
1375 "S[0]");
1376 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1377
1378 return retval;
1379 }
1380
1381 static void check_after_balance_leaf(struct tree_balance *tb)
1382 {
1383 if (tb->lnum[0]) {
1384 if (B_FREE_SPACE(tb->L[0]) !=
1385 MAX_CHILD_SIZE(tb->L[0]) -
1386 dc_size(B_N_CHILD
1387 (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1388 print_cur_tb("12221");
1389 reiserfs_panic(tb->tb_sb, "PAP-12355",
1390 "shift to left was incorrect");
1391 }
1392 }
1393 if (tb->rnum[0]) {
1394 if (B_FREE_SPACE(tb->R[0]) !=
1395 MAX_CHILD_SIZE(tb->R[0]) -
1396 dc_size(B_N_CHILD
1397 (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1398 print_cur_tb("12222");
1399 reiserfs_panic(tb->tb_sb, "PAP-12360",
1400 "shift to right was incorrect");
1401 }
1402 }
1403 if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1404 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1405 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1406 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1407 PATH_H_POSITION(tb->tb_path, 1)))))) {
1408 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1409 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1410 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1411 PATH_H_POSITION(tb->tb_path,
1412 1))));
1413 print_cur_tb("12223");
1414 reiserfs_warning(tb->tb_sb, "reiserfs-12363",
1415 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1416 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1417 left,
1418 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1419 PATH_H_PBUFFER(tb->tb_path, 1),
1420 PATH_H_POSITION(tb->tb_path, 1),
1421 dc_size(B_N_CHILD
1422 (PATH_H_PBUFFER(tb->tb_path, 1),
1423 PATH_H_POSITION(tb->tb_path, 1))),
1424 right);
1425 reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
1426 }
1427 }
1428
1429 static void check_leaf_level(struct tree_balance *tb)
1430 {
1431 check_leaf(tb->L[0]);
1432 check_leaf(tb->R[0]);
1433 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1434 }
1435
1436 static void check_internal_levels(struct tree_balance *tb)
1437 {
1438 int h;
1439
1440 /* check all internal nodes */
1441 for (h = 1; tb->insert_size[h]; h++) {
1442 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1443 "BAD BUFFER ON PATH");
1444 if (tb->lnum[h])
1445 check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1446 if (tb->rnum[h])
1447 check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1448 }
1449
1450 }
1451
1452 #endif
1453
1454 /*
1455 * Now we have all of the buffers that must be used in balancing of
1456 * the tree. We rely on the assumption that schedule() will not occur
1457 * while do_balance works. ( Only interrupt handlers are acceptable.)
1458 * We balance the tree according to the analysis made before this,
1459 * using buffers already obtained. For SMP support it will someday be
1460 * necessary to add ordered locking of tb.
1461 */
1462
1463 /*
1464 * Some interesting rules of balancing:
1465 * we delete a maximum of two nodes per level per balancing: we never
1466 * delete R, when we delete two of three nodes L, S, R then we move
1467 * them into R.
1468 *
1469 * we only delete L if we are deleting two nodes, if we delete only
1470 * one node we delete S
1471 *
1472 * if we shift leaves then we shift as much as we can: this is a
1473 * deliberate policy of extremism in node packing which results in
1474 * higher average utilization after repeated random balance operations
1475 * at the cost of more memory copies and more balancing as a result of
1476 * small insertions to full nodes.
1477 *
1478 * if we shift internal nodes we try to evenly balance the node
1479 * utilization, with consequent less balancing at the cost of lower
1480 * utilization.
1481 *
1482 * one could argue that the policy for directories in leaves should be
1483 * that of internal nodes, but we will wait until another day to
1484 * evaluate this.... It would be nice to someday measure and prove
1485 * these assumptions as to what is optimal....
1486 */
1487
1488 static inline void do_balance_starts(struct tree_balance *tb)
1489 {
1490 /* use print_cur_tb() to see initial state of struct tree_balance */
1491
1492 /* store_print_tb (tb); */
1493
1494 /* do not delete, just comment it out */
1495 /*
1496 print_tb(flag, PATH_LAST_POSITION(tb->tb_path),
1497 tb->tb_path->pos_in_item, tb, "check");
1498 */
1499 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1500 #ifdef CONFIG_REISERFS_CHECK
1501 REISERFS_SB(tb->tb_sb)->cur_tb = tb;
1502 #endif
1503 }
1504
1505 static inline void do_balance_completed(struct tree_balance *tb)
1506 {
1507
1508 #ifdef CONFIG_REISERFS_CHECK
1509 check_leaf_level(tb);
1510 check_internal_levels(tb);
1511 REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
1512 #endif
1513
1514 /*
1515 * reiserfs_free_block is no longer schedule safe. So, we need to
1516 * put the buffers we want freed on the thrown list during do_balance,
1517 * and then free them now
1518 */
1519
1520 REISERFS_SB(tb->tb_sb)->s_do_balance++;
1521
1522 /* release all nodes hold to perform the balancing */
1523 unfix_nodes(tb);
1524
1525 free_thrown(tb);
1526 }
1527
1528 /*
1529 * do_balance - balance the tree
1530 *
1531 * @tb: tree_balance structure
1532 * @ih: item header of inserted item
1533 * @body: body of inserted item or bytes to paste
1534 * @flag: 'i' - insert, 'd' - delete, 'c' - cut, 'p' paste
1535 *
1536 * Cut means delete part of an item (includes removing an entry from a
1537 * directory).
1538 *
1539 * Delete means delete whole item.
1540 *
1541 * Insert means add a new item into the tree.
1542 *
1543 * Paste means to append to the end of an existing file or to
1544 * insert a directory entry.
1545 */
1546 void do_balance(struct tree_balance *tb, struct item_head *ih,
1547 const char *body, int flag)
1548 {
1549 int child_pos; /* position of a child node in its parent */
1550 int h; /* level of the tree being processed */
1551
1552 /*
1553 * in our processing of one level we sometimes determine what
1554 * must be inserted into the next higher level. This insertion
1555 * consists of a key or two keys and their corresponding
1556 * pointers
1557 */
1558 struct item_head insert_key[2];
1559
1560 /* inserted node-ptrs for the next level */
1561 struct buffer_head *insert_ptr[2];
1562
1563 tb->tb_mode = flag;
1564 tb->need_balance_dirty = 0;
1565
1566 if (FILESYSTEM_CHANGED_TB(tb)) {
1567 reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
1568 "changed");
1569 }
1570 /* if we have no real work to do */
1571 if (!tb->insert_size[0]) {
1572 reiserfs_warning(tb->tb_sb, "PAP-12350",
1573 "insert_size == 0, mode == %c", flag);
1574 unfix_nodes(tb);
1575 return;
1576 }
1577
1578 atomic_inc(&(fs_generation(tb->tb_sb)));
1579 do_balance_starts(tb);
1580
1581 /*
1582 * balance_leaf returns 0 except if combining L R and S into
1583 * one node. see balance_internal() for explanation of this
1584 * line of code.
1585 */
1586 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1587 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1588
1589 #ifdef CONFIG_REISERFS_CHECK
1590 check_after_balance_leaf(tb);
1591 #endif
1592
1593 /* Balance internal level of the tree. */
1594 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
1595 child_pos =
1596 balance_internal(tb, h, child_pos, insert_key, insert_ptr);
1597
1598 do_balance_completed(tb);
1599
1600 }
This page took 0.093264 seconds and 5 git commands to generate.