2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
21 #include "transaction.h"
23 static int split_node(struct btrfs_trans_handle
*trans
, struct btrfs_root
24 *root
, struct btrfs_path
*path
, int level
);
25 static int split_leaf(struct btrfs_trans_handle
*trans
, struct btrfs_root
26 *root
, struct btrfs_key
*ins_key
,
27 struct btrfs_path
*path
, int data_size
);
28 static int push_node_left(struct btrfs_trans_handle
*trans
, struct btrfs_root
29 *root
, struct buffer_head
*dst
, struct buffer_head
31 static int balance_node_right(struct btrfs_trans_handle
*trans
, struct
32 btrfs_root
*root
, struct buffer_head
*dst_buf
,
33 struct buffer_head
*src_buf
);
34 static int del_ptr(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
35 struct btrfs_path
*path
, int level
, int slot
);
37 inline void btrfs_init_path(struct btrfs_path
*p
)
39 memset(p
, 0, sizeof(*p
));
42 struct btrfs_path
*btrfs_alloc_path(void)
44 struct btrfs_path
*path
;
45 path
= kmem_cache_alloc(btrfs_path_cachep
, GFP_NOFS
);
47 btrfs_init_path(path
);
51 void btrfs_free_path(struct btrfs_path
*p
)
53 btrfs_release_path(NULL
, p
);
54 kmem_cache_free(btrfs_path_cachep
, p
);
57 void btrfs_release_path(struct btrfs_root
*root
, struct btrfs_path
*p
)
60 for (i
= 0; i
< BTRFS_MAX_LEVEL
; i
++) {
63 btrfs_block_release(root
, p
->nodes
[i
]);
65 memset(p
, 0, sizeof(*p
));
68 static int __btrfs_cow_block(struct btrfs_trans_handle
*trans
, struct btrfs_root
69 *root
, struct buffer_head
*buf
, struct buffer_head
70 *parent
, int parent_slot
, struct buffer_head
71 **cow_ret
, u64 search_start
, u64 empty_size
)
73 struct buffer_head
*cow
;
74 struct btrfs_node
*cow_node
;
76 int different_trans
= 0;
78 WARN_ON(root
->ref_cows
&& trans
->transid
!= root
->last_trans
);
79 WARN_ON(!buffer_uptodate(buf
));
80 cow
= btrfs_alloc_free_block(trans
, root
, search_start
, empty_size
);
84 cow_node
= btrfs_buffer_node(cow
);
85 if (buf
->b_size
!= root
->blocksize
|| cow
->b_size
!= root
->blocksize
)
88 memcpy(cow_node
, btrfs_buffer_node(buf
), root
->blocksize
);
89 btrfs_set_header_blocknr(&cow_node
->header
, bh_blocknr(cow
));
90 btrfs_set_header_generation(&cow_node
->header
, trans
->transid
);
91 btrfs_set_header_owner(&cow_node
->header
, root
->root_key
.objectid
);
93 WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf
)) >
95 if (btrfs_header_generation(btrfs_buffer_header(buf
)) !=
98 ret
= btrfs_inc_ref(trans
, root
, buf
);
102 clean_tree_block(trans
, root
, buf
);
105 if (buf
== root
->node
) {
108 if (buf
!= root
->commit_root
) {
109 btrfs_free_extent(trans
, root
, bh_blocknr(buf
), 1, 1);
111 btrfs_block_release(root
, buf
);
113 btrfs_set_node_blockptr(btrfs_buffer_node(parent
), parent_slot
,
115 btrfs_mark_buffer_dirty(parent
);
116 WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent
)) !=
118 btrfs_free_extent(trans
, root
, bh_blocknr(buf
), 1, 1);
120 btrfs_block_release(root
, buf
);
121 btrfs_mark_buffer_dirty(cow
);
126 int btrfs_cow_block(struct btrfs_trans_handle
*trans
, struct btrfs_root
127 *root
, struct buffer_head
*buf
, struct buffer_head
128 *parent
, int parent_slot
, struct buffer_head
132 if (trans
->transaction
!= root
->fs_info
->running_transaction
) {
133 printk(KERN_CRIT
"trans %Lu running %Lu\n", trans
->transid
,
134 root
->fs_info
->running_transaction
->transid
);
137 if (trans
->transid
!= root
->fs_info
->generation
) {
138 printk(KERN_CRIT
"trans %Lu running %Lu\n", trans
->transid
,
139 root
->fs_info
->generation
);
142 if (btrfs_header_generation(btrfs_buffer_header(buf
)) ==
148 search_start
= bh_blocknr(buf
) & ~((u64
)65535);
149 return __btrfs_cow_block(trans
, root
, buf
, parent
,
150 parent_slot
, cow_ret
, search_start
, 0);
153 static int close_blocks(u64 blocknr
, u64 other
)
155 if (blocknr
< other
&& other
- blocknr
< 8)
157 if (blocknr
> other
&& blocknr
- other
< 8)
162 int btrfs_realloc_node(struct btrfs_trans_handle
*trans
,
163 struct btrfs_root
*root
, struct buffer_head
*parent
,
164 int cache_only
, u64
*last_ret
)
166 struct btrfs_node
*parent_node
;
167 struct buffer_head
*cur_bh
;
168 struct buffer_head
*tmp_bh
;
170 u64 search_start
= *last_ret
;
180 if (trans
->transaction
!= root
->fs_info
->running_transaction
) {
181 printk(KERN_CRIT
"trans %Lu running %Lu\n", trans
->transid
,
182 root
->fs_info
->running_transaction
->transid
);
185 if (trans
->transid
!= root
->fs_info
->generation
) {
186 printk(KERN_CRIT
"trans %Lu running %Lu\n", trans
->transid
,
187 root
->fs_info
->generation
);
190 parent_node
= btrfs_buffer_node(parent
);
191 parent_nritems
= btrfs_header_nritems(&parent_node
->header
);
192 parent_level
= btrfs_header_level(&parent_node
->header
);
195 end_slot
= parent_nritems
;
197 if (parent_nritems
== 1)
200 for (i
= start_slot
; i
< end_slot
; i
++) {
202 blocknr
= btrfs_node_blockptr(parent_node
, i
);
204 last_block
= blocknr
;
206 other
= btrfs_node_blockptr(parent_node
, i
- 1);
207 close
= close_blocks(blocknr
, other
);
209 if (close
&& i
< end_slot
- 1) {
210 other
= btrfs_node_blockptr(parent_node
, i
+ 1);
211 close
= close_blocks(blocknr
, other
);
214 last_block
= blocknr
;
218 cur_bh
= btrfs_find_tree_block(root
, blocknr
);
219 if (!cur_bh
|| !buffer_uptodate(cur_bh
) ||
220 buffer_locked(cur_bh
) || !buffer_defrag(cur_bh
)) {
225 if (!cur_bh
|| !buffer_uptodate(cur_bh
) ||
226 buffer_locked(cur_bh
)) {
228 cur_bh
= read_tree_block(root
, blocknr
);
231 if (search_start
== 0)
232 search_start
= last_block
& ~((u64
)65535);
234 err
= __btrfs_cow_block(trans
, root
, cur_bh
, parent
, i
,
235 &tmp_bh
, search_start
,
236 min(8, end_slot
- i
));
239 search_start
= bh_blocknr(tmp_bh
);
240 *last_ret
= search_start
;
241 if (parent_level
== 1)
242 clear_buffer_defrag(tmp_bh
);
249 * The leaf data grows from end-to-front in the node.
250 * this returns the address of the start of the last item,
251 * which is the stop of the leaf data stack
253 static inline unsigned int leaf_data_end(struct btrfs_root
*root
,
254 struct btrfs_leaf
*leaf
)
256 u32 nr
= btrfs_header_nritems(&leaf
->header
);
258 return BTRFS_LEAF_DATA_SIZE(root
);
259 return btrfs_item_offset(leaf
->items
+ nr
- 1);
263 * compare two keys in a memcmp fashion
265 static int comp_keys(struct btrfs_disk_key
*disk
, struct btrfs_key
*k2
)
269 btrfs_disk_key_to_cpu(&k1
, disk
);
271 if (k1
.objectid
> k2
->objectid
)
273 if (k1
.objectid
< k2
->objectid
)
275 if (k1
.flags
> k2
->flags
)
277 if (k1
.flags
< k2
->flags
)
279 if (k1
.offset
> k2
->offset
)
281 if (k1
.offset
< k2
->offset
)
286 static int check_node(struct btrfs_root
*root
, struct btrfs_path
*path
,
289 struct btrfs_node
*parent
= NULL
;
290 struct btrfs_node
*node
= btrfs_buffer_node(path
->nodes
[level
]);
293 struct btrfs_key cpukey
;
294 u32 nritems
= btrfs_header_nritems(&node
->header
);
296 if (path
->nodes
[level
+ 1])
297 parent
= btrfs_buffer_node(path
->nodes
[level
+ 1]);
299 slot
= path
->slots
[level
];
300 BUG_ON(nritems
== 0);
302 struct btrfs_disk_key
*parent_key
;
304 parent_slot
= path
->slots
[level
+ 1];
305 parent_key
= &parent
->ptrs
[parent_slot
].key
;
306 BUG_ON(memcmp(parent_key
, &node
->ptrs
[0].key
,
307 sizeof(struct btrfs_disk_key
)));
308 BUG_ON(btrfs_node_blockptr(parent
, parent_slot
) !=
309 btrfs_header_blocknr(&node
->header
));
311 BUG_ON(nritems
> BTRFS_NODEPTRS_PER_BLOCK(root
));
313 btrfs_disk_key_to_cpu(&cpukey
, &node
->ptrs
[slot
- 1].key
);
314 BUG_ON(comp_keys(&node
->ptrs
[slot
].key
, &cpukey
) <= 0);
316 if (slot
< nritems
- 1) {
317 btrfs_disk_key_to_cpu(&cpukey
, &node
->ptrs
[slot
+ 1].key
);
318 BUG_ON(comp_keys(&node
->ptrs
[slot
].key
, &cpukey
) >= 0);
323 static int check_leaf(struct btrfs_root
*root
, struct btrfs_path
*path
,
326 struct btrfs_leaf
*leaf
= btrfs_buffer_leaf(path
->nodes
[level
]);
327 struct btrfs_node
*parent
= NULL
;
329 int slot
= path
->slots
[0];
330 struct btrfs_key cpukey
;
332 u32 nritems
= btrfs_header_nritems(&leaf
->header
);
334 if (path
->nodes
[level
+ 1])
335 parent
= btrfs_buffer_node(path
->nodes
[level
+ 1]);
337 BUG_ON(btrfs_leaf_free_space(root
, leaf
) < 0);
343 struct btrfs_disk_key
*parent_key
;
345 parent_slot
= path
->slots
[level
+ 1];
346 parent_key
= &parent
->ptrs
[parent_slot
].key
;
348 BUG_ON(memcmp(parent_key
, &leaf
->items
[0].key
,
349 sizeof(struct btrfs_disk_key
)));
350 BUG_ON(btrfs_node_blockptr(parent
, parent_slot
) !=
351 btrfs_header_blocknr(&leaf
->header
));
354 btrfs_disk_key_to_cpu(&cpukey
, &leaf
->items
[slot
- 1].key
);
355 BUG_ON(comp_keys(&leaf
->items
[slot
].key
, &cpukey
) <= 0);
356 BUG_ON(btrfs_item_offset(leaf
->items
+ slot
- 1) !=
357 btrfs_item_end(leaf
->items
+ slot
));
359 if (slot
< nritems
- 1) {
360 btrfs_disk_key_to_cpu(&cpukey
, &leaf
->items
[slot
+ 1].key
);
361 BUG_ON(comp_keys(&leaf
->items
[slot
].key
, &cpukey
) >= 0);
362 BUG_ON(btrfs_item_offset(leaf
->items
+ slot
) !=
363 btrfs_item_end(leaf
->items
+ slot
+ 1));
365 BUG_ON(btrfs_item_offset(leaf
->items
) +
366 btrfs_item_size(leaf
->items
) != BTRFS_LEAF_DATA_SIZE(root
));
370 static int check_block(struct btrfs_root
*root
, struct btrfs_path
*path
,
373 struct btrfs_node
*node
= btrfs_buffer_node(path
->nodes
[level
]);
374 if (memcmp(node
->header
.fsid
, root
->fs_info
->disk_super
->fsid
,
375 sizeof(node
->header
.fsid
)))
378 return check_leaf(root
, path
, level
);
379 return check_node(root
, path
, level
);
383 * search for key in the array p. items p are item_size apart
384 * and there are 'max' items in p
385 * the slot in the array is returned via slot, and it points to
386 * the place where you would insert key if it is not found in
389 * slot may point to max if the key is bigger than all of the keys
391 static int generic_bin_search(char *p
, int item_size
, struct btrfs_key
*key
,
398 struct btrfs_disk_key
*tmp
;
401 mid
= (low
+ high
) / 2;
402 tmp
= (struct btrfs_disk_key
*)(p
+ mid
* item_size
);
403 ret
= comp_keys(tmp
, key
);
419 * simple bin_search frontend that does the right thing for
422 static int bin_search(struct btrfs_node
*c
, struct btrfs_key
*key
, int *slot
)
424 if (btrfs_is_leaf(c
)) {
425 struct btrfs_leaf
*l
= (struct btrfs_leaf
*)c
;
426 return generic_bin_search((void *)l
->items
,
427 sizeof(struct btrfs_item
),
428 key
, btrfs_header_nritems(&c
->header
),
431 return generic_bin_search((void *)c
->ptrs
,
432 sizeof(struct btrfs_key_ptr
),
433 key
, btrfs_header_nritems(&c
->header
),
439 static struct buffer_head
*read_node_slot(struct btrfs_root
*root
,
440 struct buffer_head
*parent_buf
,
443 struct btrfs_node
*node
= btrfs_buffer_node(parent_buf
);
446 if (slot
>= btrfs_header_nritems(&node
->header
))
448 return read_tree_block(root
, btrfs_node_blockptr(node
, slot
));
451 static int balance_level(struct btrfs_trans_handle
*trans
, struct btrfs_root
452 *root
, struct btrfs_path
*path
, int level
)
454 struct buffer_head
*right_buf
;
455 struct buffer_head
*mid_buf
;
456 struct buffer_head
*left_buf
;
457 struct buffer_head
*parent_buf
= NULL
;
458 struct btrfs_node
*right
= NULL
;
459 struct btrfs_node
*mid
;
460 struct btrfs_node
*left
= NULL
;
461 struct btrfs_node
*parent
= NULL
;
465 int orig_slot
= path
->slots
[level
];
466 int err_on_enospc
= 0;
472 mid_buf
= path
->nodes
[level
];
473 mid
= btrfs_buffer_node(mid_buf
);
474 orig_ptr
= btrfs_node_blockptr(mid
, orig_slot
);
476 if (level
< BTRFS_MAX_LEVEL
- 1)
477 parent_buf
= path
->nodes
[level
+ 1];
478 pslot
= path
->slots
[level
+ 1];
481 * deal with the case where there is only one pointer in the root
482 * by promoting the node below to a root
485 struct buffer_head
*child
;
486 u64 blocknr
= bh_blocknr(mid_buf
);
488 if (btrfs_header_nritems(&mid
->header
) != 1)
491 /* promote the child to a root */
492 child
= read_node_slot(root
, mid_buf
, 0);
495 path
->nodes
[level
] = NULL
;
496 clean_tree_block(trans
, root
, mid_buf
);
497 wait_on_buffer(mid_buf
);
498 /* once for the path */
499 btrfs_block_release(root
, mid_buf
);
500 /* once for the root ptr */
501 btrfs_block_release(root
, mid_buf
);
502 return btrfs_free_extent(trans
, root
, blocknr
, 1, 1);
504 parent
= btrfs_buffer_node(parent_buf
);
506 if (btrfs_header_nritems(&mid
->header
) >
507 BTRFS_NODEPTRS_PER_BLOCK(root
) / 4)
510 if (btrfs_header_nritems(&mid
->header
) < 2)
513 left_buf
= read_node_slot(root
, parent_buf
, pslot
- 1);
514 right_buf
= read_node_slot(root
, parent_buf
, pslot
+ 1);
516 /* first, try to make some room in the middle buffer */
518 wret
= btrfs_cow_block(trans
, root
, left_buf
,
519 parent_buf
, pslot
- 1, &left_buf
);
524 left
= btrfs_buffer_node(left_buf
);
525 orig_slot
+= btrfs_header_nritems(&left
->header
);
526 wret
= push_node_left(trans
, root
, left_buf
, mid_buf
);
529 if (btrfs_header_nritems(&mid
->header
) < 2)
534 * then try to empty the right most buffer into the middle
537 wret
= btrfs_cow_block(trans
, root
, right_buf
,
538 parent_buf
, pslot
+ 1, &right_buf
);
544 right
= btrfs_buffer_node(right_buf
);
545 wret
= push_node_left(trans
, root
, mid_buf
, right_buf
);
546 if (wret
< 0 && wret
!= -ENOSPC
)
548 if (btrfs_header_nritems(&right
->header
) == 0) {
549 u64 blocknr
= bh_blocknr(right_buf
);
550 clean_tree_block(trans
, root
, right_buf
);
551 wait_on_buffer(right_buf
);
552 btrfs_block_release(root
, right_buf
);
555 wret
= del_ptr(trans
, root
, path
, level
+ 1, pslot
+
559 wret
= btrfs_free_extent(trans
, root
, blocknr
, 1, 1);
563 btrfs_memcpy(root
, parent
,
564 &parent
->ptrs
[pslot
+ 1].key
,
566 sizeof(struct btrfs_disk_key
));
567 btrfs_mark_buffer_dirty(parent_buf
);
570 if (btrfs_header_nritems(&mid
->header
) == 1) {
572 * we're not allowed to leave a node with one item in the
573 * tree during a delete. A deletion from lower in the tree
574 * could try to delete the only pointer in this node.
575 * So, pull some keys from the left.
576 * There has to be a left pointer at this point because
577 * otherwise we would have pulled some pointers from the
581 wret
= balance_node_right(trans
, root
, mid_buf
, left_buf
);
588 if (btrfs_header_nritems(&mid
->header
) == 0) {
589 /* we've managed to empty the middle node, drop it */
590 u64 blocknr
= bh_blocknr(mid_buf
);
591 clean_tree_block(trans
, root
, mid_buf
);
592 wait_on_buffer(mid_buf
);
593 btrfs_block_release(root
, mid_buf
);
596 wret
= del_ptr(trans
, root
, path
, level
+ 1, pslot
);
599 wret
= btrfs_free_extent(trans
, root
, blocknr
, 1, 1);
603 /* update the parent key to reflect our changes */
604 btrfs_memcpy(root
, parent
,
605 &parent
->ptrs
[pslot
].key
, &mid
->ptrs
[0].key
,
606 sizeof(struct btrfs_disk_key
));
607 btrfs_mark_buffer_dirty(parent_buf
);
610 /* update the path */
612 if (btrfs_header_nritems(&left
->header
) > orig_slot
) {
614 path
->nodes
[level
] = left_buf
;
615 path
->slots
[level
+ 1] -= 1;
616 path
->slots
[level
] = orig_slot
;
618 btrfs_block_release(root
, mid_buf
);
620 orig_slot
-= btrfs_header_nritems(&left
->header
);
621 path
->slots
[level
] = orig_slot
;
624 /* double check we haven't messed things up */
625 check_block(root
, path
, level
);
627 btrfs_node_blockptr(btrfs_buffer_node(path
->nodes
[level
]),
632 btrfs_block_release(root
, right_buf
);
634 btrfs_block_release(root
, left_buf
);
638 /* returns zero if the push worked, non-zero otherwise */
639 static int push_nodes_for_insert(struct btrfs_trans_handle
*trans
,
640 struct btrfs_root
*root
,
641 struct btrfs_path
*path
, int level
)
643 struct buffer_head
*right_buf
;
644 struct buffer_head
*mid_buf
;
645 struct buffer_head
*left_buf
;
646 struct buffer_head
*parent_buf
= NULL
;
647 struct btrfs_node
*right
= NULL
;
648 struct btrfs_node
*mid
;
649 struct btrfs_node
*left
= NULL
;
650 struct btrfs_node
*parent
= NULL
;
654 int orig_slot
= path
->slots
[level
];
660 mid_buf
= path
->nodes
[level
];
661 mid
= btrfs_buffer_node(mid_buf
);
662 orig_ptr
= btrfs_node_blockptr(mid
, orig_slot
);
664 if (level
< BTRFS_MAX_LEVEL
- 1)
665 parent_buf
= path
->nodes
[level
+ 1];
666 pslot
= path
->slots
[level
+ 1];
670 parent
= btrfs_buffer_node(parent_buf
);
672 left_buf
= read_node_slot(root
, parent_buf
, pslot
- 1);
674 /* first, try to make some room in the middle buffer */
677 left
= btrfs_buffer_node(left_buf
);
678 left_nr
= btrfs_header_nritems(&left
->header
);
679 if (left_nr
>= BTRFS_NODEPTRS_PER_BLOCK(root
) - 1) {
682 ret
= btrfs_cow_block(trans
, root
, left_buf
, parent_buf
,
683 pslot
- 1, &left_buf
);
687 left
= btrfs_buffer_node(left_buf
);
688 wret
= push_node_left(trans
, root
,
695 orig_slot
+= left_nr
;
696 btrfs_memcpy(root
, parent
,
697 &parent
->ptrs
[pslot
].key
,
699 sizeof(struct btrfs_disk_key
));
700 btrfs_mark_buffer_dirty(parent_buf
);
701 if (btrfs_header_nritems(&left
->header
) > orig_slot
) {
702 path
->nodes
[level
] = left_buf
;
703 path
->slots
[level
+ 1] -= 1;
704 path
->slots
[level
] = orig_slot
;
705 btrfs_block_release(root
, mid_buf
);
708 btrfs_header_nritems(&left
->header
);
709 path
->slots
[level
] = orig_slot
;
710 btrfs_block_release(root
, left_buf
);
712 check_node(root
, path
, level
);
715 btrfs_block_release(root
, left_buf
);
717 right_buf
= read_node_slot(root
, parent_buf
, pslot
+ 1);
720 * then try to empty the right most buffer into the middle
724 right
= btrfs_buffer_node(right_buf
);
725 right_nr
= btrfs_header_nritems(&right
->header
);
726 if (right_nr
>= BTRFS_NODEPTRS_PER_BLOCK(root
) - 1) {
729 ret
= btrfs_cow_block(trans
, root
, right_buf
,
730 parent_buf
, pslot
+ 1,
735 right
= btrfs_buffer_node(right_buf
);
736 wret
= balance_node_right(trans
, root
,
743 btrfs_memcpy(root
, parent
,
744 &parent
->ptrs
[pslot
+ 1].key
,
746 sizeof(struct btrfs_disk_key
));
747 btrfs_mark_buffer_dirty(parent_buf
);
748 if (btrfs_header_nritems(&mid
->header
) <= orig_slot
) {
749 path
->nodes
[level
] = right_buf
;
750 path
->slots
[level
+ 1] += 1;
751 path
->slots
[level
] = orig_slot
-
752 btrfs_header_nritems(&mid
->header
);
753 btrfs_block_release(root
, mid_buf
);
755 btrfs_block_release(root
, right_buf
);
757 check_node(root
, path
, level
);
760 btrfs_block_release(root
, right_buf
);
762 check_node(root
, path
, level
);
767 * readahead one full node of leaves
769 static void reada_for_search(struct btrfs_root
*root
, struct btrfs_path
*path
,
772 struct btrfs_node
*node
;
781 int direction
= path
->reada
;
782 struct radix_tree_root found
;
783 unsigned long gang
[8];
784 struct buffer_head
*bh
;
789 if (!path
->nodes
[level
])
792 node
= btrfs_buffer_node(path
->nodes
[level
]);
793 search
= btrfs_node_blockptr(node
, slot
);
794 bh
= btrfs_find_tree_block(root
, search
);
800 init_bit_radix(&found
);
801 nritems
= btrfs_header_nritems(&node
->header
);
802 for (i
= slot
; i
< nritems
; i
++) {
803 item_objectid
= btrfs_disk_key_objectid(&node
->ptrs
[i
].key
);
804 blocknr
= btrfs_node_blockptr(node
, i
);
805 set_radix_bit(&found
, blocknr
);
808 cluster_start
= search
- 4;
809 if (cluster_start
> search
)
812 cluster_start
= search
+ 4;
814 ret
= find_first_radix_bit(&found
, gang
, 0, ARRAY_SIZE(gang
));
817 for (i
= 0; i
< ret
; i
++) {
819 clear_radix_bit(&found
, blocknr
);
822 if (close_blocks(cluster_start
, blocknr
)) {
823 readahead_tree_block(root
, blocknr
);
825 cluster_start
= blocknr
;
831 * look for key in the tree. path is filled in with nodes along the way
832 * if key is found, we return zero and you can find the item in the leaf
833 * level of the path (level 0)
835 * If the key isn't found, the path points to the slot where it should
836 * be inserted, and 1 is returned. If there are other errors during the
837 * search a negative error number is returned.
839 * if ins_len > 0, nodes and leaves will be split as we walk down the
840 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
843 int btrfs_search_slot(struct btrfs_trans_handle
*trans
, struct btrfs_root
844 *root
, struct btrfs_key
*key
, struct btrfs_path
*p
, int
847 struct buffer_head
*b
;
848 struct buffer_head
*cow_buf
;
849 struct btrfs_node
*c
;
854 int should_reada
= p
->reada
;
857 lowest_level
= p
->lowest_level
;
858 WARN_ON(lowest_level
&& ins_len
);
859 WARN_ON(p
->nodes
[0] != NULL
);
860 WARN_ON(!mutex_is_locked(&root
->fs_info
->fs_mutex
));
865 c
= btrfs_buffer_node(b
);
866 level
= btrfs_header_level(&c
->header
);
869 wret
= btrfs_cow_block(trans
, root
, b
,
874 btrfs_block_release(root
, cow_buf
);
878 c
= btrfs_buffer_node(b
);
880 BUG_ON(!cow
&& ins_len
);
881 if (level
!= btrfs_header_level(&c
->header
))
883 level
= btrfs_header_level(&c
->header
);
885 ret
= check_block(root
, p
, level
);
888 ret
= bin_search(c
, key
, &slot
);
889 if (!btrfs_is_leaf(c
)) {
892 p
->slots
[level
] = slot
;
893 if (ins_len
> 0 && btrfs_header_nritems(&c
->header
) >=
894 BTRFS_NODEPTRS_PER_BLOCK(root
) - 1) {
895 int sret
= split_node(trans
, root
, p
, level
);
900 c
= btrfs_buffer_node(b
);
901 slot
= p
->slots
[level
];
902 } else if (ins_len
< 0) {
903 int sret
= balance_level(trans
, root
, p
,
910 c
= btrfs_buffer_node(b
);
911 slot
= p
->slots
[level
];
912 BUG_ON(btrfs_header_nritems(&c
->header
) == 1);
914 /* this is only true while dropping a snapshot */
915 if (level
== lowest_level
)
917 blocknr
= btrfs_node_blockptr(c
, slot
);
919 reada_for_search(root
, p
, level
, slot
);
920 b
= read_tree_block(root
, btrfs_node_blockptr(c
, slot
));
923 struct btrfs_leaf
*l
= (struct btrfs_leaf
*)c
;
924 p
->slots
[level
] = slot
;
925 if (ins_len
> 0 && btrfs_leaf_free_space(root
, l
) <
926 sizeof(struct btrfs_item
) + ins_len
) {
927 int sret
= split_leaf(trans
, root
, key
,
940 * adjust the pointers going up the tree, starting at level
941 * making sure the right key of each node is points to 'key'.
942 * This is used after shifting pointers to the left, so it stops
943 * fixing up pointers when a given leaf/node is not in slot 0 of the
946 * If this fails to write a tree block, it returns -1, but continues
947 * fixing up the blocks in ram so the tree is consistent.
949 static int fixup_low_keys(struct btrfs_trans_handle
*trans
, struct btrfs_root
950 *root
, struct btrfs_path
*path
, struct btrfs_disk_key
955 for (i
= level
; i
< BTRFS_MAX_LEVEL
; i
++) {
956 struct btrfs_node
*t
;
957 int tslot
= path
->slots
[i
];
960 t
= btrfs_buffer_node(path
->nodes
[i
]);
961 btrfs_memcpy(root
, t
, &t
->ptrs
[tslot
].key
, key
, sizeof(*key
));
962 btrfs_mark_buffer_dirty(path
->nodes
[i
]);
970 * try to push data from one node into the next node left in the
973 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
974 * error, and > 0 if there was no room in the left hand block.
976 static int push_node_left(struct btrfs_trans_handle
*trans
, struct btrfs_root
977 *root
, struct buffer_head
*dst_buf
, struct
978 buffer_head
*src_buf
)
980 struct btrfs_node
*src
= btrfs_buffer_node(src_buf
);
981 struct btrfs_node
*dst
= btrfs_buffer_node(dst_buf
);
987 src_nritems
= btrfs_header_nritems(&src
->header
);
988 dst_nritems
= btrfs_header_nritems(&dst
->header
);
989 push_items
= BTRFS_NODEPTRS_PER_BLOCK(root
) - dst_nritems
;
991 if (push_items
<= 0) {
995 if (src_nritems
< push_items
)
996 push_items
= src_nritems
;
998 btrfs_memcpy(root
, dst
, dst
->ptrs
+ dst_nritems
, src
->ptrs
,
999 push_items
* sizeof(struct btrfs_key_ptr
));
1000 if (push_items
< src_nritems
) {
1001 btrfs_memmove(root
, src
, src
->ptrs
, src
->ptrs
+ push_items
,
1002 (src_nritems
- push_items
) *
1003 sizeof(struct btrfs_key_ptr
));
1005 btrfs_set_header_nritems(&src
->header
, src_nritems
- push_items
);
1006 btrfs_set_header_nritems(&dst
->header
, dst_nritems
+ push_items
);
1007 btrfs_mark_buffer_dirty(src_buf
);
1008 btrfs_mark_buffer_dirty(dst_buf
);
1013 * try to push data from one node into the next node right in the
1016 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1017 * error, and > 0 if there was no room in the right hand block.
1019 * this will only push up to 1/2 the contents of the left node over
1021 static int balance_node_right(struct btrfs_trans_handle
*trans
, struct
1022 btrfs_root
*root
, struct buffer_head
*dst_buf
,
1023 struct buffer_head
*src_buf
)
1025 struct btrfs_node
*src
= btrfs_buffer_node(src_buf
);
1026 struct btrfs_node
*dst
= btrfs_buffer_node(dst_buf
);
1033 src_nritems
= btrfs_header_nritems(&src
->header
);
1034 dst_nritems
= btrfs_header_nritems(&dst
->header
);
1035 push_items
= BTRFS_NODEPTRS_PER_BLOCK(root
) - dst_nritems
;
1036 if (push_items
<= 0) {
1040 max_push
= src_nritems
/ 2 + 1;
1041 /* don't try to empty the node */
1042 if (max_push
> src_nritems
)
1044 if (max_push
< push_items
)
1045 push_items
= max_push
;
1047 btrfs_memmove(root
, dst
, dst
->ptrs
+ push_items
, dst
->ptrs
,
1048 dst_nritems
* sizeof(struct btrfs_key_ptr
));
1050 btrfs_memcpy(root
, dst
, dst
->ptrs
,
1051 src
->ptrs
+ src_nritems
- push_items
,
1052 push_items
* sizeof(struct btrfs_key_ptr
));
1054 btrfs_set_header_nritems(&src
->header
, src_nritems
- push_items
);
1055 btrfs_set_header_nritems(&dst
->header
, dst_nritems
+ push_items
);
1057 btrfs_mark_buffer_dirty(src_buf
);
1058 btrfs_mark_buffer_dirty(dst_buf
);
1063 * helper function to insert a new root level in the tree.
1064 * A new node is allocated, and a single item is inserted to
1065 * point to the existing root
1067 * returns zero on success or < 0 on failure.
1069 static int insert_new_root(struct btrfs_trans_handle
*trans
, struct btrfs_root
1070 *root
, struct btrfs_path
*path
, int level
)
1072 struct buffer_head
*t
;
1073 struct btrfs_node
*lower
;
1074 struct btrfs_node
*c
;
1075 struct btrfs_disk_key
*lower_key
;
1077 BUG_ON(path
->nodes
[level
]);
1078 BUG_ON(path
->nodes
[level
-1] != root
->node
);
1080 t
= btrfs_alloc_free_block(trans
, root
, root
->node
->b_blocknr
, 0);
1083 c
= btrfs_buffer_node(t
);
1084 memset(c
, 0, root
->blocksize
);
1085 btrfs_set_header_nritems(&c
->header
, 1);
1086 btrfs_set_header_level(&c
->header
, level
);
1087 btrfs_set_header_blocknr(&c
->header
, bh_blocknr(t
));
1088 btrfs_set_header_generation(&c
->header
, trans
->transid
);
1089 btrfs_set_header_owner(&c
->header
, root
->root_key
.objectid
);
1090 lower
= btrfs_buffer_node(path
->nodes
[level
-1]);
1091 memcpy(c
->header
.fsid
, root
->fs_info
->disk_super
->fsid
,
1092 sizeof(c
->header
.fsid
));
1093 if (btrfs_is_leaf(lower
))
1094 lower_key
= &((struct btrfs_leaf
*)lower
)->items
[0].key
;
1096 lower_key
= &lower
->ptrs
[0].key
;
1097 btrfs_memcpy(root
, c
, &c
->ptrs
[0].key
, lower_key
,
1098 sizeof(struct btrfs_disk_key
));
1099 btrfs_set_node_blockptr(c
, 0, bh_blocknr(path
->nodes
[level
- 1]));
1101 btrfs_mark_buffer_dirty(t
);
1103 /* the super has an extra ref to root->node */
1104 btrfs_block_release(root
, root
->node
);
1107 path
->nodes
[level
] = t
;
1108 path
->slots
[level
] = 0;
1113 * worker function to insert a single pointer in a node.
1114 * the node should have enough room for the pointer already
1116 * slot and level indicate where you want the key to go, and
1117 * blocknr is the block the key points to.
1119 * returns zero on success and < 0 on any error
1121 static int insert_ptr(struct btrfs_trans_handle
*trans
, struct btrfs_root
1122 *root
, struct btrfs_path
*path
, struct btrfs_disk_key
1123 *key
, u64 blocknr
, int slot
, int level
)
1125 struct btrfs_node
*lower
;
1128 BUG_ON(!path
->nodes
[level
]);
1129 lower
= btrfs_buffer_node(path
->nodes
[level
]);
1130 nritems
= btrfs_header_nritems(&lower
->header
);
1133 if (nritems
== BTRFS_NODEPTRS_PER_BLOCK(root
))
1135 if (slot
!= nritems
) {
1136 btrfs_memmove(root
, lower
, lower
->ptrs
+ slot
+ 1,
1138 (nritems
- slot
) * sizeof(struct btrfs_key_ptr
));
1140 btrfs_memcpy(root
, lower
, &lower
->ptrs
[slot
].key
,
1141 key
, sizeof(struct btrfs_disk_key
));
1142 btrfs_set_node_blockptr(lower
, slot
, blocknr
);
1143 btrfs_set_header_nritems(&lower
->header
, nritems
+ 1);
1144 btrfs_mark_buffer_dirty(path
->nodes
[level
]);
1145 check_node(root
, path
, level
);
1150 * split the node at the specified level in path in two.
1151 * The path is corrected to point to the appropriate node after the split
1153 * Before splitting this tries to make some room in the node by pushing
1154 * left and right, if either one works, it returns right away.
1156 * returns 0 on success and < 0 on failure
1158 static int split_node(struct btrfs_trans_handle
*trans
, struct btrfs_root
1159 *root
, struct btrfs_path
*path
, int level
)
1161 struct buffer_head
*t
;
1162 struct btrfs_node
*c
;
1163 struct buffer_head
*split_buffer
;
1164 struct btrfs_node
*split
;
1170 t
= path
->nodes
[level
];
1171 c
= btrfs_buffer_node(t
);
1172 if (t
== root
->node
) {
1173 /* trying to split the root, lets make a new one */
1174 ret
= insert_new_root(trans
, root
, path
, level
+ 1);
1178 ret
= push_nodes_for_insert(trans
, root
, path
, level
);
1179 t
= path
->nodes
[level
];
1180 c
= btrfs_buffer_node(t
);
1182 btrfs_header_nritems(&c
->header
) <
1183 BTRFS_NODEPTRS_PER_BLOCK(root
) - 1)
1189 c_nritems
= btrfs_header_nritems(&c
->header
);
1190 split_buffer
= btrfs_alloc_free_block(trans
, root
, t
->b_blocknr
, 0);
1191 if (IS_ERR(split_buffer
))
1192 return PTR_ERR(split_buffer
);
1194 split
= btrfs_buffer_node(split_buffer
);
1195 btrfs_set_header_flags(&split
->header
, btrfs_header_flags(&c
->header
));
1196 btrfs_set_header_level(&split
->header
, btrfs_header_level(&c
->header
));
1197 btrfs_set_header_blocknr(&split
->header
, bh_blocknr(split_buffer
));
1198 btrfs_set_header_generation(&split
->header
, trans
->transid
);
1199 btrfs_set_header_owner(&split
->header
, root
->root_key
.objectid
);
1200 memcpy(split
->header
.fsid
, root
->fs_info
->disk_super
->fsid
,
1201 sizeof(split
->header
.fsid
));
1202 mid
= (c_nritems
+ 1) / 2;
1203 btrfs_memcpy(root
, split
, split
->ptrs
, c
->ptrs
+ mid
,
1204 (c_nritems
- mid
) * sizeof(struct btrfs_key_ptr
));
1205 btrfs_set_header_nritems(&split
->header
, c_nritems
- mid
);
1206 btrfs_set_header_nritems(&c
->header
, mid
);
1209 btrfs_mark_buffer_dirty(t
);
1210 btrfs_mark_buffer_dirty(split_buffer
);
1211 wret
= insert_ptr(trans
, root
, path
, &split
->ptrs
[0].key
,
1212 bh_blocknr(split_buffer
), path
->slots
[level
+ 1] + 1,
1217 if (path
->slots
[level
] >= mid
) {
1218 path
->slots
[level
] -= mid
;
1219 btrfs_block_release(root
, t
);
1220 path
->nodes
[level
] = split_buffer
;
1221 path
->slots
[level
+ 1] += 1;
1223 btrfs_block_release(root
, split_buffer
);
1229 * how many bytes are required to store the items in a leaf. start
1230 * and nr indicate which items in the leaf to check. This totals up the
1231 * space used both by the item structs and the item data
1233 static int leaf_space_used(struct btrfs_leaf
*l
, int start
, int nr
)
1236 int nritems
= btrfs_header_nritems(&l
->header
);
1237 int end
= min(nritems
, start
+ nr
) - 1;
1241 data_len
= btrfs_item_end(l
->items
+ start
);
1242 data_len
= data_len
- btrfs_item_offset(l
->items
+ end
);
1243 data_len
+= sizeof(struct btrfs_item
) * nr
;
1244 WARN_ON(data_len
< 0);
1249 * The space between the end of the leaf items and
1250 * the start of the leaf data. IOW, how much room
1251 * the leaf has left for both items and data
1253 int btrfs_leaf_free_space(struct btrfs_root
*root
, struct btrfs_leaf
*leaf
)
1255 int nritems
= btrfs_header_nritems(&leaf
->header
);
1256 return BTRFS_LEAF_DATA_SIZE(root
) - leaf_space_used(leaf
, 0, nritems
);
1260 * push some data in the path leaf to the right, trying to free up at
1261 * least data_size bytes. returns zero if the push worked, nonzero otherwise
1263 * returns 1 if the push failed because the other node didn't have enough
1264 * room, 0 if everything worked out and < 0 if there were major errors.
1266 static int push_leaf_right(struct btrfs_trans_handle
*trans
, struct btrfs_root
1267 *root
, struct btrfs_path
*path
, int data_size
)
1269 struct buffer_head
*left_buf
= path
->nodes
[0];
1270 struct btrfs_leaf
*left
= btrfs_buffer_leaf(left_buf
);
1271 struct btrfs_leaf
*right
;
1272 struct buffer_head
*right_buf
;
1273 struct buffer_head
*upper
;
1274 struct btrfs_node
*upper_node
;
1280 struct btrfs_item
*item
;
1285 slot
= path
->slots
[1];
1286 if (!path
->nodes
[1]) {
1289 upper
= path
->nodes
[1];
1290 upper_node
= btrfs_buffer_node(upper
);
1291 if (slot
>= btrfs_header_nritems(&upper_node
->header
) - 1) {
1294 right_buf
= read_tree_block(root
,
1295 btrfs_node_blockptr(btrfs_buffer_node(upper
), slot
+ 1));
1296 right
= btrfs_buffer_leaf(right_buf
);
1297 free_space
= btrfs_leaf_free_space(root
, right
);
1298 if (free_space
< data_size
+ sizeof(struct btrfs_item
)) {
1299 btrfs_block_release(root
, right_buf
);
1302 /* cow and double check */
1303 ret
= btrfs_cow_block(trans
, root
, right_buf
, upper
,
1304 slot
+ 1, &right_buf
);
1306 btrfs_block_release(root
, right_buf
);
1309 right
= btrfs_buffer_leaf(right_buf
);
1310 free_space
= btrfs_leaf_free_space(root
, right
);
1311 if (free_space
< data_size
+ sizeof(struct btrfs_item
)) {
1312 btrfs_block_release(root
, right_buf
);
1316 left_nritems
= btrfs_header_nritems(&left
->header
);
1317 if (left_nritems
== 0) {
1318 btrfs_block_release(root
, right_buf
);
1321 for (i
= left_nritems
- 1; i
>= 1; i
--) {
1322 item
= left
->items
+ i
;
1323 if (path
->slots
[0] == i
)
1324 push_space
+= data_size
+ sizeof(*item
);
1325 if (btrfs_item_size(item
) + sizeof(*item
) + push_space
>
1329 push_space
+= btrfs_item_size(item
) + sizeof(*item
);
1331 if (push_items
== 0) {
1332 btrfs_block_release(root
, right_buf
);
1335 if (push_items
== left_nritems
)
1337 right_nritems
= btrfs_header_nritems(&right
->header
);
1338 /* push left to right */
1339 push_space
= btrfs_item_end(left
->items
+ left_nritems
- push_items
);
1340 push_space
-= leaf_data_end(root
, left
);
1341 /* make room in the right data area */
1342 btrfs_memmove(root
, right
, btrfs_leaf_data(right
) +
1343 leaf_data_end(root
, right
) - push_space
,
1344 btrfs_leaf_data(right
) +
1345 leaf_data_end(root
, right
), BTRFS_LEAF_DATA_SIZE(root
) -
1346 leaf_data_end(root
, right
));
1347 /* copy from the left data area */
1348 btrfs_memcpy(root
, right
, btrfs_leaf_data(right
) +
1349 BTRFS_LEAF_DATA_SIZE(root
) - push_space
,
1350 btrfs_leaf_data(left
) + leaf_data_end(root
, left
),
1352 btrfs_memmove(root
, right
, right
->items
+ push_items
, right
->items
,
1353 right_nritems
* sizeof(struct btrfs_item
));
1354 /* copy the items from left to right */
1355 btrfs_memcpy(root
, right
, right
->items
, left
->items
+
1356 left_nritems
- push_items
,
1357 push_items
* sizeof(struct btrfs_item
));
1359 /* update the item pointers */
1360 right_nritems
+= push_items
;
1361 btrfs_set_header_nritems(&right
->header
, right_nritems
);
1362 push_space
= BTRFS_LEAF_DATA_SIZE(root
);
1363 for (i
= 0; i
< right_nritems
; i
++) {
1364 btrfs_set_item_offset(right
->items
+ i
, push_space
-
1365 btrfs_item_size(right
->items
+ i
));
1366 push_space
= btrfs_item_offset(right
->items
+ i
);
1368 left_nritems
-= push_items
;
1369 btrfs_set_header_nritems(&left
->header
, left_nritems
);
1371 btrfs_mark_buffer_dirty(left_buf
);
1372 btrfs_mark_buffer_dirty(right_buf
);
1374 btrfs_memcpy(root
, upper_node
, &upper_node
->ptrs
[slot
+ 1].key
,
1375 &right
->items
[0].key
, sizeof(struct btrfs_disk_key
));
1376 btrfs_mark_buffer_dirty(upper
);
1378 /* then fixup the leaf pointer in the path */
1379 if (path
->slots
[0] >= left_nritems
) {
1380 path
->slots
[0] -= left_nritems
;
1381 btrfs_block_release(root
, path
->nodes
[0]);
1382 path
->nodes
[0] = right_buf
;
1383 path
->slots
[1] += 1;
1385 btrfs_block_release(root
, right_buf
);
1388 check_node(root
, path
, 1);
1392 * push some data in the path leaf to the left, trying to free up at
1393 * least data_size bytes. returns zero if the push worked, nonzero otherwise
1395 static int push_leaf_left(struct btrfs_trans_handle
*trans
, struct btrfs_root
1396 *root
, struct btrfs_path
*path
, int data_size
)
1398 struct buffer_head
*right_buf
= path
->nodes
[0];
1399 struct btrfs_leaf
*right
= btrfs_buffer_leaf(right_buf
);
1400 struct buffer_head
*t
;
1401 struct btrfs_leaf
*left
;
1407 struct btrfs_item
*item
;
1408 u32 old_left_nritems
;
1412 slot
= path
->slots
[1];
1416 if (!path
->nodes
[1]) {
1419 t
= read_tree_block(root
,
1420 btrfs_node_blockptr(btrfs_buffer_node(path
->nodes
[1]), slot
- 1));
1421 left
= btrfs_buffer_leaf(t
);
1422 free_space
= btrfs_leaf_free_space(root
, left
);
1423 if (free_space
< data_size
+ sizeof(struct btrfs_item
)) {
1424 btrfs_block_release(root
, t
);
1428 /* cow and double check */
1429 ret
= btrfs_cow_block(trans
, root
, t
, path
->nodes
[1], slot
- 1, &t
);
1431 /* we hit -ENOSPC, but it isn't fatal here */
1434 left
= btrfs_buffer_leaf(t
);
1435 free_space
= btrfs_leaf_free_space(root
, left
);
1436 if (free_space
< data_size
+ sizeof(struct btrfs_item
)) {
1437 btrfs_block_release(root
, t
);
1441 if (btrfs_header_nritems(&right
->header
) == 0) {
1442 btrfs_block_release(root
, t
);
1446 for (i
= 0; i
< btrfs_header_nritems(&right
->header
) - 1; i
++) {
1447 item
= right
->items
+ i
;
1448 if (path
->slots
[0] == i
)
1449 push_space
+= data_size
+ sizeof(*item
);
1450 if (btrfs_item_size(item
) + sizeof(*item
) + push_space
>
1454 push_space
+= btrfs_item_size(item
) + sizeof(*item
);
1456 if (push_items
== 0) {
1457 btrfs_block_release(root
, t
);
1460 if (push_items
== btrfs_header_nritems(&right
->header
))
1462 /* push data from right to left */
1463 btrfs_memcpy(root
, left
, left
->items
+
1464 btrfs_header_nritems(&left
->header
),
1465 right
->items
, push_items
* sizeof(struct btrfs_item
));
1466 push_space
= BTRFS_LEAF_DATA_SIZE(root
) -
1467 btrfs_item_offset(right
->items
+ push_items
-1);
1468 btrfs_memcpy(root
, left
, btrfs_leaf_data(left
) +
1469 leaf_data_end(root
, left
) - push_space
,
1470 btrfs_leaf_data(right
) +
1471 btrfs_item_offset(right
->items
+ push_items
- 1),
1473 old_left_nritems
= btrfs_header_nritems(&left
->header
);
1474 BUG_ON(old_left_nritems
< 0);
1476 for (i
= old_left_nritems
; i
< old_left_nritems
+ push_items
; i
++) {
1477 u32 ioff
= btrfs_item_offset(left
->items
+ i
);
1478 btrfs_set_item_offset(left
->items
+ i
, ioff
-
1479 (BTRFS_LEAF_DATA_SIZE(root
) -
1480 btrfs_item_offset(left
->items
+
1481 old_left_nritems
- 1)));
1483 btrfs_set_header_nritems(&left
->header
, old_left_nritems
+ push_items
);
1485 /* fixup right node */
1486 push_space
= btrfs_item_offset(right
->items
+ push_items
- 1) -
1487 leaf_data_end(root
, right
);
1488 btrfs_memmove(root
, right
, btrfs_leaf_data(right
) +
1489 BTRFS_LEAF_DATA_SIZE(root
) - push_space
,
1490 btrfs_leaf_data(right
) +
1491 leaf_data_end(root
, right
), push_space
);
1492 btrfs_memmove(root
, right
, right
->items
, right
->items
+ push_items
,
1493 (btrfs_header_nritems(&right
->header
) - push_items
) *
1494 sizeof(struct btrfs_item
));
1495 btrfs_set_header_nritems(&right
->header
,
1496 btrfs_header_nritems(&right
->header
) -
1498 push_space
= BTRFS_LEAF_DATA_SIZE(root
);
1500 for (i
= 0; i
< btrfs_header_nritems(&right
->header
); i
++) {
1501 btrfs_set_item_offset(right
->items
+ i
, push_space
-
1502 btrfs_item_size(right
->items
+ i
));
1503 push_space
= btrfs_item_offset(right
->items
+ i
);
1506 btrfs_mark_buffer_dirty(t
);
1507 btrfs_mark_buffer_dirty(right_buf
);
1509 wret
= fixup_low_keys(trans
, root
, path
, &right
->items
[0].key
, 1);
1513 /* then fixup the leaf pointer in the path */
1514 if (path
->slots
[0] < push_items
) {
1515 path
->slots
[0] += old_left_nritems
;
1516 btrfs_block_release(root
, path
->nodes
[0]);
1518 path
->slots
[1] -= 1;
1520 btrfs_block_release(root
, t
);
1521 path
->slots
[0] -= push_items
;
1523 BUG_ON(path
->slots
[0] < 0);
1525 check_node(root
, path
, 1);
1530 * split the path's leaf in two, making sure there is at least data_size
1531 * available for the resulting leaf level of the path.
1533 * returns 0 if all went well and < 0 on failure.
1535 static int split_leaf(struct btrfs_trans_handle
*trans
, struct btrfs_root
1536 *root
, struct btrfs_key
*ins_key
,
1537 struct btrfs_path
*path
, int data_size
)
1539 struct buffer_head
*l_buf
;
1540 struct btrfs_leaf
*l
;
1544 struct btrfs_leaf
*right
;
1545 struct buffer_head
*right_buffer
;
1546 int space_needed
= data_size
+ sizeof(struct btrfs_item
);
1552 int double_split
= 0;
1553 struct btrfs_disk_key disk_key
;
1555 /* first try to make some room by pushing left and right */
1556 wret
= push_leaf_left(trans
, root
, path
, data_size
);
1560 wret
= push_leaf_right(trans
, root
, path
, data_size
);
1564 l_buf
= path
->nodes
[0];
1565 l
= btrfs_buffer_leaf(l_buf
);
1567 /* did the pushes work? */
1568 if (btrfs_leaf_free_space(root
, l
) >=
1569 sizeof(struct btrfs_item
) + data_size
)
1572 if (!path
->nodes
[1]) {
1573 ret
= insert_new_root(trans
, root
, path
, 1);
1577 slot
= path
->slots
[0];
1578 nritems
= btrfs_header_nritems(&l
->header
);
1579 mid
= (nritems
+ 1)/ 2;
1581 right_buffer
= btrfs_alloc_free_block(trans
, root
, l_buf
->b_blocknr
, 0);
1582 if (IS_ERR(right_buffer
))
1583 return PTR_ERR(right_buffer
);
1585 right
= btrfs_buffer_leaf(right_buffer
);
1586 memset(&right
->header
, 0, sizeof(right
->header
));
1587 btrfs_set_header_blocknr(&right
->header
, bh_blocknr(right_buffer
));
1588 btrfs_set_header_generation(&right
->header
, trans
->transid
);
1589 btrfs_set_header_owner(&right
->header
, root
->root_key
.objectid
);
1590 btrfs_set_header_level(&right
->header
, 0);
1591 memcpy(right
->header
.fsid
, root
->fs_info
->disk_super
->fsid
,
1592 sizeof(right
->header
.fsid
));
1595 leaf_space_used(l
, mid
, nritems
- mid
) + space_needed
>
1596 BTRFS_LEAF_DATA_SIZE(root
)) {
1597 if (slot
>= nritems
) {
1598 btrfs_cpu_key_to_disk(&disk_key
, ins_key
);
1599 btrfs_set_header_nritems(&right
->header
, 0);
1600 wret
= insert_ptr(trans
, root
, path
,
1602 bh_blocknr(right_buffer
),
1603 path
->slots
[1] + 1, 1);
1606 btrfs_block_release(root
, path
->nodes
[0]);
1607 path
->nodes
[0] = right_buffer
;
1609 path
->slots
[1] += 1;
1616 if (leaf_space_used(l
, 0, mid
+ 1) + space_needed
>
1617 BTRFS_LEAF_DATA_SIZE(root
)) {
1619 btrfs_cpu_key_to_disk(&disk_key
, ins_key
);
1620 btrfs_set_header_nritems(&right
->header
, 0);
1621 wret
= insert_ptr(trans
, root
, path
,
1623 bh_blocknr(right_buffer
),
1627 btrfs_block_release(root
, path
->nodes
[0]);
1628 path
->nodes
[0] = right_buffer
;
1630 if (path
->slots
[1] == 0) {
1631 wret
= fixup_low_keys(trans
, root
,
1632 path
, &disk_key
, 1);
1642 btrfs_set_header_nritems(&right
->header
, nritems
- mid
);
1643 data_copy_size
= btrfs_item_end(l
->items
+ mid
) -
1644 leaf_data_end(root
, l
);
1645 btrfs_memcpy(root
, right
, right
->items
, l
->items
+ mid
,
1646 (nritems
- mid
) * sizeof(struct btrfs_item
));
1647 btrfs_memcpy(root
, right
,
1648 btrfs_leaf_data(right
) + BTRFS_LEAF_DATA_SIZE(root
) -
1649 data_copy_size
, btrfs_leaf_data(l
) +
1650 leaf_data_end(root
, l
), data_copy_size
);
1651 rt_data_off
= BTRFS_LEAF_DATA_SIZE(root
) -
1652 btrfs_item_end(l
->items
+ mid
);
1654 for (i
= 0; i
< btrfs_header_nritems(&right
->header
); i
++) {
1655 u32 ioff
= btrfs_item_offset(right
->items
+ i
);
1656 btrfs_set_item_offset(right
->items
+ i
, ioff
+ rt_data_off
);
1659 btrfs_set_header_nritems(&l
->header
, mid
);
1661 wret
= insert_ptr(trans
, root
, path
, &right
->items
[0].key
,
1662 bh_blocknr(right_buffer
), path
->slots
[1] + 1, 1);
1665 btrfs_mark_buffer_dirty(right_buffer
);
1666 btrfs_mark_buffer_dirty(l_buf
);
1667 BUG_ON(path
->slots
[0] != slot
);
1669 btrfs_block_release(root
, path
->nodes
[0]);
1670 path
->nodes
[0] = right_buffer
;
1671 path
->slots
[0] -= mid
;
1672 path
->slots
[1] += 1;
1674 btrfs_block_release(root
, right_buffer
);
1675 BUG_ON(path
->slots
[0] < 0);
1676 check_node(root
, path
, 1);
1680 right_buffer
= btrfs_alloc_free_block(trans
, root
, l_buf
->b_blocknr
, 0);
1681 if (IS_ERR(right_buffer
))
1682 return PTR_ERR(right_buffer
);
1684 right
= btrfs_buffer_leaf(right_buffer
);
1685 memset(&right
->header
, 0, sizeof(right
->header
));
1686 btrfs_set_header_blocknr(&right
->header
, bh_blocknr(right_buffer
));
1687 btrfs_set_header_generation(&right
->header
, trans
->transid
);
1688 btrfs_set_header_owner(&right
->header
, root
->root_key
.objectid
);
1689 btrfs_set_header_level(&right
->header
, 0);
1690 memcpy(right
->header
.fsid
, root
->fs_info
->disk_super
->fsid
,
1691 sizeof(right
->header
.fsid
));
1692 btrfs_cpu_key_to_disk(&disk_key
, ins_key
);
1693 btrfs_set_header_nritems(&right
->header
, 0);
1694 wret
= insert_ptr(trans
, root
, path
,
1696 bh_blocknr(right_buffer
),
1700 if (path
->slots
[1] == 0) {
1701 wret
= fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
1705 btrfs_block_release(root
, path
->nodes
[0]);
1706 path
->nodes
[0] = right_buffer
;
1708 check_node(root
, path
, 1);
1709 check_leaf(root
, path
, 0);
1713 int btrfs_truncate_item(struct btrfs_trans_handle
*trans
,
1714 struct btrfs_root
*root
,
1715 struct btrfs_path
*path
,
1721 struct btrfs_leaf
*leaf
;
1722 struct buffer_head
*leaf_buf
;
1724 unsigned int data_end
;
1725 unsigned int old_data_start
;
1726 unsigned int old_size
;
1727 unsigned int size_diff
;
1730 slot_orig
= path
->slots
[0];
1731 leaf_buf
= path
->nodes
[0];
1732 leaf
= btrfs_buffer_leaf(leaf_buf
);
1734 nritems
= btrfs_header_nritems(&leaf
->header
);
1735 data_end
= leaf_data_end(root
, leaf
);
1737 slot
= path
->slots
[0];
1738 old_data_start
= btrfs_item_offset(leaf
->items
+ slot
);
1739 old_size
= btrfs_item_size(leaf
->items
+ slot
);
1740 BUG_ON(old_size
<= new_size
);
1741 size_diff
= old_size
- new_size
;
1744 BUG_ON(slot
>= nritems
);
1747 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1749 /* first correct the data pointers */
1750 for (i
= slot
; i
< nritems
; i
++) {
1751 u32 ioff
= btrfs_item_offset(leaf
->items
+ i
);
1752 btrfs_set_item_offset(leaf
->items
+ i
,
1755 /* shift the data */
1756 btrfs_memmove(root
, leaf
, btrfs_leaf_data(leaf
) +
1757 data_end
+ size_diff
, btrfs_leaf_data(leaf
) +
1758 data_end
, old_data_start
+ new_size
- data_end
);
1759 btrfs_set_item_size(leaf
->items
+ slot
, new_size
);
1760 btrfs_mark_buffer_dirty(leaf_buf
);
1763 if (btrfs_leaf_free_space(root
, leaf
) < 0)
1765 check_leaf(root
, path
, 0);
1769 int btrfs_extend_item(struct btrfs_trans_handle
*trans
, struct btrfs_root
1770 *root
, struct btrfs_path
*path
, u32 data_size
)
1775 struct btrfs_leaf
*leaf
;
1776 struct buffer_head
*leaf_buf
;
1778 unsigned int data_end
;
1779 unsigned int old_data
;
1780 unsigned int old_size
;
1783 slot_orig
= path
->slots
[0];
1784 leaf_buf
= path
->nodes
[0];
1785 leaf
= btrfs_buffer_leaf(leaf_buf
);
1787 nritems
= btrfs_header_nritems(&leaf
->header
);
1788 data_end
= leaf_data_end(root
, leaf
);
1790 if (btrfs_leaf_free_space(root
, leaf
) < data_size
)
1792 slot
= path
->slots
[0];
1793 old_data
= btrfs_item_end(leaf
->items
+ slot
);
1796 BUG_ON(slot
>= nritems
);
1799 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1801 /* first correct the data pointers */
1802 for (i
= slot
; i
< nritems
; i
++) {
1803 u32 ioff
= btrfs_item_offset(leaf
->items
+ i
);
1804 btrfs_set_item_offset(leaf
->items
+ i
,
1807 /* shift the data */
1808 btrfs_memmove(root
, leaf
, btrfs_leaf_data(leaf
) +
1809 data_end
- data_size
, btrfs_leaf_data(leaf
) +
1810 data_end
, old_data
- data_end
);
1811 data_end
= old_data
;
1812 old_size
= btrfs_item_size(leaf
->items
+ slot
);
1813 btrfs_set_item_size(leaf
->items
+ slot
, old_size
+ data_size
);
1814 btrfs_mark_buffer_dirty(leaf_buf
);
1817 if (btrfs_leaf_free_space(root
, leaf
) < 0)
1819 check_leaf(root
, path
, 0);
1824 * Given a key and some data, insert an item into the tree.
1825 * This does all the path init required, making room in the tree if needed.
1827 int btrfs_insert_empty_item(struct btrfs_trans_handle
*trans
, struct btrfs_root
1828 *root
, struct btrfs_path
*path
, struct btrfs_key
1829 *cpu_key
, u32 data_size
)
1834 struct btrfs_leaf
*leaf
;
1835 struct buffer_head
*leaf_buf
;
1837 unsigned int data_end
;
1838 struct btrfs_disk_key disk_key
;
1840 btrfs_cpu_key_to_disk(&disk_key
, cpu_key
);
1842 /* create a root if there isn't one */
1845 ret
= btrfs_search_slot(trans
, root
, cpu_key
, path
, data_size
, 1);
1852 slot_orig
= path
->slots
[0];
1853 leaf_buf
= path
->nodes
[0];
1854 leaf
= btrfs_buffer_leaf(leaf_buf
);
1856 nritems
= btrfs_header_nritems(&leaf
->header
);
1857 data_end
= leaf_data_end(root
, leaf
);
1859 if (btrfs_leaf_free_space(root
, leaf
) <
1860 sizeof(struct btrfs_item
) + data_size
) {
1863 slot
= path
->slots
[0];
1865 if (slot
!= nritems
) {
1867 unsigned int old_data
= btrfs_item_end(leaf
->items
+ slot
);
1870 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1872 /* first correct the data pointers */
1873 for (i
= slot
; i
< nritems
; i
++) {
1874 u32 ioff
= btrfs_item_offset(leaf
->items
+ i
);
1875 btrfs_set_item_offset(leaf
->items
+ i
,
1879 /* shift the items */
1880 btrfs_memmove(root
, leaf
, leaf
->items
+ slot
+ 1,
1882 (nritems
- slot
) * sizeof(struct btrfs_item
));
1884 /* shift the data */
1885 btrfs_memmove(root
, leaf
, btrfs_leaf_data(leaf
) +
1886 data_end
- data_size
, btrfs_leaf_data(leaf
) +
1887 data_end
, old_data
- data_end
);
1888 data_end
= old_data
;
1890 /* setup the item for the new data */
1891 btrfs_memcpy(root
, leaf
, &leaf
->items
[slot
].key
, &disk_key
,
1892 sizeof(struct btrfs_disk_key
));
1893 btrfs_set_item_offset(leaf
->items
+ slot
, data_end
- data_size
);
1894 btrfs_set_item_size(leaf
->items
+ slot
, data_size
);
1895 btrfs_set_header_nritems(&leaf
->header
, nritems
+ 1);
1896 btrfs_mark_buffer_dirty(leaf_buf
);
1900 ret
= fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
1902 if (btrfs_leaf_free_space(root
, leaf
) < 0)
1904 check_leaf(root
, path
, 0);
1910 * Given a key and some data, insert an item into the tree.
1911 * This does all the path init required, making room in the tree if needed.
1913 int btrfs_insert_item(struct btrfs_trans_handle
*trans
, struct btrfs_root
1914 *root
, struct btrfs_key
*cpu_key
, void *data
, u32
1918 struct btrfs_path
*path
;
1921 path
= btrfs_alloc_path();
1923 ret
= btrfs_insert_empty_item(trans
, root
, path
, cpu_key
, data_size
);
1925 ptr
= btrfs_item_ptr(btrfs_buffer_leaf(path
->nodes
[0]),
1926 path
->slots
[0], u8
);
1927 btrfs_memcpy(root
, path
->nodes
[0]->b_data
,
1928 ptr
, data
, data_size
);
1929 btrfs_mark_buffer_dirty(path
->nodes
[0]);
1931 btrfs_free_path(path
);
1936 * delete the pointer from a given node.
1938 * If the delete empties a node, the node is removed from the tree,
1939 * continuing all the way the root if required. The root is converted into
1940 * a leaf if all the nodes are emptied.
1942 static int del_ptr(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
1943 struct btrfs_path
*path
, int level
, int slot
)
1945 struct btrfs_node
*node
;
1946 struct buffer_head
*parent
= path
->nodes
[level
];
1951 node
= btrfs_buffer_node(parent
);
1952 nritems
= btrfs_header_nritems(&node
->header
);
1953 if (slot
!= nritems
-1) {
1954 btrfs_memmove(root
, node
, node
->ptrs
+ slot
,
1955 node
->ptrs
+ slot
+ 1,
1956 sizeof(struct btrfs_key_ptr
) *
1957 (nritems
- slot
- 1));
1960 btrfs_set_header_nritems(&node
->header
, nritems
);
1961 if (nritems
== 0 && parent
== root
->node
) {
1962 struct btrfs_header
*header
= btrfs_buffer_header(root
->node
);
1963 BUG_ON(btrfs_header_level(header
) != 1);
1964 /* just turn the root into a leaf and break */
1965 btrfs_set_header_level(header
, 0);
1966 } else if (slot
== 0) {
1967 wret
= fixup_low_keys(trans
, root
, path
, &node
->ptrs
[0].key
,
1972 btrfs_mark_buffer_dirty(parent
);
1977 * delete the item at the leaf level in path. If that empties
1978 * the leaf, remove it from the tree
1980 int btrfs_del_item(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
1981 struct btrfs_path
*path
)
1984 struct btrfs_leaf
*leaf
;
1985 struct buffer_head
*leaf_buf
;
1992 leaf_buf
= path
->nodes
[0];
1993 leaf
= btrfs_buffer_leaf(leaf_buf
);
1994 slot
= path
->slots
[0];
1995 doff
= btrfs_item_offset(leaf
->items
+ slot
);
1996 dsize
= btrfs_item_size(leaf
->items
+ slot
);
1997 nritems
= btrfs_header_nritems(&leaf
->header
);
1999 if (slot
!= nritems
- 1) {
2001 int data_end
= leaf_data_end(root
, leaf
);
2002 btrfs_memmove(root
, leaf
, btrfs_leaf_data(leaf
) +
2004 btrfs_leaf_data(leaf
) + data_end
,
2006 for (i
= slot
+ 1; i
< nritems
; i
++) {
2007 u32 ioff
= btrfs_item_offset(leaf
->items
+ i
);
2008 btrfs_set_item_offset(leaf
->items
+ i
, ioff
+ dsize
);
2010 btrfs_memmove(root
, leaf
, leaf
->items
+ slot
,
2011 leaf
->items
+ slot
+ 1,
2012 sizeof(struct btrfs_item
) *
2013 (nritems
- slot
- 1));
2015 btrfs_set_header_nritems(&leaf
->header
, nritems
- 1);
2017 /* delete the leaf if we've emptied it */
2019 if (leaf_buf
== root
->node
) {
2020 btrfs_set_header_level(&leaf
->header
, 0);
2022 clean_tree_block(trans
, root
, leaf_buf
);
2023 wait_on_buffer(leaf_buf
);
2024 wret
= del_ptr(trans
, root
, path
, 1, path
->slots
[1]);
2027 wret
= btrfs_free_extent(trans
, root
,
2028 bh_blocknr(leaf_buf
), 1, 1);
2033 int used
= leaf_space_used(leaf
, 0, nritems
);
2035 wret
= fixup_low_keys(trans
, root
, path
,
2036 &leaf
->items
[0].key
, 1);
2041 /* delete the leaf if it is mostly empty */
2042 if (used
< BTRFS_LEAF_DATA_SIZE(root
) / 3) {
2043 /* push_leaf_left fixes the path.
2044 * make sure the path still points to our leaf
2045 * for possible call to del_ptr below
2047 slot
= path
->slots
[1];
2049 wret
= push_leaf_left(trans
, root
, path
, 1);
2050 if (wret
< 0 && wret
!= -ENOSPC
)
2052 if (path
->nodes
[0] == leaf_buf
&&
2053 btrfs_header_nritems(&leaf
->header
)) {
2054 wret
= push_leaf_right(trans
, root
, path
, 1);
2055 if (wret
< 0 && wret
!= -ENOSPC
)
2058 if (btrfs_header_nritems(&leaf
->header
) == 0) {
2059 u64 blocknr
= bh_blocknr(leaf_buf
);
2060 clean_tree_block(trans
, root
, leaf_buf
);
2061 wait_on_buffer(leaf_buf
);
2062 wret
= del_ptr(trans
, root
, path
, 1, slot
);
2065 btrfs_block_release(root
, leaf_buf
);
2066 wret
= btrfs_free_extent(trans
, root
, blocknr
,
2071 btrfs_mark_buffer_dirty(leaf_buf
);
2072 btrfs_block_release(root
, leaf_buf
);
2075 btrfs_mark_buffer_dirty(leaf_buf
);
2082 * walk up the tree as far as required to find the next leaf.
2083 * returns 0 if it found something or 1 if there are no greater leaves.
2084 * returns < 0 on io errors.
2086 int btrfs_next_leaf(struct btrfs_root
*root
, struct btrfs_path
*path
)
2091 struct buffer_head
*c
;
2092 struct btrfs_node
*c_node
;
2093 struct buffer_head
*next
= NULL
;
2095 while(level
< BTRFS_MAX_LEVEL
) {
2096 if (!path
->nodes
[level
])
2098 slot
= path
->slots
[level
] + 1;
2099 c
= path
->nodes
[level
];
2100 c_node
= btrfs_buffer_node(c
);
2101 if (slot
>= btrfs_header_nritems(&c_node
->header
)) {
2105 blocknr
= btrfs_node_blockptr(c_node
, slot
);
2107 btrfs_block_release(root
, next
);
2109 reada_for_search(root
, path
, level
, slot
);
2110 next
= read_tree_block(root
, blocknr
);
2113 path
->slots
[level
] = slot
;
2116 c
= path
->nodes
[level
];
2117 btrfs_block_release(root
, c
);
2118 path
->nodes
[level
] = next
;
2119 path
->slots
[level
] = 0;
2123 reada_for_search(root
, path
, level
, slot
);
2124 next
= read_tree_block(root
,
2125 btrfs_node_blockptr(btrfs_buffer_node(next
), 0));
This page took 0.075099 seconds and 6 git commands to generate.