Btrfs: Add an extent buffer LRU to reduce radix tree hits
[deliverable/linux.git] / fs / btrfs / ctree.c
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19 #include <linux/highmem.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23 #include "print-tree.h"
24
25 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26 *root, struct btrfs_path *path, int level);
27 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28 *root, struct btrfs_key *ins_key,
29 struct btrfs_path *path, int data_size);
30 static int push_node_left(struct btrfs_trans_handle *trans,
31 struct btrfs_root *root, struct extent_buffer *dst,
32 struct extent_buffer *src);
33 static int balance_node_right(struct btrfs_trans_handle *trans,
34 struct btrfs_root *root,
35 struct extent_buffer *dst_buf,
36 struct extent_buffer *src_buf);
37 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38 struct btrfs_path *path, int level, int slot);
39
40 inline void btrfs_init_path(struct btrfs_path *p)
41 {
42 memset(p, 0, sizeof(*p));
43 }
44
45 struct btrfs_path *btrfs_alloc_path(void)
46 {
47 struct btrfs_path *path;
48 path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
49 if (path) {
50 btrfs_init_path(path);
51 path->reada = 1;
52 }
53 return path;
54 }
55
56 void btrfs_free_path(struct btrfs_path *p)
57 {
58 btrfs_release_path(NULL, p);
59 kmem_cache_free(btrfs_path_cachep, p);
60 }
61
62 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
63 {
64 int i;
65 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66 if (!p->nodes[i])
67 break;
68 free_extent_buffer(p->nodes[i]);
69 }
70 memset(p, 0, sizeof(*p));
71 }
72
73 static int __btrfs_cow_block(struct btrfs_trans_handle *trans,
74 struct btrfs_root *root,
75 struct extent_buffer *buf,
76 struct extent_buffer *parent, int parent_slot,
77 struct extent_buffer **cow_ret,
78 u64 search_start, u64 empty_size)
79 {
80 struct extent_buffer *cow;
81 int ret = 0;
82 int different_trans = 0;
83
84 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
85
86 cow = btrfs_alloc_free_block(trans, root, buf->len,
87 search_start, empty_size);
88 if (IS_ERR(cow))
89 return PTR_ERR(cow);
90
91 copy_extent_buffer(cow, buf, 0, 0, cow->len);
92 btrfs_set_header_bytenr(cow, cow->start);
93 btrfs_set_header_generation(cow, trans->transid);
94 btrfs_set_header_owner(cow, root->root_key.objectid);
95
96 WARN_ON(btrfs_header_generation(buf) > trans->transid);
97 if (btrfs_header_generation(buf) != trans->transid) {
98 different_trans = 1;
99 ret = btrfs_inc_ref(trans, root, buf);
100 if (ret)
101 return ret;
102 } else {
103 clean_tree_block(trans, root, buf);
104 }
105
106 if (buf == root->node) {
107 root->node = cow;
108 extent_buffer_get(cow);
109 if (buf != root->commit_root) {
110 btrfs_free_extent(trans, root, buf->start,
111 buf->len, 1);
112 }
113 free_extent_buffer(buf);
114 } else {
115 btrfs_set_node_blockptr(parent, parent_slot,
116 cow->start);
117 btrfs_mark_buffer_dirty(parent);
118 WARN_ON(btrfs_header_generation(parent) != trans->transid);
119 btrfs_free_extent(trans, root, buf->start, buf->len, 1);
120 }
121 free_extent_buffer(buf);
122 btrfs_mark_buffer_dirty(cow);
123 *cow_ret = cow;
124 return 0;
125 }
126
127 int btrfs_cow_block(struct btrfs_trans_handle *trans,
128 struct btrfs_root *root, struct extent_buffer *buf,
129 struct extent_buffer *parent, int parent_slot,
130 struct extent_buffer **cow_ret)
131 {
132 u64 search_start;
133 int ret;
134 if (trans->transaction != root->fs_info->running_transaction) {
135 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
136 root->fs_info->running_transaction->transid);
137 WARN_ON(1);
138 }
139 if (trans->transid != root->fs_info->generation) {
140 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
141 root->fs_info->generation);
142 WARN_ON(1);
143 }
144 if (btrfs_header_generation(buf) == trans->transid) {
145 *cow_ret = buf;
146 return 0;
147 }
148
149 search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
150 ret = __btrfs_cow_block(trans, root, buf, parent,
151 parent_slot, cow_ret, search_start, 0);
152 return ret;
153 }
154
155 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
156 {
157 if (blocknr < other && other - (blocknr + blocksize) < 32768)
158 return 1;
159 if (blocknr > other && blocknr - (other + blocksize) < 32768)
160 return 1;
161 return 0;
162 }
163
164 static int should_defrag_leaf(struct extent_buffer *leaf)
165 {
166 struct btrfs_key key;
167 u32 nritems;
168
169 if (btrfs_buffer_defrag(leaf))
170 return 1;
171
172 nritems = btrfs_header_nritems(leaf);
173 if (nritems == 0)
174 return 0;
175
176 btrfs_item_key_to_cpu(leaf, &key, 0);
177 if (key.type == BTRFS_DIR_ITEM_KEY)
178 return 1;
179
180
181 btrfs_item_key_to_cpu(leaf, &key, nritems - 1);
182 if (key.type == BTRFS_DIR_ITEM_KEY)
183 return 1;
184 if (nritems > 4) {
185 btrfs_item_key_to_cpu(leaf, &key, nritems / 2);
186 if (key.type == BTRFS_DIR_ITEM_KEY)
187 return 1;
188 }
189 return 0;
190 }
191
192 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
193 struct btrfs_root *root, struct extent_buffer *parent,
194 int cache_only, u64 *last_ret)
195 {
196 struct extent_buffer *cur;
197 struct extent_buffer *tmp;
198 u64 blocknr;
199 u64 search_start = *last_ret;
200 u64 last_block = 0;
201 u64 other;
202 u32 parent_nritems;
203 int start_slot;
204 int end_slot;
205 int i;
206 int err = 0;
207 int parent_level;
208 int uptodate;
209 u32 blocksize;
210
211 if (trans->transaction != root->fs_info->running_transaction) {
212 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
213 root->fs_info->running_transaction->transid);
214 WARN_ON(1);
215 }
216 if (trans->transid != root->fs_info->generation) {
217 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
218 root->fs_info->generation);
219 WARN_ON(1);
220 }
221 if (btrfs_buffer_defrag_done(parent))
222 return 0;
223
224 parent_nritems = btrfs_header_nritems(parent);
225 parent_level = btrfs_header_level(parent);
226 blocksize = btrfs_level_size(root, parent_level - 1);
227
228 start_slot = 0;
229 end_slot = parent_nritems;
230
231 if (parent_nritems == 1)
232 return 0;
233
234 for (i = start_slot; i < end_slot; i++) {
235 int close = 1;
236 blocknr = btrfs_node_blockptr(parent, i);
237 if (last_block == 0)
238 last_block = blocknr;
239 if (i > 0) {
240 other = btrfs_node_blockptr(parent, i - 1);
241 close = close_blocks(blocknr, other, blocksize);
242 }
243 if (close && i < end_slot - 1) {
244 other = btrfs_node_blockptr(parent, i + 1);
245 close = close_blocks(blocknr, other, blocksize);
246 }
247 if (close) {
248 last_block = blocknr;
249 continue;
250 }
251
252 cur = btrfs_find_tree_block(root, blocknr, blocksize);
253 if (cur)
254 uptodate = btrfs_buffer_uptodate(cur);
255 else
256 uptodate = 0;
257 if (!cur || !uptodate ||
258 (parent_level != 1 && !btrfs_buffer_defrag(cur)) ||
259 (parent_level == 1 && !should_defrag_leaf(cur))) {
260 if (cache_only) {
261 free_extent_buffer(cur);
262 continue;
263 }
264 if (!cur) {
265 cur = read_tree_block(root, blocknr,
266 blocksize);
267 } else if (!uptodate) {
268 btrfs_read_buffer(cur);
269 }
270 }
271 if (search_start == 0)
272 search_start = last_block;
273
274 err = __btrfs_cow_block(trans, root, cur, parent, i,
275 &tmp, search_start,
276 min(16 * blocksize,
277 (end_slot - i) * blocksize));
278 if (err) {
279 free_extent_buffer(cur);
280 break;
281 }
282 search_start = tmp->start;
283 *last_ret = search_start;
284 if (parent_level == 1)
285 btrfs_clear_buffer_defrag(tmp);
286 btrfs_set_buffer_defrag_done(tmp);
287 free_extent_buffer(tmp);
288 }
289 return err;
290 }
291
292 /*
293 * The leaf data grows from end-to-front in the node.
294 * this returns the address of the start of the last item,
295 * which is the stop of the leaf data stack
296 */
297 static inline unsigned int leaf_data_end(struct btrfs_root *root,
298 struct extent_buffer *leaf)
299 {
300 u32 nr = btrfs_header_nritems(leaf);
301 if (nr == 0)
302 return BTRFS_LEAF_DATA_SIZE(root);
303 return btrfs_item_offset_nr(leaf, nr - 1);
304 }
305
306 /*
307 * compare two keys in a memcmp fashion
308 */
309 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
310 {
311 struct btrfs_key k1;
312
313 btrfs_disk_key_to_cpu(&k1, disk);
314
315 if (k1.objectid > k2->objectid)
316 return 1;
317 if (k1.objectid < k2->objectid)
318 return -1;
319 if (k1.type > k2->type)
320 return 1;
321 if (k1.type < k2->type)
322 return -1;
323 if (k1.offset > k2->offset)
324 return 1;
325 if (k1.offset < k2->offset)
326 return -1;
327 return 0;
328 }
329
330 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
331 int level)
332 {
333 struct extent_buffer *parent = NULL;
334 struct extent_buffer *node = path->nodes[level];
335 struct btrfs_disk_key parent_key;
336 struct btrfs_disk_key node_key;
337 int parent_slot;
338 int slot;
339 struct btrfs_key cpukey;
340 u32 nritems = btrfs_header_nritems(node);
341
342 if (path->nodes[level + 1])
343 parent = path->nodes[level + 1];
344
345 slot = path->slots[level];
346 BUG_ON(nritems == 0);
347 if (parent) {
348 parent_slot = path->slots[level + 1];
349 btrfs_node_key(parent, &parent_key, parent_slot);
350 btrfs_node_key(node, &node_key, 0);
351 BUG_ON(memcmp(&parent_key, &node_key,
352 sizeof(struct btrfs_disk_key)));
353 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
354 btrfs_header_bytenr(node));
355 }
356 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
357 if (slot != 0) {
358 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
359 btrfs_node_key(node, &node_key, slot);
360 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
361 }
362 if (slot < nritems - 1) {
363 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
364 btrfs_node_key(node, &node_key, slot);
365 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
366 }
367 return 0;
368 }
369
370 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
371 int level)
372 {
373 struct extent_buffer *leaf = path->nodes[level];
374 struct extent_buffer *parent = NULL;
375 int parent_slot;
376 struct btrfs_key cpukey;
377 struct btrfs_disk_key parent_key;
378 struct btrfs_disk_key leaf_key;
379 int slot = path->slots[0];
380
381 u32 nritems = btrfs_header_nritems(leaf);
382
383 if (path->nodes[level + 1])
384 parent = path->nodes[level + 1];
385
386 if (nritems == 0)
387 return 0;
388
389 if (parent) {
390 parent_slot = path->slots[level + 1];
391 btrfs_node_key(parent, &parent_key, parent_slot);
392 btrfs_item_key(leaf, &leaf_key, 0);
393
394 BUG_ON(memcmp(&parent_key, &leaf_key,
395 sizeof(struct btrfs_disk_key)));
396 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
397 btrfs_header_bytenr(leaf));
398 }
399 #if 0
400 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
401 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
402 btrfs_item_key(leaf, &leaf_key, i);
403 if (comp_keys(&leaf_key, &cpukey) >= 0) {
404 btrfs_print_leaf(root, leaf);
405 printk("slot %d offset bad key\n", i);
406 BUG_ON(1);
407 }
408 if (btrfs_item_offset_nr(leaf, i) !=
409 btrfs_item_end_nr(leaf, i + 1)) {
410 btrfs_print_leaf(root, leaf);
411 printk("slot %d offset bad\n", i);
412 BUG_ON(1);
413 }
414 if (i == 0) {
415 if (btrfs_item_offset_nr(leaf, i) +
416 btrfs_item_size_nr(leaf, i) !=
417 BTRFS_LEAF_DATA_SIZE(root)) {
418 btrfs_print_leaf(root, leaf);
419 printk("slot %d first offset bad\n", i);
420 BUG_ON(1);
421 }
422 }
423 }
424 if (nritems > 0) {
425 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
426 btrfs_print_leaf(root, leaf);
427 printk("slot %d bad size \n", nritems - 1);
428 BUG_ON(1);
429 }
430 }
431 #endif
432 if (slot != 0 && slot < nritems - 1) {
433 btrfs_item_key(leaf, &leaf_key, slot);
434 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
435 if (comp_keys(&leaf_key, &cpukey) <= 0) {
436 btrfs_print_leaf(root, leaf);
437 printk("slot %d offset bad key\n", slot);
438 BUG_ON(1);
439 }
440 if (btrfs_item_offset_nr(leaf, slot - 1) !=
441 btrfs_item_end_nr(leaf, slot)) {
442 btrfs_print_leaf(root, leaf);
443 printk("slot %d offset bad\n", slot);
444 BUG_ON(1);
445 }
446 }
447 if (slot < nritems - 1) {
448 btrfs_item_key(leaf, &leaf_key, slot);
449 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
450 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
451 if (btrfs_item_offset_nr(leaf, slot) !=
452 btrfs_item_end_nr(leaf, slot + 1)) {
453 btrfs_print_leaf(root, leaf);
454 printk("slot %d offset bad\n", slot);
455 BUG_ON(1);
456 }
457 }
458 BUG_ON(btrfs_item_offset_nr(leaf, 0) +
459 btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
460 return 0;
461 }
462
463 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
464 int level)
465 {
466 #if 0
467 struct extent_buffer *buf = path->nodes[level];
468
469 if (memcmp_extent_buffer(buf, root->fs_info->fsid,
470 (unsigned long)btrfs_header_fsid(buf),
471 BTRFS_FSID_SIZE)) {
472 printk("warning bad block %Lu\n", buf->start);
473 return 1;
474 }
475 #endif
476 if (level == 0)
477 return check_leaf(root, path, level);
478 return check_node(root, path, level);
479 }
480
481 /*
482 * search for key in the extent_buffer. The items start at offset p,
483 * and they are item_size apart. There are 'max' items in p.
484 *
485 * the slot in the array is returned via slot, and it points to
486 * the place where you would insert key if it is not found in
487 * the array.
488 *
489 * slot may point to max if the key is bigger than all of the keys
490 */
491 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
492 int item_size, struct btrfs_key *key,
493 int max, int *slot)
494 {
495 int low = 0;
496 int high = max;
497 int mid;
498 int ret;
499 struct btrfs_disk_key *tmp = NULL;
500 struct btrfs_disk_key unaligned;
501 unsigned long offset;
502 char *map_token = NULL;
503 char *kaddr = NULL;
504 unsigned long map_start = 0;
505 unsigned long map_len = 0;
506 int err;
507
508 while(low < high) {
509 mid = (low + high) / 2;
510 offset = p + mid * item_size;
511
512 if (!map_token || offset < map_start ||
513 (offset + sizeof(struct btrfs_disk_key)) >
514 map_start + map_len) {
515 if (map_token) {
516 unmap_extent_buffer(eb, map_token, KM_USER0);
517 map_token = NULL;
518 }
519 err = map_extent_buffer(eb, offset,
520 sizeof(struct btrfs_disk_key),
521 &map_token, &kaddr,
522 &map_start, &map_len, KM_USER0);
523
524 if (!err) {
525 tmp = (struct btrfs_disk_key *)(kaddr + offset -
526 map_start);
527 } else {
528 read_extent_buffer(eb, &unaligned,
529 offset, sizeof(unaligned));
530 tmp = &unaligned;
531 }
532
533 } else {
534 tmp = (struct btrfs_disk_key *)(kaddr + offset -
535 map_start);
536 }
537 ret = comp_keys(tmp, key);
538
539 if (ret < 0)
540 low = mid + 1;
541 else if (ret > 0)
542 high = mid;
543 else {
544 *slot = mid;
545 if (map_token)
546 unmap_extent_buffer(eb, map_token, KM_USER0);
547 return 0;
548 }
549 }
550 *slot = low;
551 if (map_token)
552 unmap_extent_buffer(eb, map_token, KM_USER0);
553 return 1;
554 }
555
556 /*
557 * simple bin_search frontend that does the right thing for
558 * leaves vs nodes
559 */
560 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
561 int level, int *slot)
562 {
563 if (level == 0) {
564 return generic_bin_search(eb,
565 offsetof(struct btrfs_leaf, items),
566 sizeof(struct btrfs_item),
567 key, btrfs_header_nritems(eb),
568 slot);
569 } else {
570 return generic_bin_search(eb,
571 offsetof(struct btrfs_node, ptrs),
572 sizeof(struct btrfs_key_ptr),
573 key, btrfs_header_nritems(eb),
574 slot);
575 }
576 return -1;
577 }
578
579 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
580 struct extent_buffer *parent, int slot)
581 {
582 if (slot < 0)
583 return NULL;
584 if (slot >= btrfs_header_nritems(parent))
585 return NULL;
586 return read_tree_block(root, btrfs_node_blockptr(parent, slot),
587 btrfs_level_size(root, btrfs_header_level(parent) - 1));
588 }
589
590 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
591 *root, struct btrfs_path *path, int level)
592 {
593 struct extent_buffer *right = NULL;
594 struct extent_buffer *mid;
595 struct extent_buffer *left = NULL;
596 struct extent_buffer *parent = NULL;
597 int ret = 0;
598 int wret;
599 int pslot;
600 int orig_slot = path->slots[level];
601 int err_on_enospc = 0;
602 u64 orig_ptr;
603
604 if (level == 0)
605 return 0;
606
607 mid = path->nodes[level];
608 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
609
610 if (level < BTRFS_MAX_LEVEL - 1)
611 parent = path->nodes[level + 1];
612 pslot = path->slots[level + 1];
613
614 /*
615 * deal with the case where there is only one pointer in the root
616 * by promoting the node below to a root
617 */
618 if (!parent) {
619 struct extent_buffer *child;
620
621 if (btrfs_header_nritems(mid) != 1)
622 return 0;
623
624 /* promote the child to a root */
625 child = read_node_slot(root, mid, 0);
626 BUG_ON(!child);
627 root->node = child;
628 path->nodes[level] = NULL;
629 clean_tree_block(trans, root, mid);
630 wait_on_tree_block_writeback(root, mid);
631 /* once for the path */
632 free_extent_buffer(mid);
633 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1);
634 /* once for the root ptr */
635 free_extent_buffer(mid);
636 return ret;
637 }
638 if (btrfs_header_nritems(mid) >
639 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
640 return 0;
641
642 if (btrfs_header_nritems(mid) < 2)
643 err_on_enospc = 1;
644
645 left = read_node_slot(root, parent, pslot - 1);
646 if (left) {
647 wret = btrfs_cow_block(trans, root, left,
648 parent, pslot - 1, &left);
649 if (wret) {
650 ret = wret;
651 goto enospc;
652 }
653 }
654 right = read_node_slot(root, parent, pslot + 1);
655 if (right) {
656 wret = btrfs_cow_block(trans, root, right,
657 parent, pslot + 1, &right);
658 if (wret) {
659 ret = wret;
660 goto enospc;
661 }
662 }
663
664 /* first, try to make some room in the middle buffer */
665 if (left) {
666 orig_slot += btrfs_header_nritems(left);
667 wret = push_node_left(trans, root, left, mid);
668 if (wret < 0)
669 ret = wret;
670 if (btrfs_header_nritems(mid) < 2)
671 err_on_enospc = 1;
672 }
673
674 /*
675 * then try to empty the right most buffer into the middle
676 */
677 if (right) {
678 wret = push_node_left(trans, root, mid, right);
679 if (wret < 0 && wret != -ENOSPC)
680 ret = wret;
681 if (btrfs_header_nritems(right) == 0) {
682 u64 bytenr = right->start;
683 u32 blocksize = right->len;
684
685 clean_tree_block(trans, root, right);
686 wait_on_tree_block_writeback(root, right);
687 free_extent_buffer(right);
688 right = NULL;
689 wret = del_ptr(trans, root, path, level + 1, pslot +
690 1);
691 if (wret)
692 ret = wret;
693 wret = btrfs_free_extent(trans, root, bytenr,
694 blocksize, 1);
695 if (wret)
696 ret = wret;
697 } else {
698 struct btrfs_disk_key right_key;
699 btrfs_node_key(right, &right_key, 0);
700 btrfs_set_node_key(parent, &right_key, pslot + 1);
701 btrfs_mark_buffer_dirty(parent);
702 }
703 }
704 if (btrfs_header_nritems(mid) == 1) {
705 /*
706 * we're not allowed to leave a node with one item in the
707 * tree during a delete. A deletion from lower in the tree
708 * could try to delete the only pointer in this node.
709 * So, pull some keys from the left.
710 * There has to be a left pointer at this point because
711 * otherwise we would have pulled some pointers from the
712 * right
713 */
714 BUG_ON(!left);
715 wret = balance_node_right(trans, root, mid, left);
716 if (wret < 0) {
717 ret = wret;
718 goto enospc;
719 }
720 BUG_ON(wret == 1);
721 }
722 if (btrfs_header_nritems(mid) == 0) {
723 /* we've managed to empty the middle node, drop it */
724 u64 bytenr = mid->start;
725 u32 blocksize = mid->len;
726 clean_tree_block(trans, root, mid);
727 wait_on_tree_block_writeback(root, mid);
728 free_extent_buffer(mid);
729 mid = NULL;
730 wret = del_ptr(trans, root, path, level + 1, pslot);
731 if (wret)
732 ret = wret;
733 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1);
734 if (wret)
735 ret = wret;
736 } else {
737 /* update the parent key to reflect our changes */
738 struct btrfs_disk_key mid_key;
739 btrfs_node_key(mid, &mid_key, 0);
740 btrfs_set_node_key(parent, &mid_key, pslot);
741 btrfs_mark_buffer_dirty(parent);
742 }
743
744 /* update the path */
745 if (left) {
746 if (btrfs_header_nritems(left) > orig_slot) {
747 extent_buffer_get(left);
748 path->nodes[level] = left;
749 path->slots[level + 1] -= 1;
750 path->slots[level] = orig_slot;
751 if (mid)
752 free_extent_buffer(mid);
753 } else {
754 orig_slot -= btrfs_header_nritems(left);
755 path->slots[level] = orig_slot;
756 }
757 }
758 /* double check we haven't messed things up */
759 check_block(root, path, level);
760 if (orig_ptr !=
761 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
762 BUG();
763 enospc:
764 if (right)
765 free_extent_buffer(right);
766 if (left)
767 free_extent_buffer(left);
768 return ret;
769 }
770
771 /* returns zero if the push worked, non-zero otherwise */
772 static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
773 struct btrfs_root *root,
774 struct btrfs_path *path, int level)
775 {
776 struct extent_buffer *right = NULL;
777 struct extent_buffer *mid;
778 struct extent_buffer *left = NULL;
779 struct extent_buffer *parent = NULL;
780 int ret = 0;
781 int wret;
782 int pslot;
783 int orig_slot = path->slots[level];
784 u64 orig_ptr;
785
786 if (level == 0)
787 return 1;
788
789 mid = path->nodes[level];
790 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
791
792 if (level < BTRFS_MAX_LEVEL - 1)
793 parent = path->nodes[level + 1];
794 pslot = path->slots[level + 1];
795
796 if (!parent)
797 return 1;
798
799 left = read_node_slot(root, parent, pslot - 1);
800
801 /* first, try to make some room in the middle buffer */
802 if (left) {
803 u32 left_nr;
804 left_nr = btrfs_header_nritems(left);
805 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
806 wret = 1;
807 } else {
808 ret = btrfs_cow_block(trans, root, left, parent,
809 pslot - 1, &left);
810 if (ret)
811 wret = 1;
812 else {
813 wret = push_node_left(trans, root,
814 left, mid);
815 }
816 }
817 if (wret < 0)
818 ret = wret;
819 if (wret == 0) {
820 struct btrfs_disk_key disk_key;
821 orig_slot += left_nr;
822 btrfs_node_key(mid, &disk_key, 0);
823 btrfs_set_node_key(parent, &disk_key, pslot);
824 btrfs_mark_buffer_dirty(parent);
825 if (btrfs_header_nritems(left) > orig_slot) {
826 path->nodes[level] = left;
827 path->slots[level + 1] -= 1;
828 path->slots[level] = orig_slot;
829 free_extent_buffer(mid);
830 } else {
831 orig_slot -=
832 btrfs_header_nritems(left);
833 path->slots[level] = orig_slot;
834 free_extent_buffer(left);
835 }
836 return 0;
837 }
838 free_extent_buffer(left);
839 }
840 right= read_node_slot(root, parent, pslot + 1);
841
842 /*
843 * then try to empty the right most buffer into the middle
844 */
845 if (right) {
846 u32 right_nr;
847 right_nr = btrfs_header_nritems(right);
848 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
849 wret = 1;
850 } else {
851 ret = btrfs_cow_block(trans, root, right,
852 parent, pslot + 1,
853 &right);
854 if (ret)
855 wret = 1;
856 else {
857 wret = balance_node_right(trans, root,
858 right, mid);
859 }
860 }
861 if (wret < 0)
862 ret = wret;
863 if (wret == 0) {
864 struct btrfs_disk_key disk_key;
865
866 btrfs_node_key(right, &disk_key, 0);
867 btrfs_set_node_key(parent, &disk_key, pslot + 1);
868 btrfs_mark_buffer_dirty(parent);
869
870 if (btrfs_header_nritems(mid) <= orig_slot) {
871 path->nodes[level] = right;
872 path->slots[level + 1] += 1;
873 path->slots[level] = orig_slot -
874 btrfs_header_nritems(mid);
875 free_extent_buffer(mid);
876 } else {
877 free_extent_buffer(right);
878 }
879 return 0;
880 }
881 free_extent_buffer(right);
882 }
883 return 1;
884 }
885
886 /*
887 * readahead one full node of leaves
888 */
889 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
890 int level, int slot)
891 {
892 struct extent_buffer *node;
893 u32 nritems;
894 u64 search;
895 u64 lowest_read;
896 u64 highest_read;
897 u64 nread = 0;
898 int direction = path->reada;
899 struct extent_buffer *eb;
900 u32 nr;
901 u32 blocksize;
902 u32 nscan = 0;
903
904 if (level == 0)
905 return;
906
907 if (!path->nodes[level])
908 return;
909
910 node = path->nodes[level];
911 search = btrfs_node_blockptr(node, slot);
912 blocksize = btrfs_level_size(root, level - 1);
913 eb = btrfs_find_tree_block(root, search, blocksize);
914 if (eb) {
915 free_extent_buffer(eb);
916 return;
917 }
918
919 highest_read = search;
920 lowest_read = search;
921
922 nritems = btrfs_header_nritems(node);
923 nr = slot;
924 while(1) {
925 if (direction < 0) {
926 if (nr == 0)
927 break;
928 nr--;
929 } else if (direction > 0) {
930 nr++;
931 if (nr >= nritems)
932 break;
933 }
934 search = btrfs_node_blockptr(node, nr);
935 if ((search >= lowest_read && search <= highest_read) ||
936 (search < lowest_read && lowest_read - search <= 32768) ||
937 (search > highest_read && search - highest_read <= 32768)) {
938 readahead_tree_block(root, search, blocksize);
939 nread += blocksize;
940 }
941 nscan++;
942 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
943 break;
944 if(nread > (1024 * 1024) || nscan > 128)
945 break;
946
947 if (search < lowest_read)
948 lowest_read = search;
949 if (search > highest_read)
950 highest_read = search;
951 }
952 }
953 /*
954 * look for key in the tree. path is filled in with nodes along the way
955 * if key is found, we return zero and you can find the item in the leaf
956 * level of the path (level 0)
957 *
958 * If the key isn't found, the path points to the slot where it should
959 * be inserted, and 1 is returned. If there are other errors during the
960 * search a negative error number is returned.
961 *
962 * if ins_len > 0, nodes and leaves will be split as we walk down the
963 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
964 * possible)
965 */
966 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
967 *root, struct btrfs_key *key, struct btrfs_path *p, int
968 ins_len, int cow)
969 {
970 struct extent_buffer *b;
971 u64 bytenr;
972 int slot;
973 int ret;
974 int level;
975 int should_reada = p->reada;
976 u8 lowest_level = 0;
977
978 lowest_level = p->lowest_level;
979 WARN_ON(lowest_level && ins_len);
980 WARN_ON(p->nodes[0] != NULL);
981 WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
982 again:
983 b = root->node;
984 extent_buffer_get(b);
985 while (b) {
986 level = btrfs_header_level(b);
987 if (cow) {
988 int wret;
989 wret = btrfs_cow_block(trans, root, b,
990 p->nodes[level + 1],
991 p->slots[level + 1],
992 &b);
993 if (wret) {
994 free_extent_buffer(b);
995 return wret;
996 }
997 }
998 BUG_ON(!cow && ins_len);
999 if (level != btrfs_header_level(b))
1000 WARN_ON(1);
1001 level = btrfs_header_level(b);
1002 p->nodes[level] = b;
1003 ret = check_block(root, p, level);
1004 if (ret)
1005 return -1;
1006 ret = bin_search(b, key, level, &slot);
1007 if (level != 0) {
1008 if (ret && slot > 0)
1009 slot -= 1;
1010 p->slots[level] = slot;
1011 if (ins_len > 0 && btrfs_header_nritems(b) >=
1012 BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1013 int sret = split_node(trans, root, p, level);
1014 BUG_ON(sret > 0);
1015 if (sret)
1016 return sret;
1017 b = p->nodes[level];
1018 slot = p->slots[level];
1019 } else if (ins_len < 0) {
1020 int sret = balance_level(trans, root, p,
1021 level);
1022 if (sret)
1023 return sret;
1024 b = p->nodes[level];
1025 if (!b) {
1026 btrfs_release_path(NULL, p);
1027 goto again;
1028 }
1029 slot = p->slots[level];
1030 BUG_ON(btrfs_header_nritems(b) == 1);
1031 }
1032 /* this is only true while dropping a snapshot */
1033 if (level == lowest_level)
1034 break;
1035 bytenr = btrfs_node_blockptr(b, slot);
1036 if (should_reada)
1037 reada_for_search(root, p, level, slot);
1038 b = read_tree_block(root, bytenr,
1039 btrfs_level_size(root, level - 1));
1040 } else {
1041 p->slots[level] = slot;
1042 if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1043 sizeof(struct btrfs_item) + ins_len) {
1044 int sret = split_leaf(trans, root, key,
1045 p, ins_len);
1046 BUG_ON(sret > 0);
1047 if (sret)
1048 return sret;
1049 }
1050 return ret;
1051 }
1052 }
1053 return 1;
1054 }
1055
1056 /*
1057 * adjust the pointers going up the tree, starting at level
1058 * making sure the right key of each node is points to 'key'.
1059 * This is used after shifting pointers to the left, so it stops
1060 * fixing up pointers when a given leaf/node is not in slot 0 of the
1061 * higher levels
1062 *
1063 * If this fails to write a tree block, it returns -1, but continues
1064 * fixing up the blocks in ram so the tree is consistent.
1065 */
1066 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1067 struct btrfs_root *root, struct btrfs_path *path,
1068 struct btrfs_disk_key *key, int level)
1069 {
1070 int i;
1071 int ret = 0;
1072 struct extent_buffer *t;
1073
1074 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1075 int tslot = path->slots[i];
1076 if (!path->nodes[i])
1077 break;
1078 t = path->nodes[i];
1079 btrfs_set_node_key(t, key, tslot);
1080 btrfs_mark_buffer_dirty(path->nodes[i]);
1081 if (tslot != 0)
1082 break;
1083 }
1084 return ret;
1085 }
1086
1087 /*
1088 * try to push data from one node into the next node left in the
1089 * tree.
1090 *
1091 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1092 * error, and > 0 if there was no room in the left hand block.
1093 */
1094 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
1095 *root, struct extent_buffer *dst,
1096 struct extent_buffer *src)
1097 {
1098 int push_items = 0;
1099 int src_nritems;
1100 int dst_nritems;
1101 int ret = 0;
1102
1103 src_nritems = btrfs_header_nritems(src);
1104 dst_nritems = btrfs_header_nritems(dst);
1105 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1106
1107 if (push_items <= 0) {
1108 return 1;
1109 }
1110
1111 if (src_nritems < push_items)
1112 push_items = src_nritems;
1113
1114 copy_extent_buffer(dst, src,
1115 btrfs_node_key_ptr_offset(dst_nritems),
1116 btrfs_node_key_ptr_offset(0),
1117 push_items * sizeof(struct btrfs_key_ptr));
1118
1119 if (push_items < src_nritems) {
1120 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1121 btrfs_node_key_ptr_offset(push_items),
1122 (src_nritems - push_items) *
1123 sizeof(struct btrfs_key_ptr));
1124 }
1125 btrfs_set_header_nritems(src, src_nritems - push_items);
1126 btrfs_set_header_nritems(dst, dst_nritems + push_items);
1127 btrfs_mark_buffer_dirty(src);
1128 btrfs_mark_buffer_dirty(dst);
1129 return ret;
1130 }
1131
1132 /*
1133 * try to push data from one node into the next node right in the
1134 * tree.
1135 *
1136 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1137 * error, and > 0 if there was no room in the right hand block.
1138 *
1139 * this will only push up to 1/2 the contents of the left node over
1140 */
1141 static int balance_node_right(struct btrfs_trans_handle *trans,
1142 struct btrfs_root *root,
1143 struct extent_buffer *dst,
1144 struct extent_buffer *src)
1145 {
1146 int push_items = 0;
1147 int max_push;
1148 int src_nritems;
1149 int dst_nritems;
1150 int ret = 0;
1151
1152 src_nritems = btrfs_header_nritems(src);
1153 dst_nritems = btrfs_header_nritems(dst);
1154 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1155 if (push_items <= 0)
1156 return 1;
1157
1158 max_push = src_nritems / 2 + 1;
1159 /* don't try to empty the node */
1160 if (max_push >= src_nritems)
1161 return 1;
1162
1163 if (max_push < push_items)
1164 push_items = max_push;
1165
1166 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1167 btrfs_node_key_ptr_offset(0),
1168 (dst_nritems) *
1169 sizeof(struct btrfs_key_ptr));
1170
1171 copy_extent_buffer(dst, src,
1172 btrfs_node_key_ptr_offset(0),
1173 btrfs_node_key_ptr_offset(src_nritems - push_items),
1174 push_items * sizeof(struct btrfs_key_ptr));
1175
1176 btrfs_set_header_nritems(src, src_nritems - push_items);
1177 btrfs_set_header_nritems(dst, dst_nritems + push_items);
1178
1179 btrfs_mark_buffer_dirty(src);
1180 btrfs_mark_buffer_dirty(dst);
1181 return ret;
1182 }
1183
1184 /*
1185 * helper function to insert a new root level in the tree.
1186 * A new node is allocated, and a single item is inserted to
1187 * point to the existing root
1188 *
1189 * returns zero on success or < 0 on failure.
1190 */
1191 static int insert_new_root(struct btrfs_trans_handle *trans,
1192 struct btrfs_root *root,
1193 struct btrfs_path *path, int level)
1194 {
1195 struct extent_buffer *lower;
1196 struct extent_buffer *c;
1197 struct btrfs_disk_key lower_key;
1198
1199 BUG_ON(path->nodes[level]);
1200 BUG_ON(path->nodes[level-1] != root->node);
1201
1202 c = btrfs_alloc_free_block(trans, root, root->nodesize,
1203 root->node->start, 0);
1204 if (IS_ERR(c))
1205 return PTR_ERR(c);
1206 memset_extent_buffer(c, 0, 0, root->nodesize);
1207 btrfs_set_header_nritems(c, 1);
1208 btrfs_set_header_level(c, level);
1209 btrfs_set_header_bytenr(c, c->start);
1210 btrfs_set_header_generation(c, trans->transid);
1211 btrfs_set_header_owner(c, root->root_key.objectid);
1212 lower = path->nodes[level-1];
1213
1214 write_extent_buffer(c, root->fs_info->fsid,
1215 (unsigned long)btrfs_header_fsid(c),
1216 BTRFS_FSID_SIZE);
1217 if (level == 1)
1218 btrfs_item_key(lower, &lower_key, 0);
1219 else
1220 btrfs_node_key(lower, &lower_key, 0);
1221 btrfs_set_node_key(c, &lower_key, 0);
1222 btrfs_set_node_blockptr(c, 0, lower->start);
1223
1224 btrfs_mark_buffer_dirty(c);
1225
1226 /* the super has an extra ref to root->node */
1227 free_extent_buffer(root->node);
1228 root->node = c;
1229 extent_buffer_get(c);
1230 path->nodes[level] = c;
1231 path->slots[level] = 0;
1232 return 0;
1233 }
1234
1235 /*
1236 * worker function to insert a single pointer in a node.
1237 * the node should have enough room for the pointer already
1238 *
1239 * slot and level indicate where you want the key to go, and
1240 * blocknr is the block the key points to.
1241 *
1242 * returns zero on success and < 0 on any error
1243 */
1244 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1245 *root, struct btrfs_path *path, struct btrfs_disk_key
1246 *key, u64 bytenr, int slot, int level)
1247 {
1248 struct extent_buffer *lower;
1249 int nritems;
1250
1251 BUG_ON(!path->nodes[level]);
1252 lower = path->nodes[level];
1253 nritems = btrfs_header_nritems(lower);
1254 if (slot > nritems)
1255 BUG();
1256 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1257 BUG();
1258 if (slot != nritems) {
1259 memmove_extent_buffer(lower,
1260 btrfs_node_key_ptr_offset(slot + 1),
1261 btrfs_node_key_ptr_offset(slot),
1262 (nritems - slot) * sizeof(struct btrfs_key_ptr));
1263 }
1264 btrfs_set_node_key(lower, key, slot);
1265 btrfs_set_node_blockptr(lower, slot, bytenr);
1266 btrfs_set_header_nritems(lower, nritems + 1);
1267 btrfs_mark_buffer_dirty(lower);
1268 return 0;
1269 }
1270
1271 /*
1272 * split the node at the specified level in path in two.
1273 * The path is corrected to point to the appropriate node after the split
1274 *
1275 * Before splitting this tries to make some room in the node by pushing
1276 * left and right, if either one works, it returns right away.
1277 *
1278 * returns 0 on success and < 0 on failure
1279 */
1280 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1281 *root, struct btrfs_path *path, int level)
1282 {
1283 struct extent_buffer *c;
1284 struct extent_buffer *split;
1285 struct btrfs_disk_key disk_key;
1286 int mid;
1287 int ret;
1288 int wret;
1289 u32 c_nritems;
1290
1291 c = path->nodes[level];
1292 if (c == root->node) {
1293 /* trying to split the root, lets make a new one */
1294 ret = insert_new_root(trans, root, path, level + 1);
1295 if (ret)
1296 return ret;
1297 } else {
1298 ret = push_nodes_for_insert(trans, root, path, level);
1299 c = path->nodes[level];
1300 if (!ret && btrfs_header_nritems(c) <
1301 BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
1302 return 0;
1303 if (ret < 0)
1304 return ret;
1305 }
1306
1307 c_nritems = btrfs_header_nritems(c);
1308 split = btrfs_alloc_free_block(trans, root, root->nodesize,
1309 c->start, 0);
1310 if (IS_ERR(split))
1311 return PTR_ERR(split);
1312
1313 btrfs_set_header_flags(split, btrfs_header_flags(c));
1314 btrfs_set_header_level(split, btrfs_header_level(c));
1315 btrfs_set_header_bytenr(split, split->start);
1316 btrfs_set_header_generation(split, trans->transid);
1317 btrfs_set_header_owner(split, root->root_key.objectid);
1318 write_extent_buffer(split, root->fs_info->fsid,
1319 (unsigned long)btrfs_header_fsid(split),
1320 BTRFS_FSID_SIZE);
1321
1322 mid = (c_nritems + 1) / 2;
1323
1324 copy_extent_buffer(split, c,
1325 btrfs_node_key_ptr_offset(0),
1326 btrfs_node_key_ptr_offset(mid),
1327 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1328 btrfs_set_header_nritems(split, c_nritems - mid);
1329 btrfs_set_header_nritems(c, mid);
1330 ret = 0;
1331
1332 btrfs_mark_buffer_dirty(c);
1333 btrfs_mark_buffer_dirty(split);
1334
1335 btrfs_node_key(split, &disk_key, 0);
1336 wret = insert_ptr(trans, root, path, &disk_key, split->start,
1337 path->slots[level + 1] + 1,
1338 level + 1);
1339 if (wret)
1340 ret = wret;
1341
1342 if (path->slots[level] >= mid) {
1343 path->slots[level] -= mid;
1344 free_extent_buffer(c);
1345 path->nodes[level] = split;
1346 path->slots[level + 1] += 1;
1347 } else {
1348 free_extent_buffer(split);
1349 }
1350 return ret;
1351 }
1352
1353 /*
1354 * how many bytes are required to store the items in a leaf. start
1355 * and nr indicate which items in the leaf to check. This totals up the
1356 * space used both by the item structs and the item data
1357 */
1358 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1359 {
1360 int data_len;
1361 int nritems = btrfs_header_nritems(l);
1362 int end = min(nritems, start + nr) - 1;
1363
1364 if (!nr)
1365 return 0;
1366 data_len = btrfs_item_end_nr(l, start);
1367 data_len = data_len - btrfs_item_offset_nr(l, end);
1368 data_len += sizeof(struct btrfs_item) * nr;
1369 WARN_ON(data_len < 0);
1370 return data_len;
1371 }
1372
1373 /*
1374 * The space between the end of the leaf items and
1375 * the start of the leaf data. IOW, how much room
1376 * the leaf has left for both items and data
1377 */
1378 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1379 {
1380 int nritems = btrfs_header_nritems(leaf);
1381 int ret;
1382 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1383 if (ret < 0) {
1384 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1385 ret, BTRFS_LEAF_DATA_SIZE(root),
1386 leaf_space_used(leaf, 0, nritems), nritems);
1387 }
1388 return ret;
1389 }
1390
1391 /*
1392 * push some data in the path leaf to the right, trying to free up at
1393 * least data_size bytes. returns zero if the push worked, nonzero otherwise
1394 *
1395 * returns 1 if the push failed because the other node didn't have enough
1396 * room, 0 if everything worked out and < 0 if there were major errors.
1397 */
1398 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1399 *root, struct btrfs_path *path, int data_size)
1400 {
1401 struct extent_buffer *left = path->nodes[0];
1402 struct extent_buffer *right;
1403 struct extent_buffer *upper;
1404 struct btrfs_disk_key disk_key;
1405 int slot;
1406 int i;
1407 int free_space;
1408 int push_space = 0;
1409 int push_items = 0;
1410 struct btrfs_item *item;
1411 u32 left_nritems;
1412 u32 right_nritems;
1413 u32 data_end;
1414 u32 this_item_size;
1415 int ret;
1416
1417 slot = path->slots[1];
1418 if (!path->nodes[1]) {
1419 return 1;
1420 }
1421 upper = path->nodes[1];
1422 if (slot >= btrfs_header_nritems(upper) - 1)
1423 return 1;
1424
1425 right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
1426 root->leafsize);
1427 free_space = btrfs_leaf_free_space(root, right);
1428 if (free_space < data_size + sizeof(struct btrfs_item)) {
1429 free_extent_buffer(right);
1430 return 1;
1431 }
1432
1433 /* cow and double check */
1434 ret = btrfs_cow_block(trans, root, right, upper,
1435 slot + 1, &right);
1436 if (ret) {
1437 free_extent_buffer(right);
1438 return 1;
1439 }
1440 free_space = btrfs_leaf_free_space(root, right);
1441 if (free_space < data_size + sizeof(struct btrfs_item)) {
1442 free_extent_buffer(right);
1443 return 1;
1444 }
1445
1446 left_nritems = btrfs_header_nritems(left);
1447 if (left_nritems == 0) {
1448 free_extent_buffer(right);
1449 return 1;
1450 }
1451
1452 for (i = left_nritems - 1; i >= 1; i--) {
1453 item = btrfs_item_nr(left, i);
1454
1455 if (path->slots[0] == i)
1456 push_space += data_size + sizeof(*item);
1457
1458 if (!left->map_token) {
1459 map_extent_buffer(left, (unsigned long)item,
1460 sizeof(struct btrfs_item),
1461 &left->map_token, &left->kaddr,
1462 &left->map_start, &left->map_len,
1463 KM_USER1);
1464 }
1465
1466 this_item_size = btrfs_item_size(left, item);
1467 if (this_item_size + sizeof(*item) + push_space > free_space)
1468 break;
1469 push_items++;
1470 push_space += this_item_size + sizeof(*item);
1471 }
1472 if (left->map_token) {
1473 unmap_extent_buffer(left, left->map_token, KM_USER1);
1474 left->map_token = NULL;
1475 }
1476
1477 if (push_items == 0) {
1478 free_extent_buffer(right);
1479 return 1;
1480 }
1481
1482 if (push_items == left_nritems)
1483 WARN_ON(1);
1484
1485 /* push left to right */
1486 right_nritems = btrfs_header_nritems(right);
1487 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1488 push_space -= leaf_data_end(root, left);
1489
1490 /* make room in the right data area */
1491 data_end = leaf_data_end(root, right);
1492 memmove_extent_buffer(right,
1493 btrfs_leaf_data(right) + data_end - push_space,
1494 btrfs_leaf_data(right) + data_end,
1495 BTRFS_LEAF_DATA_SIZE(root) - data_end);
1496
1497 /* copy from the left data area */
1498 copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1499 BTRFS_LEAF_DATA_SIZE(root) - push_space,
1500 btrfs_leaf_data(left) + leaf_data_end(root, left),
1501 push_space);
1502
1503 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1504 btrfs_item_nr_offset(0),
1505 right_nritems * sizeof(struct btrfs_item));
1506
1507 /* copy the items from left to right */
1508 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1509 btrfs_item_nr_offset(left_nritems - push_items),
1510 push_items * sizeof(struct btrfs_item));
1511
1512 /* update the item pointers */
1513 right_nritems += push_items;
1514 btrfs_set_header_nritems(right, right_nritems);
1515 push_space = BTRFS_LEAF_DATA_SIZE(root);
1516
1517 for (i = 0; i < right_nritems; i++) {
1518 item = btrfs_item_nr(right, i);
1519 if (!right->map_token) {
1520 map_extent_buffer(right, (unsigned long)item,
1521 sizeof(struct btrfs_item),
1522 &right->map_token, &right->kaddr,
1523 &right->map_start, &right->map_len,
1524 KM_USER1);
1525 }
1526 push_space -= btrfs_item_size(right, item);
1527 btrfs_set_item_offset(right, item, push_space);
1528 }
1529
1530 if (right->map_token) {
1531 unmap_extent_buffer(right, right->map_token, KM_USER1);
1532 right->map_token = NULL;
1533 }
1534 left_nritems -= push_items;
1535 btrfs_set_header_nritems(left, left_nritems);
1536
1537 btrfs_mark_buffer_dirty(left);
1538 btrfs_mark_buffer_dirty(right);
1539
1540 btrfs_item_key(right, &disk_key, 0);
1541 btrfs_set_node_key(upper, &disk_key, slot + 1);
1542 btrfs_mark_buffer_dirty(upper);
1543
1544 /* then fixup the leaf pointer in the path */
1545 if (path->slots[0] >= left_nritems) {
1546 path->slots[0] -= left_nritems;
1547 free_extent_buffer(path->nodes[0]);
1548 path->nodes[0] = right;
1549 path->slots[1] += 1;
1550 } else {
1551 free_extent_buffer(right);
1552 }
1553 return 0;
1554 }
1555 /*
1556 * push some data in the path leaf to the left, trying to free up at
1557 * least data_size bytes. returns zero if the push worked, nonzero otherwise
1558 */
1559 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1560 *root, struct btrfs_path *path, int data_size)
1561 {
1562 struct btrfs_disk_key disk_key;
1563 struct extent_buffer *right = path->nodes[0];
1564 struct extent_buffer *left;
1565 int slot;
1566 int i;
1567 int free_space;
1568 int push_space = 0;
1569 int push_items = 0;
1570 struct btrfs_item *item;
1571 u32 old_left_nritems;
1572 u32 right_nritems;
1573 int ret = 0;
1574 int wret;
1575 u32 this_item_size;
1576 u32 old_left_item_size;
1577
1578 slot = path->slots[1];
1579 if (slot == 0)
1580 return 1;
1581 if (!path->nodes[1])
1582 return 1;
1583
1584 left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1585 slot - 1), root->leafsize);
1586 free_space = btrfs_leaf_free_space(root, left);
1587 if (free_space < data_size + sizeof(struct btrfs_item)) {
1588 free_extent_buffer(left);
1589 return 1;
1590 }
1591
1592 /* cow and double check */
1593 ret = btrfs_cow_block(trans, root, left,
1594 path->nodes[1], slot - 1, &left);
1595 if (ret) {
1596 /* we hit -ENOSPC, but it isn't fatal here */
1597 free_extent_buffer(left);
1598 return 1;
1599 }
1600 free_space = btrfs_leaf_free_space(root, left);
1601 if (free_space < data_size + sizeof(struct btrfs_item)) {
1602 free_extent_buffer(left);
1603 return 1;
1604 }
1605
1606 right_nritems = btrfs_header_nritems(right);
1607 if (right_nritems == 0) {
1608 free_extent_buffer(left);
1609 return 1;
1610 }
1611
1612 for (i = 0; i < right_nritems - 1; i++) {
1613 item = btrfs_item_nr(right, i);
1614 if (!right->map_token) {
1615 map_extent_buffer(right, (unsigned long)item,
1616 sizeof(struct btrfs_item),
1617 &right->map_token, &right->kaddr,
1618 &right->map_start, &right->map_len,
1619 KM_USER1);
1620 }
1621
1622 if (path->slots[0] == i)
1623 push_space += data_size + sizeof(*item);
1624
1625 this_item_size = btrfs_item_size(right, item);
1626 if (this_item_size + sizeof(*item) + push_space > free_space)
1627 break;
1628
1629 push_items++;
1630 push_space += this_item_size + sizeof(*item);
1631 }
1632
1633 if (right->map_token) {
1634 unmap_extent_buffer(right, right->map_token, KM_USER1);
1635 right->map_token = NULL;
1636 }
1637
1638 if (push_items == 0) {
1639 free_extent_buffer(left);
1640 return 1;
1641 }
1642 if (push_items == btrfs_header_nritems(right))
1643 WARN_ON(1);
1644
1645 /* push data from right to left */
1646 copy_extent_buffer(left, right,
1647 btrfs_item_nr_offset(btrfs_header_nritems(left)),
1648 btrfs_item_nr_offset(0),
1649 push_items * sizeof(struct btrfs_item));
1650
1651 push_space = BTRFS_LEAF_DATA_SIZE(root) -
1652 btrfs_item_offset_nr(right, push_items -1);
1653
1654 copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1655 leaf_data_end(root, left) - push_space,
1656 btrfs_leaf_data(right) +
1657 btrfs_item_offset_nr(right, push_items - 1),
1658 push_space);
1659 old_left_nritems = btrfs_header_nritems(left);
1660 BUG_ON(old_left_nritems < 0);
1661
1662 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1663 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1664 u32 ioff;
1665
1666 item = btrfs_item_nr(left, i);
1667 if (!left->map_token) {
1668 map_extent_buffer(left, (unsigned long)item,
1669 sizeof(struct btrfs_item),
1670 &left->map_token, &left->kaddr,
1671 &left->map_start, &left->map_len,
1672 KM_USER1);
1673 }
1674
1675 ioff = btrfs_item_offset(left, item);
1676 btrfs_set_item_offset(left, item,
1677 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1678 }
1679 btrfs_set_header_nritems(left, old_left_nritems + push_items);
1680 if (left->map_token) {
1681 unmap_extent_buffer(left, left->map_token, KM_USER1);
1682 left->map_token = NULL;
1683 }
1684
1685 /* fixup right node */
1686 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1687 leaf_data_end(root, right);
1688 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1689 BTRFS_LEAF_DATA_SIZE(root) - push_space,
1690 btrfs_leaf_data(right) +
1691 leaf_data_end(root, right), push_space);
1692
1693 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1694 btrfs_item_nr_offset(push_items),
1695 (btrfs_header_nritems(right) - push_items) *
1696 sizeof(struct btrfs_item));
1697
1698 right_nritems = btrfs_header_nritems(right) - push_items;
1699 btrfs_set_header_nritems(right, right_nritems);
1700 push_space = BTRFS_LEAF_DATA_SIZE(root);
1701
1702 for (i = 0; i < right_nritems; i++) {
1703 item = btrfs_item_nr(right, i);
1704
1705 if (!right->map_token) {
1706 map_extent_buffer(right, (unsigned long)item,
1707 sizeof(struct btrfs_item),
1708 &right->map_token, &right->kaddr,
1709 &right->map_start, &right->map_len,
1710 KM_USER1);
1711 }
1712
1713 push_space = push_space - btrfs_item_size(right, item);
1714 btrfs_set_item_offset(right, item, push_space);
1715 }
1716 if (right->map_token) {
1717 unmap_extent_buffer(right, right->map_token, KM_USER1);
1718 right->map_token = NULL;
1719 }
1720
1721 btrfs_mark_buffer_dirty(left);
1722 btrfs_mark_buffer_dirty(right);
1723
1724 btrfs_item_key(right, &disk_key, 0);
1725 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1726 if (wret)
1727 ret = wret;
1728
1729 /* then fixup the leaf pointer in the path */
1730 if (path->slots[0] < push_items) {
1731 path->slots[0] += old_left_nritems;
1732 free_extent_buffer(path->nodes[0]);
1733 path->nodes[0] = left;
1734 path->slots[1] -= 1;
1735 } else {
1736 free_extent_buffer(left);
1737 path->slots[0] -= push_items;
1738 }
1739 BUG_ON(path->slots[0] < 0);
1740 return ret;
1741 }
1742
1743 /*
1744 * split the path's leaf in two, making sure there is at least data_size
1745 * available for the resulting leaf level of the path.
1746 *
1747 * returns 0 if all went well and < 0 on failure.
1748 */
1749 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1750 *root, struct btrfs_key *ins_key,
1751 struct btrfs_path *path, int data_size)
1752 {
1753 struct extent_buffer *l;
1754 u32 nritems;
1755 int mid;
1756 int slot;
1757 struct extent_buffer *right;
1758 int space_needed = data_size + sizeof(struct btrfs_item);
1759 int data_copy_size;
1760 int rt_data_off;
1761 int i;
1762 int ret = 0;
1763 int wret;
1764 int double_split = 0;
1765 struct btrfs_disk_key disk_key;
1766
1767 /* first try to make some room by pushing left and right */
1768 wret = push_leaf_left(trans, root, path, data_size);
1769 if (wret < 0)
1770 return wret;
1771 if (wret) {
1772 wret = push_leaf_right(trans, root, path, data_size);
1773 if (wret < 0)
1774 return wret;
1775 }
1776 l = path->nodes[0];
1777
1778 /* did the pushes work? */
1779 if (btrfs_leaf_free_space(root, l) >=
1780 sizeof(struct btrfs_item) + data_size)
1781 return 0;
1782
1783 if (!path->nodes[1]) {
1784 ret = insert_new_root(trans, root, path, 1);
1785 if (ret)
1786 return ret;
1787 }
1788 slot = path->slots[0];
1789 nritems = btrfs_header_nritems(l);
1790 mid = (nritems + 1)/ 2;
1791
1792 right = btrfs_alloc_free_block(trans, root, root->leafsize,
1793 l->start, 0);
1794 if (IS_ERR(right))
1795 return PTR_ERR(right);
1796
1797 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1798 btrfs_set_header_bytenr(right, right->start);
1799 btrfs_set_header_generation(right, trans->transid);
1800 btrfs_set_header_owner(right, root->root_key.objectid);
1801 btrfs_set_header_level(right, 0);
1802 write_extent_buffer(right, root->fs_info->fsid,
1803 (unsigned long)btrfs_header_fsid(right),
1804 BTRFS_FSID_SIZE);
1805
1806 if (mid <= slot) {
1807 if (nritems == 1 ||
1808 leaf_space_used(l, mid, nritems - mid) + space_needed >
1809 BTRFS_LEAF_DATA_SIZE(root)) {
1810 if (slot >= nritems) {
1811 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1812 btrfs_set_header_nritems(right, 0);
1813 wret = insert_ptr(trans, root, path,
1814 &disk_key, right->start,
1815 path->slots[1] + 1, 1);
1816 if (wret)
1817 ret = wret;
1818 free_extent_buffer(path->nodes[0]);
1819 path->nodes[0] = right;
1820 path->slots[0] = 0;
1821 path->slots[1] += 1;
1822 return ret;
1823 }
1824 mid = slot;
1825 double_split = 1;
1826 }
1827 } else {
1828 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1829 BTRFS_LEAF_DATA_SIZE(root)) {
1830 if (slot == 0) {
1831 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1832 btrfs_set_header_nritems(right, 0);
1833 wret = insert_ptr(trans, root, path,
1834 &disk_key,
1835 right->start,
1836 path->slots[1], 1);
1837 if (wret)
1838 ret = wret;
1839 free_extent_buffer(path->nodes[0]);
1840 path->nodes[0] = right;
1841 path->slots[0] = 0;
1842 if (path->slots[1] == 0) {
1843 wret = fixup_low_keys(trans, root,
1844 path, &disk_key, 1);
1845 if (wret)
1846 ret = wret;
1847 }
1848 return ret;
1849 }
1850 mid = slot;
1851 double_split = 1;
1852 }
1853 }
1854 nritems = nritems - mid;
1855 btrfs_set_header_nritems(right, nritems);
1856 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
1857
1858 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
1859 btrfs_item_nr_offset(mid),
1860 nritems * sizeof(struct btrfs_item));
1861
1862 copy_extent_buffer(right, l,
1863 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1864 data_copy_size, btrfs_leaf_data(l) +
1865 leaf_data_end(root, l), data_copy_size);
1866
1867 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1868 btrfs_item_end_nr(l, mid);
1869
1870 for (i = 0; i < nritems; i++) {
1871 struct btrfs_item *item = btrfs_item_nr(right, i);
1872 u32 ioff;
1873
1874 if (!right->map_token) {
1875 map_extent_buffer(right, (unsigned long)item,
1876 sizeof(struct btrfs_item),
1877 &right->map_token, &right->kaddr,
1878 &right->map_start, &right->map_len,
1879 KM_USER1);
1880 }
1881
1882 ioff = btrfs_item_offset(right, item);
1883 btrfs_set_item_offset(right, item, ioff + rt_data_off);
1884 }
1885
1886 if (right->map_token) {
1887 unmap_extent_buffer(right, right->map_token, KM_USER1);
1888 right->map_token = NULL;
1889 }
1890
1891 btrfs_set_header_nritems(l, mid);
1892 ret = 0;
1893 btrfs_item_key(right, &disk_key, 0);
1894 wret = insert_ptr(trans, root, path, &disk_key, right->start,
1895 path->slots[1] + 1, 1);
1896 if (wret)
1897 ret = wret;
1898
1899 btrfs_mark_buffer_dirty(right);
1900 btrfs_mark_buffer_dirty(l);
1901 BUG_ON(path->slots[0] != slot);
1902
1903 if (mid <= slot) {
1904 free_extent_buffer(path->nodes[0]);
1905 path->nodes[0] = right;
1906 path->slots[0] -= mid;
1907 path->slots[1] += 1;
1908 } else
1909 free_extent_buffer(right);
1910
1911 BUG_ON(path->slots[0] < 0);
1912
1913 if (!double_split)
1914 return ret;
1915
1916 right = btrfs_alloc_free_block(trans, root, root->leafsize,
1917 l->start, 0);
1918 if (IS_ERR(right))
1919 return PTR_ERR(right);
1920
1921 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1922 btrfs_set_header_bytenr(right, right->start);
1923 btrfs_set_header_generation(right, trans->transid);
1924 btrfs_set_header_owner(right, root->root_key.objectid);
1925 btrfs_set_header_level(right, 0);
1926 write_extent_buffer(right, root->fs_info->fsid,
1927 (unsigned long)btrfs_header_fsid(right),
1928 BTRFS_FSID_SIZE);
1929
1930 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1931 btrfs_set_header_nritems(right, 0);
1932 wret = insert_ptr(trans, root, path,
1933 &disk_key, right->start,
1934 path->slots[1], 1);
1935 if (wret)
1936 ret = wret;
1937 if (path->slots[1] == 0) {
1938 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1939 if (wret)
1940 ret = wret;
1941 }
1942 free_extent_buffer(path->nodes[0]);
1943 path->nodes[0] = right;
1944 path->slots[0] = 0;
1945 return ret;
1946 }
1947
1948 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1949 struct btrfs_root *root,
1950 struct btrfs_path *path,
1951 u32 new_size)
1952 {
1953 int ret = 0;
1954 int slot;
1955 int slot_orig;
1956 struct extent_buffer *leaf;
1957 struct btrfs_item *item;
1958 u32 nritems;
1959 unsigned int data_end;
1960 unsigned int old_data_start;
1961 unsigned int old_size;
1962 unsigned int size_diff;
1963 int i;
1964
1965 slot_orig = path->slots[0];
1966 leaf = path->nodes[0];
1967
1968 nritems = btrfs_header_nritems(leaf);
1969 data_end = leaf_data_end(root, leaf);
1970
1971 slot = path->slots[0];
1972 old_data_start = btrfs_item_offset_nr(leaf, slot);
1973 old_size = btrfs_item_size_nr(leaf, slot);
1974 BUG_ON(old_size <= new_size);
1975 size_diff = old_size - new_size;
1976
1977 BUG_ON(slot < 0);
1978 BUG_ON(slot >= nritems);
1979
1980 /*
1981 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1982 */
1983 /* first correct the data pointers */
1984 for (i = slot; i < nritems; i++) {
1985 u32 ioff;
1986 item = btrfs_item_nr(leaf, i);
1987
1988 if (!leaf->map_token) {
1989 map_extent_buffer(leaf, (unsigned long)item,
1990 sizeof(struct btrfs_item),
1991 &leaf->map_token, &leaf->kaddr,
1992 &leaf->map_start, &leaf->map_len,
1993 KM_USER1);
1994 }
1995
1996 ioff = btrfs_item_offset(leaf, item);
1997 btrfs_set_item_offset(leaf, item, ioff + size_diff);
1998 }
1999
2000 if (leaf->map_token) {
2001 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2002 leaf->map_token = NULL;
2003 }
2004
2005 /* shift the data */
2006 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2007 data_end + size_diff, btrfs_leaf_data(leaf) +
2008 data_end, old_data_start + new_size - data_end);
2009
2010 item = btrfs_item_nr(leaf, slot);
2011 btrfs_set_item_size(leaf, item, new_size);
2012 btrfs_mark_buffer_dirty(leaf);
2013
2014 ret = 0;
2015 if (btrfs_leaf_free_space(root, leaf) < 0) {
2016 btrfs_print_leaf(root, leaf);
2017 BUG();
2018 }
2019 return ret;
2020 }
2021
2022 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2023 struct btrfs_root *root, struct btrfs_path *path,
2024 u32 data_size)
2025 {
2026 int ret = 0;
2027 int slot;
2028 int slot_orig;
2029 struct extent_buffer *leaf;
2030 struct btrfs_item *item;
2031 u32 nritems;
2032 unsigned int data_end;
2033 unsigned int old_data;
2034 unsigned int old_size;
2035 int i;
2036
2037 slot_orig = path->slots[0];
2038 leaf = path->nodes[0];
2039
2040 nritems = btrfs_header_nritems(leaf);
2041 data_end = leaf_data_end(root, leaf);
2042
2043 if (btrfs_leaf_free_space(root, leaf) < data_size) {
2044 btrfs_print_leaf(root, leaf);
2045 BUG();
2046 }
2047 slot = path->slots[0];
2048 old_data = btrfs_item_end_nr(leaf, slot);
2049
2050 BUG_ON(slot < 0);
2051 BUG_ON(slot >= nritems);
2052
2053 /*
2054 * item0..itemN ... dataN.offset..dataN.size .. data0.size
2055 */
2056 /* first correct the data pointers */
2057 for (i = slot; i < nritems; i++) {
2058 u32 ioff;
2059 item = btrfs_item_nr(leaf, i);
2060
2061 if (!leaf->map_token) {
2062 map_extent_buffer(leaf, (unsigned long)item,
2063 sizeof(struct btrfs_item),
2064 &leaf->map_token, &leaf->kaddr,
2065 &leaf->map_start, &leaf->map_len,
2066 KM_USER1);
2067 }
2068 ioff = btrfs_item_offset(leaf, item);
2069 btrfs_set_item_offset(leaf, item, ioff - data_size);
2070 }
2071
2072 if (leaf->map_token) {
2073 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2074 leaf->map_token = NULL;
2075 }
2076
2077 /* shift the data */
2078 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2079 data_end - data_size, btrfs_leaf_data(leaf) +
2080 data_end, old_data - data_end);
2081
2082 data_end = old_data;
2083 old_size = btrfs_item_size_nr(leaf, slot);
2084 item = btrfs_item_nr(leaf, slot);
2085 btrfs_set_item_size(leaf, item, old_size + data_size);
2086 btrfs_mark_buffer_dirty(leaf);
2087
2088 ret = 0;
2089 if (btrfs_leaf_free_space(root, leaf) < 0) {
2090 btrfs_print_leaf(root, leaf);
2091 BUG();
2092 }
2093 return ret;
2094 }
2095
2096 /*
2097 * Given a key and some data, insert an item into the tree.
2098 * This does all the path init required, making room in the tree if needed.
2099 */
2100 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2101 struct btrfs_root *root,
2102 struct btrfs_path *path,
2103 struct btrfs_key *cpu_key, u32 data_size)
2104 {
2105 struct extent_buffer *leaf;
2106 struct btrfs_item *item;
2107 int ret = 0;
2108 int slot;
2109 int slot_orig;
2110 u32 nritems;
2111 unsigned int data_end;
2112 struct btrfs_disk_key disk_key;
2113
2114 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2115
2116 /* create a root if there isn't one */
2117 if (!root->node)
2118 BUG();
2119
2120 ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
2121 if (ret == 0) {
2122 return -EEXIST;
2123 }
2124 if (ret < 0)
2125 goto out;
2126
2127 slot_orig = path->slots[0];
2128 leaf = path->nodes[0];
2129
2130 nritems = btrfs_header_nritems(leaf);
2131 data_end = leaf_data_end(root, leaf);
2132
2133 if (btrfs_leaf_free_space(root, leaf) <
2134 sizeof(struct btrfs_item) + data_size) {
2135 BUG();
2136 }
2137
2138 slot = path->slots[0];
2139 BUG_ON(slot < 0);
2140
2141 if (slot != nritems) {
2142 int i;
2143 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2144
2145 if (old_data < data_end) {
2146 btrfs_print_leaf(root, leaf);
2147 printk("slot %d old_data %d data_end %d\n",
2148 slot, old_data, data_end);
2149 BUG_ON(1);
2150 }
2151 /*
2152 * item0..itemN ... dataN.offset..dataN.size .. data0.size
2153 */
2154 /* first correct the data pointers */
2155 WARN_ON(leaf->map_token);
2156 for (i = slot; i < nritems; i++) {
2157 u32 ioff;
2158
2159 item = btrfs_item_nr(leaf, i);
2160 if (!leaf->map_token) {
2161 map_extent_buffer(leaf, (unsigned long)item,
2162 sizeof(struct btrfs_item),
2163 &leaf->map_token, &leaf->kaddr,
2164 &leaf->map_start, &leaf->map_len,
2165 KM_USER1);
2166 }
2167
2168 ioff = btrfs_item_offset(leaf, item);
2169 btrfs_set_item_offset(leaf, item, ioff - data_size);
2170 }
2171 if (leaf->map_token) {
2172 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2173 leaf->map_token = NULL;
2174 }
2175
2176 /* shift the items */
2177 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2178 btrfs_item_nr_offset(slot),
2179 (nritems - slot) * sizeof(struct btrfs_item));
2180
2181 /* shift the data */
2182 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2183 data_end - data_size, btrfs_leaf_data(leaf) +
2184 data_end, old_data - data_end);
2185 data_end = old_data;
2186 }
2187
2188 /* setup the item for the new data */
2189 btrfs_set_item_key(leaf, &disk_key, slot);
2190 item = btrfs_item_nr(leaf, slot);
2191 btrfs_set_item_offset(leaf, item, data_end - data_size);
2192 btrfs_set_item_size(leaf, item, data_size);
2193 btrfs_set_header_nritems(leaf, nritems + 1);
2194 btrfs_mark_buffer_dirty(leaf);
2195
2196 ret = 0;
2197 if (slot == 0)
2198 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2199
2200 if (btrfs_leaf_free_space(root, leaf) < 0) {
2201 btrfs_print_leaf(root, leaf);
2202 BUG();
2203 }
2204 out:
2205 return ret;
2206 }
2207
2208 /*
2209 * Given a key and some data, insert an item into the tree.
2210 * This does all the path init required, making room in the tree if needed.
2211 */
2212 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2213 *root, struct btrfs_key *cpu_key, void *data, u32
2214 data_size)
2215 {
2216 int ret = 0;
2217 struct btrfs_path *path;
2218 struct extent_buffer *leaf;
2219 unsigned long ptr;
2220
2221 path = btrfs_alloc_path();
2222 BUG_ON(!path);
2223 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2224 if (!ret) {
2225 leaf = path->nodes[0];
2226 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2227 write_extent_buffer(leaf, data, ptr, data_size);
2228 btrfs_mark_buffer_dirty(leaf);
2229 }
2230 btrfs_free_path(path);
2231 return ret;
2232 }
2233
2234 /*
2235 * delete the pointer from a given node.
2236 *
2237 * If the delete empties a node, the node is removed from the tree,
2238 * continuing all the way the root if required. The root is converted into
2239 * a leaf if all the nodes are emptied.
2240 */
2241 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2242 struct btrfs_path *path, int level, int slot)
2243 {
2244 struct extent_buffer *parent = path->nodes[level];
2245 u32 nritems;
2246 int ret = 0;
2247 int wret;
2248
2249 nritems = btrfs_header_nritems(parent);
2250 if (slot != nritems -1) {
2251 memmove_extent_buffer(parent,
2252 btrfs_node_key_ptr_offset(slot),
2253 btrfs_node_key_ptr_offset(slot + 1),
2254 sizeof(struct btrfs_key_ptr) *
2255 (nritems - slot - 1));
2256 }
2257 nritems--;
2258 btrfs_set_header_nritems(parent, nritems);
2259 if (nritems == 0 && parent == root->node) {
2260 BUG_ON(btrfs_header_level(root->node) != 1);
2261 /* just turn the root into a leaf and break */
2262 btrfs_set_header_level(root->node, 0);
2263 } else if (slot == 0) {
2264 struct btrfs_disk_key disk_key;
2265
2266 btrfs_node_key(parent, &disk_key, 0);
2267 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2268 if (wret)
2269 ret = wret;
2270 }
2271 btrfs_mark_buffer_dirty(parent);
2272 return ret;
2273 }
2274
2275 /*
2276 * delete the item at the leaf level in path. If that empties
2277 * the leaf, remove it from the tree
2278 */
2279 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2280 struct btrfs_path *path)
2281 {
2282 int slot;
2283 struct extent_buffer *leaf;
2284 struct btrfs_item *item;
2285 int doff;
2286 int dsize;
2287 int ret = 0;
2288 int wret;
2289 u32 nritems;
2290
2291 leaf = path->nodes[0];
2292 slot = path->slots[0];
2293 doff = btrfs_item_offset_nr(leaf, slot);
2294 dsize = btrfs_item_size_nr(leaf, slot);
2295 nritems = btrfs_header_nritems(leaf);
2296
2297 if (slot != nritems - 1) {
2298 int i;
2299 int data_end = leaf_data_end(root, leaf);
2300
2301 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2302 data_end + dsize,
2303 btrfs_leaf_data(leaf) + data_end,
2304 doff - data_end);
2305
2306 for (i = slot + 1; i < nritems; i++) {
2307 u32 ioff;
2308
2309 item = btrfs_item_nr(leaf, i);
2310 if (!leaf->map_token) {
2311 map_extent_buffer(leaf, (unsigned long)item,
2312 sizeof(struct btrfs_item),
2313 &leaf->map_token, &leaf->kaddr,
2314 &leaf->map_start, &leaf->map_len,
2315 KM_USER1);
2316 }
2317 ioff = btrfs_item_offset(leaf, item);
2318 btrfs_set_item_offset(leaf, item, ioff + dsize);
2319 }
2320
2321 if (leaf->map_token) {
2322 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2323 leaf->map_token = NULL;
2324 }
2325
2326 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2327 btrfs_item_nr_offset(slot + 1),
2328 sizeof(struct btrfs_item) *
2329 (nritems - slot - 1));
2330 }
2331 btrfs_set_header_nritems(leaf, nritems - 1);
2332 nritems--;
2333
2334 /* delete the leaf if we've emptied it */
2335 if (nritems == 0) {
2336 if (leaf == root->node) {
2337 btrfs_set_header_level(leaf, 0);
2338 } else {
2339 clean_tree_block(trans, root, leaf);
2340 wait_on_tree_block_writeback(root, leaf);
2341 wret = del_ptr(trans, root, path, 1, path->slots[1]);
2342 if (wret)
2343 ret = wret;
2344 wret = btrfs_free_extent(trans, root,
2345 leaf->start, leaf->len, 1);
2346 if (wret)
2347 ret = wret;
2348 }
2349 } else {
2350 int used = leaf_space_used(leaf, 0, nritems);
2351 if (slot == 0) {
2352 struct btrfs_disk_key disk_key;
2353
2354 btrfs_item_key(leaf, &disk_key, 0);
2355 wret = fixup_low_keys(trans, root, path,
2356 &disk_key, 1);
2357 if (wret)
2358 ret = wret;
2359 }
2360
2361 /* delete the leaf if it is mostly empty */
2362 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
2363 /* push_leaf_left fixes the path.
2364 * make sure the path still points to our leaf
2365 * for possible call to del_ptr below
2366 */
2367 slot = path->slots[1];
2368 extent_buffer_get(leaf);
2369
2370 wret = push_leaf_left(trans, root, path, 1);
2371 if (wret < 0 && wret != -ENOSPC)
2372 ret = wret;
2373
2374 if (path->nodes[0] == leaf &&
2375 btrfs_header_nritems(leaf)) {
2376 wret = push_leaf_right(trans, root, path, 1);
2377 if (wret < 0 && wret != -ENOSPC)
2378 ret = wret;
2379 }
2380
2381 if (btrfs_header_nritems(leaf) == 0) {
2382 u64 bytenr = leaf->start;
2383 u32 blocksize = leaf->len;
2384
2385 clean_tree_block(trans, root, leaf);
2386 wait_on_tree_block_writeback(root, leaf);
2387
2388 wret = del_ptr(trans, root, path, 1, slot);
2389 if (wret)
2390 ret = wret;
2391
2392 free_extent_buffer(leaf);
2393 wret = btrfs_free_extent(trans, root, bytenr,
2394 blocksize, 1);
2395 if (wret)
2396 ret = wret;
2397 } else {
2398 btrfs_mark_buffer_dirty(leaf);
2399 free_extent_buffer(leaf);
2400 }
2401 } else {
2402 btrfs_mark_buffer_dirty(leaf);
2403 }
2404 }
2405 return ret;
2406 }
2407
2408 /*
2409 * walk up the tree as far as required to find the next leaf.
2410 * returns 0 if it found something or 1 if there are no greater leaves.
2411 * returns < 0 on io errors.
2412 */
2413 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2414 {
2415 int slot;
2416 int level = 1;
2417 u64 bytenr;
2418 struct extent_buffer *c;
2419 struct extent_buffer *next = NULL;
2420
2421 while(level < BTRFS_MAX_LEVEL) {
2422 if (!path->nodes[level])
2423 return 1;
2424
2425 slot = path->slots[level] + 1;
2426 c = path->nodes[level];
2427 if (slot >= btrfs_header_nritems(c)) {
2428 level++;
2429 continue;
2430 }
2431
2432 bytenr = btrfs_node_blockptr(c, slot);
2433 if (next)
2434 free_extent_buffer(next);
2435
2436 if (path->reada)
2437 reada_for_search(root, path, level, slot);
2438
2439 next = read_tree_block(root, bytenr,
2440 btrfs_level_size(root, level -1));
2441 break;
2442 }
2443 path->slots[level] = slot;
2444 while(1) {
2445 level--;
2446 c = path->nodes[level];
2447 free_extent_buffer(c);
2448 path->nodes[level] = next;
2449 path->slots[level] = 0;
2450 if (!level)
2451 break;
2452 if (path->reada)
2453 reada_for_search(root, path, level, 0);
2454 next = read_tree_block(root, btrfs_node_blockptr(next, 0),
2455 btrfs_level_size(root, level - 1));
2456 }
2457 return 0;
2458 }
This page took 0.083217 seconds and 6 git commands to generate.