Btrfs: patch queue: page_mkwrite
[deliverable/linux.git] / fs / btrfs / file.c
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19 #include <linux/module.h>
20 #include <linux/buffer_head.h>
21 #include <linux/fs.h>
22 #include <linux/pagemap.h>
23 #include <linux/highmem.h>
24 #include <linux/time.h>
25 #include <linux/init.h>
26 #include <linux/string.h>
27 #include <linux/smp_lock.h>
28 #include <linux/backing-dev.h>
29 #include <linux/mpage.h>
30 #include <linux/swap.h>
31 #include <linux/writeback.h>
32 #include <linux/statfs.h>
33 #include <linux/compat.h>
34 #include "ctree.h"
35 #include "disk-io.h"
36 #include "transaction.h"
37 #include "btrfs_inode.h"
38 #include "ioctl.h"
39 #include "print-tree.h"
40
41
42 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
43 struct page **prepared_pages,
44 const char __user * buf)
45 {
46 long page_fault = 0;
47 int i;
48 int offset = pos & (PAGE_CACHE_SIZE - 1);
49
50 for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
51 size_t count = min_t(size_t,
52 PAGE_CACHE_SIZE - offset, write_bytes);
53 struct page *page = prepared_pages[i];
54 fault_in_pages_readable(buf, count);
55
56 /* Copy data from userspace to the current page */
57 kmap(page);
58 page_fault = __copy_from_user(page_address(page) + offset,
59 buf, count);
60 /* Flush processor's dcache for this page */
61 flush_dcache_page(page);
62 kunmap(page);
63 buf += count;
64 write_bytes -= count;
65
66 if (page_fault)
67 break;
68 }
69 return page_fault ? -EFAULT : 0;
70 }
71
72 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
73 {
74 size_t i;
75 for (i = 0; i < num_pages; i++) {
76 if (!pages[i])
77 break;
78 unlock_page(pages[i]);
79 mark_page_accessed(pages[i]);
80 page_cache_release(pages[i]);
81 }
82 }
83
84 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
85 struct btrfs_root *root,
86 struct file *file,
87 struct page **pages,
88 size_t num_pages,
89 loff_t pos,
90 size_t write_bytes)
91 {
92 int i;
93 int offset;
94 int err = 0;
95 int ret;
96 int this_write;
97 struct inode *inode = file->f_path.dentry->d_inode;
98 struct buffer_head *bh;
99 struct btrfs_file_extent_item *ei;
100
101 for (i = 0; i < num_pages; i++) {
102 offset = pos & (PAGE_CACHE_SIZE -1);
103 this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
104 /* FIXME, one block at a time */
105
106 bh = page_buffers(pages[i]);
107
108 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
109 struct btrfs_key key;
110 struct btrfs_path *path;
111 char *ptr, *kaddr;
112 u32 datasize;
113
114 mutex_lock(&root->fs_info->fs_mutex);
115 trans = btrfs_start_transaction(root, 1);
116 btrfs_set_trans_block_group(trans, inode);
117
118 /* create an inline extent, and copy the data in */
119 path = btrfs_alloc_path();
120 BUG_ON(!path);
121 key.objectid = inode->i_ino;
122 key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
123 key.flags = 0;
124 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
125 BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
126 datasize = offset +
127 btrfs_file_extent_calc_inline_size(write_bytes);
128
129 ret = btrfs_insert_empty_item(trans, root, path, &key,
130 datasize);
131 BUG_ON(ret);
132 ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
133 path->slots[0], struct btrfs_file_extent_item);
134 btrfs_set_file_extent_generation(ei, trans->transid);
135 btrfs_set_file_extent_type(ei,
136 BTRFS_FILE_EXTENT_INLINE);
137 ptr = btrfs_file_extent_inline_start(ei);
138
139 kaddr = kmap_atomic(bh->b_page, KM_USER0);
140 btrfs_memcpy(root, path->nodes[0]->b_data,
141 ptr, kaddr + bh_offset(bh),
142 offset + write_bytes);
143 kunmap_atomic(kaddr, KM_USER0);
144
145 mark_buffer_dirty(path->nodes[0]);
146 btrfs_free_path(path);
147 ret = btrfs_end_transaction(trans, root);
148 BUG_ON(ret);
149 mutex_unlock(&root->fs_info->fs_mutex);
150 }
151
152 ret = btrfs_commit_write(file, pages[i], offset,
153 offset + this_write);
154 pos += this_write;
155 if (ret) {
156 err = ret;
157 goto failed;
158 }
159 WARN_ON(this_write > write_bytes);
160 write_bytes -= this_write;
161 }
162 failed:
163 return err;
164 }
165
166 /*
167 * this is very complex, but the basic idea is to drop all extents
168 * in the range start - end. hint_block is filled in with a block number
169 * that would be a good hint to the block allocator for this file.
170 *
171 * If an extent intersects the range but is not entirely inside the range
172 * it is either truncated or split. Anything entirely inside the range
173 * is deleted from the tree.
174 */
175 int btrfs_drop_extents(struct btrfs_trans_handle *trans,
176 struct btrfs_root *root, struct inode *inode,
177 u64 start, u64 end, u64 *hint_block)
178 {
179 int ret;
180 struct btrfs_key key;
181 struct btrfs_leaf *leaf;
182 int slot;
183 struct btrfs_file_extent_item *extent;
184 u64 extent_end = 0;
185 int keep;
186 struct btrfs_file_extent_item old;
187 struct btrfs_path *path;
188 u64 search_start = start;
189 int bookend;
190 int found_type;
191 int found_extent;
192 int found_inline;
193
194 path = btrfs_alloc_path();
195 if (!path)
196 return -ENOMEM;
197 while(1) {
198 btrfs_release_path(root, path);
199 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
200 search_start, -1);
201 if (ret < 0)
202 goto out;
203 if (ret > 0) {
204 if (path->slots[0] == 0) {
205 ret = 0;
206 goto out;
207 }
208 path->slots[0]--;
209 }
210 keep = 0;
211 bookend = 0;
212 found_extent = 0;
213 found_inline = 0;
214 extent = NULL;
215 leaf = btrfs_buffer_leaf(path->nodes[0]);
216 slot = path->slots[0];
217 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
218 if (key.offset >= end || key.objectid != inode->i_ino) {
219 ret = 0;
220 goto out;
221 }
222 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
223 ret = 0;
224 goto out;
225 }
226 extent = btrfs_item_ptr(leaf, slot,
227 struct btrfs_file_extent_item);
228 found_type = btrfs_file_extent_type(extent);
229 if (found_type == BTRFS_FILE_EXTENT_REG) {
230 extent_end = key.offset +
231 (btrfs_file_extent_num_blocks(extent) <<
232 inode->i_blkbits);
233 found_extent = 1;
234 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
235 found_inline = 1;
236 extent_end = key.offset +
237 btrfs_file_extent_inline_len(leaf->items + slot);
238 }
239
240 /* we found nothing we can drop */
241 if (!found_extent && !found_inline) {
242 ret = 0;
243 goto out;
244 }
245
246 /* we found nothing inside the range */
247 if (search_start >= extent_end) {
248 ret = 0;
249 goto out;
250 }
251
252 /* FIXME, there's only one inline extent allowed right now */
253 if (found_inline) {
254 u64 mask = root->blocksize - 1;
255 search_start = (extent_end + mask) & ~mask;
256 } else
257 search_start = extent_end;
258
259 if (end < extent_end && end >= key.offset) {
260 if (found_extent) {
261 u64 disk_blocknr =
262 btrfs_file_extent_disk_blocknr(extent);
263 u64 disk_num_blocks =
264 btrfs_file_extent_disk_num_blocks(extent);
265 memcpy(&old, extent, sizeof(old));
266 if (disk_blocknr != 0) {
267 ret = btrfs_inc_extent_ref(trans, root,
268 disk_blocknr, disk_num_blocks);
269 BUG_ON(ret);
270 }
271 }
272 WARN_ON(found_inline);
273 bookend = 1;
274 }
275
276 /* truncate existing extent */
277 if (start > key.offset) {
278 u64 new_num;
279 u64 old_num;
280 keep = 1;
281 WARN_ON(start & (root->blocksize - 1));
282 if (found_extent) {
283 new_num = (start - key.offset) >>
284 inode->i_blkbits;
285 old_num = btrfs_file_extent_num_blocks(extent);
286 *hint_block =
287 btrfs_file_extent_disk_blocknr(extent);
288 if (btrfs_file_extent_disk_blocknr(extent)) {
289 inode->i_blocks -=
290 (old_num - new_num) << 3;
291 }
292 btrfs_set_file_extent_num_blocks(extent,
293 new_num);
294 mark_buffer_dirty(path->nodes[0]);
295 } else {
296 WARN_ON(1);
297 }
298 }
299 /* delete the entire extent */
300 if (!keep) {
301 u64 disk_blocknr = 0;
302 u64 disk_num_blocks = 0;
303 u64 extent_num_blocks = 0;
304 if (found_extent) {
305 disk_blocknr =
306 btrfs_file_extent_disk_blocknr(extent);
307 disk_num_blocks =
308 btrfs_file_extent_disk_num_blocks(extent);
309 extent_num_blocks =
310 btrfs_file_extent_num_blocks(extent);
311 *hint_block =
312 btrfs_file_extent_disk_blocknr(extent);
313 }
314 ret = btrfs_del_item(trans, root, path);
315 BUG_ON(ret);
316 btrfs_release_path(root, path);
317 extent = NULL;
318 if (found_extent && disk_blocknr != 0) {
319 inode->i_blocks -= extent_num_blocks << 3;
320 ret = btrfs_free_extent(trans, root,
321 disk_blocknr,
322 disk_num_blocks, 0);
323 }
324
325 BUG_ON(ret);
326 if (!bookend && search_start >= end) {
327 ret = 0;
328 goto out;
329 }
330 if (!bookend)
331 continue;
332 }
333 /* create bookend, splitting the extent in two */
334 if (bookend && found_extent) {
335 struct btrfs_key ins;
336 ins.objectid = inode->i_ino;
337 ins.offset = end;
338 ins.flags = 0;
339 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
340
341 btrfs_release_path(root, path);
342 ret = btrfs_insert_empty_item(trans, root, path, &ins,
343 sizeof(*extent));
344 BUG_ON(ret);
345 extent = btrfs_item_ptr(
346 btrfs_buffer_leaf(path->nodes[0]),
347 path->slots[0],
348 struct btrfs_file_extent_item);
349 btrfs_set_file_extent_disk_blocknr(extent,
350 btrfs_file_extent_disk_blocknr(&old));
351 btrfs_set_file_extent_disk_num_blocks(extent,
352 btrfs_file_extent_disk_num_blocks(&old));
353
354 btrfs_set_file_extent_offset(extent,
355 btrfs_file_extent_offset(&old) +
356 ((end - key.offset) >> inode->i_blkbits));
357 WARN_ON(btrfs_file_extent_num_blocks(&old) <
358 (extent_end - end) >> inode->i_blkbits);
359 btrfs_set_file_extent_num_blocks(extent,
360 (extent_end - end) >> inode->i_blkbits);
361
362 btrfs_set_file_extent_type(extent,
363 BTRFS_FILE_EXTENT_REG);
364 btrfs_set_file_extent_generation(extent,
365 btrfs_file_extent_generation(&old));
366 btrfs_mark_buffer_dirty(path->nodes[0]);
367 if (btrfs_file_extent_disk_blocknr(&old) != 0) {
368 inode->i_blocks +=
369 btrfs_file_extent_num_blocks(extent) << 3;
370 }
371 ret = 0;
372 goto out;
373 }
374 }
375 out:
376 btrfs_free_path(path);
377 return ret;
378 }
379
380 /*
381 * this gets pages into the page cache and locks them down
382 */
383 static int prepare_pages(struct btrfs_root *root,
384 struct file *file,
385 struct page **pages,
386 size_t num_pages,
387 loff_t pos,
388 unsigned long first_index,
389 unsigned long last_index,
390 size_t write_bytes,
391 u64 alloc_extent_start)
392 {
393 int i;
394 unsigned long index = pos >> PAGE_CACHE_SHIFT;
395 struct inode *inode = file->f_path.dentry->d_inode;
396 int offset;
397 int err = 0;
398 int this_write;
399 struct buffer_head *bh;
400 struct buffer_head *head;
401 loff_t isize = i_size_read(inode);
402
403 memset(pages, 0, num_pages * sizeof(struct page *));
404
405 for (i = 0; i < num_pages; i++) {
406 pages[i] = grab_cache_page(inode->i_mapping, index + i);
407 if (!pages[i]) {
408 err = -ENOMEM;
409 goto failed_release;
410 }
411 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
412 wait_on_page_writeback(pages[i]);
413 offset = pos & (PAGE_CACHE_SIZE -1);
414 this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
415 if (!page_has_buffers(pages[i])) {
416 create_empty_buffers(pages[i],
417 root->fs_info->sb->s_blocksize,
418 (1 << BH_Uptodate));
419 }
420 head = page_buffers(pages[i]);
421 bh = head;
422 do {
423 err = btrfs_map_bh_to_logical(root, bh,
424 alloc_extent_start);
425 BUG_ON(err);
426 if (err)
427 goto failed_truncate;
428 bh = bh->b_this_page;
429 if (alloc_extent_start)
430 alloc_extent_start++;
431 } while (bh != head);
432 pos += this_write;
433 WARN_ON(this_write > write_bytes);
434 write_bytes -= this_write;
435 }
436 return 0;
437
438 failed_release:
439 btrfs_drop_pages(pages, num_pages);
440 return err;
441
442 failed_truncate:
443 btrfs_drop_pages(pages, num_pages);
444 if (pos > isize)
445 vmtruncate(inode, isize);
446 return err;
447 }
448
449 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
450 size_t count, loff_t *ppos)
451 {
452 loff_t pos;
453 size_t num_written = 0;
454 int err = 0;
455 int ret = 0;
456 struct inode *inode = file->f_path.dentry->d_inode;
457 struct btrfs_root *root = BTRFS_I(inode)->root;
458 struct page *pages[8];
459 struct page *pinned[2];
460 unsigned long first_index;
461 unsigned long last_index;
462 u64 start_pos;
463 u64 num_blocks;
464 u64 alloc_extent_start;
465 u64 hint_block;
466 struct btrfs_trans_handle *trans;
467 struct btrfs_key ins;
468 pinned[0] = NULL;
469 pinned[1] = NULL;
470 if (file->f_flags & O_DIRECT)
471 return -EINVAL;
472 pos = *ppos;
473 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
474 current->backing_dev_info = inode->i_mapping->backing_dev_info;
475 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
476 if (err)
477 goto out;
478 if (count == 0)
479 goto out;
480 err = remove_suid(file->f_path.dentry);
481 if (err)
482 goto out;
483 file_update_time(file);
484
485 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
486 num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
487 inode->i_blkbits;
488
489 mutex_lock(&inode->i_mutex);
490 first_index = pos >> PAGE_CACHE_SHIFT;
491 last_index = (pos + count) >> PAGE_CACHE_SHIFT;
492
493 /*
494 * there are lots of better ways to do this, but this code
495 * makes sure the first and last page in the file range are
496 * up to date and ready for cow
497 */
498 if ((pos & (PAGE_CACHE_SIZE - 1))) {
499 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
500 if (!PageUptodate(pinned[0])) {
501 ret = btrfs_readpage(NULL, pinned[0]);
502 BUG_ON(ret);
503 wait_on_page_locked(pinned[0]);
504 } else {
505 unlock_page(pinned[0]);
506 }
507 }
508 if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
509 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
510 if (!PageUptodate(pinned[1])) {
511 ret = btrfs_readpage(NULL, pinned[1]);
512 BUG_ON(ret);
513 wait_on_page_locked(pinned[1]);
514 } else {
515 unlock_page(pinned[1]);
516 }
517 }
518
519 mutex_lock(&root->fs_info->fs_mutex);
520 trans = btrfs_start_transaction(root, 1);
521 if (!trans) {
522 err = -ENOMEM;
523 mutex_unlock(&root->fs_info->fs_mutex);
524 goto out_unlock;
525 }
526 btrfs_set_trans_block_group(trans, inode);
527 /* FIXME blocksize != 4096 */
528 inode->i_blocks += num_blocks << 3;
529 hint_block = 0;
530
531 /* FIXME...EIEIO, ENOSPC and more */
532
533 /* step one, delete the existing extents in this range */
534 if (start_pos < inode->i_size) {
535 /* FIXME blocksize != pagesize */
536 ret = btrfs_drop_extents(trans, root, inode,
537 start_pos,
538 (pos + count + root->blocksize -1) &
539 ~((u64)root->blocksize - 1),
540 &hint_block);
541 BUG_ON(ret);
542 }
543
544 /* insert any holes we need to create */
545 if (inode->i_size < start_pos) {
546 u64 last_pos_in_file;
547 u64 hole_size;
548 u64 mask = root->blocksize - 1;
549 last_pos_in_file = (inode->i_size + mask) & ~mask;
550 hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
551 hole_size >>= inode->i_blkbits;
552 if (last_pos_in_file < start_pos) {
553 ret = btrfs_insert_file_extent(trans, root,
554 inode->i_ino,
555 last_pos_in_file,
556 0, 0, hole_size);
557 }
558 BUG_ON(ret);
559 }
560
561 /*
562 * either allocate an extent for the new bytes or setup the key
563 * to show we are doing inline data in the extent
564 */
565 if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
566 pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
567 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
568 num_blocks, hint_block, (u64)-1,
569 &ins, 1);
570 BUG_ON(ret);
571 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
572 start_pos, ins.objectid, ins.offset,
573 ins.offset);
574 BUG_ON(ret);
575 } else {
576 ins.offset = 0;
577 ins.objectid = 0;
578 }
579 BUG_ON(ret);
580 alloc_extent_start = ins.objectid;
581 ret = btrfs_end_transaction(trans, root);
582 mutex_unlock(&root->fs_info->fs_mutex);
583
584 while(count > 0) {
585 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
586 size_t write_bytes = min(count,
587 (size_t)PAGE_CACHE_SIZE - offset);
588 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
589 PAGE_CACHE_SHIFT;
590
591 memset(pages, 0, sizeof(pages));
592 ret = prepare_pages(root, file, pages, num_pages,
593 pos, first_index, last_index,
594 write_bytes, alloc_extent_start);
595 BUG_ON(ret);
596
597 /* FIXME blocks != pagesize */
598 if (alloc_extent_start)
599 alloc_extent_start += num_pages;
600 ret = btrfs_copy_from_user(pos, num_pages,
601 write_bytes, pages, buf);
602 BUG_ON(ret);
603
604 ret = dirty_and_release_pages(NULL, root, file, pages,
605 num_pages, pos, write_bytes);
606 BUG_ON(ret);
607 btrfs_drop_pages(pages, num_pages);
608
609 buf += write_bytes;
610 count -= write_bytes;
611 pos += write_bytes;
612 num_written += write_bytes;
613
614 balance_dirty_pages_ratelimited(inode->i_mapping);
615 btrfs_btree_balance_dirty(root);
616 cond_resched();
617 }
618 out_unlock:
619 mutex_unlock(&inode->i_mutex);
620 out:
621 if (pinned[0])
622 page_cache_release(pinned[0]);
623 if (pinned[1])
624 page_cache_release(pinned[1]);
625 *ppos = pos;
626 current->backing_dev_info = NULL;
627 mark_inode_dirty(inode);
628 return num_written ? num_written : err;
629 }
630
631 static int btrfs_sync_file(struct file *file,
632 struct dentry *dentry, int datasync)
633 {
634 struct inode *inode = dentry->d_inode;
635 struct btrfs_root *root = BTRFS_I(inode)->root;
636 int ret;
637 struct btrfs_trans_handle *trans;
638
639 /*
640 * FIXME, use inode generation number to check if we can skip the
641 * commit
642 */
643 mutex_lock(&root->fs_info->fs_mutex);
644 trans = btrfs_start_transaction(root, 1);
645 if (!trans) {
646 ret = -ENOMEM;
647 goto out;
648 }
649 ret = btrfs_commit_transaction(trans, root);
650 mutex_unlock(&root->fs_info->fs_mutex);
651 out:
652 return ret > 0 ? EIO : ret;
653 }
654
655 static struct vm_operations_struct btrfs_file_vm_ops = {
656 .nopage = filemap_nopage,
657 .populate = filemap_populate,
658 .page_mkwrite = btrfs_page_mkwrite,
659 };
660
661 static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
662 {
663 vma->vm_ops = &btrfs_file_vm_ops;
664 file_accessed(filp);
665 return 0;
666 }
667
668 struct file_operations btrfs_file_operations = {
669 .llseek = generic_file_llseek,
670 .read = do_sync_read,
671 .aio_read = generic_file_aio_read,
672 .write = btrfs_file_write,
673 .mmap = btrfs_file_mmap,
674 .open = generic_file_open,
675 .ioctl = btrfs_ioctl,
676 .fsync = btrfs_sync_file,
677 #ifdef CONFIG_COMPAT
678 .compat_ioctl = btrfs_compat_ioctl,
679 #endif
680 };
681
This page took 0.045805 seconds and 6 git commands to generate.