}
/*
- * Default synchronous end-of-IO handler.. Just mark it up-to-date and
- * unlock the buffer. This is what ll_rw_block uses too.
+ * End-of-IO handler helper function which does not touch the bh after
+ * unlocking it.
+ * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
+ * a race there is benign: unlock_buffer() only use the bh's address for
+ * hashing after unlocking the buffer, so it doesn't actually touch the bh
+ * itself.
*/
-void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
+static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
{
if (uptodate) {
set_buffer_uptodate(bh);
clear_buffer_uptodate(bh);
}
unlock_buffer(bh);
+}
+
+/*
+ * Default synchronous end-of-IO handler.. Just mark it up-to-date and
+ * unlock the buffer. This is what ll_rw_block uses too.
+ */
+void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
+{
+ __end_buffer_read_notouch(bh, uptodate);
put_bh(bh);
}
for_each_online_pgdat(pgdat) {
zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
if (*zones)
- try_to_free_pages(zones, GFP_NOFS);
+ try_to_free_pages(zones, 0, GFP_NOFS);
}
}
}
EXPORT_SYMBOL(mark_buffer_dirty_inode);
+/*
+ * Mark the page dirty, and set it dirty in the radix tree, and mark the inode
+ * dirty.
+ *
+ * If warn is true, then emit a warning if the page is not uptodate and has
+ * not been truncated.
+ */
+static int __set_page_dirty(struct page *page,
+ struct address_space *mapping, int warn)
+{
+ if (unlikely(!mapping))
+ return !TestSetPageDirty(page);
+
+ if (TestSetPageDirty(page))
+ return 0;
+
+ write_lock_irq(&mapping->tree_lock);
+ if (page->mapping) { /* Race with truncate? */
+ WARN_ON_ONCE(warn && !PageUptodate(page));
+
+ if (mapping_cap_account_dirty(mapping)) {
+ __inc_zone_page_state(page, NR_FILE_DIRTY);
+ task_io_account_write(PAGE_CACHE_SIZE);
+ }
+ radix_tree_tag_set(&mapping->page_tree,
+ page_index(page), PAGECACHE_TAG_DIRTY);
+ }
+ write_unlock_irq(&mapping->tree_lock);
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+ return 1;
+}
+
/*
* Add a page to the dirty page list.
*
*/
int __set_page_dirty_buffers(struct page *page)
{
- struct address_space * const mapping = page_mapping(page);
+ struct address_space *mapping = page_mapping(page);
if (unlikely(!mapping))
return !TestSetPageDirty(page);
}
spin_unlock(&mapping->private_lock);
- if (TestSetPageDirty(page))
- return 0;
-
- write_lock_irq(&mapping->tree_lock);
- if (page->mapping) { /* Race with truncate? */
- if (mapping_cap_account_dirty(mapping)) {
- __inc_zone_page_state(page, NR_FILE_DIRTY);
- task_io_account_write(PAGE_CACHE_SIZE);
- }
- radix_tree_tag_set(&mapping->page_tree,
- page_index(page), PAGECACHE_TAG_DIRTY);
- }
- write_unlock_irq(&mapping->tree_lock);
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
- return 1;
+ return __set_page_dirty(page, mapping, 1);
}
EXPORT_SYMBOL(__set_page_dirty_buffers);
struct buffer_head *bh;
page = find_or_create_page(inode->i_mapping, index,
- mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+ (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
if (!page)
return NULL;
/*
* Create buffers for the specified block device block's page. If
* that page was dirty, the buffers are set dirty also.
- *
- * Except that's a bug. Attaching dirty buffers to a dirty
- * blockdev's page can result in filesystem corruption, because
- * some of those buffers may be aliases of filesystem data.
- * grow_dev_page() will go BUG() if this happens.
*/
static int
grow_buffers(struct block_device *bdev, sector_t block, int size)
*/
void fastcall mark_buffer_dirty(struct buffer_head *bh)
{
+ WARN_ON_ONCE(!buffer_uptodate(bh));
if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
- __set_page_dirty_nobuffers(bh->b_page);
+ __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
}
/*
unmap_underlying_metadata(bh->b_bdev,
bh->b_blocknr);
if (PageUptodate(page)) {
+ clear_buffer_new(bh);
set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
continue;
}
if (block_end > to || block_start < from) {
return 0;
}
+/*
+ * block_page_mkwrite() is not allowed to change the file size as it gets
+ * called from a page fault handler when a page is first dirtied. Hence we must
+ * be careful to check for EOF conditions here. We set the page up correctly
+ * for a written page which means we get ENOSPC checking when writing into
+ * holes and correct delalloc and unwritten extent mapping on filesystems that
+ * support these features.
+ *
+ * We are not allowed to take the i_mutex here so we have to play games to
+ * protect against truncate races as the page could now be beyond EOF. Because
+ * vmtruncate() writes the inode size before removing pages, once we have the
+ * page lock we can determine safely if the page is beyond EOF. If it is not
+ * beyond EOF, then the page is guaranteed safe against truncation until we
+ * unlock the page.
+ */
+int
+block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
+ get_block_t get_block)
+{
+ struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ unsigned long end;
+ loff_t size;
+ int ret = -EINVAL;
+
+ lock_page(page);
+ size = i_size_read(inode);
+ if ((page->mapping != inode->i_mapping) ||
+ (page_offset(page) > size)) {
+ /* page got truncated out from underneath us */
+ goto out_unlock;
+ }
+
+ /* page is wholly or partially inside EOF */
+ if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
+ end = size & ~PAGE_CACHE_MASK;
+ else
+ end = PAGE_CACHE_SIZE;
+
+ ret = block_prepare_write(page, 0, end, get_block);
+ if (!ret)
+ ret = block_commit_write(page, 0, end);
+
+out_unlock:
+ unlock_page(page);
+ return ret;
+}
/*
* nobh_prepare_write()'s prereads are special: the buffer_heads are freed
* immediately, while under the page lock. So it needs a special end_io
* handler which does not touch the bh after unlocking it.
- *
- * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
- * a race there is benign: unlock_buffer() only use the bh's address for
- * hashing after unlocking the buffer, so it doesn't actually touch the bh
- * itself.
*/
static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
{
- if (uptodate) {
- set_buffer_uptodate(bh);
- } else {
- /* This happens, due to failed READA attempts. */
- clear_buffer_uptodate(bh);
- }
- unlock_buffer(bh);
+ __end_buffer_read_notouch(bh, uptodate);
}
/*
struct inode *inode = page->mapping->host;
const unsigned blkbits = inode->i_blkbits;
const unsigned blocksize = 1 << blkbits;
- struct buffer_head map_bh;
- struct buffer_head *read_bh[MAX_BUF_PER_PAGE];
+ struct buffer_head *head, *bh;
unsigned block_in_page;
- unsigned block_start;
+ unsigned block_start, block_end;
sector_t block_in_file;
char *kaddr;
int nr_reads = 0;
- int i;
int ret = 0;
int is_mapped_to_disk = 1;
+ if (page_has_buffers(page))
+ return block_prepare_write(page, from, to, get_block);
+
if (PageMappedToDisk(page))
return 0;
+ /*
+ * Allocate buffers so that we can keep track of state, and potentially
+ * attach them to the page if an error occurs. In the common case of
+ * no error, they will just be freed again without ever being attached
+ * to the page (which is all OK, because we're under the page lock).
+ *
+ * Be careful: the buffer linked list is a NULL terminated one, rather
+ * than the circular one we're used to.
+ */
+ head = alloc_page_buffers(page, blocksize, 0);
+ if (!head)
+ return -ENOMEM;
+
block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
- map_bh.b_page = page;
/*
* We loop across all blocks in the page, whether or not they are
* part of the affected region. This is so we can discover if the
* page is fully mapped-to-disk.
*/
- for (block_start = 0, block_in_page = 0;
+ for (block_start = 0, block_in_page = 0, bh = head;
block_start < PAGE_CACHE_SIZE;
- block_in_page++, block_start += blocksize) {
- unsigned block_end = block_start + blocksize;
+ block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
int create;
- map_bh.b_state = 0;
+ block_end = block_start + blocksize;
+ bh->b_state = 0;
create = 1;
if (block_start >= to)
create = 0;
- map_bh.b_size = blocksize;
ret = get_block(inode, block_in_file + block_in_page,
- &map_bh, create);
+ bh, create);
if (ret)
goto failed;
- if (!buffer_mapped(&map_bh))
+ if (!buffer_mapped(bh))
is_mapped_to_disk = 0;
- if (buffer_new(&map_bh))
- unmap_underlying_metadata(map_bh.b_bdev,
- map_bh.b_blocknr);
- if (PageUptodate(page))
+ if (buffer_new(bh))
+ unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+ if (PageUptodate(page)) {
+ set_buffer_uptodate(bh);
continue;
- if (buffer_new(&map_bh) || !buffer_mapped(&map_bh)) {
+ }
+ if (buffer_new(bh) || !buffer_mapped(bh)) {
kaddr = kmap_atomic(page, KM_USER0);
if (block_start < from)
memset(kaddr+block_start, 0, from-block_start);
kunmap_atomic(kaddr, KM_USER0);
continue;
}
- if (buffer_uptodate(&map_bh))
+ if (buffer_uptodate(bh))
continue; /* reiserfs does this */
if (block_start < from || block_end > to) {
- struct buffer_head *bh = alloc_buffer_head(GFP_NOFS);
-
- if (!bh) {
- ret = -ENOMEM;
- goto failed;
- }
- bh->b_state = map_bh.b_state;
- atomic_set(&bh->b_count, 0);
- bh->b_this_page = NULL;
- bh->b_page = page;
- bh->b_blocknr = map_bh.b_blocknr;
- bh->b_size = blocksize;
- bh->b_data = (char *)(long)block_start;
- bh->b_bdev = map_bh.b_bdev;
- bh->b_private = NULL;
- read_bh[nr_reads++] = bh;
+ lock_buffer(bh);
+ bh->b_end_io = end_buffer_read_nobh;
+ submit_bh(READ, bh);
+ nr_reads++;
}
}
if (nr_reads) {
- struct buffer_head *bh;
-
/*
* The page is locked, so these buffers are protected from
* any VM or truncate activity. Hence we don't need to care
* for the buffer_head refcounts.
*/
- for (i = 0; i < nr_reads; i++) {
- bh = read_bh[i];
- lock_buffer(bh);
- bh->b_end_io = end_buffer_read_nobh;
- submit_bh(READ, bh);
- }
- for (i = 0; i < nr_reads; i++) {
- bh = read_bh[i];
+ for (bh = head; bh; bh = bh->b_this_page) {
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
ret = -EIO;
- free_buffer_head(bh);
- read_bh[i] = NULL;
}
if (ret)
goto failed;
if (is_mapped_to_disk)
SetPageMappedToDisk(page);
+ do {
+ bh = head;
+ head = head->b_this_page;
+ free_buffer_head(bh);
+ } while (head);
+
return 0;
failed:
- for (i = 0; i < nr_reads; i++) {
- if (read_bh[i])
- free_buffer_head(read_bh[i]);
- }
-
/*
- * Error recovery is pretty slack. Clear the page and mark it dirty
- * so we'll later zero out any blocks which _were_ allocated.
+ * Error recovery is a bit difficult. We need to zero out blocks that
+ * were newly allocated, and dirty them to ensure they get written out.
+ * Buffers need to be attached to the page at this point, otherwise
+ * the handling of potential IO errors during writeout would be hard
+ * (could try doing synchronous writeout, but what if that fails too?)
*/
- zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
- SetPageUptodate(page);
- set_page_dirty(page);
+ spin_lock(&page->mapping->private_lock);
+ bh = head;
+ block_start = 0;
+ do {
+ if (PageUptodate(page))
+ set_buffer_uptodate(bh);
+ if (PageDirty(page))
+ set_buffer_dirty(bh);
+
+ block_end = block_start+blocksize;
+ if (block_end <= from)
+ goto next;
+ if (block_start >= to)
+ goto next;
+
+ if (buffer_new(bh)) {
+ clear_buffer_new(bh);
+ if (!buffer_uptodate(bh)) {
+ zero_user_page(page, block_start, bh->b_size, KM_USER0);
+ set_buffer_uptodate(bh);
+ }
+ mark_buffer_dirty(bh);
+ }
+next:
+ block_start = block_end;
+ if (!bh->b_this_page)
+ bh->b_this_page = head;
+ bh = bh->b_this_page;
+ } while (bh != head);
+ attach_page_buffers(page, head);
+ spin_unlock(&page->mapping->private_lock);
+
return ret;
}
EXPORT_SYMBOL(nobh_prepare_write);
struct inode *inode = page->mapping->host;
loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+ if (page_has_buffers(page))
+ return generic_commit_write(file, page, from, to);
+
SetPageUptodate(page);
set_page_dirty(page);
if (pos > inode->i_size) {
return tmp.b_blocknr;
}
-static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err)
+static void end_bio_bh_io_sync(struct bio *bio, int err)
{
struct buffer_head *bh = bio->bi_private;
- if (bio->bi_size)
- return 1;
-
if (err == -EOPNOTSUPP) {
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
set_bit(BH_Eopnotsupp, &bh->b_state);
bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
bio_put(bio);
- return 0;
}
int submit_bh(int rw, struct buffer_head * bh)
EXPORT_SYMBOL(__wait_on_buffer);
EXPORT_SYMBOL(block_commit_write);
EXPORT_SYMBOL(block_prepare_write);
+EXPORT_SYMBOL(block_page_mkwrite);
EXPORT_SYMBOL(block_read_full_page);
EXPORT_SYMBOL(block_sync_page);
EXPORT_SYMBOL(block_truncate_page);