xfs: global error sign conversion
[deliverable/linux.git] / fs / xfs / xfs_aops.c
index 75df77d09f757d4786c889679c35c5cac2ca00b3..11e9b4caa54f168f7e429f5d7e5a01302d026e1c 100644 (file)
@@ -240,7 +240,7 @@ xfs_end_io(
 
 done:
        if (error)
-               ioend->io_error = -error;
+               ioend->io_error = error;
        xfs_destroy_ioend(ioend);
 }
 
@@ -308,14 +308,14 @@ xfs_map_blocks(
        int                     nimaps = 1;
 
        if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
+               return -EIO;
 
        if (type == XFS_IO_UNWRITTEN)
                bmapi_flags |= XFS_BMAPI_IGSTATE;
 
        if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
                if (nonblocking)
-                       return -XFS_ERROR(EAGAIN);
+                       return -EAGAIN;
                xfs_ilock(ip, XFS_ILOCK_SHARED);
        }
 
@@ -332,14 +332,14 @@ xfs_map_blocks(
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
        if (error)
-               return -XFS_ERROR(error);
+               return error;
 
        if (type == XFS_IO_DELALLOC &&
            (!nimaps || isnullstartblock(imap->br_startblock))) {
                error = xfs_iomap_write_allocate(ip, offset, imap);
                if (!error)
                        trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
-               return -XFS_ERROR(error);
+               return error;
        }
 
 #ifdef DEBUG
@@ -502,7 +502,7 @@ xfs_submit_ioend(
                 * time.
                 */
                if (fail) {
-                       ioend->io_error = -fail;
+                       ioend->io_error = fail;
                        xfs_finish_ioend(ioend);
                        continue;
                }
@@ -975,14 +975,39 @@ xfs_vm_writepage(
         * Given that we do not allow direct reclaim to call us, we should
         * never be called while in a filesystem transaction.
         */
-       if (WARN_ON(current->flags & PF_FSTRANS))
+       if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
                goto redirty;
 
        /* Is this page beyond the end of the file? */
        offset = i_size_read(inode);
        end_index = offset >> PAGE_CACHE_SHIFT;
        last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
-       if (page->index >= end_index) {
+
+       /*
+        * The page index is less than the end_index, adjust the end_offset
+        * to the highest offset that this page should represent.
+        * -----------------------------------------------------
+        * |                    file mapping           | <EOF> |
+        * -----------------------------------------------------
+        * | Page ... | Page N-2 | Page N-1 |  Page N  |       |
+        * ^--------------------------------^----------|--------
+        * |     desired writeback range    |      see else    |
+        * ---------------------------------^------------------|
+        */
+       if (page->index < end_index)
+               end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
+       else {
+               /*
+                * Check whether the page to write out is beyond or straddles
+                * i_size or not.
+                * -------------------------------------------------------
+                * |            file mapping                    | <EOF>  |
+                * -------------------------------------------------------
+                * | Page ... | Page N-2 | Page N-1 |  Page N   | Beyond |
+                * ^--------------------------------^-----------|---------
+                * |                                |      Straddles     |
+                * ---------------------------------^-----------|--------|
+                */
                unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
 
                /*
@@ -990,24 +1015,36 @@ xfs_vm_writepage(
                 * truncate operation that is in progress. We must redirty the
                 * page so that reclaim stops reclaiming it. Otherwise
                 * xfs_vm_releasepage() is called on it and gets confused.
+                *
+                * Note that the end_index is unsigned long, it would overflow
+                * if the given offset is greater than 16TB on 32-bit system
+                * and if we do check the page is fully outside i_size or not
+                * via "if (page->index >= end_index + 1)" as "end_index + 1"
+                * will be evaluated to 0.  Hence this page will be redirtied
+                * and be written out repeatedly which would result in an
+                * infinite loop, the user program that perform this operation
+                * will hang.  Instead, we can verify this situation by checking
+                * if the page to write is totally beyond the i_size or if it's
+                * offset is just equal to the EOF.
                 */
-               if (page->index >= end_index + 1 || offset_into_page == 0)
+               if (page->index > end_index ||
+                   (page->index == end_index && offset_into_page == 0))
                        goto redirty;
 
                /*
                 * The page straddles i_size.  It must be zeroed out on each
                 * and every writepage invocation because it may be mmapped.
                 * "A file is mapped in multiples of the page size.  For a file
-                * that is not a multiple of the  page size, the remaining
+                * that is not a multiple of the page size, the remaining
                 * memory is zeroed when mapped, and writes to that region are
                 * not written out to the file."
                 */
                zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE);
+
+               /* Adjust the end_offset to the end of file */
+               end_offset = offset;
        }
 
-       end_offset = min_t(unsigned long long,
-                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
-                       offset);
        len = 1 << inode->i_blkbits;
 
        bh = head = page_buffers(page);
@@ -1188,9 +1225,9 @@ xfs_vm_releasepage(
 
        xfs_count_page_state(page, &delalloc, &unwritten);
 
-       if (WARN_ON(delalloc))
+       if (WARN_ON_ONCE(delalloc))
                return 0;
-       if (WARN_ON(unwritten))
+       if (WARN_ON_ONCE(unwritten))
                return 0;
 
        return try_to_free_buffers(page);
@@ -1216,7 +1253,7 @@ __xfs_get_blocks(
        int                     new = 0;
 
        if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
+               return -EIO;
 
        offset = (xfs_off_t)iblock << inode->i_blkbits;
        ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
@@ -1265,7 +1302,7 @@ __xfs_get_blocks(
                        error = xfs_iomap_write_direct(ip, offset, size,
                                                       &imap, nimaps);
                        if (error)
-                               return -error;
+                               return error;
                        new = 1;
                } else {
                        /*
@@ -1344,6 +1381,14 @@ __xfs_get_blocks(
        /*
         * If this is O_DIRECT or the mpage code calling tell them how large
         * the mapping is, so that we can avoid repeated get_blocks calls.
+        *
+        * If the mapping spans EOF, then we have to break the mapping up as the
+        * mapping for blocks beyond EOF must be marked new so that sub block
+        * regions can be correctly zeroed. We can't do this for mappings within
+        * EOF unless the mapping was just allocated or is unwritten, otherwise
+        * the callers would overwrite existing data with zeros. Hence we have
+        * to split the mapping into a range up to and including EOF, and a
+        * second mapping for beyond EOF.
         */
        if (direct || size > (1 << inode->i_blkbits)) {
                xfs_off_t               mapping_size;
@@ -1354,6 +1399,12 @@ __xfs_get_blocks(
                ASSERT(mapping_size > 0);
                if (mapping_size > size)
                        mapping_size = size;
+               if (offset < i_size_read(inode) &&
+                   offset + mapping_size >= i_size_read(inode)) {
+                       /* limit mapping to block that spans EOF */
+                       mapping_size = roundup_64(i_size_read(inode) - offset,
+                                                 1 << inode->i_blkbits);
+               }
                if (mapping_size > LONG_MAX)
                        mapping_size = LONG_MAX;
 
@@ -1364,7 +1415,7 @@ __xfs_get_blocks(
 
 out_unlock:
        xfs_iunlock(ip, lockmode);
-       return -error;
+       return error;
 }
 
 int
@@ -1435,9 +1486,8 @@ STATIC ssize_t
 xfs_vm_direct_IO(
        int                     rw,
        struct kiocb            *iocb,
-       const struct iovec      *iov,
-       loff_t                  offset,
-       unsigned long           nr_segs)
+       struct iov_iter         *iter,
+       loff_t                  offset)
 {
        struct inode            *inode = iocb->ki_filp->f_mapping->host;
        struct block_device     *bdev = xfs_find_bdev_for_inode(inode);
@@ -1445,7 +1495,7 @@ xfs_vm_direct_IO(
        ssize_t                 ret;
 
        if (rw & WRITE) {
-               size_t size = iov_length(iov, nr_segs);
+               size_t size = iov_iter_count(iter);
 
                /*
                 * We cannot preallocate a size update transaction here as we
@@ -1457,17 +1507,15 @@ xfs_vm_direct_IO(
                if (offset + size > XFS_I(inode)->i_d.di_size)
                        ioend->io_isdirect = 1;
 
-               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-                                           offset, nr_segs,
-                                           xfs_get_blocks_direct,
+               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+                                           offset, xfs_get_blocks_direct,
                                            xfs_end_io_direct_write, NULL,
                                            DIO_ASYNC_EXTEND);
                if (ret != -EIOCBQUEUED && iocb->private)
                        goto out_destroy_ioend;
        } else {
-               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-                                           offset, nr_segs,
-                                           xfs_get_blocks_direct,
+               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+                                           offset, xfs_get_blocks_direct,
                                            NULL, NULL, 0);
        }
 
@@ -1566,6 +1614,16 @@ xfs_vm_write_failed(
 
                xfs_vm_kill_delalloc_range(inode, block_offset,
                                           block_offset + bh->b_size);
+
+               /*
+                * This buffer does not contain data anymore. make sure anyone
+                * who finds it knows that for certain.
+                */
+               clear_buffer_delay(bh);
+               clear_buffer_uptodate(bh);
+               clear_buffer_mapped(bh);
+               clear_buffer_new(bh);
+               clear_buffer_dirty(bh);
        }
 
 }
@@ -1599,12 +1657,21 @@ xfs_vm_write_begin(
        status = __block_write_begin(page, pos, len, xfs_get_blocks);
        if (unlikely(status)) {
                struct inode    *inode = mapping->host;
+               size_t          isize = i_size_read(inode);
 
                xfs_vm_write_failed(inode, page, pos, len);
                unlock_page(page);
 
-               if (pos + len > i_size_read(inode))
-                       truncate_pagecache(inode, i_size_read(inode));
+               /*
+                * If the write is beyond EOF, we only want to kill blocks
+                * allocated in this write, not blocks that were previously
+                * written successfully.
+                */
+               if (pos + len > isize) {
+                       ssize_t start = max_t(ssize_t, pos, isize);
+
+                       truncate_pagecache_range(inode, start, pos + len);
+               }
 
                page_cache_release(page);
                page = NULL;
@@ -1615,9 +1682,12 @@ xfs_vm_write_begin(
 }
 
 /*
- * On failure, we only need to kill delalloc blocks beyond EOF because they
- * will never be written. For blocks within EOF, generic_write_end() zeros them
- * so they are safe to leave alone and be written with all the other valid data.
+ * On failure, we only need to kill delalloc blocks beyond EOF in the range of
+ * this specific write because they will never be written. Previous writes
+ * beyond EOF where block allocation succeeded do not need to be trashed, so
+ * only new blocks from this write should be trashed. For blocks within
+ * EOF, generic_write_end() zeros them so they are safe to leave alone and be
+ * written with all the other valid data.
  */
 STATIC int
 xfs_vm_write_end(
@@ -1640,8 +1710,11 @@ xfs_vm_write_end(
                loff_t          to = pos + len;
 
                if (to > isize) {
-                       truncate_pagecache(inode, isize);
+                       /* only kill blocks in this write beyond EOF */
+                       if (pos > isize)
+                               isize = pos;
                        xfs_vm_kill_delalloc_range(inode, isize, to);
+                       truncate_pagecache_range(inode, isize, to);
                }
        }
        return ret;
This page took 0.042627 seconds and 5 git commands to generate.