/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like. Any license provided herein, whether implied or
- * otherwise, applies only to this software file. Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA 94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-/*
- * The xfs_buf.c code provides an abstract buffer cache model on top
- * of the Linux page cache. Cached metadata blocks for a file system
- * are hashed to the inode for the block device. xfs_buf.c assembles
- * buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O.
- *
- * Written by Steve Lord, Jim Mostek, Russell Cattelan
- * and Rajagopal Ananthanarayanan ("ananth") at SGI.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
*
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
-
#include <linux/stddef.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/hash.h>
#include <linux/kthread.h>
-
#include "xfs_linux.h"
-/*
- * File wide globals
- */
-
STATIC kmem_cache_t *pagebuf_zone;
STATIC kmem_shaker_t pagebuf_shake;
+STATIC int xfsbufd(void *);
STATIC int xfsbufd_wakeup(int, gfp_t);
STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
STATIC struct workqueue_struct *xfslogd_workqueue;
struct workqueue_struct *xfsdatad_workqueue;
-/*
- * Pagebuf debugging
- */
-
#ifdef PAGEBUF_TRACE
void
pagebuf_trace(
# define PB_GET_OWNER(pb) do { } while (0)
#endif
-/*
- * Pagebuf allocation / freeing.
- */
-
#define pb_to_gfp(flags) \
((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \
((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
#define pb_to_km(flags) \
(((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
-
#define pagebuf_allocate(flags) \
kmem_zone_alloc(pagebuf_zone, pb_to_km(flags))
#define pagebuf_deallocate(pb) \
* most cases but may be reset (e.g. XFS recovery).
*/
pb->pb_buffer_length = pb->pb_count_desired = range_length;
- pb->pb_flags = flags | PBF_NONE;
+ pb->pb_flags = flags;
pb->pb_bn = XFS_BUF_DADDR_NULL;
atomic_set(&pb->pb_pin_count, 0);
init_waitqueue_head(&pb->pb_waiters);
unlock_page(bp->pb_pages[i]);
}
- bp->pb_flags &= ~PBF_NONE;
+ if (page_count == bp->pb_page_count)
+ bp->pb_flags |= PBF_DONE;
PB_TRACE(bp, "lookup_pages", (long)page_count);
return error;
PB_TRACE(pb, "rele", pb->pb_relse);
- /*
- * pagebuf_lookup buffers are not hashed, not delayed write,
- * and don't have their own release routines. Special case.
- */
- if (unlikely(!hash)) {
- ASSERT(!pb->pb_relse);
- if (atomic_dec_and_test(&pb->pb_hold))
- xfs_buf_free(pb);
- return;
- }
-
if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) {
- int do_free = 1;
-
if (pb->pb_relse) {
atomic_inc(&pb->pb_hold);
spin_unlock(&hash->bh_lock);
(*(pb->pb_relse)) (pb);
- spin_lock(&hash->bh_lock);
- do_free = 0;
- }
-
- if (pb->pb_flags & PBF_FS_MANAGED) {
- do_free = 0;
- }
-
- if (do_free) {
- ASSERT((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == 0);
- list_del_init(&pb->pb_hash_list);
+ } else if (pb->pb_flags & PBF_FS_MANAGED) {
spin_unlock(&hash->bh_lock);
- pagebuf_free(pb);
} else {
+ ASSERT(!(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)));
+ list_del_init(&pb->pb_hash_list);
spin_unlock(&hash->bh_lock);
+ pagebuf_free(pb);
}
} else {
/*
{
pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
if (pb->pb_error == 0)
- pb->pb_flags &= ~PBF_NONE;
+ pb->pb_flags |= PBF_DONE;
PB_TRACE(pb, "iodone", pb->pb_iodone);
rw = (pb->pb_flags & PBF_READ) ? READ : WRITE;
}
+ if (pb->pb_flags & PBF_ORDERED) {
+ ASSERT(!(pb->pb_flags & PBF_READ));
+ rw = WRITE_BARRIER;
+ }
+
/* Special code path for reading a sub page size pagebuf in --
* we populate up the whole page, and hence the other metadata
* in the same page. This optimization is only valid when the
btp->bt_hash = NULL;
}
+/*
+ * buftarg list for delwrite queue processing
+ */
+STATIC LIST_HEAD(xfs_buftarg_list);
+STATIC DEFINE_SPINLOCK(xfs_buftarg_lock);
+
+STATIC void
+xfs_register_buftarg(
+ xfs_buftarg_t *btp)
+{
+ spin_lock(&xfs_buftarg_lock);
+ list_add(&btp->bt_list, &xfs_buftarg_list);
+ spin_unlock(&xfs_buftarg_lock);
+}
+
+STATIC void
+xfs_unregister_buftarg(
+ xfs_buftarg_t *btp)
+{
+ spin_lock(&xfs_buftarg_lock);
+ list_del(&btp->bt_list);
+ spin_unlock(&xfs_buftarg_lock);
+}
+
void
xfs_free_buftarg(
xfs_buftarg_t *btp,
xfs_blkdev_put(btp->pbr_bdev);
xfs_free_bufhash(btp);
iput(btp->pbr_mapping->host);
+
+ /* unregister the buftarg first so that we don't get a
+ * wakeup finding a non-existent task */
+ xfs_unregister_buftarg(btp);
+ kthread_stop(btp->bt_task);
+
kmem_free(btp, sizeof(*btp));
}
return 0;
}
+STATIC int
+xfs_alloc_delwrite_queue(
+ xfs_buftarg_t *btp)
+{
+ int error = 0;
+
+ INIT_LIST_HEAD(&btp->bt_list);
+ INIT_LIST_HEAD(&btp->bt_delwrite_queue);
+ spinlock_init(&btp->bt_delwrite_lock, "delwri_lock");
+ btp->bt_flags = 0;
+ btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
+ if (IS_ERR(btp->bt_task)) {
+ error = PTR_ERR(btp->bt_task);
+ goto out_error;
+ }
+ xfs_register_buftarg(btp);
+out_error:
+ return error;
+}
+
xfs_buftarg_t *
xfs_alloc_buftarg(
struct block_device *bdev,
goto error;
if (xfs_mapping_buftarg(btp, bdev))
goto error;
+ if (xfs_alloc_delwrite_queue(btp))
+ goto error;
xfs_alloc_bufhash(btp, external);
return btp;
/*
* Pagebuf delayed write buffer handling
*/
-
-STATIC LIST_HEAD(pbd_delwrite_queue);
-STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);
-
STATIC void
pagebuf_delwri_queue(
xfs_buf_t *pb,
int unlock)
{
+ struct list_head *dwq = &pb->pb_target->bt_delwrite_queue;
+ spinlock_t *dwlk = &pb->pb_target->bt_delwrite_lock;
+
PB_TRACE(pb, "delwri_q", (long)unlock);
ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) ==
(PBF_DELWRI|PBF_ASYNC));
- spin_lock(&pbd_delwrite_lock);
+ spin_lock(dwlk);
/* If already in the queue, dequeue and place at tail */
if (!list_empty(&pb->pb_list)) {
ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
}
pb->pb_flags |= _PBF_DELWRI_Q;
- list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
+ list_add_tail(&pb->pb_list, dwq);
pb->pb_queuetime = jiffies;
- spin_unlock(&pbd_delwrite_lock);
+ spin_unlock(dwlk);
if (unlock)
pagebuf_unlock(pb);
pagebuf_delwri_dequeue(
xfs_buf_t *pb)
{
+ spinlock_t *dwlk = &pb->pb_target->bt_delwrite_lock;
int dequeued = 0;
- spin_lock(&pbd_delwrite_lock);
+ spin_lock(dwlk);
if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {
ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
list_del_init(&pb->pb_list);
dequeued = 1;
}
pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
- spin_unlock(&pbd_delwrite_lock);
+ spin_unlock(dwlk);
if (dequeued)
pagebuf_rele(pb);
flush_workqueue(queue);
}
-/* Defines for pagebuf daemon */
-STATIC struct task_struct *xfsbufd_task;
-STATIC int xfsbufd_force_flush;
-STATIC int xfsbufd_force_sleep;
-
STATIC int
xfsbufd_wakeup(
- int priority,
- gfp_t mask)
+ int priority,
+ gfp_t mask)
{
- if (xfsbufd_force_sleep)
- return 0;
- xfsbufd_force_flush = 1;
- barrier();
- wake_up_process(xfsbufd_task);
+ xfs_buftarg_t *btp, *n;
+
+ spin_lock(&xfs_buftarg_lock);
+ list_for_each_entry_safe(btp, n, &xfs_buftarg_list, bt_list) {
+ if (test_bit(BT_FORCE_SLEEP, &btp->bt_flags))
+ continue;
+ set_bit(BT_FORCE_FLUSH, &btp->bt_flags);
+ barrier();
+ wake_up_process(btp->bt_task);
+ }
+ spin_unlock(&xfs_buftarg_lock);
return 0;
}
{
struct list_head tmp;
unsigned long age;
- xfs_buftarg_t *target;
+ xfs_buftarg_t *target = (xfs_buftarg_t *)data;
xfs_buf_t *pb, *n;
+ struct list_head *dwq = &target->bt_delwrite_queue;
+ spinlock_t *dwlk = &target->bt_delwrite_lock;
current->flags |= PF_MEMALLOC;
INIT_LIST_HEAD(&tmp);
do {
if (unlikely(freezing(current))) {
- xfsbufd_force_sleep = 1;
+ set_bit(BT_FORCE_SLEEP, &target->bt_flags);
refrigerator();
} else {
- xfsbufd_force_sleep = 0;
+ clear_bit(BT_FORCE_SLEEP, &target->bt_flags);
}
- schedule_timeout_interruptible
- (xfs_buf_timer_centisecs * msecs_to_jiffies(10));
+ schedule_timeout_interruptible(
+ xfs_buf_timer_centisecs * msecs_to_jiffies(10));
age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
- spin_lock(&pbd_delwrite_lock);
- list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
+ spin_lock(dwlk);
+ list_for_each_entry_safe(pb, n, dwq, pb_list) {
PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
ASSERT(pb->pb_flags & PBF_DELWRI);
if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
- if (!xfsbufd_force_flush &&
+ if (!test_bit(BT_FORCE_FLUSH,
+ &target->bt_flags) &&
time_before(jiffies,
pb->pb_queuetime + age)) {
pagebuf_unlock(pb);
list_move(&pb->pb_list, &tmp);
}
}
- spin_unlock(&pbd_delwrite_lock);
+ spin_unlock(dwlk);
while (!list_empty(&tmp)) {
pb = list_entry(tmp.next, xfs_buf_t, pb_list);
- target = pb->pb_target;
+ ASSERT(target == pb->pb_target);
list_del_init(&pb->pb_list);
pagebuf_iostrategy(pb);
if (as_list_len > 0)
purge_addresses();
- xfsbufd_force_flush = 0;
+ clear_bit(BT_FORCE_FLUSH, &target->bt_flags);
} while (!kthread_should_stop());
return 0;
struct list_head tmp;
xfs_buf_t *pb, *n;
int pincount = 0;
+ struct list_head *dwq = &target->bt_delwrite_queue;
+ spinlock_t *dwlk = &target->bt_delwrite_lock;
pagebuf_runall_queues(xfsdatad_workqueue);
pagebuf_runall_queues(xfslogd_workqueue);
INIT_LIST_HEAD(&tmp);
- spin_lock(&pbd_delwrite_lock);
- list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
-
- if (pb->pb_target != target)
- continue;
+ spin_lock(dwlk);
+ list_for_each_entry_safe(pb, n, dwq, pb_list) {
+ ASSERT(pb->pb_target == target);
ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q));
PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
if (pagebuf_ispin(pb)) {
list_move(&pb->pb_list, &tmp);
}
- spin_unlock(&pbd_delwrite_lock);
+ spin_unlock(dwlk);
/*
* Dropped the delayed write list lock, now walk the temporary list
if (!xfsdatad_workqueue)
goto out_destroy_xfslogd_workqueue;
- xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd");
- if (IS_ERR(xfsbufd_task)) {
- error = PTR_ERR(xfsbufd_task);
- goto out_destroy_xfsdatad_workqueue;
- }
-
pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
if (!pagebuf_shake)
- goto out_stop_xfsbufd;
+ goto out_destroy_xfsdatad_workqueue;
return 0;
- out_stop_xfsbufd:
- kthread_stop(xfsbufd_task);
out_destroy_xfsdatad_workqueue:
destroy_workqueue(xfsdatad_workqueue);
out_destroy_xfslogd_workqueue:
pagebuf_terminate(void)
{
kmem_shake_deregister(pagebuf_shake);
- kthread_stop(xfsbufd_task);
destroy_workqueue(xfsdatad_workqueue);
destroy_workqueue(xfslogd_workqueue);
kmem_zone_destroy(pagebuf_zone);