Merge branch 'slab/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2011 00:51:12 +0000 (17:51 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2011 00:51:12 +0000 (17:51 -0700)
* 'slab/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6:
  SLUB: Write to per cpu data when allocating it
  slub: Fix debugobjects with lockless fastpath

241 files changed:
Documentation/block/biodoc.txt
Documentation/cgroups/blkio-controller.txt
Documentation/iostats.txt
arch/arm/mm/init.c
arch/ia64/mm/contig.c
arch/ia64/mm/discontig.c
arch/mn10300/Kconfig
arch/mn10300/Kconfig.debug
arch/mn10300/include/asm/debugger.h [new file with mode: 0644]
arch/mn10300/include/asm/div64.h
arch/mn10300/include/asm/fpu.h
arch/mn10300/include/asm/irqflags.h
arch/mn10300/include/asm/kgdb.h [new file with mode: 0644]
arch/mn10300/include/asm/smp.h
arch/mn10300/include/asm/thread_info.h
arch/mn10300/kernel/Makefile
arch/mn10300/kernel/entry.S
arch/mn10300/kernel/fpu.c
arch/mn10300/kernel/gdb-cache.S [deleted file]
arch/mn10300/kernel/gdb-io-ttysm.c
arch/mn10300/kernel/gdb-stub.c
arch/mn10300/kernel/internal.h
arch/mn10300/kernel/irq.c
arch/mn10300/kernel/kgdb.c [new file with mode: 0644]
arch/mn10300/kernel/mn10300-serial.c
arch/mn10300/kernel/process.c
arch/mn10300/kernel/smp.c
arch/mn10300/kernel/switch_to.S
arch/mn10300/kernel/traps.c
arch/mn10300/mm/Kconfig.cache
arch/mn10300/mm/Makefile
arch/mn10300/mm/cache-dbg-flush-by-reg.S [new file with mode: 0644]
arch/mn10300/mm/cache-dbg-flush-by-tag.S [new file with mode: 0644]
arch/mn10300/mm/cache-dbg-inv-by-reg.S [new file with mode: 0644]
arch/mn10300/mm/cache-dbg-inv-by-tag.S [new file with mode: 0644]
arch/mn10300/mm/cache-dbg-inv.S [new file with mode: 0644]
arch/mn10300/mm/cache-flush-by-tag.S
arch/mn10300/mm/cache-inv-by-reg.S
arch/mn10300/mm/cache-inv-by-tag.S
arch/mn10300/mm/cache.inc [new file with mode: 0644]
arch/mn10300/mm/fault.c
arch/mn10300/proc-mn103e010/include/proc/cache.h
arch/mn10300/proc-mn2ws0050/include/proc/cache.h
arch/parisc/mm/init.c
arch/powerpc/xmon/xmon.c
arch/sparc/mm/init_32.c
arch/tile/mm/pgtable.c
arch/unicore32/mm/init.c
block/blk-cgroup.c
block/blk-cgroup.h
block/blk-core.c
block/blk-exec.c
block/blk-flush.c
block/blk-lib.c
block/blk-merge.c
block/blk-settings.c
block/blk-sysfs.c
block/blk-throttle.c
block/blk.h
block/cfq-iosched.c
block/cfq.h
block/deadline-iosched.c
block/elevator.c
block/genhd.c
block/noop-iosched.c
drivers/block/DAC960.c
drivers/block/amiflop.c
drivers/block/ataflop.c
drivers/block/cciss.c
drivers/block/cpqarray.c
drivers/block/drbd/drbd_actlog.c
drivers/block/drbd/drbd_bitmap.c
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_req.c
drivers/block/drbd/drbd_worker.c
drivers/block/drbd/drbd_wrappers.h
drivers/block/floppy.c
drivers/block/loop.c
drivers/block/paride/pcd.c
drivers/block/paride/pd.c
drivers/block/paride/pf.c
drivers/block/pktcdvd.c
drivers/block/swim.c
drivers/block/swim3.c
drivers/block/ub.c
drivers/block/umem.c
drivers/block/xsysace.c
drivers/cdrom/gdrom.c
drivers/cdrom/viocd.c
drivers/gpu/drm/drm_crtc.c
drivers/gpu/drm/drm_gem.c
drivers/gpu/drm/drm_ioctl.c
drivers/gpu/drm/drm_irq.c
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/radeon/atombios_crtc.c
drivers/gpu/drm/radeon/radeon_combios.c
drivers/gpu/drm/radeon/radeon_connectors.c
drivers/gpu/drm/radeon/radeon_mode.h
drivers/gpu/drm/radeon/radeon_pm.c
drivers/ide/ide-atapi.c
drivers/ide/ide-cd.c
drivers/ide/ide-cd.h
drivers/ide/ide-cd_ioctl.c
drivers/ide/ide-gd.c
drivers/ide/ide-io.c
drivers/ide/ide-park.c
drivers/md/bitmap.c
drivers/md/dm-crypt.c
drivers/md/dm-io.c
drivers/md/dm-kcopyd.c
drivers/md/dm-raid.c
drivers/md/dm-raid1.c
drivers/md/dm-table.c
drivers/md/dm.c
drivers/md/dm.h
drivers/md/linear.c
drivers/md/md.c
drivers/md/multipath.c
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/md/raid5.h
drivers/message/i2o/i2o_block.c
drivers/mmc/card/queue.c
drivers/s390/block/dasd.c
drivers/s390/char/tape_block.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_transport_fc.c
drivers/scsi/scsi_transport_sas.c
drivers/staging/hv/blkvsc_drv.c
drivers/staging/westbridge/astoria/block/cyasblkdev_block.c
drivers/target/target_core_iblock.c
drivers/tty/sysrq.c
drivers/tty/vt/keyboard.c
fs/adfs/inode.c
fs/affs/file.c
fs/aio.c
fs/befs/linuxvfs.c
fs/bfs/file.c
fs/bio-integrity.c
fs/bio.c
fs/block_dev.c
fs/btrfs/disk-io.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/volumes.c
fs/buffer.c
fs/cifs/file.c
fs/direct-io.c
fs/efs/inode.c
fs/exofs/inode.c
fs/ext2/inode.c
fs/ext3/inode.c
fs/ext4/inode.c
fs/ext4/page-io.c
fs/fat/inode.c
fs/freevxfs/vxfs_subr.c
fs/fuse/inode.c
fs/gfs2/aops.c
fs/gfs2/log.c
fs/gfs2/lops.c
fs/gfs2/meta_io.c
fs/hfs/inode.c
fs/hfsplus/inode.c
fs/hpfs/file.c
fs/isofs/inode.c
fs/jbd/commit.c
fs/jbd2/commit.c
fs/jfs/inode.c
fs/jfs/jfs_metapage.c
fs/logfs/dev_bdev.c
fs/minix/inode.c
fs/mpage.c
fs/nilfs2/btnode.c
fs/nilfs2/gcinode.c
fs/nilfs2/inode.c
fs/nilfs2/mdt.c
fs/nilfs2/page.c
fs/nilfs2/page.h
fs/nilfs2/segbuf.c
fs/ntfs/aops.c
fs/ntfs/compress.c
fs/ocfs2/aops.c
fs/ocfs2/cluster/heartbeat.c
fs/omfs/file.c
fs/partitions/check.c
fs/qnx4/inode.c
fs/reiserfs/inode.c
fs/super.c
fs/sync.c
fs/sysv/itree.c
fs/ubifs/super.c
fs/udf/file.c
fs/udf/inode.c
fs/ufs/inode.c
fs/ufs/truncate.c
fs/xfs/linux-2.6/xfs_aops.c
fs/xfs/linux-2.6/xfs_buf.c
include/drm/drm.h
include/linux/backing-dev.h
include/linux/bio.h
include/linux/blk_types.h
include/linux/blkdev.h
include/linux/buffer_head.h
include/linux/device-mapper.h
include/linux/elevator.h
include/linux/fs.h
include/linux/genhd.h
include/linux/kgdb.h
include/linux/mm.h
include/linux/pagemap.h
include/linux/sched.h
include/linux/swap.h
kernel/debug/gdbstub.c
kernel/exit.c
kernel/fork.c
kernel/power/block_io.c
kernel/sched.c
kernel/trace/blktrace.c
lib/show_mem.c
mm/backing-dev.c
mm/filemap.c
mm/memory-failure.c
mm/nommu.c
mm/oom_kill.c
mm/page-writeback.c
mm/page_alloc.c
mm/page_io.c
mm/readahead.c
mm/shmem.c
mm/swap_state.c
mm/swapfile.c
mm/vmscan.c

index b9a83dd24732486965f41225d8184405e4766c4a..2a7b38c832c72e705bd9136ace4d9ece77c16628 100644 (file)
@@ -963,11 +963,6 @@ elevator_dispatch_fn*              fills the dispatch queue with ready requests.
 
 elevator_add_req_fn*           called to add a new request into the scheduler
 
-elevator_queue_empty_fn                returns true if the merge queue is empty.
-                               Drivers shouldn't use this, but rather check
-                               if elv_next_request is NULL (without losing the
-                               request if one exists!)
-
 elevator_former_req_fn
 elevator_latter_req_fn         These return the request before or after the
                                one specified in disk sort order. Used by the
index 4ed7b5ceeed212c1b7a734451456896f4759ee9d..465351d4cf853e8a308c9c84abef789b3dcfa42c 100644 (file)
@@ -140,7 +140,7 @@ Proportional weight policy files
        - Specifies per cgroup weight. This is default weight of the group
          on all the devices until and unless overridden by per device rule.
          (See blkio.weight_device).
-         Currently allowed range of weights is from 100 to 1000.
+         Currently allowed range of weights is from 10 to 1000.
 
 - blkio.weight_device
        - One can specify per cgroup per device rules using this interface.
@@ -343,34 +343,6 @@ Common files among various policies
 
 CFQ sysfs tunable
 =================
-/sys/block/<disk>/queue/iosched/group_isolation
------------------------------------------------
-
-If group_isolation=1, it provides stronger isolation between groups at the
-expense of throughput. By default group_isolation is 0. In general that
-means that if group_isolation=0, expect fairness for sequential workload
-only. Set group_isolation=1 to see fairness for random IO workload also.
-
-Generally CFQ will put random seeky workload in sync-noidle category. CFQ
-will disable idling on these queues and it does a collective idling on group
-of such queues. Generally these are slow moving queues and if there is a
-sync-noidle service tree in each group, that group gets exclusive access to
-disk for certain period. That means it will bring the throughput down if
-group does not have enough IO to drive deeper queue depths and utilize disk
-capacity to the fullest in the slice allocated to it. But the flip side is
-that even a random reader should get better latencies and overall throughput
-if there are lots of sequential readers/sync-idle workload running in the
-system.
-
-If group_isolation=0, then CFQ automatically moves all the random seeky queues
-in the root group. That means there will be no service differentiation for
-that kind of workload. This leads to better throughput as we do collective
-idling on root sync-noidle tree.
-
-By default one should run with group_isolation=0. If that is not sufficient
-and one wants stronger isolation between groups, then set group_isolation=1
-but this will come at cost of reduced throughput.
-
 /sys/block/<disk>/queue/iosched/slice_idle
 ------------------------------------------
 On a faster hardware CFQ can be slow, especially with sequential workload.
index f6dece5b701436a9581a1d55d30b169a23d9f8a3..c76c21d87e8582a2bd624ac126ba45290e851d70 100644 (file)
@@ -1,8 +1,6 @@
 I/O statistics fields
 ---------------
 
-Last modified Sep 30, 2003
-
 Since 2.4.20 (and some versions before, with patches), and 2.5.45,
 more extensive disk statistics have been introduced to help measure disk
 activity. Tools such as sar and iostat typically interpret these and do
@@ -46,11 +44,12 @@ the above example, the first field of statistics would be 446216.
 By contrast, in 2.6 if you look at /sys/block/hda/stat, you'll
 find just the eleven fields, beginning with 446216.  If you look at
 /proc/diskstats, the eleven fields will be preceded by the major and
-minor device numbers, and device name.  Each of these formats provide
+minor device numbers, and device name.  Each of these formats provides
 eleven fields of statistics, each meaning exactly the same things.
 All fields except field 9 are cumulative since boot.  Field 9 should
-go to zero as I/Os complete; all others only increase.  Yes, these are
-32 bit unsigned numbers, and on a very busy or long-lived system they
+go to zero as I/Os complete; all others only increase (unless they
+overflow and wrap).  Yes, these are (32-bit or 64-bit) unsigned long
+(native word size) numbers, and on a very busy or long-lived system they
 may wrap. Applications should be prepared to deal with that; unless
 your observations are measured in large numbers of minutes or hours,
 they should not wrap twice before you notice them.
@@ -96,11 +95,11 @@ introduced when changes collide, so (for instance) adding up all the
 read I/Os issued per partition should equal those made to the disks ...
 but due to the lack of locking it may only be very close.
 
-In 2.6, there are counters for each cpu, which made the lack of locking
-almost a non-issue.  When the statistics are read, the per-cpu counters
-are summed (possibly overflowing the unsigned 32-bit variable they are
+In 2.6, there are counters for each CPU, which make the lack of locking
+almost a non-issue.  When the statistics are read, the per-CPU counters
+are summed (possibly overflowing the unsigned long variable they are
 summed to) and the result given to the user.  There is no convenient
-user interface for accessing the per-cpu counters themselves.
+user interface for accessing the per-CPU counters themselves.
 
 Disks vs Partitions
 -------------------
index b3b0f0f5053dfe791eaaa1fc66d9a014350c4d9b..e5f6fc42834892e7622664b496d3aa0288eaa2ff 100644 (file)
@@ -78,7 +78,7 @@ __tagtable(ATAG_INITRD2, parse_tag_initrd2);
  */
 struct meminfo meminfo;
 
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
        int free = 0, total = 0, reserved = 0;
        int shared = 0, cached = 0, slab = 0, i;
index 54bf540598118339a04919e1f08c505a5ad57cd8..9a018cde5d84d5c3718c3fef41d6557aa392d14b 100644 (file)
@@ -36,7 +36,7 @@ static unsigned long max_gap;
  * Shows a simple page count of reserved and used pages in the system.
  * For discontig machines, it does this on a per-pgdat basis.
  */
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
        int i, total_reserved = 0;
        int total_shared = 0, total_cached = 0;
index 61620323bb60adadf942288b53446604d8323b69..82ab1bc6afb1236460c0e380a2a5fbf61a5b8e1b 100644 (file)
@@ -614,7 +614,7 @@ void __cpuinit *per_cpu_init(void)
  * Shows a simple page count of reserved and used pages in the system.
  * For discontig machines, it does this on a per-pgdat basis.
  */
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
        int i, total_reserved = 0;
        int total_shared = 0, total_cached = 0;
index 10971be4306156a5eddc22746246c7a80dd83ea7..d8ab97a73db288db6b601fd13fd2baaa278f6809 100644 (file)
@@ -3,6 +3,8 @@ config MN10300
        select HAVE_OPROFILE
        select HAVE_GENERIC_HARDIRQS
        select GENERIC_HARDIRQS_NO_DEPRECATED
+       select HAVE_ARCH_TRACEHOOK
+       select HAVE_ARCH_KGDB
 
 config AM33_2
        def_bool n
@@ -401,9 +403,9 @@ comment "[!] NOTE: A lower number/level indicates a higher priority (0 is highes
 comment "____Non-maskable interrupt levels____"
 comment "The following must be set to a higher priority than local_irq_disable() and on-chip serial"
 
-config GDBSTUB_IRQ_LEVEL
-       int "GDBSTUB interrupt priority"
-       depends on GDBSTUB
+config DEBUGGER_IRQ_LEVEL
+       int "DEBUGGER interrupt priority"
+       depends on KERNEL_DEBUGGER
        range 0 1 if LINUX_CLI_LEVEL = 2
        range 0 2 if LINUX_CLI_LEVEL = 3
        range 0 3 if LINUX_CLI_LEVEL = 4
@@ -437,7 +439,7 @@ config LINUX_CLI_LEVEL
          EPSW.IM from 7.  Any interrupt is permitted for which the level is
          lower than EPSW.IM.
 
-         Certain interrupts, such as GDBSTUB and virtual MN10300 on-chip
+         Certain interrupts, such as DEBUGGER and virtual MN10300 on-chip
          serial DMA interrupts are allowed to interrupt normal disabled
          sections.
 
index ce83c74b3fd714abf68fca4a4d2beef024545c83..bdbfd444a9ff9cf5569baa685a906e5f8800801d 100644 (file)
@@ -36,7 +36,7 @@ config KPROBES
 
 config GDBSTUB
        bool "Remote GDB kernel debugging"
-       depends on DEBUG_KERNEL
+       depends on DEBUG_KERNEL && DEPRECATED
        select DEBUG_INFO
        select FRAME_POINTER
        help
@@ -46,6 +46,9 @@ config GDBSTUB
          RAM to avoid excessive linking time. This is only useful for kernel
          hackers. If unsure, say N.
 
+         This is deprecated in favour of KGDB and will be removed in a later
+         version.
+
 config GDBSTUB_IMMEDIATE
        bool "Break into GDB stub immediately"
        depends on GDBSTUB
@@ -54,6 +57,14 @@ config GDBSTUB_IMMEDIATE
          possible, leaving the program counter at the beginning of
          start_kernel() in init/main.c.
 
+config GDBSTUB_ALLOW_SINGLE_STEP
+       bool "Allow software single-stepping in GDB stub"
+       depends on GDBSTUB && !SMP && !PREEMPT
+       help
+         Allow GDB stub to perform software single-stepping through the
+         kernel.  This doesn't work very well on SMP or preemptible kernels as
+         it uses temporary breakpoints to emulate single-stepping.
+
 config GDB_CONSOLE
        bool "Console output to GDB"
        depends on GDBSTUB
@@ -142,3 +153,7 @@ config GDBSTUB_ON_TTYSx
        default y
 
 endmenu
+
+config KERNEL_DEBUGGER
+       def_bool y
+       depends on GDBSTUB || KGDB
diff --git a/arch/mn10300/include/asm/debugger.h b/arch/mn10300/include/asm/debugger.h
new file mode 100644 (file)
index 0000000..e1d3b08
--- /dev/null
@@ -0,0 +1,43 @@
+/* Kernel debugger for MN10300
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_DEBUGGER_H
+#define _ASM_DEBUGGER_H
+
+#if defined(CONFIG_KERNEL_DEBUGGER)
+
+extern int debugger_intercept(enum exception_code, int, int, struct pt_regs *);
+extern int at_debugger_breakpoint(struct pt_regs *);
+
+#ifndef CONFIG_MN10300_DEBUGGER_CACHE_NO_FLUSH
+extern void debugger_local_cache_flushinv(void);
+extern void debugger_local_cache_flushinv_one(u8 *);
+#else
+static inline void debugger_local_cache_flushinv(void) {}
+static inline void debugger_local_cache_flushinv_one(u8 *addr) {}
+#endif
+
+#else /* CONFIG_KERNEL_DEBUGGER */
+
+static inline int debugger_intercept(enum exception_code excep,
+                                    int signo, int si_code,
+                                    struct pt_regs *regs)
+{
+       return 0;
+}
+
+static inline int at_debugger_breakpoint(struct pt_regs *regs)
+{
+       return 0;
+}
+
+#endif /* CONFIG_KERNEL_DEBUGGER */
+#endif /* _ASM_DEBUGGER_H */
index 34dcb8e68309e0331b12d5ce2051a05da04e4c3f..503efab2a516988bbb024666227cb6c279c0b9f7 100644 (file)
 
 extern void ____unhandled_size_in_do_div___(void);
 
+/*
+ * Beginning with gcc 4.6, the MDR register is represented explicitly.  We
+ * must, therefore, at least explicitly clobber the register when we make
+ * changes to it.  The following assembly fragments *could* be rearranged in
+ * order to leave the moves to/from the MDR register to the compiler, but the
+ * gains would be minimal at best.
+ */
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+# define CLOBBER_MDR_CC                "mdr", "cc"
+#else
+# define CLOBBER_MDR_CC                "cc"
+#endif
+
 /*
  * divide n by base, leaving the result in n and returning the remainder
  * - we can do this quite efficiently on the MN10300 by cascading the divides
@@ -29,7 +42,7 @@ extern void ____unhandled_size_in_do_div___(void);
                    "mov        mdr,%1  \n"                             \
                    : "+r"(n), "=d"(__rem)                              \
                    : "r"(base), "1"(__rem)                             \
-                   : "cc"                                              \
+                   : CLOBBER_MDR_CC                                    \
                    );                                                  \
        } else if (sizeof(n) <= 8) {                                    \
                union {                                                 \
@@ -48,7 +61,7 @@ extern void ____unhandled_size_in_do_div___(void);
                    : "=d"(__rem), "=r"(__quot.w[1]), "=r"(__quot.w[0]) \
                    : "r"(base), "0"(__rem), "1"(__quot.w[1]),          \
                      "2"(__quot.w[0])                                  \
-                   : "cc"                                              \
+                   : CLOBBER_MDR_CC                                    \
                    );                                                  \
                n = __quot.l;                                           \
        } else {                                                        \
@@ -72,7 +85,7 @@ unsigned __muldiv64u(unsigned val, unsigned mult, unsigned div)
                                         * MDR = MDR:val%div */
            : "=r"(result)
            : "0"(val), "ir"(mult), "r"(div)
-           : "cc"
+           : CLOBBER_MDR_CC
            );
 
        return result;
@@ -93,7 +106,7 @@ signed __muldiv64s(signed val, signed mult, signed div)
                                         * MDR = MDR:val%div */
            : "=r"(result)
            : "0"(val), "ir"(mult), "r"(div)
-           : "cc"
+           : CLOBBER_MDR_CC
            );
 
        return result;
index b7625de8eade6754edf14f5de0340d9a0aa76e57..738ff72659d52307cdbc6195f768db79843c4c19 100644 (file)
@@ -55,7 +55,6 @@ static inline void clear_using_fpu(struct task_struct *tsk)
 
 extern asmlinkage void fpu_kill_state(struct task_struct *);
 extern asmlinkage void fpu_exception(struct pt_regs *, enum exception_code);
-extern asmlinkage void fpu_invalid_op(struct pt_regs *, enum exception_code);
 extern asmlinkage void fpu_init_state(void);
 extern asmlinkage void fpu_save(struct fpu_state_struct *);
 extern int fpu_setup_sigcontext(struct fpucontext *buf);
@@ -113,7 +112,6 @@ static inline void flush_fpu(void)
 
 extern asmlinkage
 void unexpected_fpu_exception(struct pt_regs *, enum exception_code);
-#define fpu_invalid_op unexpected_fpu_exception
 #define fpu_exception unexpected_fpu_exception
 
 struct task_struct;
index 7a7ae12c7119e42f9c83943dec0ad14ab9dfa7a7..678f68d5f37bb7ef819ab900302ff9e14c61705d 100644 (file)
@@ -20,7 +20,7 @@
 /*
  * interrupt control
  * - "disabled": run in IM1/2
- *   - level 0 - GDB stub
+ *   - level 0 - kernel debugger
  *   - level 1 - virtual serial DMA (if present)
  *   - level 5 - normal interrupt priority
  *   - level 6 - timer interrupt
diff --git a/arch/mn10300/include/asm/kgdb.h b/arch/mn10300/include/asm/kgdb.h
new file mode 100644 (file)
index 0000000..eb245f1
--- /dev/null
@@ -0,0 +1,81 @@
+/* Kernel debugger for MN10300
+ *
+ * Copyright (C) 2010 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_KGDB_H
+#define _ASM_KGDB_H
+
+/*
+ * BUFMAX defines the maximum number of characters in inbound/outbound
+ * buffers at least NUMREGBYTES*2 are needed for register packets
+ * Longer buffer is needed to list all threads
+ */
+#define BUFMAX                 1024
+
+/*
+ * Note that this register image is in a different order than the register
+ * image that Linux produces at interrupt time.
+ */
+enum regnames {
+       GDB_FR_D0               = 0,
+       GDB_FR_D1               = 1,
+       GDB_FR_D2               = 2,
+       GDB_FR_D3               = 3,
+       GDB_FR_A0               = 4,
+       GDB_FR_A1               = 5,
+       GDB_FR_A2               = 6,
+       GDB_FR_A3               = 7,
+
+       GDB_FR_SP               = 8,
+       GDB_FR_PC               = 9,
+       GDB_FR_MDR              = 10,
+       GDB_FR_EPSW             = 11,
+       GDB_FR_LIR              = 12,
+       GDB_FR_LAR              = 13,
+       GDB_FR_MDRQ             = 14,
+
+       GDB_FR_E0               = 15,
+       GDB_FR_E1               = 16,
+       GDB_FR_E2               = 17,
+       GDB_FR_E3               = 18,
+       GDB_FR_E4               = 19,
+       GDB_FR_E5               = 20,
+       GDB_FR_E6               = 21,
+       GDB_FR_E7               = 22,
+
+       GDB_FR_SSP              = 23,
+       GDB_FR_MSP              = 24,
+       GDB_FR_USP              = 25,
+       GDB_FR_MCRH             = 26,
+       GDB_FR_MCRL             = 27,
+       GDB_FR_MCVF             = 28,
+
+       GDB_FR_FPCR             = 29,
+       GDB_FR_DUMMY0           = 30,
+       GDB_FR_DUMMY1           = 31,
+
+       GDB_FR_FS0              = 32,
+
+       GDB_FR_SIZE             = 64,
+};
+
+#define GDB_ORIG_D0            41
+#define NUMREGBYTES            (GDB_FR_SIZE*4)
+
+static inline void arch_kgdb_breakpoint(void)
+{
+       asm(".globl __arch_kgdb_breakpoint; __arch_kgdb_breakpoint: break");
+}
+extern u8 __arch_kgdb_breakpoint;
+
+#define BREAK_INSTR_SIZE       1
+#define CACHE_FLUSH_IS_SAFE    1
+
+#endif /* _ASM_KGDB_H */
index a3930e43a958d1e4077c028afd94b122963032cb..6745dbe649441d906cc76de00df7c496961ba1d7 100644 (file)
@@ -34,7 +34,7 @@
 #define LOCAL_TIMER_IPI                193
 #define FLUSH_CACHE_IPI                194
 #define CALL_FUNCTION_NMI_IPI  195
-#define GDB_NMI_IPI            196
+#define DEBUGGER_NMI_IPI       196
 
 #define SMP_BOOT_IRQ           195
 
@@ -43,6 +43,7 @@
 #define LOCAL_TIMER_GxICR_LV   GxICR_LEVEL_4
 #define FLUSH_CACHE_GxICR_LV   GxICR_LEVEL_0
 #define SMP_BOOT_GxICR_LV      GxICR_LEVEL_0
+#define DEBUGGER_GxICR_LV      CONFIG_DEBUGGER_IRQ_LEVEL
 
 #define TIME_OUT_COUNT_BOOT_IPI        100
 #define DELAY_TIME_BOOT_IPI    75000
@@ -61,8 +62,9 @@
  * An alternate way of dealing with this could be to use the EPSW.S bits to
  * cache this information for systems with up to four CPUs.
  */
+#define arch_smp_processor_id()        (CPUID)
 #if 0
-#define raw_smp_processor_id() (CPUID)
+#define raw_smp_processor_id() (arch_smp_processor_id())
 #else
 #define raw_smp_processor_id() (current_thread_info()->cpu)
 #endif
index 8d53f09c878d6592e9b7eeeffc3430a457783293..87c213002d4c9b30834b88aff215262160193541 100644 (file)
@@ -131,7 +131,11 @@ static inline unsigned long current_stack_pointer(void)
                kmalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #endif
 
+#ifndef CONFIG_KGDB
 #define free_thread_info(ti)   kfree((ti))
+#else
+extern void free_thread_info(struct thread_info *);
+#endif
 #define get_thread_info(ti)    get_task_struct((ti)->task)
 #define put_thread_info(ti)    put_task_struct((ti)->task)
 
index a06a2e10051d52b7a53e1227826325427cff9c0d..47ed30fe8178c45c5d99f3ec490feb41770e7c5c 100644 (file)
@@ -21,11 +21,8 @@ obj-$(CONFIG_GDBSTUB) += gdb-stub.o gdb-low.o
 obj-$(CONFIG_GDBSTUB_ON_TTYSx) += gdb-io-serial.o gdb-io-serial-low.o
 obj-$(CONFIG_GDBSTUB_ON_TTYSMx) += gdb-io-ttysm.o gdb-io-ttysm-low.o
 
-ifeq ($(CONFIG_MN10300_CACHE_ENABLED),y)
-obj-$(CONFIG_GDBSTUB) += gdb-cache.o
-endif
-
 obj-$(CONFIG_MN10300_RTC) += rtc.o
 obj-$(CONFIG_PROFILE) += profile.o profile-low.o
 obj-$(CONFIG_MODULES) += module.o
 obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_KGDB) += kgdb.o
index f00b9bafcd3ebb7aa8d53e0e8db61aedf15652e2..fb93ad720b82665cb5abbf1add396fdfd2e53ec4 100644 (file)
@@ -266,7 +266,11 @@ ENTRY(raw_bus_error)
 
 ###############################################################################
 #
-# Miscellaneous exception entry points
+# NMI exception entry points
+#
+# This is used by ordinary interrupt channels that have the GxICR_NMI bit set
+# in addition to the main NMI and Watchdog channels.  SMP NMI IPIs use this
+# facility.
 #
 ###############################################################################
 ENTRY(nmi_handler)
@@ -281,7 +285,7 @@ ENTRY(nmi_handler)
        and     NMIAGR_GN,d0
        lsr     0x2,d0
        cmp     CALL_FUNCTION_NMI_IPI,d0
-       bne     5f                      # if not call function, jump
+       bne     nmi_not_smp_callfunc    # if not call function, jump
 
        # function call nmi ipi
        add     4,sp                    # no need to store TBR
@@ -295,59 +299,38 @@ ENTRY(nmi_handler)
        call    smp_nmi_call_function_interrupt[],0
        RESTORE_ALL
 
-5:
-#ifdef CONFIG_GDBSTUB
-       cmp     GDB_NMI_IPI,d0
-       bne     3f                      # if not gdb nmi ipi, jump
+nmi_not_smp_callfunc:
+#ifdef CONFIG_KERNEL_DEBUGGER
+       cmp     DEBUGGER_NMI_IPI,d0
+       bne     nmi_not_debugger        # if not kernel debugger NMI IPI, jump
 
-       # gdb nmi ipi
+       # kernel debugger NMI IPI
        add     4,sp                    # no need to store TBR
        mov     GxICR_DETECT,d0         # clear NMI
-       movbu   d0,(GxICR(GDB_NMI_IPI))
-       movhu   (GxICR(GDB_NMI_IPI)),d0
+       movbu   d0,(GxICR(DEBUGGER_NMI_IPI))
+       movhu   (GxICR(DEBUGGER_NMI_IPI)),d0
        and     ~EPSW_NMID,epsw         # enable NMI
-#ifdef CONFIG_MN10300_CACHE_ENABLED
-       mov     (gdbstub_nmi_opr_type),d0
-       cmp     GDBSTUB_NMI_CACHE_PURGE,d0
-       bne     4f                      # if not gdb cache purge, jump
-
-       # gdb cache purge nmi ipi
-       add     -20,sp
-       mov     d1,(4,sp)
-       mov     a0,(8,sp)
-       mov     a1,(12,sp)
-       mov     mdr,d0
-       mov     d0,(16,sp)
-       call    gdbstub_local_purge_cache[],0
-       mov     0x1,d0
-       mov     (CPUID),d1
-       asl     d1,d0
-       mov     gdbstub_nmi_cpumask,a0
-       bclr    d0,(a0)
-       mov     (4,sp),d1
-       mov     (8,sp),a0
-       mov     (12,sp),a1
-       mov     (16,sp),d0
-       mov     d0,mdr
-       add     20,sp
-       mov     (sp),d0
-       add     4,sp
-       rti
-4:
-#endif /* CONFIG_MN10300_CACHE_ENABLED */
-       # gdb wait nmi ipi
+
        mov     (sp),d0
        SAVE_ALL
-       call    gdbstub_nmi_wait[],0
+       mov     fp,d0                   # arg 0: stacked register file
+       mov     a2,d1                   # arg 1: exception number
+       call    debugger_nmi_interrupt[],0
        RESTORE_ALL
-3:
-#endif /* CONFIG_GDBSTUB */
+
+nmi_not_debugger:
+#endif /* CONFIG_KERNEL_DEBUGGER */
        mov     (sp),d0                 # restore TBR to d0
        add     4,sp
 #endif /* CONFIG_SMP */
 
        bra     __common_exception_nonmi
 
+###############################################################################
+#
+# General exception entry point
+#
+###############################################################################
 ENTRY(__common_exception)
        add     -4,sp
        mov     d0,(sp)
index 5f9c3fa19a85fb4459987789229387352cf1a809..bb5fa7df6c4425f5ea856fb5625dd8d785f78493 100644 (file)
@@ -69,24 +69,6 @@ asmlinkage void fpu_exception(struct pt_regs *regs, enum exception_code code)
        force_sig_info(SIGFPE, &info, tsk);
 }
 
-/*
- * handle an FPU invalid_op exception
- * - Derived from DO_EINFO() macro in arch/mn10300/kernel/traps.c
- */
-asmlinkage void fpu_invalid_op(struct pt_regs *regs, enum exception_code code)
-{
-       siginfo_t info;
-
-       if (!user_mode(regs))
-               die_if_no_fixup("FPU invalid opcode", regs, code);
-
-       info.si_signo = SIGILL;
-       info.si_errno = 0;
-       info.si_code = ILL_COPROC;
-       info.si_addr = (void *) regs->pc;
-       force_sig_info(info.si_signo, &info, current);
-}
-
 /*
  * save the FPU state to a signal context
  */
diff --git a/arch/mn10300/kernel/gdb-cache.S b/arch/mn10300/kernel/gdb-cache.S
deleted file mode 100644 (file)
index 1108bad..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-###############################################################################
-#
-# MN10300 Low-level cache purging routines for gdbstub
-#
-# Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
-# Written by David Howells (dhowells@redhat.com)
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public Licence
-# as published by the Free Software Foundation; either version
-# 2 of the Licence, or (at your option) any later version.
-#
-###############################################################################
-#include <linux/sys.h>
-#include <linux/linkage.h>
-#include <asm/smp.h>
-#include <asm/cache.h>
-#include <asm/cpu-regs.h>
-#include <asm/exceptions.h>
-#include <asm/frame.inc>
-#include <asm/serial-regs.h>
-
-       .text
-
-###############################################################################
-#
-# GDB stub cache purge
-#
-###############################################################################
-       .type   gdbstub_purge_cache,@function
-ENTRY(gdbstub_purge_cache)
-       #######################################################################
-       # read the addresses tagged in the cache's tag RAM and attempt to flush
-       # those addresses specifically
-       # - we rely on the hardware to filter out invalid tag entry addresses
-       mov     DCACHE_TAG(0,0),a0              # dcache tag RAM access address
-       mov     DCACHE_PURGE(0,0),a1            # dcache purge request address
-       mov     L1_CACHE_NWAYS*L1_CACHE_NENTRIES,d1  # total number of entries
-
-mn10300_dcache_flush_loop:
-       mov     (a0),d0
-       and     L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0
-       or      L1_CACHE_TAG_VALID,d0           # retain valid entries in the
-                                               # cache
-       mov     d0,(a1)                         # conditional purge
-
-mn10300_dcache_flush_skip:
-       add     L1_CACHE_BYTES,a0
-       add     L1_CACHE_BYTES,a1
-       add     -1,d1
-       bne     mn10300_dcache_flush_loop
-
-;;     # unconditionally flush and invalidate the dcache
-;;     mov     DCACHE_PURGE(0,0),a1            # dcache purge request address
-;;     mov     L1_CACHE_NWAYS*L1_CACHE_NENTRIES,d1     # total number of
-;;                                                     # entries
-;;
-;; gdbstub_purge_cache__dcache_loop:
-;;     mov     (a1),d0                         # unconditional purge
-;;
-;;     add     L1_CACHE_BYTES,a1
-;;     add     -1,d1
-;;     bne     gdbstub_purge_cache__dcache_loop
-
-       #######################################################################
-       # now invalidate the icache
-       mov     CHCTR,a0
-       movhu   (a0),a1
-
-       mov     epsw,d1
-       and     ~EPSW_IE,epsw
-       nop
-       nop
-
-       # disable the icache
-       and     ~CHCTR_ICEN,d0
-       movhu   d0,(a0)
-
-       # and wait for it to calm down
-       setlb
-       movhu   (a0),d0
-       btst    CHCTR_ICBUSY,d0
-       lne
-
-       # invalidate
-       or      CHCTR_ICINV,d0
-       movhu   d0,(a0)
-
-       # wait for the cache to finish
-       mov     CHCTR,a0
-       setlb
-       movhu   (a0),d0
-       btst    CHCTR_ICBUSY,d0
-       lne
-
-       # and reenable it
-       movhu   a1,(a0)
-       movhu   (a0),d0                 # read back to flush
-                                       # (SIGILLs all over without this)
-
-       mov     d1,epsw
-
-       ret     [],0
-
-       .size   gdbstub_purge_cache,.-gdbstub_purge_cache
index abdeea153c89643184df8f7b1c13805a504b0b7a..c859cacbb9c3bfc94008f251430b038c3744ffac 100644 (file)
@@ -59,10 +59,10 @@ void __init gdbstub_io_init(void)
 
        /* we want to get serial receive interrupts */
        set_intr_level(gdbstub_port->rx_irq,
-               NUM2GxICR_LEVEL(CONFIG_GDBSTUB_IRQ_LEVEL));
+               NUM2GxICR_LEVEL(CONFIG_DEBUGGER_IRQ_LEVEL));
        set_intr_level(gdbstub_port->tx_irq,
-               NUM2GxICR_LEVEL(CONFIG_GDBSTUB_IRQ_LEVEL));
-       set_intr_stub(NUM2EXCEP_IRQ_LEVEL(CONFIG_GDBSTUB_IRQ_LEVEL),
+               NUM2GxICR_LEVEL(CONFIG_DEBUGGER_IRQ_LEVEL));
+       set_intr_stub(NUM2EXCEP_IRQ_LEVEL(CONFIG_DEBUGGER_IRQ_LEVEL),
                gdbstub_io_rx_handler);
 
        *gdbstub_port->rx_icr |= GxICR_ENABLE;
@@ -88,7 +88,7 @@ void __init gdbstub_io_init(void)
 
        /* permit level 0 IRQs only */
        arch_local_change_intr_mask_level(
-               NUM2EPSW_IM(CONFIG_GDBSTUB_IRQ_LEVEL + 1));
+               NUM2EPSW_IM(CONFIG_DEBUGGER_IRQ_LEVEL + 1));
 }
 
 /*
index b169d99d9f20e8fd4b5da5ea353b70f02819844b..538266b2c9bc6cd0d87e9886d4f2509e61847664 100644 (file)
 #include <asm/system.h>
 #include <asm/gdb-stub.h>
 #include <asm/exceptions.h>
-#include <asm/cacheflush.h>
+#include <asm/debugger.h>
 #include <asm/serial-regs.h>
 #include <asm/busctl-regs.h>
 #include <unit/leds.h>
@@ -405,6 +405,7 @@ static int hexToInt(char **ptr, int *intValue)
        return (numChars);
 }
 
+#ifdef CONFIG_GDBSTUB_ALLOW_SINGLE_STEP
 /*
  * We single-step by setting breakpoints. When an exception
  * is handled, we need to restore the instructions hoisted
@@ -729,6 +730,7 @@ static int gdbstub_single_step(struct pt_regs *regs)
        __gdbstub_restore_bp();
        return -EFAULT;
 }
+#endif /* CONFIG_GDBSTUB_ALLOW_SINGLE_STEP */
 
 #ifdef CONFIG_GDBSTUB_CONSOLE
 
@@ -1171,7 +1173,7 @@ int gdbstub_clear_breakpoint(u8 *addr, int len)
 
 /*
  * This function does all command processing for interfacing to gdb
- * - returns 1 if the exception should be skipped, 0 otherwise.
+ * - returns 0 if the exception should be skipped, -ERROR otherwise.
  */
 static int gdbstub(struct pt_regs *regs, enum exception_code excep)
 {
@@ -1186,7 +1188,7 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep)
        int loop;
 
        if (excep == EXCEP_FPU_DISABLED)
-               return 0;
+               return -ENOTSUPP;
 
        gdbstub_flush_caches = 0;
 
@@ -1195,7 +1197,7 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep)
        asm volatile("mov mdr,%0" : "=d"(mdr));
        local_save_flags(epsw);
        arch_local_change_intr_mask_level(
-               NUM2EPSW_IM(CONFIG_GDBSTUB_IRQ_LEVEL + 1));
+               NUM2EPSW_IM(CONFIG_DEBUGGER_IRQ_LEVEL + 1));
 
        gdbstub_store_fpu();
 
@@ -1208,11 +1210,13 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep)
        /* if we were single stepping, restore the opcodes hoisted for the
         * breakpoint[s] */
        broke = 0;
+#ifdef CONFIG_GDBSTUB_ALLOW_SINGLE_STEP
        if ((step_bp[0].addr && step_bp[0].addr == (u8 *) regs->pc) ||
            (step_bp[1].addr && step_bp[1].addr == (u8 *) regs->pc))
                broke = 1;
 
        __gdbstub_restore_bp();
+#endif
 
        if (gdbstub_rx_unget) {
                sigval = SIGINT;
@@ -1548,17 +1552,21 @@ packet_waiting:
                         * Step to next instruction
                         */
                case 's':
-                       /*
-                        * using the T flag doesn't seem to perform single
+                       /* Using the T flag doesn't seem to perform single
                         * stepping (it seems to wind up being caught by the
                         * JTAG unit), so we have to use breakpoints and
                         * continue instead.
                         */
+#ifdef CONFIG_GDBSTUB_ALLOW_SINGLE_STEP
                        if (gdbstub_single_step(regs) < 0)
                                /* ignore any fault error for now */
                                gdbstub_printk("unable to set single-step"
                                               " bp\n");
                        goto done;
+#else
+                       gdbstub_strcpy(output_buffer, "E01");
+                       break;
+#endif
 
                        /*
                         * Set baud rate (bBB)
@@ -1657,7 +1665,7 @@ done:
         * NB: We flush both caches, just to be sure...
         */
        if (gdbstub_flush_caches)
-               gdbstub_purge_cache();
+               debugger_local_cache_flushinv();
 
        gdbstub_load_fpu();
        mn10300_set_gdbleds(0);
@@ -1667,14 +1675,23 @@ done:
        touch_softlockup_watchdog();
 
        local_irq_restore(epsw);
-       return 1;
+       return 0;
+}
+
+/*
+ * Determine if we hit a debugger special breakpoint that needs skipping over
+ * automatically.
+ */
+int at_debugger_breakpoint(struct pt_regs *regs)
+{
+       return 0;
 }
 
 /*
  * handle event interception
  */
-asmlinkage int gdbstub_intercept(struct pt_regs *regs,
-                                enum exception_code excep)
+asmlinkage int debugger_intercept(enum exception_code excep,
+                                 int signo, int si_code, struct pt_regs *regs)
 {
        static u8 notfirst = 1;
        int ret;
@@ -1688,7 +1705,7 @@ asmlinkage int gdbstub_intercept(struct pt_regs *regs,
                asm("mov mdr,%0" : "=d"(mdr));
 
                gdbstub_entry(
-                       "--> gdbstub_intercept(%p,%04x) [MDR=%lx PC=%lx]\n",
+                       "--> debugger_intercept(%p,%04x) [MDR=%lx PC=%lx]\n",
                        regs, excep, mdr, regs->pc);
 
                gdbstub_entry(
@@ -1722,7 +1739,7 @@ asmlinkage int gdbstub_intercept(struct pt_regs *regs,
 
        ret = gdbstub(regs, excep);
 
-       gdbstub_entry("<-- gdbstub_intercept()\n");
+       gdbstub_entry("<-- debugger_intercept()\n");
        gdbstub_busy = 0;
        return ret;
 }
index ea946613f46d12fb8f57cea490428e740f03015c..a5ac755dd69f4acbc8c6d213c47c285b8af41098 100644 (file)
@@ -29,6 +29,13 @@ extern void ret_from_fork(struct task_struct *) __attribute__((noreturn));
 extern void mn10300_low_ipi_handler(void);
 #endif
 
+/*
+ * smp.c
+ */
+#ifdef CONFIG_SMP
+extern void smp_jump_to_debugger(void);
+#endif
+
 /*
  * time.c
  */
index f09fed5e6afc21a1f1349cafef258e6cdebed57c..5f7fc3eb45e60807c58a4888fce1bc5ca24e83ce 100644 (file)
@@ -153,7 +153,7 @@ mn10300_cpupic_setaffinity(struct irq_data *d, const struct cpumask *mask,
        case LOCAL_TIMER_IPI:
        case FLUSH_CACHE_IPI:
        case CALL_FUNCTION_NMI_IPI:
-       case GDB_NMI_IPI:
+       case DEBUGGER_NMI_IPI:
 #ifdef CONFIG_MN10300_TTYSM0
        case SC0RXIRQ:
        case SC0TXIRQ:
diff --git a/arch/mn10300/kernel/kgdb.c b/arch/mn10300/kernel/kgdb.c
new file mode 100644 (file)
index 0000000..f6c981d
--- /dev/null
@@ -0,0 +1,502 @@
+/* kgdb support for MN10300
+ *
+ * Copyright (C) 2010 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/ptrace.h>
+#include <linux/kgdb.h>
+#include <linux/uaccess.h>
+#include <unit/leds.h>
+#include <unit/serial.h>
+#include <asm/debugger.h>
+#include <asm/serial-regs.h>
+#include "internal.h"
+
+/*
+ * Software single-stepping breakpoint save (used by __switch_to())
+ */
+static struct thread_info *kgdb_sstep_thread;
+u8 *kgdb_sstep_bp_addr[2];
+u8 kgdb_sstep_bp[2];
+
+/*
+ * Copy kernel exception frame registers to the GDB register file
+ */
+void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+       unsigned long ssp = (unsigned long) (regs + 1);
+
+       gdb_regs[GDB_FR_D0]     = regs->d0;
+       gdb_regs[GDB_FR_D1]     = regs->d1;
+       gdb_regs[GDB_FR_D2]     = regs->d2;
+       gdb_regs[GDB_FR_D3]     = regs->d3;
+       gdb_regs[GDB_FR_A0]     = regs->a0;
+       gdb_regs[GDB_FR_A1]     = regs->a1;
+       gdb_regs[GDB_FR_A2]     = regs->a2;
+       gdb_regs[GDB_FR_A3]     = regs->a3;
+       gdb_regs[GDB_FR_SP]     = (regs->epsw & EPSW_nSL) ? regs->sp : ssp;
+       gdb_regs[GDB_FR_PC]     = regs->pc;
+       gdb_regs[GDB_FR_MDR]    = regs->mdr;
+       gdb_regs[GDB_FR_EPSW]   = regs->epsw;
+       gdb_regs[GDB_FR_LIR]    = regs->lir;
+       gdb_regs[GDB_FR_LAR]    = regs->lar;
+       gdb_regs[GDB_FR_MDRQ]   = regs->mdrq;
+       gdb_regs[GDB_FR_E0]     = regs->e0;
+       gdb_regs[GDB_FR_E1]     = regs->e1;
+       gdb_regs[GDB_FR_E2]     = regs->e2;
+       gdb_regs[GDB_FR_E3]     = regs->e3;
+       gdb_regs[GDB_FR_E4]     = regs->e4;
+       gdb_regs[GDB_FR_E5]     = regs->e5;
+       gdb_regs[GDB_FR_E6]     = regs->e6;
+       gdb_regs[GDB_FR_E7]     = regs->e7;
+       gdb_regs[GDB_FR_SSP]    = ssp;
+       gdb_regs[GDB_FR_MSP]    = 0;
+       gdb_regs[GDB_FR_USP]    = regs->sp;
+       gdb_regs[GDB_FR_MCRH]   = regs->mcrh;
+       gdb_regs[GDB_FR_MCRL]   = regs->mcrl;
+       gdb_regs[GDB_FR_MCVF]   = regs->mcvf;
+       gdb_regs[GDB_FR_DUMMY0] = 0;
+       gdb_regs[GDB_FR_DUMMY1] = 0;
+       gdb_regs[GDB_FR_FS0]    = 0;
+}
+
+/*
+ * Extracts kernel SP/PC values understandable by gdb from the values
+ * saved by switch_to().
+ */
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+       gdb_regs[GDB_FR_SSP]    = p->thread.sp;
+       gdb_regs[GDB_FR_PC]     = p->thread.pc;
+       gdb_regs[GDB_FR_A3]     = p->thread.a3;
+       gdb_regs[GDB_FR_USP]    = p->thread.usp;
+       gdb_regs[GDB_FR_FPCR]   = p->thread.fpu_state.fpcr;
+}
+
+/*
+ * Fill kernel exception frame registers from the GDB register file
+ */
+void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+       regs->d0        = gdb_regs[GDB_FR_D0];
+       regs->d1        = gdb_regs[GDB_FR_D1];
+       regs->d2        = gdb_regs[GDB_FR_D2];
+       regs->d3        = gdb_regs[GDB_FR_D3];
+       regs->a0        = gdb_regs[GDB_FR_A0];
+       regs->a1        = gdb_regs[GDB_FR_A1];
+       regs->a2        = gdb_regs[GDB_FR_A2];
+       regs->a3        = gdb_regs[GDB_FR_A3];
+       regs->sp        = gdb_regs[GDB_FR_SP];
+       regs->pc        = gdb_regs[GDB_FR_PC];
+       regs->mdr       = gdb_regs[GDB_FR_MDR];
+       regs->epsw      = gdb_regs[GDB_FR_EPSW];
+       regs->lir       = gdb_regs[GDB_FR_LIR];
+       regs->lar       = gdb_regs[GDB_FR_LAR];
+       regs->mdrq      = gdb_regs[GDB_FR_MDRQ];
+       regs->e0        = gdb_regs[GDB_FR_E0];
+       regs->e1        = gdb_regs[GDB_FR_E1];
+       regs->e2        = gdb_regs[GDB_FR_E2];
+       regs->e3        = gdb_regs[GDB_FR_E3];
+       regs->e4        = gdb_regs[GDB_FR_E4];
+       regs->e5        = gdb_regs[GDB_FR_E5];
+       regs->e6        = gdb_regs[GDB_FR_E6];
+       regs->e7        = gdb_regs[GDB_FR_E7];
+       regs->sp        = gdb_regs[GDB_FR_SSP];
+       /* gdb_regs[GDB_FR_MSP]; */
+       // regs->usp    = gdb_regs[GDB_FR_USP];
+       regs->mcrh      = gdb_regs[GDB_FR_MCRH];
+       regs->mcrl      = gdb_regs[GDB_FR_MCRL];
+       regs->mcvf      = gdb_regs[GDB_FR_MCVF];
+       /* gdb_regs[GDB_FR_DUMMY0]; */
+       /* gdb_regs[GDB_FR_DUMMY1]; */
+
+       // regs->fpcr   = gdb_regs[GDB_FR_FPCR];
+       // regs->fs0    = gdb_regs[GDB_FR_FS0];
+}
+
+struct kgdb_arch arch_kgdb_ops = {
+       .gdb_bpt_instr  = { 0xff },
+       .flags          = KGDB_HW_BREAKPOINT,
+};
+
+static const unsigned char mn10300_kgdb_insn_sizes[256] =
+{
+       /* 1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+       1, 3, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3, /* 0 */
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 1 */
+       2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, /* 2 */
+       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, /* 3 */
+       1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, /* 4 */
+       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, /* 5 */
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6 */
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 7 */
+       2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* 8 */
+       2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* 9 */
+       2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* a */
+       2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* b */
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 2, /* c */
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d */
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* e */
+       0, 2, 2, 2, 2, 2, 2, 4, 0, 3, 0, 4, 0, 6, 7, 1  /* f */
+};
+
+/*
+ * Attempt to emulate single stepping by means of breakpoint instructions.
+ * Although there is a single-step trace flag in EPSW, its use is not
+ * sufficiently documented and is only intended for use with the JTAG debugger.
+ */
+static int kgdb_arch_do_singlestep(struct pt_regs *regs)
+{
+       unsigned long arg;
+       unsigned size;
+       u8 *pc = (u8 *)regs->pc, *sp = (u8 *)(regs + 1), cur;
+       u8 *x = NULL, *y = NULL;
+       int ret;
+
+       ret = probe_kernel_read(&cur, pc, 1);
+       if (ret < 0)
+               return ret;
+
+       size = mn10300_kgdb_insn_sizes[cur];
+       if (size > 0) {
+               x = pc + size;
+               goto set_x;
+       }
+
+       switch (cur) {
+               /* Bxx (d8,PC) */
+       case 0xc0 ... 0xca:
+               ret = probe_kernel_read(&arg, pc + 1, 1);
+               if (ret < 0)
+                       return ret;
+               x = pc + 2;
+               if (arg >= 0 && arg <= 2)
+                       goto set_x;
+               y = pc + (s8)arg;
+               goto set_x_and_y;
+
+               /* LXX (d8,PC) */
+       case 0xd0 ... 0xda:
+               x = pc + 1;
+               if (regs->pc == regs->lar)
+                       goto set_x;
+               y = (u8 *)regs->lar;
+               goto set_x_and_y;
+
+               /* SETLB - loads the next four bytes into the LIR register
+                * (which mustn't include a breakpoint instruction) */
+       case 0xdb:
+               x = pc + 5;
+               goto set_x;
+
+               /* JMP (d16,PC) or CALL (d16,PC) */
+       case 0xcc:
+       case 0xcd:
+               ret = probe_kernel_read(&arg, pc + 1, 2);
+               if (ret < 0)
+                       return ret;
+               x = pc + (s16)arg;
+               goto set_x;
+
+               /* JMP (d32,PC) or CALL (d32,PC) */
+       case 0xdc:
+       case 0xdd:
+               ret = probe_kernel_read(&arg, pc + 1, 4);
+               if (ret < 0)
+                       return ret;
+               x = pc + (s32)arg;
+               goto set_x;
+
+               /* RETF */
+       case 0xde:
+               x = (u8 *)regs->mdr;
+               goto set_x;
+
+               /* RET */
+       case 0xdf:
+               ret = probe_kernel_read(&arg, pc + 2, 1);
+               if (ret < 0)
+                       return ret;
+               ret = probe_kernel_read(&x, sp + (s8)arg, 4);
+               if (ret < 0)
+                       return ret;
+               goto set_x;
+
+       case 0xf0:
+               ret = probe_kernel_read(&cur, pc + 1, 1);
+               if (ret < 0)
+                       return ret;
+
+               if (cur >= 0xf0 && cur <= 0xf7) {
+                       /* JMP (An) / CALLS (An) */
+                       switch (cur & 3) {
+                       case 0: x = (u8 *)regs->a0; break;
+                       case 1: x = (u8 *)regs->a1; break;
+                       case 2: x = (u8 *)regs->a2; break;
+                       case 3: x = (u8 *)regs->a3; break;
+                       }
+                       goto set_x;
+               } else if (cur == 0xfc) {
+                       /* RETS */
+                       ret = probe_kernel_read(&x, sp, 4);
+                       if (ret < 0)
+                               return ret;
+                       goto set_x;
+               } else if (cur == 0xfd) {
+                       /* RTI */
+                       ret = probe_kernel_read(&x, sp + 4, 4);
+                       if (ret < 0)
+                               return ret;
+                       goto set_x;
+               } else {
+                       x = pc + 2;
+                       goto set_x;
+               }
+               break;
+
+               /* potential 3-byte conditional branches */
+       case 0xf8:
+               ret = probe_kernel_read(&cur, pc + 1, 1);
+               if (ret < 0)
+                       return ret;
+               x = pc + 3;
+
+               if (cur >= 0xe8 && cur <= 0xeb) {
+                       ret = probe_kernel_read(&arg, pc + 2, 1);
+                       if (ret < 0)
+                               return ret;
+                       if (arg >= 0 && arg <= 3)
+                               goto set_x;
+                       y = pc + (s8)arg;
+                       goto set_x_and_y;
+               }
+               goto set_x;
+
+       case 0xfa:
+               ret = probe_kernel_read(&cur, pc + 1, 1);
+               if (ret < 0)
+                       return ret;
+
+               if (cur == 0xff) {
+                       /* CALLS (d16,PC) */
+                       ret = probe_kernel_read(&arg, pc + 2, 2);
+                       if (ret < 0)
+                               return ret;
+                       x = pc + (s16)arg;
+                       goto set_x;
+               }
+
+               x = pc + 4;
+               goto set_x;
+
+       case 0xfc:
+               ret = probe_kernel_read(&cur, pc + 1, 1);
+               if (ret < 0)
+                       return ret;
+
+               if (cur == 0xff) {
+                       /* CALLS (d32,PC) */
+                       ret = probe_kernel_read(&arg, pc + 2, 4);
+                       if (ret < 0)
+                               return ret;
+                       x = pc + (s32)arg;
+                       goto set_x;
+               }
+
+               x = pc + 6;
+               goto set_x;
+       }
+
+       return 0;
+
+set_x:
+       kgdb_sstep_bp_addr[0] = x;
+       kgdb_sstep_bp_addr[1] = NULL;
+       ret = probe_kernel_read(&kgdb_sstep_bp[0], x, 1);
+       if (ret < 0)
+               return ret;
+       ret = probe_kernel_write(x, &arch_kgdb_ops.gdb_bpt_instr, 1);
+       if (ret < 0)
+               return ret;
+       kgdb_sstep_thread = current_thread_info();
+       debugger_local_cache_flushinv_one(x);
+       return ret;
+
+set_x_and_y:
+       kgdb_sstep_bp_addr[0] = x;
+       kgdb_sstep_bp_addr[1] = y;
+       ret = probe_kernel_read(&kgdb_sstep_bp[0], x, 1);
+       if (ret < 0)
+               return ret;
+       ret = probe_kernel_read(&kgdb_sstep_bp[1], y, 1);
+       if (ret < 0)
+               return ret;
+       ret = probe_kernel_write(x, &arch_kgdb_ops.gdb_bpt_instr, 1);
+       if (ret < 0)
+               return ret;
+       ret = probe_kernel_write(y, &arch_kgdb_ops.gdb_bpt_instr, 1);
+       if (ret < 0) {
+               probe_kernel_write(kgdb_sstep_bp_addr[0],
+                                  &kgdb_sstep_bp[0], 1);
+       } else {
+               kgdb_sstep_thread = current_thread_info();
+       }
+       debugger_local_cache_flushinv_one(x);
+       debugger_local_cache_flushinv_one(y);
+       return ret;
+}
+
+/*
+ * Remove emplaced single-step breakpoints, returning true if we hit one of
+ * them.
+ */
+static bool kgdb_arch_undo_singlestep(struct pt_regs *regs)
+{
+       bool hit = false;
+       u8 *x = kgdb_sstep_bp_addr[0], *y = kgdb_sstep_bp_addr[1];
+       u8 opcode;
+
+       if (kgdb_sstep_thread == current_thread_info()) {
+               if (x) {
+                       if (x == (u8 *)regs->pc)
+                               hit = true;
+                       if (probe_kernel_read(&opcode, x,
+                                             1) < 0 ||
+                           opcode != 0xff)
+                               BUG();
+                       probe_kernel_write(x, &kgdb_sstep_bp[0], 1);
+                       debugger_local_cache_flushinv_one(x);
+               }
+               if (y) {
+                       if (y == (u8 *)regs->pc)
+                               hit = true;
+                       if (probe_kernel_read(&opcode, y,
+                                             1) < 0 ||
+                           opcode != 0xff)
+                               BUG();
+                       probe_kernel_write(y, &kgdb_sstep_bp[1], 1);
+                       debugger_local_cache_flushinv_one(y);
+               }
+       }
+
+       kgdb_sstep_bp_addr[0] = NULL;
+       kgdb_sstep_bp_addr[1] = NULL;
+       kgdb_sstep_thread = NULL;
+       return hit;
+}
+
+/*
+ * Catch a single-step-pending thread being deleted and make sure the global
+ * single-step state is cleared.  At this point the breakpoints should have
+ * been removed by __switch_to().
+ */
+void free_thread_info(struct thread_info *ti)
+{
+       if (kgdb_sstep_thread == ti) {
+               kgdb_sstep_thread = NULL;
+
+               /* However, we may now be running in degraded mode, with most
+                * of the CPUs disabled until such a time as KGDB is reentered,
+                * so force immediate reentry */
+               kgdb_breakpoint();
+       }
+       kfree(ti);
+}
+
+/*
+ * Handle unknown packets and [CcsDk] packets
+ * - at this point breakpoints have been installed
+ */
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+                              char *remcom_in_buffer, char *remcom_out_buffer,
+                              struct pt_regs *regs)
+{
+       long addr;
+       char *ptr;
+
+       switch (remcom_in_buffer[0]) {
+       case 'c':
+       case 's':
+               /* try to read optional parameter, pc unchanged if no parm */
+               ptr = &remcom_in_buffer[1];
+               if (kgdb_hex2long(&ptr, &addr))
+                       regs->pc = addr;
+       case 'D':
+       case 'k':
+               atomic_set(&kgdb_cpu_doing_single_step, -1);
+
+               if (remcom_in_buffer[0] == 's') {
+                       kgdb_arch_do_singlestep(regs);
+                       kgdb_single_step = 1;
+                       atomic_set(&kgdb_cpu_doing_single_step,
+                                  raw_smp_processor_id());
+               }
+               return 0;
+       }
+       return -1; /* this means that we do not want to exit from the handler */
+}
+
+/*
+ * Handle event interception
+ * - returns 0 if the exception should be skipped, -ERROR otherwise.
+ */
+int debugger_intercept(enum exception_code excep, int signo, int si_code,
+                      struct pt_regs *regs)
+{
+       int ret;
+
+       if (kgdb_arch_undo_singlestep(regs)) {
+               excep = EXCEP_TRAP;
+               signo = SIGTRAP;
+               si_code = TRAP_TRACE;
+       }
+
+       ret = kgdb_handle_exception(excep, signo, si_code, regs);
+
+       debugger_local_cache_flushinv();
+
+       return ret;
+}
+
+/*
+ * Determine if we've hit a debugger special breakpoint
+ */
+int at_debugger_breakpoint(struct pt_regs *regs)
+{
+       return regs->pc == (unsigned long)&__arch_kgdb_breakpoint;
+}
+
+/*
+ * Initialise kgdb
+ */
+int kgdb_arch_init(void)
+{
+       return 0;
+}
+
+/*
+ * Do something, perhaps, but don't know what.
+ */
+void kgdb_arch_exit(void)
+{
+}
+
+#ifdef CONFIG_SMP
+void debugger_nmi_interrupt(struct pt_regs *regs, enum exception_code code)
+{
+       kgdb_nmicallback(arch_smp_processor_id(), regs);
+       debugger_local_cache_flushinv();
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+       smp_jump_to_debugger();
+}
+#endif
index 93c53739cfc94c58ba7d694af171dcfaf0f284df..efca426a2ed4e7241e4a8a74d7b29a49c29a2973 100644 (file)
@@ -119,6 +119,10 @@ static int mn10300_serial_request_port(struct uart_port *);
 static void mn10300_serial_config_port(struct uart_port *, int);
 static int mn10300_serial_verify_port(struct uart_port *,
                                        struct serial_struct *);
+#ifdef CONFIG_CONSOLE_POLL
+static void mn10300_serial_poll_put_char(struct uart_port *, unsigned char);
+static int mn10300_serial_poll_get_char(struct uart_port *);
+#endif
 
 static const struct uart_ops mn10300_serial_ops = {
        .tx_empty       = mn10300_serial_tx_empty,
@@ -138,6 +142,10 @@ static const struct uart_ops mn10300_serial_ops = {
        .request_port   = mn10300_serial_request_port,
        .config_port    = mn10300_serial_config_port,
        .verify_port    = mn10300_serial_verify_port,
+#ifdef CONFIG_CONSOLE_POLL
+       .poll_put_char  = mn10300_serial_poll_put_char,
+       .poll_get_char  = mn10300_serial_poll_get_char,
+#endif
 };
 
 static irqreturn_t mn10300_serial_interrupt(int irq, void *dev_id);
@@ -1634,3 +1642,70 @@ static int __init mn10300_serial_console_init(void)
 
 console_initcall(mn10300_serial_console_init);
 #endif
+
+#ifdef CONFIG_CONSOLE_POLL
+/*
+ * Polled character reception for the kernel debugger
+ */
+static int mn10300_serial_poll_get_char(struct uart_port *_port)
+{
+       struct mn10300_serial_port *port =
+               container_of(_port, struct mn10300_serial_port, uart);
+       unsigned ix;
+       u8 st, ch;
+
+       _enter("%s", port->name);
+
+       do {
+               /* pull chars out of the hat */
+               ix = port->rx_outp;
+               if (ix == port->rx_inp)
+                       return NO_POLL_CHAR;
+
+               ch = port->rx_buffer[ix++];
+               st = port->rx_buffer[ix++];
+               smp_rmb();
+               port->rx_outp = ix & (MNSC_BUFFER_SIZE - 1);
+
+       } while (st & (SC01STR_FEF | SC01STR_PEF | SC01STR_OEF));
+
+       return ch;
+}
+
+
+/*
+ * Polled character transmission for the kernel debugger
+ */
+static void mn10300_serial_poll_put_char(struct uart_port *_port,
+                                        unsigned char ch)
+{
+       struct mn10300_serial_port *port =
+               container_of(_port, struct mn10300_serial_port, uart);
+       u8 intr, tmp;
+
+       /* wait for the transmitter to finish anything it might be doing (and
+        * this includes the virtual DMA handler, so it might take a while) */
+       while (*port->_status & (SC01STR_TBF | SC01STR_TXF))
+               continue;
+
+       /* disable the Tx ready interrupt */
+       intr = *port->_intr;
+       *port->_intr = intr & ~SC01ICR_TI;
+       tmp = *port->_intr;
+
+       if (ch == 0x0a) {
+               *(u8 *) port->_txb = 0x0d;
+               while (*port->_status & SC01STR_TBF)
+                       continue;
+       }
+
+       *(u8 *) port->_txb = ch;
+       while (*port->_status & SC01STR_TBF)
+               continue;
+
+       /* restore the Tx interrupt flag */
+       *port->_intr = intr;
+       tmp = *port->_intr;
+}
+
+#endif /* CONFIG_CONSOLE_POLL */
index e1b14a6ed544675bf7929dc7861876a1a3bf2cc8..28eec3102535c86c91649b336aeb0b9f5677b75c 100644 (file)
@@ -135,7 +135,7 @@ void release_segments(struct mm_struct *mm)
 
 void machine_restart(char *cmd)
 {
-#ifdef CONFIG_GDBSTUB
+#ifdef CONFIG_KERNEL_DEBUGGER
        gdbstub_exit(0);
 #endif
 
@@ -148,14 +148,14 @@ void machine_restart(char *cmd)
 
 void machine_halt(void)
 {
-#ifdef CONFIG_GDBSTUB
+#ifdef CONFIG_KERNEL_DEBUGGER
        gdbstub_exit(0);
 #endif
 }
 
 void machine_power_off(void)
 {
-#ifdef CONFIG_GDBSTUB
+#ifdef CONFIG_KERNEL_DEBUGGER
        gdbstub_exit(0);
 #endif
 }
index 1ebb79f1650d74b2fb38407694f44f17a18771ab..51c02f97dceaa924aaba64383efcbb93032ad313 100644 (file)
@@ -439,6 +439,22 @@ int smp_nmi_call_function(smp_call_func_t func, void *info, int wait)
        return ret;
 }
 
+/**
+ * smp_jump_to_debugger - Make other CPUs enter the debugger by sending an IPI
+ *
+ * Send a non-maskable request to all other CPUs in the system, instructing
+ * them to jump into the debugger.  The caller is responsible for checking that
+ * the other CPUs responded to the instruction.
+ *
+ * The caller should make sure that this CPU's debugger IPI is disabled.
+ */
+void smp_jump_to_debugger(void)
+{
+       if (num_online_cpus() > 1)
+               /* Send a message to all other CPUs */
+               send_IPI_allbutself(DEBUGGER_NMI_IPI);
+}
+
 /**
  * stop_this_cpu - Callback to stop a CPU.
  * @unused: Callback context (ignored).
@@ -603,7 +619,7 @@ static void __init smp_cpu_init(void)
 /**
  * smp_prepare_cpu_init - Initialise CPU in startup_secondary
  *
- * Set interrupt level 0-6 setting and init ICR of gdbstub.
+ * Set interrupt level 0-6 setting and init ICR of the kernel debugger.
  */
 void smp_prepare_cpu_init(void)
 {
@@ -622,15 +638,15 @@ void smp_prepare_cpu_init(void)
        for (loop = 0; loop < GxICR_NUM_IRQS; loop++)
                GxICR(loop) = GxICR_LEVEL_6 | GxICR_DETECT;
 
-#ifdef CONFIG_GDBSTUB
-       /* initialise GDB-stub */
+#ifdef CONFIG_KERNEL_DEBUGGER
+       /* initialise the kernel debugger interrupt */
        do {
                unsigned long flags;
                u16 tmp16;
 
                flags = arch_local_cli_save();
-               GxICR(GDB_NMI_IPI) = GxICR_NMI | GxICR_ENABLE | GxICR_DETECT;
-               tmp16 = GxICR(GDB_NMI_IPI);
+               GxICR(DEBUGGER_NMI_IPI) = GxICR_NMI | GxICR_ENABLE | GxICR_DETECT;
+               tmp16 = GxICR(DEBUGGER_NMI_IPI);
                arch_local_irq_restore(flags);
        } while (0);
 #endif
index 9074d0fb8788fbc5187d6d77b026a271c91050f5..de3e74fc9ea04980099b6a6c68546e311e7cb565 100644 (file)
@@ -39,11 +39,17 @@ ENTRY(__switch_to)
 
        # save prev context
        mov     __switch_back,d0
-       mov     d0,(THREAD_PC,a0)
        mov     sp,a2
        mov     a2,(THREAD_SP,a0)
        mov     a3,(THREAD_A3,a0)
 
+#ifdef CONFIG_KGDB
+       btst    0xff,(kgdb_single_step)
+       bne     __switch_to__lift_sstep_bp
+__switch_to__continue:
+#endif
+       mov     d0,(THREAD_PC,a0)
+
        mov     (THREAD_A3,a1),a3
        mov     (THREAD_SP,a1),a2
 
@@ -68,3 +74,106 @@ ENTRY(__switch_to)
 __switch_back:
        and     ~EPSW_NMID,epsw
        ret     [d2,d3,a2,a3,exreg1],32
+
+#ifdef CONFIG_KGDB
+###############################################################################
+#
+# Lift the single-step breakpoints when the task being traced is switched out
+# A0 = prev
+# A1 = next
+#
+###############################################################################
+__switch_to__lift_sstep_bp:
+       add     -12,sp
+       mov     a0,e4
+       mov     a1,e5
+
+       # Clear the single-step flag to prevent us coming this way until we get
+       # switched back in
+       bclr    0xff,(kgdb_single_step)
+
+       # Remove first breakpoint
+       mov     (kgdb_sstep_bp_addr),a2
+       cmp     0,a2
+       beq     1f
+       movbu   (kgdb_sstep_bp),d0
+       movbu   d0,(a2)
+#if defined(CONFIG_MN10300_CACHE_FLUSH_ICACHE) || defined(CONFIG_MN10300_CACHE_INV_ICACHE)
+       mov     a2,d0
+       mov     a2,d1
+       add     1,d1
+       calls   flush_icache_range
+#endif
+1:
+
+       # Remove second breakpoint
+       mov     (kgdb_sstep_bp_addr+4),a2
+       cmp     0,a2
+       beq     2f
+       movbu   (kgdb_sstep_bp+1),d0
+       movbu   d0,(a2)
+#if defined(CONFIG_MN10300_CACHE_FLUSH_ICACHE) || defined(CONFIG_MN10300_CACHE_INV_ICACHE)
+       mov     a2,d0
+       mov     a2,d1
+       add     1,d1
+       calls   flush_icache_range
+#endif
+2:
+
+       # Change the resumption address and return
+       mov     __switch_back__reinstall_sstep_bp,d0
+       mov     e4,a0
+       mov     e5,a1
+       add     12,sp
+       bra     __switch_to__continue
+
+###############################################################################
+#
+# Reinstall the single-step breakpoints when the task being traced is switched
+# back in (A1 points to the new thread_struct).
+#
+###############################################################################
+__switch_back__reinstall_sstep_bp:
+       add     -12,sp
+       mov     a0,e4                   # save the return value
+       mov     0xff,d3
+
+       # Reinstall first breakpoint
+       mov     (kgdb_sstep_bp_addr),a2
+       cmp     0,a2
+       beq     1f
+       movbu   (a2),d0
+       movbu   d0,(kgdb_sstep_bp)
+       movbu   d3,(a2)
+#if defined(CONFIG_MN10300_CACHE_FLUSH_ICACHE) || defined(CONFIG_MN10300_CACHE_INV_ICACHE)
+       mov     a2,d0
+       mov     a2,d1
+       add     1,d1
+       calls   flush_icache_range
+#endif
+1:
+
+       # Reinstall second breakpoint
+       mov     (kgdb_sstep_bp_addr+4),a2
+       cmp     0,a2
+       beq     2f
+       movbu   (a2),d0
+       movbu   d0,(kgdb_sstep_bp+1)
+       movbu   d3,(a2)
+#if defined(CONFIG_MN10300_CACHE_FLUSH_ICACHE) || defined(CONFIG_MN10300_CACHE_INV_ICACHE)
+       mov     a2,d0
+       mov     a2,d1
+       add     1,d1
+       calls   flush_icache_range
+#endif
+2:
+
+       mov     d3,(kgdb_single_step)
+
+       # Restore the return value (the previous thread_struct pointer)
+       mov     e4,a0
+       mov     a0,d0
+       add     12,sp
+       bra     __switch_back
+
+#endif /* CONFIG_KGDB */
index b90c3f160c77b0e598fe90578d9362de54e4a1f1..f03cb278828f400c1e66580be491c30da507e9b3 100644 (file)
@@ -38,8 +38,9 @@
 #include <asm/busctl-regs.h>
 #include <unit/leds.h>
 #include <asm/fpu.h>
-#include <asm/gdb-stub.h>
 #include <asm/sections.h>
+#include <asm/debugger.h>
+#include "internal.h"
 
 #if (CONFIG_INTERRUPT_VECTOR_BASE & 0xffffff)
 #error "INTERRUPT_VECTOR_BASE not aligned to 16MiB boundary!"
@@ -49,63 +50,169 @@ int kstack_depth_to_print = 24;
 
 spinlock_t die_lock = __SPIN_LOCK_UNLOCKED(die_lock);
 
-ATOMIC_NOTIFIER_HEAD(mn10300_die_chain);
+struct exception_to_signal_map {
+       u8      signo;
+       u32     si_code;
+};
+
+static const struct exception_to_signal_map exception_to_signal_map[256] = {
+       /* MMU exceptions */
+       [EXCEP_ITLBMISS >> 3]   = { 0, 0 },
+       [EXCEP_DTLBMISS >> 3]   = { 0, 0 },
+       [EXCEP_IAERROR >> 3]    = { 0, 0 },
+       [EXCEP_DAERROR >> 3]    = { 0, 0 },
+
+       /* system exceptions */
+       [EXCEP_TRAP >> 3]       = { SIGTRAP,    TRAP_BRKPT },
+       [EXCEP_ISTEP >> 3]      = { SIGTRAP,    TRAP_TRACE },   /* Monitor */
+       [EXCEP_IBREAK >> 3]     = { SIGTRAP,    TRAP_HWBKPT },  /* Monitor */
+       [EXCEP_OBREAK >> 3]     = { SIGTRAP,    TRAP_HWBKPT },  /* Monitor */
+       [EXCEP_PRIVINS >> 3]    = { SIGILL,     ILL_PRVOPC },
+       [EXCEP_UNIMPINS >> 3]   = { SIGILL,     ILL_ILLOPC },
+       [EXCEP_UNIMPEXINS >> 3] = { SIGILL,     ILL_ILLOPC },
+       [EXCEP_MEMERR >> 3]     = { SIGSEGV,    SEGV_ACCERR },
+       [EXCEP_MISALIGN >> 3]   = { SIGBUS,     BUS_ADRALN },
+       [EXCEP_BUSERROR >> 3]   = { SIGBUS,     BUS_ADRERR },
+       [EXCEP_ILLINSACC >> 3]  = { SIGSEGV,    SEGV_ACCERR },
+       [EXCEP_ILLDATACC >> 3]  = { SIGSEGV,    SEGV_ACCERR },
+       [EXCEP_IOINSACC >> 3]   = { SIGSEGV,    SEGV_ACCERR },
+       [EXCEP_PRIVINSACC >> 3] = { SIGSEGV,    SEGV_ACCERR }, /* userspace */
+       [EXCEP_PRIVDATACC >> 3] = { SIGSEGV,    SEGV_ACCERR }, /* userspace */
+       [EXCEP_DATINSACC >> 3]  = { SIGSEGV,    SEGV_ACCERR },
+       [EXCEP_DOUBLE_FAULT >> 3] = { SIGILL,   ILL_BADSTK },
+
+       /* FPU exceptions */
+       [EXCEP_FPU_DISABLED >> 3] = { SIGILL,   ILL_COPROC },
+       [EXCEP_FPU_UNIMPINS >> 3] = { SIGILL,   ILL_COPROC },
+       [EXCEP_FPU_OPERATION >> 3] = { SIGFPE,  FPE_INTDIV },
+
+       /* interrupts */
+       [EXCEP_WDT >> 3]        = { SIGALRM,    0 },
+       [EXCEP_NMI >> 3]        = { SIGQUIT,    0 },
+       [EXCEP_IRQ_LEVEL0 >> 3] = { SIGINT,     0 },
+       [EXCEP_IRQ_LEVEL1 >> 3] = { 0, 0 },
+       [EXCEP_IRQ_LEVEL2 >> 3] = { 0, 0 },
+       [EXCEP_IRQ_LEVEL3 >> 3] = { 0, 0 },
+       [EXCEP_IRQ_LEVEL4 >> 3] = { 0, 0 },
+       [EXCEP_IRQ_LEVEL5 >> 3] = { 0, 0 },
+       [EXCEP_IRQ_LEVEL6 >> 3] = { 0, 0 },
+
+       /* system calls */
+       [EXCEP_SYSCALL0 >> 3]   = { 0, 0 },
+       [EXCEP_SYSCALL1 >> 3]   = { SIGILL,     ILL_ILLTRP },
+       [EXCEP_SYSCALL2 >> 3]   = { SIGILL,     ILL_ILLTRP },
+       [EXCEP_SYSCALL3 >> 3]   = { SIGILL,     ILL_ILLTRP },
+       [EXCEP_SYSCALL4 >> 3]   = { SIGILL,     ILL_ILLTRP },
+       [EXCEP_SYSCALL5 >> 3]   = { SIGILL,     ILL_ILLTRP },
+       [EXCEP_SYSCALL6 >> 3]   = { SIGILL,     ILL_ILLTRP },
+       [EXCEP_SYSCALL7 >> 3]   = { SIGILL,     ILL_ILLTRP },
+       [EXCEP_SYSCALL8 >> 3]   = { SIGILL,     ILL_ILLTRP },
+       [EXCEP_SYSCALL9 >> 3]   = { SIGILL,     ILL_ILLTRP },
+       [EXCEP_SYSCALL10 >> 3]  = { SIGILL,     ILL_ILLTRP },
+       [EXCEP_SYSCALL11 >> 3]  = { SIGILL,     ILL_ILLTRP },
+       [EXCEP_SYSCALL12 >> 3]  = { SIGILL,     ILL_ILLTRP },
+       [EXCEP_SYSCALL13 >> 3]  = { SIGILL,     ILL_ILLTRP },
+       [EXCEP_SYSCALL14 >> 3]  = { SIGILL,     ILL_ILLTRP },
+       [EXCEP_SYSCALL15 >> 3]  = { SIGABRT,    0 },
+};
 
 /*
- * These constants are for searching for possible module text
- * segments. MODULE_RANGE is a guess of how much space is likely
- * to be vmalloced.
+ * Handle kernel exceptions.
+ *
+ * See if there's a fixup handler we can force a jump to when an exception
+ * happens due to something kernel code did
  */
-#define MODULE_RANGE (8 * 1024 * 1024)
-
-#define DO_ERROR(signr, prologue, str, name)                   \
-asmlinkage void name(struct pt_regs *regs, u32 intcode)                \
-{                                                              \
-       prologue;                                               \
-       if (die_if_no_fixup(str, regs, intcode))                \
-               return;                                         \
-       force_sig(signr, current);                              \
-}
+int die_if_no_fixup(const char *str, struct pt_regs *regs,
+                   enum exception_code code)
+{
+       u8 opcode;
+       int signo, si_code;
+
+       if (user_mode(regs))
+               return 0;
+
+       peripheral_leds_display_exception(code);
+
+       signo = exception_to_signal_map[code >> 3].signo;
+       si_code = exception_to_signal_map[code >> 3].si_code;
+
+       switch (code) {
+               /* see if we can fixup the kernel accessing memory */
+       case EXCEP_ITLBMISS:
+       case EXCEP_DTLBMISS:
+       case EXCEP_IAERROR:
+       case EXCEP_DAERROR:
+       case EXCEP_MEMERR:
+       case EXCEP_MISALIGN:
+       case EXCEP_BUSERROR:
+       case EXCEP_ILLDATACC:
+       case EXCEP_IOINSACC:
+       case EXCEP_PRIVINSACC:
+       case EXCEP_PRIVDATACC:
+       case EXCEP_DATINSACC:
+               if (fixup_exception(regs))
+                       return 1;
+               break;
 
-#define DO_EINFO(signr, prologue, str, name, sicode)                   \
-asmlinkage void name(struct pt_regs *regs, u32 intcode)                        \
-{                                                                      \
-       siginfo_t info;                                                 \
-       prologue;                                                       \
-       if (die_if_no_fixup(str, regs, intcode))                        \
-               return;                                                 \
-       info.si_signo = signr;                                          \
-       if (signr == SIGILL && sicode == ILL_ILLOPC) {                  \
-               uint8_t opcode;                                         \
-               if (get_user(opcode, (uint8_t __user *)regs->pc) == 0)  \
-                       if (opcode == 0xff)                             \
-                               info.si_signo = SIGTRAP;                \
-       }                                                               \
-       info.si_errno = 0;                                              \
-       info.si_code = sicode;                                          \
-       info.si_addr = (void *) regs->pc;                               \
-       force_sig_info(info.si_signo, &info, current);                  \
+       case EXCEP_TRAP:
+       case EXCEP_UNIMPINS:
+               if (get_user(opcode, (uint8_t __user *)regs->pc) != 0)
+                       break;
+               if (opcode == 0xff) {
+                       if (notify_die(DIE_BREAKPOINT, str, regs, code, 0, 0))
+                               return 1;
+                       if (at_debugger_breakpoint(regs))
+                               regs->pc++;
+                       signo = SIGTRAP;
+                       si_code = TRAP_BRKPT;
+               }
+               break;
+
+       case EXCEP_SYSCALL1 ... EXCEP_SYSCALL14:
+               /* syscall return addr is _after_ the instruction */
+               regs->pc -= 2;
+               break;
+
+       case EXCEP_SYSCALL15:
+               if (report_bug(regs->pc, regs) == BUG_TRAP_TYPE_WARN)
+                       return 1;
+
+               /* syscall return addr is _after_ the instruction */
+               regs->pc -= 2;
+               break;
+
+       default:
+               break;
+       }
+
+       if (debugger_intercept(code, signo, si_code, regs) == 0)
+               return 1;
+
+       if (notify_die(DIE_GPF, str, regs, code, 0, 0))
+               return 1;
+
+       /* make the process die as the last resort */
+       die(str, regs, code);
 }
 
-DO_ERROR(SIGTRAP, {}, "trap",                  trap);
-DO_ERROR(SIGSEGV, {}, "ibreak",                        ibreak);
-DO_ERROR(SIGSEGV, {}, "obreak",                        obreak);
-DO_EINFO(SIGSEGV, {}, "access error",          access_error,   SEGV_ACCERR);
-DO_EINFO(SIGSEGV, {}, "insn access error",     insn_acc_error, SEGV_ACCERR);
-DO_EINFO(SIGSEGV, {}, "data access error",     data_acc_error, SEGV_ACCERR);
-DO_EINFO(SIGILL,  {}, "privileged opcode",     priv_op,        ILL_PRVOPC);
-DO_EINFO(SIGILL,  {}, "invalid opcode",                invalid_op,     ILL_ILLOPC);
-DO_EINFO(SIGILL,  {}, "invalid ex opcode",     invalid_exop,   ILL_ILLOPC);
-DO_EINFO(SIGBUS,  {}, "invalid address",       mem_error,      BUS_ADRERR);
-DO_EINFO(SIGBUS,  {}, "bus error",             bus_error,      BUS_ADRERR);
-
-DO_ERROR(SIGTRAP,
-#ifndef CONFIG_MN10300_USING_JTAG
-        DCR &= ~0x0001,
-#else
-        {},
-#endif
-        "single step", istep);
+/*
+ * General exception handler
+ */
+asmlinkage void handle_exception(struct pt_regs *regs, u32 intcode)
+{
+       siginfo_t info;
+
+       /* deal with kernel exceptions here */
+       if (die_if_no_fixup(NULL, regs, intcode))
+               return;
+
+       /* otherwise it's a userspace exception */
+       info.si_signo = exception_to_signal_map[intcode >> 3].signo;
+       info.si_code = exception_to_signal_map[intcode >> 3].si_code;
+       info.si_errno = 0;
+       info.si_addr = (void *) regs->pc;
+       force_sig_info(info.si_signo, &info, current);
+}
 
 /*
  * handle NMI
@@ -113,10 +220,8 @@ DO_ERROR(SIGTRAP,
 asmlinkage void nmi(struct pt_regs *regs, enum exception_code code)
 {
        /* see if gdbstub wants to deal with it */
-#ifdef CONFIG_GDBSTUB
-       if (gdbstub_intercept(regs, code))
+       if (debugger_intercept(code, SIGQUIT, 0, regs))
                return;
-#endif
 
        printk(KERN_WARNING "--- Register Dump ---\n");
        show_registers(regs);
@@ -128,29 +233,36 @@ asmlinkage void nmi(struct pt_regs *regs, enum exception_code code)
  */
 void show_trace(unsigned long *sp)
 {
-       unsigned long *stack, addr, module_start, module_end;
-       int i;
-
-       printk(KERN_EMERG "\nCall Trace:");
-
-       stack = sp;
-       i = 0;
-       module_start = VMALLOC_START;
-       module_end = VMALLOC_END;
+       unsigned long bottom, stack, addr, fp, raslot;
+
+       printk(KERN_EMERG "\nCall Trace:\n");
+
+       //stack = (unsigned long)sp;
+       asm("mov sp,%0" : "=a"(stack));
+       asm("mov a3,%0" : "=r"(fp));
+
+       raslot = ULONG_MAX;
+       bottom = (stack + THREAD_SIZE) & ~(THREAD_SIZE - 1);
+       for (; stack < bottom; stack += sizeof(addr)) {
+               addr = *(unsigned long *)stack;
+               if (stack == fp) {
+                       if (addr > stack && addr < bottom) {
+                               fp = addr;
+                               raslot = stack + sizeof(addr);
+                               continue;
+                       }
+                       fp = 0;
+                       raslot = ULONG_MAX;
+               }
 
-       while (((long) stack & (THREAD_SIZE - 1)) != 0) {
-               addr = *stack++;
                if (__kernel_text_address(addr)) {
-#if 1
                        printk(" [<%08lx>]", addr);
+                       if (stack >= raslot)
+                               raslot = ULONG_MAX;
+                       else
+                               printk(" ?");
                        print_symbol(" %s", addr);
                        printk("\n");
-#else
-                       if ((i % 6) == 0)
-                               printk(KERN_EMERG "  ");
-                       printk("[<%08lx>] ", addr);
-                       i++;
-#endif
                }
        }
 
@@ -322,86 +434,6 @@ void die(const char *str, struct pt_regs *regs, enum exception_code code)
        do_exit(SIGSEGV);
 }
 
-/*
- * see if there's a fixup handler we can force a jump to when an exception
- * happens due to something kernel code did
- */
-int die_if_no_fixup(const char *str, struct pt_regs *regs,
-                   enum exception_code code)
-{
-       if (user_mode(regs))
-               return 0;
-
-       peripheral_leds_display_exception(code);
-
-       switch (code) {
-               /* see if we can fixup the kernel accessing memory */
-       case EXCEP_ITLBMISS:
-       case EXCEP_DTLBMISS:
-       case EXCEP_IAERROR:
-       case EXCEP_DAERROR:
-       case EXCEP_MEMERR:
-       case EXCEP_MISALIGN:
-       case EXCEP_BUSERROR:
-       case EXCEP_ILLDATACC:
-       case EXCEP_IOINSACC:
-       case EXCEP_PRIVINSACC:
-       case EXCEP_PRIVDATACC:
-       case EXCEP_DATINSACC:
-               if (fixup_exception(regs))
-                       return 1;
-       case EXCEP_UNIMPINS:
-               if (regs->pc && *(uint8_t *)regs->pc == 0xff)
-                       if (notify_die(DIE_BREAKPOINT, str, regs, code, 0, 0))
-                               return 1;
-               break;
-       default:
-               break;
-       }
-
-       /* see if gdbstub wants to deal with it */
-#ifdef CONFIG_GDBSTUB
-       if (gdbstub_intercept(regs, code))
-               return 1;
-#endif
-
-       if (notify_die(DIE_GPF, str, regs, code, 0, 0))
-               return 1;
-
-       /* make the process die as the last resort */
-       die(str, regs, code);
-}
-
-/*
- * handle unsupported syscall instructions (syscall 1-15)
- */
-static asmlinkage void unsupported_syscall(struct pt_regs *regs,
-                                          enum exception_code code)
-{
-       struct task_struct *tsk = current;
-       siginfo_t info;
-
-       /* catch a kernel BUG() */
-       if (code == EXCEP_SYSCALL15 && !user_mode(regs)) {
-               if (report_bug(regs->pc, regs) == BUG_TRAP_TYPE_BUG) {
-#ifdef CONFIG_GDBSTUB
-                       gdbstub_intercept(regs, code);
-#endif
-               }
-       }
-
-       regs->pc -= 2; /* syscall return addr is _after_ the instruction */
-
-       die_if_no_fixup("An unsupported syscall insn was used by the kernel\n",
-                       regs, code);
-
-       info.si_signo   = SIGILL;
-       info.si_errno   = ENOSYS;
-       info.si_code    = ILL_ILLTRP;
-       info.si_addr    = (void *) regs->pc;
-       force_sig_info(SIGILL, &info, tsk);
-}
-
 /*
  * display the register file when the stack pointer gets clobbered
  */
@@ -481,10 +513,8 @@ asmlinkage void uninitialised_exception(struct pt_regs *regs,
 {
 
        /* see if gdbstub wants to deal with it */
-#ifdef CONFIG_GDBSTUB
-       if (gdbstub_intercept(regs, code))
+       if (debugger_intercept(code, SIGSYS, 0, regs) == 0)
                return;
-#endif
 
        peripheral_leds_display_exception(code);
        printk(KERN_EMERG "Uninitialised Exception 0x%04x\n", code & 0xFFFF);
@@ -549,43 +579,43 @@ void __init set_intr_stub(enum exception_code code, void *handler)
  */
 void __init trap_init(void)
 {
-       set_excp_vector(EXCEP_TRAP,             trap);
-       set_excp_vector(EXCEP_ISTEP,            istep);
-       set_excp_vector(EXCEP_IBREAK,           ibreak);
-       set_excp_vector(EXCEP_OBREAK,           obreak);
-
-       set_excp_vector(EXCEP_PRIVINS,          priv_op);
-       set_excp_vector(EXCEP_UNIMPINS,         invalid_op);
-       set_excp_vector(EXCEP_UNIMPEXINS,       invalid_exop);
-       set_excp_vector(EXCEP_MEMERR,           mem_error);
+       set_excp_vector(EXCEP_TRAP,             handle_exception);
+       set_excp_vector(EXCEP_ISTEP,            handle_exception);
+       set_excp_vector(EXCEP_IBREAK,           handle_exception);
+       set_excp_vector(EXCEP_OBREAK,           handle_exception);
+
+       set_excp_vector(EXCEP_PRIVINS,          handle_exception);
+       set_excp_vector(EXCEP_UNIMPINS,         handle_exception);
+       set_excp_vector(EXCEP_UNIMPEXINS,       handle_exception);
+       set_excp_vector(EXCEP_MEMERR,           handle_exception);
        set_excp_vector(EXCEP_MISALIGN,         misalignment);
-       set_excp_vector(EXCEP_BUSERROR,         bus_error);
-       set_excp_vector(EXCEP_ILLINSACC,        insn_acc_error);
-       set_excp_vector(EXCEP_ILLDATACC,        data_acc_error);
-       set_excp_vector(EXCEP_IOINSACC,         insn_acc_error);
-       set_excp_vector(EXCEP_PRIVINSACC,       insn_acc_error);
-       set_excp_vector(EXCEP_PRIVDATACC,       data_acc_error);
-       set_excp_vector(EXCEP_DATINSACC,        insn_acc_error);
-       set_excp_vector(EXCEP_FPU_UNIMPINS,     fpu_invalid_op);
+       set_excp_vector(EXCEP_BUSERROR,         handle_exception);
+       set_excp_vector(EXCEP_ILLINSACC,        handle_exception);
+       set_excp_vector(EXCEP_ILLDATACC,        handle_exception);
+       set_excp_vector(EXCEP_IOINSACC,         handle_exception);
+       set_excp_vector(EXCEP_PRIVINSACC,       handle_exception);
+       set_excp_vector(EXCEP_PRIVDATACC,       handle_exception);
+       set_excp_vector(EXCEP_DATINSACC,        handle_exception);
+       set_excp_vector(EXCEP_FPU_UNIMPINS,     handle_exception);
        set_excp_vector(EXCEP_FPU_OPERATION,    fpu_exception);
 
        set_excp_vector(EXCEP_NMI,              nmi);
 
-       set_excp_vector(EXCEP_SYSCALL1,         unsupported_syscall);
-       set_excp_vector(EXCEP_SYSCALL2,         unsupported_syscall);
-       set_excp_vector(EXCEP_SYSCALL3,         unsupported_syscall);
-       set_excp_vector(EXCEP_SYSCALL4,         unsupported_syscall);
-       set_excp_vector(EXCEP_SYSCALL5,         unsupported_syscall);
-       set_excp_vector(EXCEP_SYSCALL6,         unsupported_syscall);
-       set_excp_vector(EXCEP_SYSCALL7,         unsupported_syscall);
-       set_excp_vector(EXCEP_SYSCALL8,         unsupported_syscall);
-       set_excp_vector(EXCEP_SYSCALL9,         unsupported_syscall);
-       set_excp_vector(EXCEP_SYSCALL10,        unsupported_syscall);
-       set_excp_vector(EXCEP_SYSCALL11,        unsupported_syscall);
-       set_excp_vector(EXCEP_SYSCALL12,        unsupported_syscall);
-       set_excp_vector(EXCEP_SYSCALL13,        unsupported_syscall);
-       set_excp_vector(EXCEP_SYSCALL14,        unsupported_syscall);
-       set_excp_vector(EXCEP_SYSCALL15,        unsupported_syscall);
+       set_excp_vector(EXCEP_SYSCALL1,         handle_exception);
+       set_excp_vector(EXCEP_SYSCALL2,         handle_exception);
+       set_excp_vector(EXCEP_SYSCALL3,         handle_exception);
+       set_excp_vector(EXCEP_SYSCALL4,         handle_exception);
+       set_excp_vector(EXCEP_SYSCALL5,         handle_exception);
+       set_excp_vector(EXCEP_SYSCALL6,         handle_exception);
+       set_excp_vector(EXCEP_SYSCALL7,         handle_exception);
+       set_excp_vector(EXCEP_SYSCALL8,         handle_exception);
+       set_excp_vector(EXCEP_SYSCALL9,         handle_exception);
+       set_excp_vector(EXCEP_SYSCALL10,        handle_exception);
+       set_excp_vector(EXCEP_SYSCALL11,        handle_exception);
+       set_excp_vector(EXCEP_SYSCALL12,        handle_exception);
+       set_excp_vector(EXCEP_SYSCALL13,        handle_exception);
+       set_excp_vector(EXCEP_SYSCALL14,        handle_exception);
+       set_excp_vector(EXCEP_SYSCALL15,        handle_exception);
 }
 
 /*
index c4fd923a55a09803b0c06f13485598968345c0d9..bfbe52691f2c8feb474be5af3d9eccaea1e4e287 100644 (file)
@@ -99,3 +99,49 @@ config MN10300_CACHE_INV_ICACHE
        help
          Set if we need the icache to be invalidated, even if the dcache is in
          write-through mode and doesn't need flushing.
+
+#
+# The kernel debugger gets its own separate cache flushing functions
+#
+config MN10300_DEBUGGER_CACHE_FLUSH_BY_TAG
+       def_bool y if KERNEL_DEBUGGER && \
+                       MN10300_CACHE_WBACK && \
+                       !MN10300_CACHE_SNOOP && \
+                       MN10300_CACHE_MANAGE_BY_TAG
+       help
+         Set if the debugger needs to flush the dcache and invalidate the
+         icache using the cache tag registers to make breakpoints work.
+
+config MN10300_DEBUGGER_CACHE_FLUSH_BY_REG
+       def_bool y if KERNEL_DEBUGGER && \
+                       MN10300_CACHE_WBACK && \
+                       !MN10300_CACHE_SNOOP && \
+                       MN10300_CACHE_MANAGE_BY_REG
+       help
+         Set if the debugger needs to flush the dcache and invalidate the
+         icache using automatic purge registers to make breakpoints work.
+
+config MN10300_DEBUGGER_CACHE_INV_BY_TAG
+       def_bool y if KERNEL_DEBUGGER && \
+                       MN10300_CACHE_WTHRU && \
+                       !MN10300_CACHE_SNOOP && \
+                       MN10300_CACHE_MANAGE_BY_TAG
+       help
+         Set if the debugger needs to invalidate the icache using the cache
+         tag registers to make breakpoints work.
+
+config MN10300_DEBUGGER_CACHE_INV_BY_REG
+       def_bool y if KERNEL_DEBUGGER && \
+                       MN10300_CACHE_WTHRU && \
+                       !MN10300_CACHE_SNOOP && \
+                       MN10300_CACHE_MANAGE_BY_REG
+       help
+         Set if the debugger needs to invalidate the icache using automatic
+         purge registers to make breakpoints work.
+
+config MN10300_DEBUGGER_CACHE_NO_FLUSH
+       def_bool y if KERNEL_DEBUGGER && \
+                       (MN10300_CACHE_DISABLED || MN10300_CACHE_SNOOP)
+       help
+         Set if the debugger does not need to flush the dcache and/or
+         invalidate the icache to make breakpoints work.
index 203fee23f7d70efebe0b6c11eb1bfdb6dc3cae6c..11f38466ac28e17a7e3c2518a5a9679a1ad35d3b 100644 (file)
@@ -13,6 +13,15 @@ cacheflush-$(CONFIG_MN10300_CACHE_INV_BY_REG) += cache-inv-by-reg.o
 cacheflush-$(CONFIG_MN10300_CACHE_FLUSH_BY_TAG) += cache-flush-by-tag.o
 cacheflush-$(CONFIG_MN10300_CACHE_FLUSH_BY_REG) += cache-flush-by-reg.o
 
+cacheflush-$(CONFIG_MN10300_DEBUGGER_CACHE_FLUSH_BY_TAG) += \
+       cache-dbg-flush-by-tag.o cache-dbg-inv-by-tag.o
+cacheflush-$(CONFIG_MN10300_DEBUGGER_CACHE_FLUSH_BY_REG) += \
+       cache-dbg-flush-by-reg.o
+cacheflush-$(CONFIG_MN10300_DEBUGGER_CACHE_INV_BY_TAG) += \
+       cache-dbg-inv-by-tag.o cache-dbg-inv.o
+cacheflush-$(CONFIG_MN10300_DEBUGGER_CACHE_INV_BY_REG) += \
+       cache-dbg-inv-by-reg.o cache-dbg-inv.o
+
 cacheflush-$(CONFIG_MN10300_CACHE_DISABLED) := cache-disabled.o
 
 obj-y := \
diff --git a/arch/mn10300/mm/cache-dbg-flush-by-reg.S b/arch/mn10300/mm/cache-dbg-flush-by-reg.S
new file mode 100644 (file)
index 0000000..665919f
--- /dev/null
@@ -0,0 +1,160 @@
+/* MN10300 CPU cache invalidation routines, using automatic purge registers
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/irqflags.h>
+#include <asm/cacheflush.h>
+#include "cache.inc"
+
+       .am33_2
+
+###############################################################################
+#
+# void debugger_local_cache_flushinv(void)
+# Flush the entire data cache back to RAM and invalidate the icache
+#
+###############################################################################
+       ALIGN
+       .globl  debugger_local_cache_flushinv
+        .type  debugger_local_cache_flushinv,@function
+debugger_local_cache_flushinv:
+       #
+       # firstly flush the dcache
+       #
+       movhu   (CHCTR),d0
+       btst    CHCTR_DCEN|CHCTR_ICEN,d0
+       beq     debugger_local_cache_flushinv_end
+
+       mov     DCPGCR,a0
+
+       mov     epsw,d1
+       and     ~EPSW_IE,epsw
+       or      EPSW_NMID,epsw
+       nop
+
+       btst    CHCTR_DCEN,d0
+       beq     debugger_local_cache_flushinv_no_dcache
+
+       # wait for busy bit of area purge
+       setlb
+       mov     (a0),d0
+       btst    DCPGCR_DCPGBSY,d0
+       lne
+
+       # set mask
+       clr     d0
+       mov     d0,(DCPGMR)
+
+       # area purge
+       #
+       # DCPGCR = DCPGCR_DCP
+       #
+       mov     DCPGCR_DCP,d0
+       mov     d0,(a0)
+
+       # wait for busy bit of area purge
+       setlb
+       mov     (a0),d0
+       btst    DCPGCR_DCPGBSY,d0
+       lne
+
+debugger_local_cache_flushinv_no_dcache:
+       #
+       # secondly, invalidate the icache if it is enabled
+       #
+       mov     CHCTR,a0
+       movhu   (a0),d0
+       btst    CHCTR_ICEN,d0
+       beq     debugger_local_cache_flushinv_done
+
+       invalidate_icache 0
+
+debugger_local_cache_flushinv_done:
+       mov     d1,epsw
+
+debugger_local_cache_flushinv_end:
+       ret     [],0
+       .size   debugger_local_cache_flushinv,.-debugger_local_cache_flushinv
+
+###############################################################################
+#
+# void debugger_local_cache_flushinv_one(u8 *addr)
+#
+# Invalidate one particular cacheline if it's in the icache
+#
+###############################################################################
+       ALIGN
+       .globl  debugger_local_cache_flushinv_one
+       .type   debugger_local_cache_flushinv_one,@function
+debugger_local_cache_flushinv_one:
+       movhu   (CHCTR),d1
+       btst    CHCTR_DCEN|CHCTR_ICEN,d1
+       beq     debugger_local_cache_flushinv_one_end
+       btst    CHCTR_DCEN,d1
+       beq     debugger_local_cache_flushinv_one_no_dcache
+
+       # round cacheline addr down
+       and     L1_CACHE_TAG_MASK,d0
+       mov     d0,a1
+       mov     d0,d1
+
+       # determine the dcache purge control reg address
+       mov     DCACHE_PURGE(0,0),a0
+       and     L1_CACHE_TAG_ENTRY,d0
+       add     d0,a0
+
+       # retain valid entries in the cache
+       or      L1_CACHE_TAG_VALID,d1
+
+       # conditionally purge this line in all ways
+       mov     d1,(L1_CACHE_WAYDISP*0,a0)
+
+debugger_local_cache_flushinv_no_dcache:
+       #
+       # now try to flush the icache
+       #
+       mov     CHCTR,a0
+       movhu   (a0),d0
+       btst    CHCTR_ICEN,d0
+       beq     mn10300_local_icache_inv_range_reg_end
+
+       LOCAL_CLI_SAVE(d1)
+
+       mov     ICIVCR,a0
+
+       # wait for the invalidator to quiesce
+       setlb
+       mov     (a0),d0
+       btst    ICIVCR_ICIVBSY,d0
+       lne
+
+       # set the mask
+       mov     L1_CACHE_TAG_MASK,d0
+       mov     d0,(ICIVMR)
+
+       # invalidate the cache line at the given address
+       or      ICIVCR_ICI,a1
+       mov     a1,(a0)
+
+       # wait for the invalidator to quiesce again
+       setlb
+       mov     (a0),d0
+       btst    ICIVCR_ICIVBSY,d0
+       lne
+
+       LOCAL_IRQ_RESTORE(d1)
+
+debugger_local_cache_flushinv_one_end:
+       ret     [],0
+       .size   debugger_local_cache_flushinv_one,.-debugger_local_cache_flushinv_one
diff --git a/arch/mn10300/mm/cache-dbg-flush-by-tag.S b/arch/mn10300/mm/cache-dbg-flush-by-tag.S
new file mode 100644 (file)
index 0000000..bf56930
--- /dev/null
@@ -0,0 +1,114 @@
+/* MN10300 CPU cache invalidation routines, using direct tag flushing
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/irqflags.h>
+#include <asm/cacheflush.h>
+#include "cache.inc"
+
+       .am33_2
+
+###############################################################################
+#
+# void debugger_local_cache_flushinv(void)
+#
+# Flush the entire data cache back to RAM and invalidate the icache
+#
+###############################################################################
+       ALIGN
+       .globl  debugger_local_cache_flushinv
+        .type  debugger_local_cache_flushinv,@function
+debugger_local_cache_flushinv:
+       #
+       # firstly flush the dcache
+       #
+       movhu   (CHCTR),d0
+       btst    CHCTR_DCEN|CHCTR_ICEN,d0
+       beq     debugger_local_cache_flushinv_end
+
+       btst    CHCTR_DCEN,d0
+       beq     debugger_local_cache_flushinv_no_dcache
+
+       # read the addresses tagged in the cache's tag RAM and attempt to flush
+       # those addresses specifically
+       # - we rely on the hardware to filter out invalid tag entry addresses
+       mov     DCACHE_TAG(0,0),a0              # dcache tag RAM access address
+       mov     DCACHE_PURGE(0,0),a1            # dcache purge request address
+       mov     L1_CACHE_NWAYS*L1_CACHE_NENTRIES,e0  # total number of entries
+
+mn10300_local_dcache_flush_loop:
+       mov     (a0),d0
+       and     L1_CACHE_TAG_MASK,d0
+       or      L1_CACHE_TAG_VALID,d0           # retain valid entries in the
+                                               # cache
+       mov     d0,(a1)                         # conditional purge
+
+       add     L1_CACHE_BYTES,a0
+       add     L1_CACHE_BYTES,a1
+       add     -1,e0
+       bne     mn10300_local_dcache_flush_loop
+
+debugger_local_cache_flushinv_no_dcache:
+       #
+       # secondly, invalidate the icache if it is enabled
+       #
+       mov     CHCTR,a0
+       movhu   (a0),d0
+       btst    CHCTR_ICEN,d0
+       beq     debugger_local_cache_flushinv_end
+
+       invalidate_icache 1
+
+debugger_local_cache_flushinv_end:
+       ret     [],0
+       .size   debugger_local_cache_flushinv,.-debugger_local_cache_flushinv
+
+###############################################################################
+#
+# void debugger_local_cache_flushinv_one(u8 *addr)
+#
+# Invalidate one particular cacheline if it's in the icache
+#
+###############################################################################
+       ALIGN
+       .globl  debugger_local_cache_flushinv_one
+       .type   debugger_local_cache_flushinv_one,@function
+debugger_local_cache_flushinv_one:
+       movhu   (CHCTR),d1
+       btst    CHCTR_DCEN|CHCTR_ICEN,d1
+       beq     debugger_local_cache_flushinv_one_end
+       btst    CHCTR_DCEN,d1
+       beq     debugger_local_cache_flushinv_one_icache
+
+       # round cacheline addr down
+       and     L1_CACHE_TAG_MASK,d0
+       mov     d0,a1
+
+       # determine the dcache purge control reg address
+       mov     DCACHE_PURGE(0,0),a0
+       and     L1_CACHE_TAG_ENTRY,d0
+       add     d0,a0
+
+       # retain valid entries in the cache
+       or      L1_CACHE_TAG_VALID,a1
+
+       # conditionally purge this line in all ways
+       mov     a1,(L1_CACHE_WAYDISP*0,a0)
+
+       # now go and do the icache
+       bra     debugger_local_cache_flushinv_one_icache
+
+debugger_local_cache_flushinv_one_end:
+       ret     [],0
+       .size   debugger_local_cache_flushinv_one,.-debugger_local_cache_flushinv_one
diff --git a/arch/mn10300/mm/cache-dbg-inv-by-reg.S b/arch/mn10300/mm/cache-dbg-inv-by-reg.S
new file mode 100644 (file)
index 0000000..c4e6252
--- /dev/null
@@ -0,0 +1,69 @@
+/* MN10300 CPU cache invalidation routines, using automatic purge registers
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/cache.h>
+#include <asm/irqflags.h>
+#include <asm/cacheflush.h>
+#include "cache.inc"
+
+       .am33_2
+
+       .globl  debugger_local_cache_flushinv_one
+
+###############################################################################
+#
+# void debugger_local_cache_flushinv_one(u8 *addr)
+#
+# Invalidate one particular cacheline if it's in the icache
+#
+###############################################################################
+       ALIGN
+       .globl  debugger_local_cache_flushinv_one
+       .type   debugger_local_cache_flushinv_one,@function
+debugger_local_cache_flushinv_one:
+       mov     d0,a1
+
+       mov     CHCTR,a0
+       movhu   (a0),d0
+       btst    CHCTR_ICEN,d0
+       beq     mn10300_local_icache_inv_range_reg_end
+
+       LOCAL_CLI_SAVE(d1)
+
+       mov     ICIVCR,a0
+
+       # wait for the invalidator to quiesce
+       setlb
+       mov     (a0),d0
+       btst    ICIVCR_ICIVBSY,d0
+       lne
+
+       # set the mask
+       mov     ~L1_CACHE_TAG_MASK,d0
+       mov     d0,(ICIVMR)
+
+       # invalidate the cache line at the given address
+       and     ~L1_CACHE_TAG_MASK,a1
+       or      ICIVCR_ICI,a1
+       mov     a1,(a0)
+
+       # wait for the invalidator to quiesce again
+       setlb
+       mov     (a0),d0
+       btst    ICIVCR_ICIVBSY,d0
+       lne
+
+       LOCAL_IRQ_RESTORE(d1)
+
+mn10300_local_icache_inv_range_reg_end:
+       ret     [],0
+       .size   debugger_local_cache_flushinv_one,.-debugger_local_cache_flushinv_one
diff --git a/arch/mn10300/mm/cache-dbg-inv-by-tag.S b/arch/mn10300/mm/cache-dbg-inv-by-tag.S
new file mode 100644 (file)
index 0000000..d8ec821
--- /dev/null
@@ -0,0 +1,120 @@
+/* MN10300 CPU cache invalidation routines, using direct tag flushing
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/irqflags.h>
+#include <asm/cacheflush.h>
+#include "cache.inc"
+
+       .am33_2
+
+       .globl  debugger_local_cache_flushinv_one_icache
+
+###############################################################################
+#
+# void debugger_local_cache_flushinv_one(u8 *addr)
+#
+# Invalidate one particular cacheline if it's in the icache
+#
+###############################################################################
+       ALIGN
+       .globl  debugger_local_cache_flushinv_one_icache
+       .type   debugger_local_cache_flushinv_one_icache,@function
+debugger_local_cache_flushinv_one_icache:
+       movm    [d3,a2],(sp)
+
+       mov     CHCTR,a2
+       movhu   (a2),d0
+       btst    CHCTR_ICEN,d0
+       beq     debugger_local_cache_flushinv_one_icache_end
+
+       mov     d0,a1
+       and     L1_CACHE_TAG_MASK,a1
+
+       # read the tags from the tag RAM, and if they indicate a matching valid
+       # cache line then we invalidate that line
+       mov     ICACHE_TAG(0,0),a0
+       mov     a1,d0
+       and     L1_CACHE_TAG_ENTRY,d0
+       add     d0,a0                           # starting icache tag RAM
+                                               # access address
+
+       and     ~(L1_CACHE_DISPARITY-1),a1      # determine comparator base
+       or      L1_CACHE_TAG_VALID,a1
+       mov     L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_VALID,d1
+
+       LOCAL_CLI_SAVE(d3)
+
+       # disable the icache
+       movhu   (a2),d0
+       and     ~CHCTR_ICEN,d0
+       movhu   d0,(a2)
+
+       # and wait for it to calm down
+       setlb
+       movhu   (a2),d0
+       btst    CHCTR_ICBUSY,d0
+       lne
+
+       # check all the way tags for this cache entry
+       mov     (a0),d0                         # read the tag in the way 0 slot
+       xor     a1,d0
+       and     d1,d0
+       beq     debugger_local_icache_kill      # jump if matched
+
+       add     L1_CACHE_WAYDISP,a0
+       mov     (a0),d0                         # read the tag in the way 1 slot
+       xor     a1,d0
+       and     d1,d0
+       beq     debugger_local_icache_kill      # jump if matched
+
+       add     L1_CACHE_WAYDISP,a0
+       mov     (a0),d0                         # read the tag in the way 2 slot
+       xor     a1,d0
+       and     d1,d0
+       beq     debugger_local_icache_kill      # jump if matched
+
+       add     L1_CACHE_WAYDISP,a0
+       mov     (a0),d0                         # read the tag in the way 3 slot
+       xor     a1,d0
+       and     d1,d0
+       bne     debugger_local_icache_finish    # jump if not matched
+
+debugger_local_icache_kill:
+       mov     d0,(a0)                         # kill the tag (D0 is 0 at this point)
+
+debugger_local_icache_finish:
+       # wait for the cache to finish what it's doing
+       setlb
+       movhu   (a2),d0
+       btst    CHCTR_ICBUSY,d0
+       lne
+
+       # and reenable it
+       or      CHCTR_ICEN,d0
+       movhu   d0,(a2)
+       movhu   (a2),d0
+
+       # re-enable interrupts
+       LOCAL_IRQ_RESTORE(d3)
+
+debugger_local_cache_flushinv_one_icache_end:
+       ret     [d3,a2],8
+       .size   debugger_local_cache_flushinv_one_icache,.-debugger_local_cache_flushinv_one_icache
+
+#ifdef CONFIG_MN10300_DEBUGGER_CACHE_INV_BY_TAG
+       .globl  debugger_local_cache_flushinv_one
+       .type   debugger_local_cache_flushinv_one,@function
+debugger_local_cache_flushinv_one = debugger_local_cache_flushinv_one_icache
+#endif
diff --git a/arch/mn10300/mm/cache-dbg-inv.S b/arch/mn10300/mm/cache-dbg-inv.S
new file mode 100644 (file)
index 0000000..eba2d6d
--- /dev/null
@@ -0,0 +1,47 @@
+/* MN10300 CPU cache invalidation routines
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/irqflags.h>
+#include <asm/cacheflush.h>
+#include "cache.inc"
+
+       .am33_2
+
+       .globl  debugger_local_cache_flushinv
+
+###############################################################################
+#
+# void debugger_local_cache_flushinv(void)
+#
+# Invalidate the entire icache
+#
+###############################################################################
+       ALIGN
+       .globl  debugger_local_cache_flushinv
+        .type  debugger_local_cache_flushinv,@function
+debugger_local_cache_flushinv:
+       #
+       # we only need to invalidate the icache in this cache mode
+       #
+       mov     CHCTR,a0
+       movhu   (a0),d0
+       btst    CHCTR_ICEN,d0
+       beq     debugger_local_cache_flushinv_end
+
+       invalidate_icache 1
+
+debugger_local_cache_flushinv_end:
+       ret     [],0
+       .size   debugger_local_cache_flushinv,.-debugger_local_cache_flushinv
index 5cd6a27dd63e04448bde98836e8eff937824240b..1ddc068492429fc5903f0f776d42c6798d7eb76f 100644 (file)
@@ -62,7 +62,7 @@ mn10300_local_dcache_flush:
 
 mn10300_local_dcache_flush_loop:
        mov     (a0),d0
-       and     L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0
+       and     L1_CACHE_TAG_MASK,d0
        or      L1_CACHE_TAG_VALID,d0           # retain valid entries in the
                                                # cache
        mov     d0,(a1)                         # conditional purge
@@ -112,11 +112,11 @@ mn10300_local_dcache_flush_range:
 1:
 
        # round start addr down
-       and     L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0
+       and     L1_CACHE_TAG_MASK,d0
        mov     d0,a1
 
        add     L1_CACHE_BYTES,d1                       # round end addr up
-       and     L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d1
+       and     L1_CACHE_TAG_MASK,d1
 
        # write a request to flush all instances of an address from the cache
        mov     DCACHE_PURGE(0,0),a0
@@ -215,12 +215,11 @@ mn10300_local_dcache_flush_inv_range:
        bra     mn10300_local_dcache_flush_inv
 1:
 
-       and     L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0      # round start
-                                                               # addr down
+       and     L1_CACHE_TAG_MASK,d0            # round start addr down
        mov     d0,a1
 
-       add     L1_CACHE_BYTES,d1                       # round end addr up
-       and     L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d1
+       add     L1_CACHE_BYTES,d1               # round end addr up
+       and     L1_CACHE_TAG_MASK,d1
 
        # write a request to flush and invalidate all instances of an address
        # from the cache
index c8950861ed779c5da127fbc562bc03ee58a72d55..a60825b91e7724fc0a7e6e54f17bd1059995a7fb 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/cache.h>
 #include <asm/irqflags.h>
 #include <asm/cacheflush.h>
+#include "cache.inc"
 
 #define mn10300_local_dcache_inv_range_intr_interval \
        +((1 << MN10300_DCACHE_INV_RANGE_INTR_LOG2_INTERVAL) - 1)
@@ -62,10 +63,7 @@ mn10300_local_icache_inv:
        btst    CHCTR_ICEN,d0
        beq     mn10300_local_icache_inv_end
 
-       # invalidate
-       or      CHCTR_ICINV,d0
-       movhu   d0,(a0)
-       movhu   (a0),d0
+       invalidate_icache 1
 
 mn10300_local_icache_inv_end:
        ret     [],0
@@ -87,11 +85,8 @@ mn10300_local_dcache_inv:
        btst    CHCTR_DCEN,d0
        beq     mn10300_local_dcache_inv_end
 
-       # invalidate
-       or      CHCTR_DCINV,d0
-       movhu   d0,(a0)
-       movhu   (a0),d0
-
+       invalidate_dcache 1
+       
 mn10300_local_dcache_inv_end:
        ret     [],0
        .size   mn10300_local_dcache_inv,.-mn10300_local_dcache_inv
@@ -121,9 +116,9 @@ mn10300_local_dcache_inv_range:
        # and if they're not cacheline-aligned, we must flush any bits outside
        # the range that share cachelines with stuff inside the range
 #ifdef CONFIG_MN10300_CACHE_WBACK
-       btst    ~(L1_CACHE_BYTES-1),d0
+       btst    ~L1_CACHE_TAG_MASK,d0
        bne     1f
-       btst    ~(L1_CACHE_BYTES-1),d1
+       btst    ~L1_CACHE_TAG_MASK,d1
        beq     2f
 1:
        bra     mn10300_local_dcache_flush_inv_range
@@ -141,12 +136,11 @@ mn10300_local_dcache_inv_range:
        # writeback mode, in which case we would be in flush and invalidate by
        # now
 #ifndef CONFIG_MN10300_CACHE_WBACK
-       and     L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0      # round start
-                                                               # addr down
+       and     L1_CACHE_TAG_MASK,d0    # round start addr down
 
        mov     L1_CACHE_BYTES-1,d2
        add     d2,d1
-       and     L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d1      # round end addr up
+       and     L1_CACHE_TAG_MASK,d1    # round end addr up
 #endif /* !CONFIG_MN10300_CACHE_WBACK */
 
        sub     d0,d1,d2                # calculate the total size
index e9713b40c0ff4823ead75d4e81b09a71138e9531..ccedce9c144d30459b8d092959247d4713ee03f0 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/cache.h>
 #include <asm/irqflags.h>
 #include <asm/cacheflush.h>
+#include "cache.inc"
 
 #define mn10300_local_dcache_inv_range_intr_interval \
        +((1 << MN10300_DCACHE_INV_RANGE_INTR_LOG2_INTERVAL) - 1)
@@ -70,43 +71,7 @@ mn10300_local_icache_inv:
        btst    CHCTR_ICEN,d0
        beq     mn10300_local_icache_inv_end
 
-#if defined(CONFIG_AM33_2) || defined(CONFIG_AM33_3)
-       LOCAL_CLI_SAVE(d1)
-
-       # disable the icache
-       and     ~CHCTR_ICEN,d0
-       movhu   d0,(a0)
-
-       # and wait for it to calm down
-       setlb
-       movhu   (a0),d0
-       btst    CHCTR_ICBUSY,d0
-       lne
-
-       # invalidate
-       or      CHCTR_ICINV,d0
-       movhu   d0,(a0)
-
-       # wait for the cache to finish
-       mov     CHCTR,a0
-       setlb
-       movhu   (a0),d0
-       btst    CHCTR_ICBUSY,d0
-       lne
-
-       # and reenable it
-       and     ~CHCTR_ICINV,d0
-       or      CHCTR_ICEN,d0
-       movhu   d0,(a0)
-       movhu   (a0),d0
-
-       LOCAL_IRQ_RESTORE(d1)
-#else /* CONFIG_AM33_2 || CONFIG_AM33_3 */
-       # invalidate
-       or      CHCTR_ICINV,d0
-       movhu   d0,(a0)
-       movhu   (a0),d0
-#endif /* CONFIG_AM33_2 || CONFIG_AM33_3 */
+       invalidate_icache 1
 
 mn10300_local_icache_inv_end:
        ret     [],0
@@ -128,43 +93,7 @@ mn10300_local_dcache_inv:
        btst    CHCTR_DCEN,d0
        beq     mn10300_local_dcache_inv_end
 
-#if defined(CONFIG_AM33_2) || defined(CONFIG_AM33_3)
-       LOCAL_CLI_SAVE(d1)
-
-       # disable the dcache
-       and     ~CHCTR_DCEN,d0
-       movhu   d0,(a0)
-
-       # and wait for it to calm down
-       setlb
-       movhu   (a0),d0
-       btst    CHCTR_DCBUSY,d0
-       lne
-
-       # invalidate
-       or      CHCTR_DCINV,d0
-       movhu   d0,(a0)
-
-       # wait for the cache to finish
-       mov     CHCTR,a0
-       setlb
-       movhu   (a0),d0
-       btst    CHCTR_DCBUSY,d0
-       lne
-
-       # and reenable it
-       and     ~CHCTR_DCINV,d0
-       or      CHCTR_DCEN,d0
-       movhu   d0,(a0)
-       movhu   (a0),d0
-
-       LOCAL_IRQ_RESTORE(d1)
-#else /* CONFIG_AM33_2 || CONFIG_AM33_3 */
-       # invalidate
-       or      CHCTR_DCINV,d0
-       movhu   d0,(a0)
-       movhu   (a0),d0
-#endif /* CONFIG_AM33_2 || CONFIG_AM33_3 */
+       invalidate_dcache 1
 
 mn10300_local_dcache_inv_end:
        ret     [],0
@@ -195,9 +124,9 @@ mn10300_local_dcache_inv_range:
        # and if they're not cacheline-aligned, we must flush any bits outside
        # the range that share cachelines with stuff inside the range
 #ifdef CONFIG_MN10300_CACHE_WBACK
-       btst    ~(L1_CACHE_BYTES-1),d0
+       btst    ~L1_CACHE_TAG_MASK,d0
        bne     1f
-       btst    ~(L1_CACHE_BYTES-1),d1
+       btst    ~L1_CACHE_TAG_MASK,d1
        beq     2f
 1:
        bra     mn10300_local_dcache_flush_inv_range
@@ -212,11 +141,10 @@ mn10300_local_dcache_inv_range:
        beq     mn10300_local_dcache_inv_range_end
 
 #ifndef CONFIG_MN10300_CACHE_WBACK
-       and     L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0      # round start
-                                                               # addr down
+       and     L1_CACHE_TAG_MASK,d0            # round start addr down
 
        add     L1_CACHE_BYTES,d1               # round end addr up
-       and     L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d1
+       and     L1_CACHE_TAG_MASK,d1
 #endif /* !CONFIG_MN10300_CACHE_WBACK */
        mov     d0,a1
 
diff --git a/arch/mn10300/mm/cache.inc b/arch/mn10300/mm/cache.inc
new file mode 100644 (file)
index 0000000..394a119
--- /dev/null
@@ -0,0 +1,133 @@
+/* MN10300 CPU core caching macros -*- asm -*-
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+
+###############################################################################
+#
+# Invalidate the instruction cache.
+#      A0: Should hold CHCTR
+#      D0: Should have been read from CHCTR
+#      D1: Will be clobbered
+#
+# On some cores it is necessary to disable the icache whilst we do this.
+#
+###############################################################################
+       .macro invalidate_icache,disable_irq
+
+#if defined(CONFIG_AM33_2) || defined(CONFIG_AM33_3)
+       .if \disable_irq
+       # don't want an interrupt routine seeing a disabled cache
+       mov     epsw,d1
+       and     ~EPSW_IE,epsw
+       or      EPSW_NMID,epsw
+       nop
+       nop
+       .endif
+
+       # disable the icache
+       and     ~CHCTR_ICEN,d0
+       movhu   d0,(a0)
+
+       # and wait for it to calm down
+       setlb
+       movhu   (a0),d0
+       btst    CHCTR_ICBUSY,d0
+       lne
+
+       # invalidate
+       or      CHCTR_ICINV,d0
+       movhu   d0,(a0)
+
+       # wait for the cache to finish
+       setlb
+       movhu   (a0),d0
+       btst    CHCTR_ICBUSY,d0
+       lne
+
+       # and reenable it
+       or      CHCTR_ICEN,d0
+       movhu   d0,(a0)
+       movhu   (a0),d0
+
+       .if \disable_irq
+       LOCAL_IRQ_RESTORE(d1)
+       .endif
+
+#else /* CONFIG_AM33_2 || CONFIG_AM33_3 */
+
+       # invalidate
+       or      CHCTR_ICINV,d0
+       movhu   d0,(a0)
+       movhu   (a0),d0
+
+#endif /* CONFIG_AM33_2 || CONFIG_AM33_3 */
+       .endm
+
+###############################################################################
+#
+# Invalidate the data cache.
+#      A0: Should hold CHCTR
+#      D0: Should have been read from CHCTR
+#      D1: Will be clobbered
+#
+# On some cores it is necessary to disable the dcache whilst we do this.
+#
+###############################################################################
+       .macro invalidate_dcache,disable_irq
+
+#if defined(CONFIG_AM33_2) || defined(CONFIG_AM33_3)
+       .if \disable_irq
+       # don't want an interrupt routine seeing a disabled cache
+       mov     epsw,d1
+       and     ~EPSW_IE,epsw
+       or      EPSW_NMID,epsw
+       nop
+       nop
+       .endif
+       
+       # disable the dcache
+       and     ~CHCTR_DCEN,d0
+       movhu   d0,(a0)
+
+       # and wait for it to calm down
+       setlb
+       movhu   (a0),d0
+       btst    CHCTR_DCBUSY,d0
+       lne
+
+       # invalidate
+       or      CHCTR_DCINV,d0
+       movhu   d0,(a0)
+
+       # wait for the cache to finish
+       setlb
+       movhu   (a0),d0
+       btst    CHCTR_DCBUSY,d0
+       lne
+
+       # and reenable it
+       or      CHCTR_DCEN,d0
+       movhu   d0,(a0)
+       movhu   (a0),d0
+
+       .if \disable_irq
+       LOCAL_IRQ_RESTORE(d1)
+       .endif
+
+#else /* CONFIG_AM33_2 || CONFIG_AM33_3 */
+
+       # invalidate
+       or      CHCTR_DCINV,d0
+       movhu   d0,(a0)
+       movhu   (a0),d0
+
+#endif /* CONFIG_AM33_2 || CONFIG_AM33_3 */
+       .endm
index 59c3da49d9d9e4ad08108bf2f1eebe8182f46e0c..0945409a802219cb8b1d7f09ef5410941a4756c0 100644 (file)
@@ -28,8 +28,9 @@
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/hardirq.h>
-#include <asm/gdb-stub.h>
 #include <asm/cpu-regs.h>
+#include <asm/debugger.h>
+#include <asm/gdb-stub.h>
 
 /*
  * Unlock any spinlocks which will prevent us from getting the
@@ -306,10 +307,8 @@ no_context:
        printk(" printing pc:\n");
        printk(KERN_ALERT "%08lx\n", regs->pc);
 
-#ifdef CONFIG_GDBSTUB
-       gdbstub_intercept(
-               regs, fault_code & 0x00010000 ? EXCEP_IAERROR : EXCEP_DAERROR);
-#endif
+       debugger_intercept(fault_code & 0x00010000 ? EXCEP_IAERROR : EXCEP_DAERROR,
+                          SIGSEGV, SEGV_ACCERR, regs);
 
        page = PTBR;
        page = ((unsigned long *) __va(page))[address >> 22];
index c1528004163ce8fe54418368ce3aea757e31104f..967d144f307e5572d24b06e78ff50298366ce331 100644 (file)
@@ -23,6 +23,7 @@
 #define L1_CACHE_TAG_DIRTY     0x00000008      /* data cache tag dirty bit */
 #define L1_CACHE_TAG_ENTRY     0x00000ff0      /* cache tag entry address mask */
 #define L1_CACHE_TAG_ADDRESS   0xfffff000      /* cache tag line address mask */
+#define L1_CACHE_TAG_MASK      +(L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY)
 
 /*
  * specification of the interval between interrupt checking intervals whilst
index cafd7b5b55b444836a7714793eb9725c3e4f5740..bcb5df2d892f99c76012c491707a64223fa61784 100644 (file)
@@ -29,6 +29,7 @@
 #define L1_CACHE_TAG_DIRTY     0x00000008      /* data cache tag dirty bit */
 #define L1_CACHE_TAG_ENTRY     0x00000fe0      /* cache tag entry address mask */
 #define L1_CACHE_TAG_ADDRESS   0xfffff000      /* cache tag line address mask */
+#define L1_CACHE_TAG_MASK      +(L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY)
 
 /*
  * specification of the interval between interrupt checking intervals whilst
index f4f4d700833affc00d9953871b441eae12227831..b7ed8d7a9b33a10e784722a3fa5b8a55f7eabbcb 100644 (file)
@@ -544,7 +544,7 @@ void __init mem_init(void)
 unsigned long *empty_zero_page __read_mostly;
 EXPORT_SYMBOL(empty_zero_page);
 
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
        int i,free = 0,total = 0,reserved = 0;
        int shared = 0, cached = 0;
index d17d04cfb2cd4095c77adb306fcb690161c09fbf..33794c1d92c349b24589694d9fd0d5c321ccf522 100644 (file)
@@ -821,7 +821,7 @@ cmds(struct pt_regs *excp)
                                memzcan();
                                break;
                        case 'i':
-                               show_mem();
+                               show_mem(0);
                                break;
                        default:
                                termch = cmd;
index 6d0e02c4fe09cdc318d6da849668495546d21897..4c31e2b6e71b8bd49451ab2d6f4381e82bfbd9ae 100644 (file)
@@ -75,7 +75,7 @@ void __init kmap_init(void)
        kmap_prot = __pgprot(SRMMU_ET_PTE | SRMMU_PRIV | SRMMU_CACHE);
 }
 
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
        printk("Mem-info:\n");
        show_free_areas();
index 1a2b36f8866d88b7708722ff81b9026e5445f22e..de7d8e21e01d8980009de9840ccbd2c30690b2f1 100644 (file)
@@ -41,7 +41,7 @@
  * The normal show_free_areas() is too verbose on Tile, with dozens
  * of processors and often four NUMA zones each with high and lowmem.
  */
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
        struct zone *zone;
 
index 3dbe3709b69d0ce0fe556c4f400e2d4d07a5bce9..1fc02633f700a18af47db8e61a85cd55c0210d80 100644 (file)
@@ -55,7 +55,7 @@ early_param("initrd", early_initrd);
  */
 struct meminfo meminfo;
 
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
        int free = 0, total = 0, reserved = 0;
        int shared = 0, cached = 0, slab = 0, i;
index 455768a3eb9e8c916747024bc6cdfba101ab9919..2bef5705ce249d474a5d51dbab84b5c516383095 100644 (file)
@@ -371,12 +371,14 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
 
-void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
+void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time,
+                               unsigned long unaccounted_time)
 {
        unsigned long flags;
 
        spin_lock_irqsave(&blkg->stats_lock, flags);
        blkg->stats.time += time;
+       blkg->stats.unaccounted_time += unaccounted_time;
        spin_unlock_irqrestore(&blkg->stats_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
@@ -604,6 +606,9 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
                return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
                                        blkg->stats.sectors, cb, dev);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
+       if (type == BLKIO_STAT_UNACCOUNTED_TIME)
+               return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
+                                       blkg->stats.unaccounted_time, cb, dev);
        if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
                uint64_t sum = blkg->stats.avg_queue_size_sum;
                uint64_t samples = blkg->stats.avg_queue_size_samples;
@@ -1125,6 +1130,9 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
                        return blkio_read_blkg_stats(blkcg, cft, cb,
                                                BLKIO_STAT_QUEUED, 1);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
+               case BLKIO_PROP_unaccounted_time:
+                       return blkio_read_blkg_stats(blkcg, cft, cb,
+                                               BLKIO_STAT_UNACCOUNTED_TIME, 0);
                case BLKIO_PROP_dequeue:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
                                                BLKIO_STAT_DEQUEUE, 0);
@@ -1382,6 +1390,12 @@ struct cftype blkio_files[] = {
                                BLKIO_PROP_dequeue),
                .read_map = blkiocg_file_read_map,
        },
+       {
+               .name = "unaccounted_time",
+               .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
+                               BLKIO_PROP_unaccounted_time),
+               .read_map = blkiocg_file_read_map,
+       },
 #endif
 };
 
index ea4861bdd549a58fb3884eab94c1c2395bae243a..10919fae2d3aa2e6ebc1e3cdc2d1636939c997ad 100644 (file)
@@ -49,6 +49,8 @@ enum stat_type {
        /* All the single valued stats go below this */
        BLKIO_STAT_TIME,
        BLKIO_STAT_SECTORS,
+       /* Time not charged to this cgroup */
+       BLKIO_STAT_UNACCOUNTED_TIME,
 #ifdef CONFIG_DEBUG_BLK_CGROUP
        BLKIO_STAT_AVG_QUEUE_SIZE,
        BLKIO_STAT_IDLE_TIME,
@@ -81,6 +83,7 @@ enum blkcg_file_name_prop {
        BLKIO_PROP_io_serviced,
        BLKIO_PROP_time,
        BLKIO_PROP_sectors,
+       BLKIO_PROP_unaccounted_time,
        BLKIO_PROP_io_service_time,
        BLKIO_PROP_io_wait_time,
        BLKIO_PROP_io_merged,
@@ -114,6 +117,8 @@ struct blkio_group_stats {
        /* total disk time and nr sectors dispatched by this group */
        uint64_t time;
        uint64_t sectors;
+       /* Time not charged to this cgroup */
+       uint64_t unaccounted_time;
        uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
 #ifdef CONFIG_DEBUG_BLK_CGROUP
        /* Sum of number of IOs queued across all samples */
@@ -240,7 +245,7 @@ static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
 
 #endif
 
-#define BLKIO_WEIGHT_MIN       100
+#define BLKIO_WEIGHT_MIN       10
 #define BLKIO_WEIGHT_MAX       1000
 #define BLKIO_WEIGHT_DEFAULT   500
 
@@ -293,7 +298,8 @@ extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
 extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
                                                void *key);
 void blkiocg_update_timeslice_used(struct blkio_group *blkg,
-                                       unsigned long time);
+                                       unsigned long time,
+                                       unsigned long unaccounted_time);
 void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes,
                                                bool direction, bool sync);
 void blkiocg_update_completion_stats(struct blkio_group *blkg,
@@ -319,7 +325,9 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
 static inline struct blkio_group *
 blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
 static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
-                                               unsigned long time) {}
+                                               unsigned long time,
+                                               unsigned long unaccounted_time)
+{}
 static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
                                uint64_t bytes, bool direction, bool sync) {}
 static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
index a63336d49f3004919fda91d13e4f7087789fd184..59b5c00c01262100c76abbf109583a4da37c0c54 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/writeback.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/fault-inject.h>
+#include <linux/list_sort.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
@@ -149,39 +150,29 @@ EXPORT_SYMBOL(blk_rq_init);
 static void req_bio_endio(struct request *rq, struct bio *bio,
                          unsigned int nbytes, int error)
 {
-       struct request_queue *q = rq->q;
-
-       if (&q->flush_rq != rq) {
-               if (error)
-                       clear_bit(BIO_UPTODATE, &bio->bi_flags);
-               else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-                       error = -EIO;
+       if (error)
+               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+       else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+               error = -EIO;
 
-               if (unlikely(nbytes > bio->bi_size)) {
-                       printk(KERN_ERR "%s: want %u bytes done, %u left\n",
-                              __func__, nbytes, bio->bi_size);
-                       nbytes = bio->bi_size;
-               }
+       if (unlikely(nbytes > bio->bi_size)) {
+               printk(KERN_ERR "%s: want %u bytes done, %u left\n",
+                      __func__, nbytes, bio->bi_size);
+               nbytes = bio->bi_size;
+       }
 
-               if (unlikely(rq->cmd_flags & REQ_QUIET))
-                       set_bit(BIO_QUIET, &bio->bi_flags);
+       if (unlikely(rq->cmd_flags & REQ_QUIET))
+               set_bit(BIO_QUIET, &bio->bi_flags);
 
-               bio->bi_size -= nbytes;
-               bio->bi_sector += (nbytes >> 9);
+       bio->bi_size -= nbytes;
+       bio->bi_sector += (nbytes >> 9);
 
-               if (bio_integrity(bio))
-                       bio_integrity_advance(bio, nbytes);
+       if (bio_integrity(bio))
+               bio_integrity_advance(bio, nbytes);
 
-               if (bio->bi_size == 0)
-                       bio_endio(bio, error);
-       } else {
-               /*
-                * Okay, this is the sequenced flush request in
-                * progress, just record the error;
-                */
-               if (error && !q->flush_err)
-                       q->flush_err = error;
-       }
+       /* don't actually finish bio if it's part of flush sequence */
+       if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
+               bio_endio(bio, error);
 }
 
 void blk_dump_rq_flags(struct request *rq, char *msg)
@@ -208,135 +199,43 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 EXPORT_SYMBOL(blk_dump_rq_flags);
 
 /*
- * "plug" the device if there are no outstanding requests: this will
- * force the transfer to start only after we have put all the requests
- * on the list.
- *
- * This is called with interrupts off and no requests on the queue and
- * with the queue lock held.
- */
-void blk_plug_device(struct request_queue *q)
+ * Make sure that plugs that were pending when this function was entered,
+ * are now complete and requests pushed to the queue.
+*/
+static inline void queue_sync_plugs(struct request_queue *q)
 {
-       WARN_ON(!irqs_disabled());
-
        /*
-        * don't plug a stopped queue, it must be paired with blk_start_queue()
-        * which will restart the queueing
+        * If the current process is plugged and has barriers submitted,
+        * we will livelock if we don't unplug first.
         */
-       if (blk_queue_stopped(q))
-               return;
-
-       if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
-               mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
-               trace_block_plug(q);
-       }
-}
-EXPORT_SYMBOL(blk_plug_device);
-
-/**
- * blk_plug_device_unlocked - plug a device without queue lock held
- * @q:    The &struct request_queue to plug
- *
- * Description:
- *   Like @blk_plug_device(), but grabs the queue lock and disables
- *   interrupts.
- **/
-void blk_plug_device_unlocked(struct request_queue *q)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(q->queue_lock, flags);
-       blk_plug_device(q);
-       spin_unlock_irqrestore(q->queue_lock, flags);
-}
-EXPORT_SYMBOL(blk_plug_device_unlocked);
-
-/*
- * remove the queue from the plugged list, if present. called with
- * queue lock held and interrupts disabled.
- */
-int blk_remove_plug(struct request_queue *q)
-{
-       WARN_ON(!irqs_disabled());
-
-       if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
-               return 0;
-
-       del_timer(&q->unplug_timer);
-       return 1;
+       blk_flush_plug(current);
 }
-EXPORT_SYMBOL(blk_remove_plug);
 
-/*
- * remove the plug and let it rip..
- */
-void __generic_unplug_device(struct request_queue *q)
+static void blk_delay_work(struct work_struct *work)
 {
-       if (unlikely(blk_queue_stopped(q)))
-               return;
-       if (!blk_remove_plug(q) && !blk_queue_nonrot(q))
-               return;
+       struct request_queue *q;
 
-       q->request_fn(q);
+       q = container_of(work, struct request_queue, delay_work.work);
+       spin_lock_irq(q->queue_lock);
+       __blk_run_queue(q, false);
+       spin_unlock_irq(q->queue_lock);
 }
 
 /**
- * generic_unplug_device - fire a request queue
- * @q:    The &struct request_queue in question
+ * blk_delay_queue - restart queueing after defined interval
+ * @q:         The &struct request_queue in question
+ * @msecs:     Delay in msecs
  *
  * Description:
- *   Linux uses plugging to build bigger requests queues before letting
- *   the device have at them. If a queue is plugged, the I/O scheduler
- *   is still adding and merging requests on the queue. Once the queue
- *   gets unplugged, the request_fn defined for the queue is invoked and
- *   transfers started.
- **/
-void generic_unplug_device(struct request_queue *q)
-{
-       if (blk_queue_plugged(q)) {
-               spin_lock_irq(q->queue_lock);
-               __generic_unplug_device(q);
-               spin_unlock_irq(q->queue_lock);
-       }
-}
-EXPORT_SYMBOL(generic_unplug_device);
-
-static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
-                                  struct page *page)
-{
-       struct request_queue *q = bdi->unplug_io_data;
-
-       blk_unplug(q);
-}
-
-void blk_unplug_work(struct work_struct *work)
-{
-       struct request_queue *q =
-               container_of(work, struct request_queue, unplug_work);
-
-       trace_block_unplug_io(q);
-       q->unplug_fn(q);
-}
-
-void blk_unplug_timeout(unsigned long data)
-{
-       struct request_queue *q = (struct request_queue *)data;
-
-       trace_block_unplug_timer(q);
-       kblockd_schedule_work(q, &q->unplug_work);
-}
-
-void blk_unplug(struct request_queue *q)
+ *   Sometimes queueing needs to be postponed for a little while, to allow
+ *   resources to come back. This function will make sure that queueing is
+ *   restarted around the specified time.
+ */
+void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 {
-       /*
-        * devices don't necessarily have an ->unplug_fn defined
-        */
-       if (q->unplug_fn) {
-               trace_block_unplug_io(q);
-               q->unplug_fn(q);
-       }
+       schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs));
 }
-EXPORT_SYMBOL(blk_unplug);
+EXPORT_SYMBOL(blk_delay_queue);
 
 /**
  * blk_start_queue - restart a previously stopped queue
@@ -372,7 +271,7 @@ EXPORT_SYMBOL(blk_start_queue);
  **/
 void blk_stop_queue(struct request_queue *q)
 {
-       blk_remove_plug(q);
+       cancel_delayed_work(&q->delay_work);
        queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
 EXPORT_SYMBOL(blk_stop_queue);
@@ -390,13 +289,16 @@ EXPORT_SYMBOL(blk_stop_queue);
  *     that its ->make_request_fn will not re-add plugging prior to calling
  *     this function.
  *
+ *     This function does not cancel any asynchronous activity arising
+ *     out of elevator or throttling code. That would require elevaotor_exit()
+ *     and blk_throtl_exit() to be called with queue lock initialized.
+ *
  */
 void blk_sync_queue(struct request_queue *q)
 {
-       del_timer_sync(&q->unplug_timer);
        del_timer_sync(&q->timeout);
-       cancel_work_sync(&q->unplug_work);
-       throtl_shutdown_timer_wq(q);
+       cancel_delayed_work_sync(&q->delay_work);
+       queue_sync_plugs(q);
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
@@ -412,14 +314,9 @@ EXPORT_SYMBOL(blk_sync_queue);
  */
 void __blk_run_queue(struct request_queue *q, bool force_kblockd)
 {
-       blk_remove_plug(q);
-
        if (unlikely(blk_queue_stopped(q)))
                return;
 
-       if (elv_queue_empty(q))
-               return;
-
        /*
         * Only recurse once to avoid overrunning the stack, let the unplug
         * handling reinvoke the handler shortly if we already got there.
@@ -427,10 +324,8 @@ void __blk_run_queue(struct request_queue *q, bool force_kblockd)
        if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
                q->request_fn(q);
                queue_flag_clear(QUEUE_FLAG_REENTER, q);
-       } else {
-               queue_flag_set(QUEUE_FLAG_PLUGGED, q);
-               kblockd_schedule_work(q, &q->unplug_work);
-       }
+       } else
+               queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -457,6 +352,11 @@ void blk_put_queue(struct request_queue *q)
        kobject_put(&q->kobj);
 }
 
+/*
+ * Note: If a driver supplied the queue lock, it should not zap that lock
+ * unexpectedly as some queue cleanup components like elevator_exit() and
+ * blk_throtl_exit() need queue lock.
+ */
 void blk_cleanup_queue(struct request_queue *q)
 {
        /*
@@ -475,6 +375,8 @@ void blk_cleanup_queue(struct request_queue *q)
        if (q->elevator)
                elevator_exit(q->elevator);
 
+       blk_throtl_exit(q);
+
        blk_put_queue(q);
 }
 EXPORT_SYMBOL(blk_cleanup_queue);
@@ -517,8 +419,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        if (!q)
                return NULL;
 
-       q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
-       q->backing_dev_info.unplug_io_data = q;
        q->backing_dev_info.ra_pages =
                        (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
        q->backing_dev_info.state = 0;
@@ -538,17 +438,24 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 
        setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
                    laptop_mode_timer_fn, (unsigned long) q);
-       init_timer(&q->unplug_timer);
        setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
        INIT_LIST_HEAD(&q->timeout_list);
-       INIT_LIST_HEAD(&q->pending_flushes);
-       INIT_WORK(&q->unplug_work, blk_unplug_work);
+       INIT_LIST_HEAD(&q->flush_queue[0]);
+       INIT_LIST_HEAD(&q->flush_queue[1]);
+       INIT_LIST_HEAD(&q->flush_data_in_flight);
+       INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
 
        kobject_init(&q->kobj, &blk_queue_ktype);
 
        mutex_init(&q->sysfs_lock);
        spin_lock_init(&q->__queue_lock);
 
+       /*
+        * By default initialize queue_lock to internal lock and driver can
+        * override it later if need be.
+        */
+       q->queue_lock = &q->__queue_lock;
+
        return q;
 }
 EXPORT_SYMBOL(blk_alloc_queue_node);
@@ -631,9 +538,11 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
        q->request_fn           = rfn;
        q->prep_rq_fn           = NULL;
        q->unprep_rq_fn         = NULL;
-       q->unplug_fn            = generic_unplug_device;
        q->queue_flags          = QUEUE_FLAG_DEFAULT;
-       q->queue_lock           = lock;
+
+       /* Override internal queue lock with supplied lock pointer */
+       if (lock)
+               q->queue_lock           = lock;
 
        /*
         * This also sets hw/phys segments, boundary and size
@@ -666,6 +575,8 @@ int blk_get_queue(struct request_queue *q)
 
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
+       BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
+
        if (rq->cmd_flags & REQ_ELVPRIV)
                elv_put_request(q, rq);
        mempool_free(rq, q->rq.rq_pool);
@@ -761,6 +672,25 @@ static void freed_request(struct request_queue *q, int sync, int priv)
                __freed_request(q, sync ^ 1);
 }
 
+/*
+ * Determine if elevator data should be initialized when allocating the
+ * request associated with @bio.
+ */
+static bool blk_rq_should_init_elevator(struct bio *bio)
+{
+       if (!bio)
+               return true;
+
+       /*
+        * Flush requests do not use the elevator so skip initialization.
+        * This allows a request to share the flush and elevator data.
+        */
+       if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
+               return false;
+
+       return true;
+}
+
 /*
  * Get a free request, queue_lock must be held.
  * Returns NULL on failure, with queue_lock held.
@@ -773,7 +703,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
        struct request_list *rl = &q->rq;
        struct io_context *ioc = NULL;
        const bool is_sync = rw_is_sync(rw_flags) != 0;
-       int may_queue, priv;
+       int may_queue, priv = 0;
 
        may_queue = elv_may_queue(q, rw_flags);
        if (may_queue == ELV_MQUEUE_NO)
@@ -817,9 +747,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
        rl->count[is_sync]++;
        rl->starved[is_sync] = 0;
 
-       priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
-       if (priv)
-               rl->elvpriv++;
+       if (blk_rq_should_init_elevator(bio)) {
+               priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+               if (priv)
+                       rl->elvpriv++;
+       }
 
        if (blk_queue_io_stat(q))
                rw_flags |= REQ_IO_STAT;
@@ -866,8 +798,8 @@ out:
 }
 
 /*
- * No available requests for this queue, unplug the device and wait for some
- * requests to become available.
+ * No available requests for this queue, wait for some requests to become
+ * available.
  *
  * Called with q->queue_lock held, and returns with it unlocked.
  */
@@ -888,7 +820,6 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
 
                trace_block_sleeprq(q, bio, rw_flags & 1);
 
-               __generic_unplug_device(q);
                spin_unlock_irq(q->queue_lock);
                io_schedule();
 
@@ -1010,6 +941,13 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL(blk_requeue_request);
 
+static void add_acct_request(struct request_queue *q, struct request *rq,
+                            int where)
+{
+       drive_stat_acct(rq, 1);
+       __elv_add_request(q, rq, where);
+}
+
 /**
  * blk_insert_request - insert a special request into a request queue
  * @q:         request queue where request should be inserted
@@ -1052,8 +990,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
        if (blk_rq_tagged(rq))
                blk_queue_end_tag(q, rq);
 
-       drive_stat_acct(rq, 1);
-       __elv_add_request(q, rq, where, 0);
+       add_acct_request(q, rq, where);
        __blk_run_queue(q, false);
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
@@ -1174,6 +1111,113 @@ void blk_add_request_payload(struct request *rq, struct page *page,
 }
 EXPORT_SYMBOL_GPL(blk_add_request_payload);
 
+static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
+                                  struct bio *bio)
+{
+       const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
+
+       /*
+        * Debug stuff, kill later
+        */
+       if (!rq_mergeable(req)) {
+               blk_dump_rq_flags(req, "back");
+               return false;
+       }
+
+       if (!ll_back_merge_fn(q, req, bio))
+               return false;
+
+       trace_block_bio_backmerge(q, bio);
+
+       if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
+               blk_rq_set_mixed_merge(req);
+
+       req->biotail->bi_next = bio;
+       req->biotail = bio;
+       req->__data_len += bio->bi_size;
+       req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
+
+       drive_stat_acct(req, 0);
+       return true;
+}
+
+static bool bio_attempt_front_merge(struct request_queue *q,
+                                   struct request *req, struct bio *bio)
+{
+       const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
+       sector_t sector;
+
+       /*
+        * Debug stuff, kill later
+        */
+       if (!rq_mergeable(req)) {
+               blk_dump_rq_flags(req, "front");
+               return false;
+       }
+
+       if (!ll_front_merge_fn(q, req, bio))
+               return false;
+
+       trace_block_bio_frontmerge(q, bio);
+
+       if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
+               blk_rq_set_mixed_merge(req);
+
+       sector = bio->bi_sector;
+
+       bio->bi_next = req->bio;
+       req->bio = bio;
+
+       /*
+        * may not be valid. if the low level driver said
+        * it didn't need a bounce buffer then it better
+        * not touch req->buffer either...
+        */
+       req->buffer = bio_data(bio);
+       req->__sector = bio->bi_sector;
+       req->__data_len += bio->bi_size;
+       req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
+
+       drive_stat_acct(req, 0);
+       return true;
+}
+
+/*
+ * Attempts to merge with the plugged list in the current process. Returns
+ * true if merge was succesful, otherwise false.
+ */
+static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
+                              struct bio *bio)
+{
+       struct blk_plug *plug;
+       struct request *rq;
+       bool ret = false;
+
+       plug = tsk->plug;
+       if (!plug)
+               goto out;
+
+       list_for_each_entry_reverse(rq, &plug->list, queuelist) {
+               int el_ret;
+
+               if (rq->q != q)
+                       continue;
+
+               el_ret = elv_try_merge(rq, bio);
+               if (el_ret == ELEVATOR_BACK_MERGE) {
+                       ret = bio_attempt_back_merge(q, rq, bio);
+                       if (ret)
+                               break;
+               } else if (el_ret == ELEVATOR_FRONT_MERGE) {
+                       ret = bio_attempt_front_merge(q, rq, bio);
+                       if (ret)
+                               break;
+               }
+       }
+out:
+       return ret;
+}
+
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
        req->cpu = bio->bi_comp_cpu;
@@ -1189,26 +1233,12 @@ void init_request_from_bio(struct request *req, struct bio *bio)
        blk_rq_bio_prep(req->q, req, bio);
 }
 
-/*
- * Only disabling plugging for non-rotational devices if it does tagging
- * as well, otherwise we do need the proper merging
- */
-static inline bool queue_should_plug(struct request_queue *q)
-{
-       return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
-}
-
 static int __make_request(struct request_queue *q, struct bio *bio)
 {
-       struct request *req;
-       int el_ret;
-       unsigned int bytes = bio->bi_size;
-       const unsigned short prio = bio_prio(bio);
        const bool sync = !!(bio->bi_rw & REQ_SYNC);
-       const bool unplug = !!(bio->bi_rw & REQ_UNPLUG);
-       const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK;
-       int where = ELEVATOR_INSERT_SORT;
-       int rw_flags;
+       struct blk_plug *plug;
+       int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
+       struct request *req;
 
        /*
         * low level driver can indicate that it wants pages above a
@@ -1217,78 +1247,36 @@ static int __make_request(struct request_queue *q, struct bio *bio)
         */
        blk_queue_bounce(q, &bio);
 
-       spin_lock_irq(q->queue_lock);
-
        if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
-               where = ELEVATOR_INSERT_FRONT;
+               spin_lock_irq(q->queue_lock);
+               where = ELEVATOR_INSERT_FLUSH;
                goto get_rq;
        }
 
-       if (elv_queue_empty(q))
-               goto get_rq;
-
-       el_ret = elv_merge(q, &req, bio);
-       switch (el_ret) {
-       case ELEVATOR_BACK_MERGE:
-               BUG_ON(!rq_mergeable(req));
-
-               if (!ll_back_merge_fn(q, req, bio))
-                       break;
-
-               trace_block_bio_backmerge(q, bio);
-
-               if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
-                       blk_rq_set_mixed_merge(req);
-
-               req->biotail->bi_next = bio;
-               req->biotail = bio;
-               req->__data_len += bytes;
-               req->ioprio = ioprio_best(req->ioprio, prio);
-               if (!blk_rq_cpu_valid(req))
-                       req->cpu = bio->bi_comp_cpu;
-               drive_stat_acct(req, 0);
-               elv_bio_merged(q, req, bio);
-               if (!attempt_back_merge(q, req))
-                       elv_merged_request(q, req, el_ret);
+       /*
+        * Check if we can merge with the plugged list before grabbing
+        * any locks.
+        */
+       if (attempt_plug_merge(current, q, bio))
                goto out;
 
-       case ELEVATOR_FRONT_MERGE:
-               BUG_ON(!rq_mergeable(req));
-
-               if (!ll_front_merge_fn(q, req, bio))
-                       break;
-
-               trace_block_bio_frontmerge(q, bio);
+       spin_lock_irq(q->queue_lock);
 
-               if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
-                       blk_rq_set_mixed_merge(req);
-                       req->cmd_flags &= ~REQ_FAILFAST_MASK;
-                       req->cmd_flags |= ff;
+       el_ret = elv_merge(q, &req, bio);
+       if (el_ret == ELEVATOR_BACK_MERGE) {
+               BUG_ON(req->cmd_flags & REQ_ON_PLUG);
+               if (bio_attempt_back_merge(q, req, bio)) {
+                       if (!attempt_back_merge(q, req))
+                               elv_merged_request(q, req, el_ret);
+                       goto out_unlock;
+               }
+       } else if (el_ret == ELEVATOR_FRONT_MERGE) {
+               BUG_ON(req->cmd_flags & REQ_ON_PLUG);
+               if (bio_attempt_front_merge(q, req, bio)) {
+                       if (!attempt_front_merge(q, req))
+                               elv_merged_request(q, req, el_ret);
+                       goto out_unlock;
                }
-
-               bio->bi_next = req->bio;
-               req->bio = bio;
-
-               /*
-                * may not be valid. if the low level driver said
-                * it didn't need a bounce buffer then it better
-                * not touch req->buffer either...
-                */
-               req->buffer = bio_data(bio);
-               req->__sector = bio->bi_sector;
-               req->__data_len += bytes;
-               req->ioprio = ioprio_best(req->ioprio, prio);
-               if (!blk_rq_cpu_valid(req))
-                       req->cpu = bio->bi_comp_cpu;
-               drive_stat_acct(req, 0);
-               elv_bio_merged(q, req, bio);
-               if (!attempt_front_merge(q, req))
-                       elv_merged_request(q, req, el_ret);
-               goto out;
-
-       /* ELV_NO_MERGE: elevator says don't/can't merge. */
-       default:
-               ;
        }
 
 get_rq:
@@ -1315,20 +1303,35 @@ get_rq:
         */
        init_request_from_bio(req, bio);
 
-       spin_lock_irq(q->queue_lock);
        if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
-           bio_flagged(bio, BIO_CPU_AFFINE))
-               req->cpu = blk_cpu_to_group(smp_processor_id());
-       if (queue_should_plug(q) && elv_queue_empty(q))
-               blk_plug_device(q);
-
-       /* insert the request into the elevator */
-       drive_stat_acct(req, 1);
-       __elv_add_request(q, req, where, 0);
+           bio_flagged(bio, BIO_CPU_AFFINE)) {
+               req->cpu = blk_cpu_to_group(get_cpu());
+               put_cpu();
+       }
+
+       plug = current->plug;
+       if (plug) {
+               if (!plug->should_sort && !list_empty(&plug->list)) {
+                       struct request *__rq;
+
+                       __rq = list_entry_rq(plug->list.prev);
+                       if (__rq->q != q)
+                               plug->should_sort = 1;
+               }
+               /*
+                * Debug flag, kill later
+                */
+               req->cmd_flags |= REQ_ON_PLUG;
+               list_add_tail(&req->queuelist, &plug->list);
+               drive_stat_acct(req, 1);
+       } else {
+               spin_lock_irq(q->queue_lock);
+               add_acct_request(q, req, where);
+               __blk_run_queue(q, false);
+out_unlock:
+               spin_unlock_irq(q->queue_lock);
+       }
 out:
-       if (unplug || !queue_should_plug(q))
-               __generic_unplug_device(q);
-       spin_unlock_irq(q->queue_lock);
        return 0;
 }
 
@@ -1731,9 +1734,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
         */
        BUG_ON(blk_queued_rq(rq));
 
-       drive_stat_acct(rq, 1);
-       __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
-
+       add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
        spin_unlock_irqrestore(q->queue_lock, flags);
 
        return 0;
@@ -1805,7 +1806,7 @@ static void blk_account_io_done(struct request *req)
         * normal IO on queueing nor completion.  Accounting the
         * containing request is enough.
         */
-       if (blk_do_io_stat(req) && req != &req->q->flush_rq) {
+       if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
                unsigned long duration = jiffies - req->start_time;
                const int rw = rq_data_dir(req);
                struct hd_struct *part;
@@ -2628,6 +2629,113 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 
+int kblockd_schedule_delayed_work(struct request_queue *q,
+                       struct delayed_work *dwork, unsigned long delay)
+{
+       return queue_delayed_work(kblockd_workqueue, dwork, delay);
+}
+EXPORT_SYMBOL(kblockd_schedule_delayed_work);
+
+#define PLUG_MAGIC     0x91827364
+
+void blk_start_plug(struct blk_plug *plug)
+{
+       struct task_struct *tsk = current;
+
+       plug->magic = PLUG_MAGIC;
+       INIT_LIST_HEAD(&plug->list);
+       plug->should_sort = 0;
+
+       /*
+        * If this is a nested plug, don't actually assign it. It will be
+        * flushed on its own.
+        */
+       if (!tsk->plug) {
+               /*
+                * Store ordering should not be needed here, since a potential
+                * preempt will imply a full memory barrier
+                */
+               tsk->plug = plug;
+       }
+}
+EXPORT_SYMBOL(blk_start_plug);
+
+static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+       struct request *rqa = container_of(a, struct request, queuelist);
+       struct request *rqb = container_of(b, struct request, queuelist);
+
+       return !(rqa->q == rqb->q);
+}
+
+static void flush_plug_list(struct blk_plug *plug)
+{
+       struct request_queue *q;
+       unsigned long flags;
+       struct request *rq;
+
+       BUG_ON(plug->magic != PLUG_MAGIC);
+
+       if (list_empty(&plug->list))
+               return;
+
+       if (plug->should_sort)
+               list_sort(NULL, &plug->list, plug_rq_cmp);
+
+       q = NULL;
+       local_irq_save(flags);
+       while (!list_empty(&plug->list)) {
+               rq = list_entry_rq(plug->list.next);
+               list_del_init(&rq->queuelist);
+               BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
+               BUG_ON(!rq->q);
+               if (rq->q != q) {
+                       if (q) {
+                               __blk_run_queue(q, false);
+                               spin_unlock(q->queue_lock);
+                       }
+                       q = rq->q;
+                       spin_lock(q->queue_lock);
+               }
+               rq->cmd_flags &= ~REQ_ON_PLUG;
+
+               /*
+                * rq is already accounted, so use raw insert
+                */
+               __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
+       }
+
+       if (q) {
+               __blk_run_queue(q, false);
+               spin_unlock(q->queue_lock);
+       }
+
+       BUG_ON(!list_empty(&plug->list));
+       local_irq_restore(flags);
+}
+
+static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug)
+{
+       flush_plug_list(plug);
+
+       if (plug == tsk->plug)
+               tsk->plug = NULL;
+}
+
+void blk_finish_plug(struct blk_plug *plug)
+{
+       if (plug)
+               __blk_finish_plug(current, plug);
+}
+EXPORT_SYMBOL(blk_finish_plug);
+
+void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug)
+{
+       __blk_finish_plug(tsk, plug);
+       tsk->plug = plug;
+}
+EXPORT_SYMBOL(__blk_flush_plug);
+
 int __init blk_dev_init(void)
 {
        BUILD_BUG_ON(__REQ_NR_BITS > 8 *
index cf1456a02acdf7f4fc7fb924ff1153dcb4620c06..7482b7fa863ba10b337d7547fb7040767e02b2da 100644 (file)
@@ -54,8 +54,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
        rq->end_io = done;
        WARN_ON(irqs_disabled());
        spin_lock_irq(q->queue_lock);
-       __elv_add_request(q, rq, where, 1);
-       __generic_unplug_device(q);
+       __elv_add_request(q, rq, where);
+       __blk_run_queue(q, false);
        /* the queue is stopped so it won't be plugged+unplugged */
        if (rq->cmd_type == REQ_TYPE_PM_RESUME)
                q->request_fn(q);
index b27d0208611b4d904e7fda9e4e66bd9a4c7411b4..93d5fd8e51ebfe1126746321af3a9f143831ac83 100644 (file)
@@ -1,6 +1,69 @@
 /*
  * Functions to sequence FLUSH and FUA writes.
+ *
+ * Copyright (C) 2011          Max Planck Institute for Gravitational Physics
+ * Copyright (C) 2011          Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ *
+ * REQ_{FLUSH|FUA} requests are decomposed to sequences consisted of three
+ * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request
+ * properties and hardware capability.
+ *
+ * If a request doesn't have data, only REQ_FLUSH makes sense, which
+ * indicates a simple flush request.  If there is data, REQ_FLUSH indicates
+ * that the device cache should be flushed before the data is executed, and
+ * REQ_FUA means that the data must be on non-volatile media on request
+ * completion.
+ *
+ * If the device doesn't have writeback cache, FLUSH and FUA don't make any
+ * difference.  The requests are either completed immediately if there's no
+ * data or executed as normal requests otherwise.
+ *
+ * If the device has writeback cache and supports FUA, REQ_FLUSH is
+ * translated to PREFLUSH but REQ_FUA is passed down directly with DATA.
+ *
+ * If the device has writeback cache and doesn't support FUA, REQ_FLUSH is
+ * translated to PREFLUSH and REQ_FUA to POSTFLUSH.
+ *
+ * The actual execution of flush is double buffered.  Whenever a request
+ * needs to execute PRE or POSTFLUSH, it queues at
+ * q->flush_queue[q->flush_pending_idx].  Once certain criteria are met, a
+ * flush is issued and the pending_idx is toggled.  When the flush
+ * completes, all the requests which were pending are proceeded to the next
+ * step.  This allows arbitrary merging of different types of FLUSH/FUA
+ * requests.
+ *
+ * Currently, the following conditions are used to determine when to issue
+ * flush.
+ *
+ * C1. At any given time, only one flush shall be in progress.  This makes
+ *     double buffering sufficient.
+ *
+ * C2. Flush is deferred if any request is executing DATA of its sequence.
+ *     This avoids issuing separate POSTFLUSHes for requests which shared
+ *     PREFLUSH.
+ *
+ * C3. The second condition is ignored if there is a request which has
+ *     waited longer than FLUSH_PENDING_TIMEOUT.  This is to avoid
+ *     starvation in the unlikely case where there are continuous stream of
+ *     FUA (without FLUSH) requests.
+ *
+ * For devices which support FUA, it isn't clear whether C2 (and thus C3)
+ * is beneficial.
+ *
+ * Note that a sequenced FLUSH/FUA request with DATA is completed twice.
+ * Once while executing DATA and again after the whole sequence is
+ * complete.  The first completion updates the contained bio but doesn't
+ * finish it so that the bio submitter is notified only after the whole
+ * sequence is complete.  This is implemented by testing REQ_FLUSH_SEQ in
+ * req_bio_endio().
+ *
+ * The above peculiarity requires that each FLUSH/FUA request has only one
+ * bio attached to it, which is guaranteed as they aren't allowed to be
+ * merged in the usual way.
  */
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/bio.h>
 
 /* FLUSH/FUA sequences */
 enum {
-       QUEUE_FSEQ_STARTED      = (1 << 0), /* flushing in progress */
-       QUEUE_FSEQ_PREFLUSH     = (1 << 1), /* pre-flushing in progress */
-       QUEUE_FSEQ_DATA         = (1 << 2), /* data write in progress */
-       QUEUE_FSEQ_POSTFLUSH    = (1 << 3), /* post-flushing in progress */
-       QUEUE_FSEQ_DONE         = (1 << 4),
+       REQ_FSEQ_PREFLUSH       = (1 << 0), /* pre-flushing in progress */
+       REQ_FSEQ_DATA           = (1 << 1), /* data write in progress */
+       REQ_FSEQ_POSTFLUSH      = (1 << 2), /* post-flushing in progress */
+       REQ_FSEQ_DONE           = (1 << 3),
+
+       REQ_FSEQ_ACTIONS        = REQ_FSEQ_PREFLUSH | REQ_FSEQ_DATA |
+                                 REQ_FSEQ_POSTFLUSH,
+
+       /*
+        * If flush has been pending longer than the following timeout,
+        * it's issued even if flush_data requests are still in flight.
+        */
+       FLUSH_PENDING_TIMEOUT   = 5 * HZ,
 };
 
-static struct request *queue_next_fseq(struct request_queue *q);
+static bool blk_kick_flush(struct request_queue *q);
 
-unsigned blk_flush_cur_seq(struct request_queue *q)
+static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
 {
-       if (!q->flush_seq)
-               return 0;
-       return 1 << ffz(q->flush_seq);
+       unsigned int policy = 0;
+
+       if (fflags & REQ_FLUSH) {
+               if (rq->cmd_flags & REQ_FLUSH)
+                       policy |= REQ_FSEQ_PREFLUSH;
+               if (blk_rq_sectors(rq))
+                       policy |= REQ_FSEQ_DATA;
+               if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
+                       policy |= REQ_FSEQ_POSTFLUSH;
+       }
+       return policy;
 }
 
-static struct request *blk_flush_complete_seq(struct request_queue *q,
-                                             unsigned seq, int error)
+static unsigned int blk_flush_cur_seq(struct request *rq)
 {
-       struct request *next_rq = NULL;
-
-       if (error && !q->flush_err)
-               q->flush_err = error;
-
-       BUG_ON(q->flush_seq & seq);
-       q->flush_seq |= seq;
-
-       if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) {
-               /* not complete yet, queue the next flush sequence */
-               next_rq = queue_next_fseq(q);
-       } else {
-               /* complete this flush request */
-               __blk_end_request_all(q->orig_flush_rq, q->flush_err);
-               q->orig_flush_rq = NULL;
-               q->flush_seq = 0;
-
-               /* dispatch the next flush if there's one */
-               if (!list_empty(&q->pending_flushes)) {
-                       next_rq = list_entry_rq(q->pending_flushes.next);
-                       list_move(&next_rq->queuelist, &q->queue_head);
-               }
+       return 1 << ffz(rq->flush.seq);
+}
+
+static void blk_flush_restore_request(struct request *rq)
+{
+       /*
+        * After flush data completion, @rq->bio is %NULL but we need to
+        * complete the bio again.  @rq->biotail is guaranteed to equal the
+        * original @rq->bio.  Restore it.
+        */
+       rq->bio = rq->biotail;
+
+       /* make @rq a normal request */
+       rq->cmd_flags &= ~REQ_FLUSH_SEQ;
+       rq->end_io = NULL;
+}
+
+/**
+ * blk_flush_complete_seq - complete flush sequence
+ * @rq: FLUSH/FUA request being sequenced
+ * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero)
+ * @error: whether an error occurred
+ *
+ * @rq just completed @seq part of its flush sequence, record the
+ * completion and trigger the next step.
+ *
+ * CONTEXT:
+ * spin_lock_irq(q->queue_lock)
+ *
+ * RETURNS:
+ * %true if requests were added to the dispatch queue, %false otherwise.
+ */
+static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
+                                  int error)
+{
+       struct request_queue *q = rq->q;
+       struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
+       bool queued = false;
+
+       BUG_ON(rq->flush.seq & seq);
+       rq->flush.seq |= seq;
+
+       if (likely(!error))
+               seq = blk_flush_cur_seq(rq);
+       else
+               seq = REQ_FSEQ_DONE;
+
+       switch (seq) {
+       case REQ_FSEQ_PREFLUSH:
+       case REQ_FSEQ_POSTFLUSH:
+               /* queue for flush */
+               if (list_empty(pending))
+                       q->flush_pending_since = jiffies;
+               list_move_tail(&rq->flush.list, pending);
+               break;
+
+       case REQ_FSEQ_DATA:
+               list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
+               list_add(&rq->queuelist, &q->queue_head);
+               queued = true;
+               break;
+
+       case REQ_FSEQ_DONE:
+               /*
+                * @rq was previously adjusted by blk_flush_issue() for
+                * flush sequencing and may already have gone through the
+                * flush data request completion path.  Restore @rq for
+                * normal completion and end it.
+                */
+               BUG_ON(!list_empty(&rq->queuelist));
+               list_del_init(&rq->flush.list);
+               blk_flush_restore_request(rq);
+               __blk_end_request_all(rq, error);
+               break;
+
+       default:
+               BUG();
        }
-       return next_rq;
+
+       return blk_kick_flush(q) | queued;
 }
 
-static void blk_flush_complete_seq_end_io(struct request_queue *q,
-                                         unsigned seq, int error)
+static void flush_end_io(struct request *flush_rq, int error)
 {
-       bool was_empty = elv_queue_empty(q);
-       struct request *next_rq;
+       struct request_queue *q = flush_rq->q;
+       struct list_head *running = &q->flush_queue[q->flush_running_idx];
+       bool queued = false;
+       struct request *rq, *n;
 
-       next_rq = blk_flush_complete_seq(q, seq, error);
+       BUG_ON(q->flush_pending_idx == q->flush_running_idx);
+
+       /* account completion of the flush request */
+       q->flush_running_idx ^= 1;
+       elv_completed_request(q, flush_rq);
+
+       /* and push the waiting requests to the next stage */
+       list_for_each_entry_safe(rq, n, running, flush.list) {
+               unsigned int seq = blk_flush_cur_seq(rq);
+
+               BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
+               queued |= blk_flush_complete_seq(rq, seq, error);
+       }
 
        /*
         * Moving a request silently to empty queue_head may stall the
@@ -70,127 +217,153 @@ static void blk_flush_complete_seq_end_io(struct request_queue *q,
         * from request completion path and calling directly into
         * request_fn may confuse the driver.  Always use kblockd.
         */
-       if (was_empty && next_rq)
+       if (queued)
                __blk_run_queue(q, true);
 }
 
-static void pre_flush_end_io(struct request *rq, int error)
+/**
+ * blk_kick_flush - consider issuing flush request
+ * @q: request_queue being kicked
+ *
+ * Flush related states of @q have changed, consider issuing flush request.
+ * Please read the comment at the top of this file for more info.
+ *
+ * CONTEXT:
+ * spin_lock_irq(q->queue_lock)
+ *
+ * RETURNS:
+ * %true if flush was issued, %false otherwise.
+ */
+static bool blk_kick_flush(struct request_queue *q)
 {
-       elv_completed_request(rq->q, rq);
-       blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error);
+       struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
+       struct request *first_rq =
+               list_first_entry(pending, struct request, flush.list);
+
+       /* C1 described at the top of this file */
+       if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending))
+               return false;
+
+       /* C2 and C3 */
+       if (!list_empty(&q->flush_data_in_flight) &&
+           time_before(jiffies,
+                       q->flush_pending_since + FLUSH_PENDING_TIMEOUT))
+               return false;
+
+       /*
+        * Issue flush and toggle pending_idx.  This makes pending_idx
+        * different from running_idx, which means flush is in flight.
+        */
+       blk_rq_init(q, &q->flush_rq);
+       q->flush_rq.cmd_type = REQ_TYPE_FS;
+       q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
+       q->flush_rq.rq_disk = first_rq->rq_disk;
+       q->flush_rq.end_io = flush_end_io;
+
+       q->flush_pending_idx ^= 1;
+       elv_insert(q, &q->flush_rq, ELEVATOR_INSERT_REQUEUE);
+       return true;
 }
 
 static void flush_data_end_io(struct request *rq, int error)
 {
-       elv_completed_request(rq->q, rq);
-       blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error);
-}
+       struct request_queue *q = rq->q;
 
-static void post_flush_end_io(struct request *rq, int error)
-{
-       elv_completed_request(rq->q, rq);
-       blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error);
+       /*
+        * After populating an empty queue, kick it to avoid stall.  Read
+        * the comment in flush_end_io().
+        */
+       if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
+               __blk_run_queue(q, true);
 }
 
-static void init_flush_request(struct request *rq, struct gendisk *disk)
+/**
+ * blk_insert_flush - insert a new FLUSH/FUA request
+ * @rq: request to insert
+ *
+ * To be called from elv_insert() for %ELEVATOR_INSERT_FLUSH insertions.
+ * @rq is being submitted.  Analyze what needs to be done and put it on the
+ * right queue.
+ *
+ * CONTEXT:
+ * spin_lock_irq(q->queue_lock)
+ */
+void blk_insert_flush(struct request *rq)
 {
-       rq->cmd_type = REQ_TYPE_FS;
-       rq->cmd_flags = WRITE_FLUSH;
-       rq->rq_disk = disk;
-}
+       struct request_queue *q = rq->q;
+       unsigned int fflags = q->flush_flags;   /* may change, cache */
+       unsigned int policy = blk_flush_policy(fflags, rq);
 
-static struct request *queue_next_fseq(struct request_queue *q)
-{
-       struct request *orig_rq = q->orig_flush_rq;
-       struct request *rq = &q->flush_rq;
+       BUG_ON(rq->end_io);
+       BUG_ON(!rq->bio || rq->bio != rq->biotail);
 
-       blk_rq_init(q, rq);
+       /*
+        * @policy now records what operations need to be done.  Adjust
+        * REQ_FLUSH and FUA for the driver.
+        */
+       rq->cmd_flags &= ~REQ_FLUSH;
+       if (!(fflags & REQ_FUA))
+               rq->cmd_flags &= ~REQ_FUA;
 
-       switch (blk_flush_cur_seq(q)) {
-       case QUEUE_FSEQ_PREFLUSH:
-               init_flush_request(rq, orig_rq->rq_disk);
-               rq->end_io = pre_flush_end_io;
-               break;
-       case QUEUE_FSEQ_DATA:
-               init_request_from_bio(rq, orig_rq->bio);
-               /*
-                * orig_rq->rq_disk may be different from
-                * bio->bi_bdev->bd_disk if orig_rq got here through
-                * remapping drivers.  Make sure rq->rq_disk points
-                * to the same one as orig_rq.
-                */
-               rq->rq_disk = orig_rq->rq_disk;
-               rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA);
-               rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA);
-               rq->end_io = flush_data_end_io;
-               break;
-       case QUEUE_FSEQ_POSTFLUSH:
-               init_flush_request(rq, orig_rq->rq_disk);
-               rq->end_io = post_flush_end_io;
-               break;
-       default:
-               BUG();
+       /*
+        * If there's data but flush is not necessary, the request can be
+        * processed directly without going through flush machinery.  Queue
+        * for normal execution.
+        */
+       if ((policy & REQ_FSEQ_DATA) &&
+           !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
+               list_add(&rq->queuelist, &q->queue_head);
+               return;
        }
 
-       elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
-       return rq;
+       /*
+        * @rq should go through flush machinery.  Mark it part of flush
+        * sequence and submit for further processing.
+        */
+       memset(&rq->flush, 0, sizeof(rq->flush));
+       INIT_LIST_HEAD(&rq->flush.list);
+       rq->cmd_flags |= REQ_FLUSH_SEQ;
+       rq->end_io = flush_data_end_io;
+
+       blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
 }
 
-struct request *blk_do_flush(struct request_queue *q, struct request *rq)
+/**
+ * blk_abort_flushes - @q is being aborted, abort flush requests
+ * @q: request_queue being aborted
+ *
+ * To be called from elv_abort_queue().  @q is being aborted.  Prepare all
+ * FLUSH/FUA requests for abortion.
+ *
+ * CONTEXT:
+ * spin_lock_irq(q->queue_lock)
+ */
+void blk_abort_flushes(struct request_queue *q)
 {
-       unsigned int fflags = q->flush_flags; /* may change, cache it */
-       bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA;
-       bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH);
-       bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA);
-       unsigned skip = 0;
+       struct request *rq, *n;
+       int i;
 
        /*
-        * Special case.  If there's data but flush is not necessary,
-        * the request can be issued directly.
-        *
-        * Flush w/o data should be able to be issued directly too but
-        * currently some drivers assume that rq->bio contains
-        * non-zero data if it isn't NULL and empty FLUSH requests
-        * getting here usually have bio's without data.
+        * Requests in flight for data are already owned by the dispatch
+        * queue or the device driver.  Just restore for normal completion.
         */
-       if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) {
-               rq->cmd_flags &= ~REQ_FLUSH;
-               if (!has_fua)
-                       rq->cmd_flags &= ~REQ_FUA;
-               return rq;
+       list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) {
+               list_del_init(&rq->flush.list);
+               blk_flush_restore_request(rq);
        }
 
        /*
-        * Sequenced flushes can't be processed in parallel.  If
-        * another one is already in progress, queue for later
-        * processing.
+        * We need to give away requests on flush queues.  Restore for
+        * normal completion and put them on the dispatch queue.
         */
-       if (q->flush_seq) {
-               list_move_tail(&rq->queuelist, &q->pending_flushes);
-               return NULL;
+       for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) {
+               list_for_each_entry_safe(rq, n, &q->flush_queue[i],
+                                        flush.list) {
+                       list_del_init(&rq->flush.list);
+                       blk_flush_restore_request(rq);
+                       list_add_tail(&rq->queuelist, &q->queue_head);
+               }
        }
-
-       /*
-        * Start a new flush sequence
-        */
-       q->flush_err = 0;
-       q->flush_seq |= QUEUE_FSEQ_STARTED;
-
-       /* adjust FLUSH/FUA of the original request and stash it away */
-       rq->cmd_flags &= ~REQ_FLUSH;
-       if (!has_fua)
-               rq->cmd_flags &= ~REQ_FUA;
-       blk_dequeue_request(rq);
-       q->orig_flush_rq = rq;
-
-       /* skip unneded sequences and return the first one */
-       if (!do_preflush)
-               skip |= QUEUE_FSEQ_PREFLUSH;
-       if (!blk_rq_sectors(rq))
-               skip |= QUEUE_FSEQ_DATA;
-       if (!do_postflush)
-               skip |= QUEUE_FSEQ_POSTFLUSH;
-       return blk_flush_complete_seq(q, skip, 0);
 }
 
 static void bio_end_flush(struct bio *bio, int err)
index bd3e8df4d5e2b45a0ae89d90de025cfbaa12eed9..25de73e4759b82d624c2a89b545baf6ea859f624 100644 (file)
@@ -136,8 +136,6 @@ static void bio_batch_end_io(struct bio *bio, int err)
  *
  * Description:
  *  Generate and issue number of bios with zerofiled pages.
- *  Send barrier at the beginning and at the end if requested. This guarantie
- *  correct request ordering. Empty barrier allow us to avoid post queue flush.
  */
 
 int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
index ea85e20d5e9462be965f3f117bd7c3efcf51aa57..cfcc37cb222b63722bc048814c0f392205d1a898 100644 (file)
@@ -465,3 +465,9 @@ int attempt_front_merge(struct request_queue *q, struct request *rq)
 
        return 0;
 }
+
+int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
+                         struct request *next)
+{
+       return attempt_merge(q, rq, next);
+}
index 36c8c1f2af18088fb5fa34f4889d2fead2556600..1fa7692935976f4e60393e8d7289acf1c56c74c3 100644 (file)
@@ -164,24 +164,9 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
        blk_queue_congestion_threshold(q);
        q->nr_batching = BLK_BATCH_REQ;
 
-       q->unplug_thresh = 4;           /* hmm */
-       q->unplug_delay = msecs_to_jiffies(3);  /* 3 milliseconds */
-       if (q->unplug_delay == 0)
-               q->unplug_delay = 1;
-
-       q->unplug_timer.function = blk_unplug_timeout;
-       q->unplug_timer.data = (unsigned long)q;
-
        blk_set_default_limits(&q->limits);
        blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
 
-       /*
-        * If the caller didn't supply a lock, fall back to our embedded
-        * per-queue locks
-        */
-       if (!q->queue_lock)
-               q->queue_lock = &q->__queue_lock;
-
        /*
         * by default assume old behaviour and bounce for any highmem page
         */
index 41fb69150b4d3c6758b0f5da6ff2005d4d7a0a3f..261c75c665ae381a4c12dd444f7ed3b847c155a0 100644 (file)
@@ -471,8 +471,6 @@ static void blk_release_queue(struct kobject *kobj)
 
        blk_sync_queue(q);
 
-       blk_throtl_exit(q);
-
        if (rl->rq_pool)
                mempool_destroy(rl->rq_pool);
 
index e36cc10a346c83bfd233a0b71421486180518e2f..5352bdafbcf0fe6dcf761e6c1f4f36b12492f64e 100644 (file)
@@ -102,7 +102,7 @@ struct throtl_data
        /* Work for dispatching throttled bios */
        struct delayed_work throtl_work;
 
-       atomic_t limits_changed;
+       bool limits_changed;
 };
 
 enum tg_state_flags {
@@ -201,6 +201,7 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
        RB_CLEAR_NODE(&tg->rb_node);
        bio_list_init(&tg->bio_lists[0]);
        bio_list_init(&tg->bio_lists[1]);
+       td->limits_changed = false;
 
        /*
         * Take the initial reference that will be released on destroy
@@ -737,34 +738,36 @@ static void throtl_process_limit_change(struct throtl_data *td)
        struct throtl_grp *tg;
        struct hlist_node *pos, *n;
 
-       if (!atomic_read(&td->limits_changed))
+       if (!td->limits_changed)
                return;
 
-       throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed));
+       xchg(&td->limits_changed, false);
 
-       /*
-        * Make sure updates from throtl_update_blkio_group_read_bps() group
-        * of functions to tg->limits_changed are visible. We do not
-        * want update td->limits_changed to be visible but update to
-        * tg->limits_changed not being visible yet on this cpu. Hence
-        * the read barrier.
-        */
-       smp_rmb();
+       throtl_log(td, "limits changed");
 
        hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
-               if (throtl_tg_on_rr(tg) && tg->limits_changed) {
-                       throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"
-                               " riops=%u wiops=%u", tg->bps[READ],
-                               tg->bps[WRITE], tg->iops[READ],
-                               tg->iops[WRITE]);
+               if (!tg->limits_changed)
+                       continue;
+
+               if (!xchg(&tg->limits_changed, false))
+                       continue;
+
+               throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"
+                       " riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE],
+                       tg->iops[READ], tg->iops[WRITE]);
+
+               /*
+                * Restart the slices for both READ and WRITES. It
+                * might happen that a group's limit are dropped
+                * suddenly and we don't want to account recently
+                * dispatched IO with new low rate
+                */
+               throtl_start_new_slice(td, tg, 0);
+               throtl_start_new_slice(td, tg, 1);
+
+               if (throtl_tg_on_rr(tg))
                        tg_update_disptime(td, tg);
-                       tg->limits_changed = false;
-               }
        }
-
-       smp_mb__before_atomic_dec();
-       atomic_dec(&td->limits_changed);
-       smp_mb__after_atomic_dec();
 }
 
 /* Dispatch throttled bios. Should be called without queue lock held. */
@@ -774,6 +777,7 @@ static int throtl_dispatch(struct request_queue *q)
        unsigned int nr_disp = 0;
        struct bio_list bio_list_on_stack;
        struct bio *bio;
+       struct blk_plug plug;
 
        spin_lock_irq(q->queue_lock);
 
@@ -802,9 +806,10 @@ out:
         * immediate dispatch
         */
        if (nr_disp) {
+               blk_start_plug(&plug);
                while((bio = bio_list_pop(&bio_list_on_stack)))
                        generic_make_request(bio);
-               blk_unplug(q);
+               blk_finish_plug(&plug);
        }
        return nr_disp;
 }
@@ -825,7 +830,8 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
 
        struct delayed_work *dwork = &td->throtl_work;
 
-       if (total_nr_queued(td) > 0) {
+       /* schedule work if limits changed even if no bio is queued */
+       if (total_nr_queued(td) > 0 || td->limits_changed) {
                /*
                 * We might have a work scheduled to be executed in future.
                 * Cancel that and schedule a new one.
@@ -898,6 +904,15 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg)
        spin_unlock_irqrestore(td->queue->queue_lock, flags);
 }
 
+static void throtl_update_blkio_group_common(struct throtl_data *td,
+                               struct throtl_grp *tg)
+{
+       xchg(&tg->limits_changed, true);
+       xchg(&td->limits_changed, true);
+       /* Schedule a work now to process the limit change */
+       throtl_schedule_delayed_work(td, 0);
+}
+
 /*
  * For all update functions, key should be a valid pointer because these
  * update functions are called under blkcg_lock, that means, blkg is
@@ -911,64 +926,43 @@ static void throtl_update_blkio_group_read_bps(void *key,
                                struct blkio_group *blkg, u64 read_bps)
 {
        struct throtl_data *td = key;
+       struct throtl_grp *tg = tg_of_blkg(blkg);
 
-       tg_of_blkg(blkg)->bps[READ] = read_bps;
-       /* Make sure read_bps is updated before setting limits_changed */
-       smp_wmb();
-       tg_of_blkg(blkg)->limits_changed = true;
-
-       /* Make sure tg->limits_changed is updated before td->limits_changed */
-       smp_mb__before_atomic_inc();
-       atomic_inc(&td->limits_changed);
-       smp_mb__after_atomic_inc();
-
-       /* Schedule a work now to process the limit change */
-       throtl_schedule_delayed_work(td, 0);
+       tg->bps[READ] = read_bps;
+       throtl_update_blkio_group_common(td, tg);
 }
 
 static void throtl_update_blkio_group_write_bps(void *key,
                                struct blkio_group *blkg, u64 write_bps)
 {
        struct throtl_data *td = key;
+       struct throtl_grp *tg = tg_of_blkg(blkg);
 
-       tg_of_blkg(blkg)->bps[WRITE] = write_bps;
-       smp_wmb();
-       tg_of_blkg(blkg)->limits_changed = true;
-       smp_mb__before_atomic_inc();
-       atomic_inc(&td->limits_changed);
-       smp_mb__after_atomic_inc();
-       throtl_schedule_delayed_work(td, 0);
+       tg->bps[WRITE] = write_bps;
+       throtl_update_blkio_group_common(td, tg);
 }
 
 static void throtl_update_blkio_group_read_iops(void *key,
                        struct blkio_group *blkg, unsigned int read_iops)
 {
        struct throtl_data *td = key;
+       struct throtl_grp *tg = tg_of_blkg(blkg);
 
-       tg_of_blkg(blkg)->iops[READ] = read_iops;
-       smp_wmb();
-       tg_of_blkg(blkg)->limits_changed = true;
-       smp_mb__before_atomic_inc();
-       atomic_inc(&td->limits_changed);
-       smp_mb__after_atomic_inc();
-       throtl_schedule_delayed_work(td, 0);
+       tg->iops[READ] = read_iops;
+       throtl_update_blkio_group_common(td, tg);
 }
 
 static void throtl_update_blkio_group_write_iops(void *key,
                        struct blkio_group *blkg, unsigned int write_iops)
 {
        struct throtl_data *td = key;
+       struct throtl_grp *tg = tg_of_blkg(blkg);
 
-       tg_of_blkg(blkg)->iops[WRITE] = write_iops;
-       smp_wmb();
-       tg_of_blkg(blkg)->limits_changed = true;
-       smp_mb__before_atomic_inc();
-       atomic_inc(&td->limits_changed);
-       smp_mb__after_atomic_inc();
-       throtl_schedule_delayed_work(td, 0);
+       tg->iops[WRITE] = write_iops;
+       throtl_update_blkio_group_common(td, tg);
 }
 
-void throtl_shutdown_timer_wq(struct request_queue *q)
+static void throtl_shutdown_wq(struct request_queue *q)
 {
        struct throtl_data *td = q->td;
 
@@ -1009,20 +1003,28 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
                /*
                 * There is already another bio queued in same dir. No
                 * need to update dispatch time.
-                * Still update the disptime if rate limits on this group
-                * were changed.
                 */
-               if (!tg->limits_changed)
-                       update_disptime = false;
-               else
-                       tg->limits_changed = false;
-
+               update_disptime = false;
                goto queue_bio;
+
        }
 
        /* Bio is with-in rate limit of group */
        if (tg_may_dispatch(td, tg, bio, NULL)) {
                throtl_charge_bio(tg, bio);
+
+               /*
+                * We need to trim slice even when bios are not being queued
+                * otherwise it might happen that a bio is not queued for
+                * a long time and slice keeps on extending and trim is not
+                * called for a long time. Now if limits are reduced suddenly
+                * we take into account all the IO dispatched so far at new
+                * low rate and * newly queued IO gets a really long dispatch
+                * time.
+                *
+                * So keep on trimming slice even if bio is not queued.
+                */
+               throtl_trim_slice(td, tg, rw);
                goto out;
        }
 
@@ -1058,7 +1060,7 @@ int blk_throtl_init(struct request_queue *q)
 
        INIT_HLIST_HEAD(&td->tg_list);
        td->tg_service_tree = THROTL_RB_ROOT;
-       atomic_set(&td->limits_changed, 0);
+       td->limits_changed = false;
 
        /* Init root group */
        tg = &td->root_tg;
@@ -1070,6 +1072,7 @@ int blk_throtl_init(struct request_queue *q)
        /* Practically unlimited BW */
        tg->bps[0] = tg->bps[1] = -1;
        tg->iops[0] = tg->iops[1] = -1;
+       td->limits_changed = false;
 
        /*
         * Set root group reference to 2. One reference will be dropped when
@@ -1102,7 +1105,7 @@ void blk_throtl_exit(struct request_queue *q)
 
        BUG_ON(!td);
 
-       throtl_shutdown_timer_wq(q);
+       throtl_shutdown_wq(q);
 
        spin_lock_irq(q->queue_lock);
        throtl_release_tgs(td);
@@ -1132,7 +1135,7 @@ void blk_throtl_exit(struct request_queue *q)
         * update limits through cgroup and another work got queued, cancel
         * it.
         */
-       throtl_shutdown_timer_wq(q);
+       throtl_shutdown_wq(q);
        throtl_td_free(td);
 }
 
index 2db8f32838e7336d1a53bbddc8a4c01ec0c3489f..c8db371a921d55760e2d9494c25a0f52c16bd659 100644 (file)
@@ -18,8 +18,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
 void blk_dequeue_request(struct request *rq);
 void __blk_queue_free_tags(struct request_queue *q);
 
-void blk_unplug_work(struct work_struct *work);
-void blk_unplug_timeout(unsigned long data);
 void blk_rq_timed_out_timer(unsigned long data);
 void blk_delete_timer(struct request *);
 void blk_add_timer(struct request *);
@@ -51,21 +49,17 @@ static inline void blk_clear_rq_complete(struct request *rq)
  */
 #define ELV_ON_HASH(rq)                (!hlist_unhashed(&(rq)->hash))
 
-struct request *blk_do_flush(struct request_queue *q, struct request *rq);
+void blk_insert_flush(struct request *rq);
+void blk_abort_flushes(struct request_queue *q);
 
 static inline struct request *__elv_next_request(struct request_queue *q)
 {
        struct request *rq;
 
        while (1) {
-               while (!list_empty(&q->queue_head)) {
+               if (!list_empty(&q->queue_head)) {
                        rq = list_entry_rq(q->queue_head.next);
-                       if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) ||
-                           rq == &q->flush_rq)
-                               return rq;
-                       rq = blk_do_flush(q, rq);
-                       if (rq)
-                               return rq;
+                       return rq;
                }
 
                if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
@@ -109,6 +103,8 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
                      struct bio *bio);
 int attempt_back_merge(struct request_queue *q, struct request *rq);
 int attempt_front_merge(struct request_queue *q, struct request *rq);
+int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
+                               struct request *next);
 void blk_recalc_rq_segments(struct request *rq);
 void blk_rq_set_mixed_merge(struct request *rq);
 
index ea83a4f0c27dfda658ee41d87979a2d57c58450d..7785169f3c8f86df03887fe98d9af988b260e8ea 100644 (file)
@@ -54,9 +54,9 @@ static const int cfq_hist_divisor = 4;
 #define CFQQ_SEEKY(cfqq)       (hweight32(cfqq->seek_history) > 32/8)
 
 #define RQ_CIC(rq)             \
-       ((struct cfq_io_context *) (rq)->elevator_private)
-#define RQ_CFQQ(rq)            (struct cfq_queue *) ((rq)->elevator_private2)
-#define RQ_CFQG(rq)            (struct cfq_group *) ((rq)->elevator_private3)
+       ((struct cfq_io_context *) (rq)->elevator_private[0])
+#define RQ_CFQQ(rq)            (struct cfq_queue *) ((rq)->elevator_private[1])
+#define RQ_CFQG(rq)            (struct cfq_group *) ((rq)->elevator_private[2])
 
 static struct kmem_cache *cfq_pool;
 static struct kmem_cache *cfq_ioc_pool;
@@ -146,7 +146,6 @@ struct cfq_queue {
        struct cfq_rb_root *service_tree;
        struct cfq_queue *new_cfqq;
        struct cfq_group *cfqg;
-       struct cfq_group *orig_cfqg;
        /* Number of sectors dispatched from queue in single dispatch round */
        unsigned long nr_sectors;
 };
@@ -179,6 +178,8 @@ struct cfq_group {
        /* group service_tree key */
        u64 vdisktime;
        unsigned int weight;
+       unsigned int new_weight;
+       bool needs_update;
 
        /* number of cfqq currently on this group */
        int nr_cfqq;
@@ -238,6 +239,7 @@ struct cfq_data {
        struct rb_root prio_trees[CFQ_PRIO_LISTS];
 
        unsigned int busy_queues;
+       unsigned int busy_sync_queues;
 
        int rq_in_driver;
        int rq_in_flight[2];
@@ -285,7 +287,6 @@ struct cfq_data {
        unsigned int cfq_slice_idle;
        unsigned int cfq_group_idle;
        unsigned int cfq_latency;
-       unsigned int cfq_group_isolation;
 
        unsigned int cic_index;
        struct list_head cic_list;
@@ -501,13 +502,6 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
        }
 }
 
-static int cfq_queue_empty(struct request_queue *q)
-{
-       struct cfq_data *cfqd = q->elevator->elevator_data;
-
-       return !cfqd->rq_queued;
-}
-
 /*
  * Scale schedule slice based on io priority. Use the sync time slice only
  * if a queue is marked sync and has sync io queued. A sync queue with async
@@ -558,15 +552,13 @@ static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
 
 static void update_min_vdisktime(struct cfq_rb_root *st)
 {
-       u64 vdisktime = st->min_vdisktime;
        struct cfq_group *cfqg;
 
        if (st->left) {
                cfqg = rb_entry_cfqg(st->left);
-               vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
+               st->min_vdisktime = max_vdisktime(st->min_vdisktime,
+                                                 cfqg->vdisktime);
        }
-
-       st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime);
 }
 
 /*
@@ -863,7 +855,27 @@ __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
 }
 
 static void
-cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
+cfq_update_group_weight(struct cfq_group *cfqg)
+{
+       BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
+       if (cfqg->needs_update) {
+               cfqg->weight = cfqg->new_weight;
+               cfqg->needs_update = false;
+       }
+}
+
+static void
+cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
+{
+       BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
+
+       cfq_update_group_weight(cfqg);
+       __cfq_group_service_tree_add(st, cfqg);
+       st->total_weight += cfqg->weight;
+}
+
+static void
+cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
        struct cfq_rb_root *st = &cfqd->grp_service_tree;
        struct cfq_group *__cfqg;
@@ -884,13 +896,19 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
                cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
        } else
                cfqg->vdisktime = st->min_vdisktime;
+       cfq_group_service_tree_add(st, cfqg);
+}
 
-       __cfq_group_service_tree_add(st, cfqg);
-       st->total_weight += cfqg->weight;
+static void
+cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg)
+{
+       st->total_weight -= cfqg->weight;
+       if (!RB_EMPTY_NODE(&cfqg->rb_node))
+               cfq_rb_erase(&cfqg->rb_node, st);
 }
 
 static void
-cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
+cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
        struct cfq_rb_root *st = &cfqd->grp_service_tree;
 
@@ -902,14 +920,13 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
                return;
 
        cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
-       st->total_weight -= cfqg->weight;
-       if (!RB_EMPTY_NODE(&cfqg->rb_node))
-               cfq_rb_erase(&cfqg->rb_node, st);
+       cfq_group_service_tree_del(st, cfqg);
        cfqg->saved_workload_slice = 0;
        cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
 }
 
-static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
+static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
+                                               unsigned int *unaccounted_time)
 {
        unsigned int slice_used;
 
@@ -928,8 +945,13 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
                                        1);
        } else {
                slice_used = jiffies - cfqq->slice_start;
-               if (slice_used > cfqq->allocated_slice)
+               if (slice_used > cfqq->allocated_slice) {
+                       *unaccounted_time = slice_used - cfqq->allocated_slice;
                        slice_used = cfqq->allocated_slice;
+               }
+               if (time_after(cfqq->slice_start, cfqq->dispatch_start))
+                       *unaccounted_time += cfqq->slice_start -
+                                       cfqq->dispatch_start;
        }
 
        return slice_used;
@@ -939,12 +961,12 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
                                struct cfq_queue *cfqq)
 {
        struct cfq_rb_root *st = &cfqd->grp_service_tree;
-       unsigned int used_sl, charge;
+       unsigned int used_sl, charge, unaccounted_sl = 0;
        int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
                        - cfqg->service_tree_idle.count;
 
        BUG_ON(nr_sync < 0);
-       used_sl = charge = cfq_cfqq_slice_usage(cfqq);
+       used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
 
        if (iops_mode(cfqd))
                charge = cfqq->slice_dispatch;
@@ -952,9 +974,10 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
                charge = cfqq->allocated_slice;
 
        /* Can't update vdisktime while group is on service tree */
-       cfq_rb_erase(&cfqg->rb_node, st);
+       cfq_group_service_tree_del(st, cfqg);
        cfqg->vdisktime += cfq_scale_slice(charge, cfqg);
-       __cfq_group_service_tree_add(st, cfqg);
+       /* If a new weight was requested, update now, off tree */
+       cfq_group_service_tree_add(st, cfqg);
 
        /* This group is being expired. Save the context */
        if (time_after(cfqd->workload_expires, jiffies)) {
@@ -970,7 +993,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
        cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u"
                        " sect=%u", used_sl, cfqq->slice_dispatch, charge,
                        iops_mode(cfqd), cfqq->nr_sectors);
-       cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
+       cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
+                                         unaccounted_sl);
        cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
 }
 
@@ -985,7 +1009,9 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
 void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
                                        unsigned int weight)
 {
-       cfqg_of_blkg(blkg)->weight = weight;
+       struct cfq_group *cfqg = cfqg_of_blkg(blkg);
+       cfqg->new_weight = weight;
+       cfqg->needs_update = true;
 }
 
 static struct cfq_group *
@@ -1187,32 +1213,6 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        int new_cfqq = 1;
        int group_changed = 0;
 
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-       if (!cfqd->cfq_group_isolation
-           && cfqq_type(cfqq) == SYNC_NOIDLE_WORKLOAD
-           && cfqq->cfqg && cfqq->cfqg != &cfqd->root_group) {
-               /* Move this cfq to root group */
-               cfq_log_cfqq(cfqd, cfqq, "moving to root group");
-               if (!RB_EMPTY_NODE(&cfqq->rb_node))
-                       cfq_group_service_tree_del(cfqd, cfqq->cfqg);
-               cfqq->orig_cfqg = cfqq->cfqg;
-               cfqq->cfqg = &cfqd->root_group;
-               cfqd->root_group.ref++;
-               group_changed = 1;
-       } else if (!cfqd->cfq_group_isolation
-                  && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) {
-               /* cfqq is sequential now needs to go to its original group */
-               BUG_ON(cfqq->cfqg != &cfqd->root_group);
-               if (!RB_EMPTY_NODE(&cfqq->rb_node))
-                       cfq_group_service_tree_del(cfqd, cfqq->cfqg);
-               cfq_put_cfqg(cfqq->cfqg);
-               cfqq->cfqg = cfqq->orig_cfqg;
-               cfqq->orig_cfqg = NULL;
-               group_changed = 1;
-               cfq_log_cfqq(cfqd, cfqq, "moved to origin group");
-       }
-#endif
-
        service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
                                                cfqq_type(cfqq));
        if (cfq_class_idle(cfqq)) {
@@ -1284,7 +1284,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        service_tree->count++;
        if ((add_front || !new_cfqq) && !group_changed)
                return;
-       cfq_group_service_tree_add(cfqd, cfqq->cfqg);
+       cfq_group_notify_queue_add(cfqd, cfqq->cfqg);
 }
 
 static struct cfq_queue *
@@ -1372,6 +1372,8 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
        BUG_ON(cfq_cfqq_on_rr(cfqq));
        cfq_mark_cfqq_on_rr(cfqq);
        cfqd->busy_queues++;
+       if (cfq_cfqq_sync(cfqq))
+               cfqd->busy_sync_queues++;
 
        cfq_resort_rr_list(cfqd, cfqq);
 }
@@ -1395,9 +1397,11 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
                cfqq->p_root = NULL;
        }
 
-       cfq_group_service_tree_del(cfqd, cfqq->cfqg);
+       cfq_group_notify_queue_del(cfqd, cfqq->cfqg);
        BUG_ON(!cfqd->busy_queues);
        cfqd->busy_queues--;
+       if (cfq_cfqq_sync(cfqq))
+               cfqd->busy_sync_queues--;
 }
 
 /*
@@ -2405,22 +2409,34 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
         * Does this cfqq already have too much IO in flight?
         */
        if (cfqq->dispatched >= max_dispatch) {
+               bool promote_sync = false;
                /*
                 * idle queue must always only have a single IO in flight
                 */
                if (cfq_class_idle(cfqq))
                        return false;
 
+               /*
+                * If there is only one sync queue
+                * we can ignore async queue here and give the sync
+                * queue no dispatch limit. The reason is a sync queue can
+                * preempt async queue, limiting the sync queue doesn't make
+                * sense. This is useful for aiostress test.
+                */
+               if (cfq_cfqq_sync(cfqq) && cfqd->busy_sync_queues == 1)
+                       promote_sync = true;
+
                /*
                 * We have other queues, don't allow more IO from this one
                 */
-               if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq))
+               if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq) &&
+                               !promote_sync)
                        return false;
 
                /*
                 * Sole queue user, no limit
                 */
-               if (cfqd->busy_queues == 1)
+               if (cfqd->busy_queues == 1 || promote_sync)
                        max_dispatch = -1;
                else
                        /*
@@ -2542,7 +2558,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
 static void cfq_put_queue(struct cfq_queue *cfqq)
 {
        struct cfq_data *cfqd = cfqq->cfqd;
-       struct cfq_group *cfqg, *orig_cfqg;
+       struct cfq_group *cfqg;
 
        BUG_ON(cfqq->ref <= 0);
 
@@ -2554,7 +2570,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
        BUG_ON(rb_first(&cfqq->sort_list));
        BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
        cfqg = cfqq->cfqg;
-       orig_cfqg = cfqq->orig_cfqg;
 
        if (unlikely(cfqd->active_queue == cfqq)) {
                __cfq_slice_expired(cfqd, cfqq, 0);
@@ -2564,8 +2579,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
        BUG_ON(cfq_cfqq_on_rr(cfqq));
        kmem_cache_free(cfq_pool, cfqq);
        cfq_put_cfqg(cfqg);
-       if (orig_cfqg)
-               cfq_put_cfqg(orig_cfqg);
 }
 
 /*
@@ -3613,12 +3626,12 @@ static void cfq_put_request(struct request *rq)
 
                put_io_context(RQ_CIC(rq)->ioc);
 
-               rq->elevator_private = NULL;
-               rq->elevator_private2 = NULL;
+               rq->elevator_private[0] = NULL;
+               rq->elevator_private[1] = NULL;
 
                /* Put down rq reference on cfqg */
                cfq_put_cfqg(RQ_CFQG(rq));
-               rq->elevator_private3 = NULL;
+               rq->elevator_private[2] = NULL;
 
                cfq_put_queue(cfqq);
        }
@@ -3705,13 +3718,12 @@ new_queue:
        }
 
        cfqq->allocated[rw]++;
-       cfqq->ref++;
-       rq->elevator_private = cic;
-       rq->elevator_private2 = cfqq;
-       rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg);
 
+       cfqq->ref++;
+       rq->elevator_private[0] = cic;
+       rq->elevator_private[1] = cfqq;
+       rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg);
        spin_unlock_irqrestore(q->queue_lock, flags);
-
        return 0;
 
 queue_fail:
@@ -3953,7 +3965,6 @@ static void *cfq_init_queue(struct request_queue *q)
        cfqd->cfq_slice_idle = cfq_slice_idle;
        cfqd->cfq_group_idle = cfq_group_idle;
        cfqd->cfq_latency = 1;
-       cfqd->cfq_group_isolation = 0;
        cfqd->hw_tag = -1;
        /*
         * we optimistically start assuming sync ops weren't delayed in last
@@ -4029,7 +4040,6 @@ SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
 SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
-SHOW_FUNCTION(cfq_group_isolation_show, cfqd->cfq_group_isolation, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)                        \
@@ -4063,7 +4073,6 @@ STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
                UINT_MAX, 0);
 STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
-STORE_FUNCTION(cfq_group_isolation_store, &cfqd->cfq_group_isolation, 0, 1, 0);
 #undef STORE_FUNCTION
 
 #define CFQ_ATTR(name) \
@@ -4081,7 +4090,6 @@ static struct elv_fs_entry cfq_attrs[] = {
        CFQ_ATTR(slice_idle),
        CFQ_ATTR(group_idle),
        CFQ_ATTR(low_latency),
-       CFQ_ATTR(group_isolation),
        __ATTR_NULL
 };
 
@@ -4096,7 +4104,6 @@ static struct elevator_type iosched_cfq = {
                .elevator_add_req_fn =          cfq_insert_request,
                .elevator_activate_req_fn =     cfq_activate_request,
                .elevator_deactivate_req_fn =   cfq_deactivate_request,
-               .elevator_queue_empty_fn =      cfq_queue_empty,
                .elevator_completed_req_fn =    cfq_completed_request,
                .elevator_former_req_fn =       elv_rb_former_request,
                .elevator_latter_req_fn =       elv_rb_latter_request,
index 54a6d90f8e8c914564546c1e3f699a298e8cc0e9..2a155927e37ce50a936019cecab31805945c4e45 100644 (file)
@@ -16,9 +16,9 @@ static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
 }
 
 static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg,
-                       unsigned long time)
+                       unsigned long time, unsigned long unaccounted_time)
 {
-       blkiocg_update_timeslice_used(blkg, time);
+       blkiocg_update_timeslice_used(blkg, time, unaccounted_time);
 }
 
 static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg)
@@ -85,7 +85,7 @@ static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
                        unsigned long dequeue) {}
 
 static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg,
-                       unsigned long time) {}
+                       unsigned long time, unsigned long unaccounted_time) {}
 static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
 static inline void cfq_blkiocg_update_io_remove_stats(struct blkio_group *blkg,
                                bool direction, bool sync) {}
index b547cbca7b23a55dd9d1f0b444bed07d297fc646..5139c0ea1864a858b6072febcf636a71e9bc6f13 100644 (file)
@@ -326,14 +326,6 @@ dispatch_request:
        return 1;
 }
 
-static int deadline_queue_empty(struct request_queue *q)
-{
-       struct deadline_data *dd = q->elevator->elevator_data;
-
-       return list_empty(&dd->fifo_list[WRITE])
-               && list_empty(&dd->fifo_list[READ]);
-}
-
 static void deadline_exit_queue(struct elevator_queue *e)
 {
        struct deadline_data *dd = e->elevator_data;
@@ -445,7 +437,6 @@ static struct elevator_type iosched_deadline = {
                .elevator_merge_req_fn =        deadline_merged_requests,
                .elevator_dispatch_fn =         deadline_dispatch_requests,
                .elevator_add_req_fn =          deadline_add_request,
-               .elevator_queue_empty_fn =      deadline_queue_empty,
                .elevator_former_req_fn =       elv_rb_former_request,
                .elevator_latter_req_fn =       elv_rb_latter_request,
                .elevator_init_fn =             deadline_init_queue,
index 236e93c1f46ce54d0f2ac40c48a85289f1d568fb..c387d3168734c21d74336b536819d35dc77719a5 100644 (file)
@@ -113,7 +113,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
 }
 EXPORT_SYMBOL(elv_rq_merge_ok);
 
-static inline int elv_try_merge(struct request *__rq, struct bio *bio)
+int elv_try_merge(struct request *__rq, struct bio *bio)
 {
        int ret = ELEVATOR_NO_MERGE;
 
@@ -421,6 +421,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
        struct list_head *entry;
        int stop_flags;
 
+       BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
+
        if (q->last_merge == rq)
                q->last_merge = NULL;
 
@@ -519,6 +521,40 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
        return ELEVATOR_NO_MERGE;
 }
 
+/*
+ * Attempt to do an insertion back merge. Only check for the case where
+ * we can append 'rq' to an existing request, so we can throw 'rq' away
+ * afterwards.
+ *
+ * Returns true if we merged, false otherwise
+ */
+static bool elv_attempt_insert_merge(struct request_queue *q,
+                                    struct request *rq)
+{
+       struct request *__rq;
+
+       if (blk_queue_nomerges(q))
+               return false;
+
+       /*
+        * First try one-hit cache.
+        */
+       if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
+               return true;
+
+       if (blk_queue_noxmerges(q))
+               return false;
+
+       /*
+        * See if our hash lookup can find a potential backmerge.
+        */
+       __rq = elv_rqhash_find(q, blk_rq_pos(rq));
+       if (__rq && blk_attempt_req_merge(q, __rq, rq))
+               return true;
+
+       return false;
+}
+
 void elv_merged_request(struct request_queue *q, struct request *rq, int type)
 {
        struct elevator_queue *e = q->elevator;
@@ -536,14 +572,18 @@ void elv_merge_requests(struct request_queue *q, struct request *rq,
                             struct request *next)
 {
        struct elevator_queue *e = q->elevator;
+       const int next_sorted = next->cmd_flags & REQ_SORTED;
 
-       if (e->ops->elevator_merge_req_fn)
+       if (next_sorted && e->ops->elevator_merge_req_fn)
                e->ops->elevator_merge_req_fn(q, rq, next);
 
        elv_rqhash_reposition(q, rq);
-       elv_rqhash_del(q, next);
 
-       q->nr_sorted--;
+       if (next_sorted) {
+               elv_rqhash_del(q, next);
+               q->nr_sorted--;
+       }
+
        q->last_merge = rq;
 }
 
@@ -617,21 +657,12 @@ void elv_quiesce_end(struct request_queue *q)
 
 void elv_insert(struct request_queue *q, struct request *rq, int where)
 {
-       int unplug_it = 1;
-
        trace_block_rq_insert(q, rq);
 
        rq->q = q;
 
        switch (where) {
        case ELEVATOR_INSERT_REQUEUE:
-               /*
-                * Most requeues happen because of a busy condition,
-                * don't force unplug of the queue for that case.
-                * Clear unplug_it and fall through.
-                */
-               unplug_it = 0;
-
        case ELEVATOR_INSERT_FRONT:
                rq->cmd_flags |= REQ_SOFTBARRIER;
                list_add(&rq->queuelist, &q->queue_head);
@@ -654,6 +685,14 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
                __blk_run_queue(q, false);
                break;
 
+       case ELEVATOR_INSERT_SORT_MERGE:
+               /*
+                * If we succeed in merging this request with one in the
+                * queue already, we are done - rq has now been freed,
+                * so no need to do anything further.
+                */
+               if (elv_attempt_insert_merge(q, rq))
+                       break;
        case ELEVATOR_INSERT_SORT:
                BUG_ON(rq->cmd_type != REQ_TYPE_FS &&
                       !(rq->cmd_flags & REQ_DISCARD));
@@ -673,24 +712,21 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
                q->elevator->ops->elevator_add_req_fn(q, rq);
                break;
 
+       case ELEVATOR_INSERT_FLUSH:
+               rq->cmd_flags |= REQ_SOFTBARRIER;
+               blk_insert_flush(rq);
+               break;
        default:
                printk(KERN_ERR "%s: bad insertion point %d\n",
                       __func__, where);
                BUG();
        }
-
-       if (unplug_it && blk_queue_plugged(q)) {
-               int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
-                               - queue_in_flight(q);
-
-               if (nrq >= q->unplug_thresh)
-                       __generic_unplug_device(q);
-       }
 }
 
-void __elv_add_request(struct request_queue *q, struct request *rq, int where,
-                      int plug)
+void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 {
+       BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
+
        if (rq->cmd_flags & REQ_SOFTBARRIER) {
                /* barriers are scheduling boundary, update end_sector */
                if (rq->cmd_type == REQ_TYPE_FS ||
@@ -702,38 +738,20 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where,
                    where == ELEVATOR_INSERT_SORT)
                where = ELEVATOR_INSERT_BACK;
 
-       if (plug)
-               blk_plug_device(q);
-
        elv_insert(q, rq, where);
 }
 EXPORT_SYMBOL(__elv_add_request);
 
-void elv_add_request(struct request_queue *q, struct request *rq, int where,
-                    int plug)
+void elv_add_request(struct request_queue *q, struct request *rq, int where)
 {
        unsigned long flags;
 
        spin_lock_irqsave(q->queue_lock, flags);
-       __elv_add_request(q, rq, where, plug);
+       __elv_add_request(q, rq, where);
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(elv_add_request);
 
-int elv_queue_empty(struct request_queue *q)
-{
-       struct elevator_queue *e = q->elevator;
-
-       if (!list_empty(&q->queue_head))
-               return 0;
-
-       if (e->ops->elevator_queue_empty_fn)
-               return e->ops->elevator_queue_empty_fn(q);
-
-       return 1;
-}
-EXPORT_SYMBOL(elv_queue_empty);
-
 struct request *elv_latter_request(struct request_queue *q, struct request *rq)
 {
        struct elevator_queue *e = q->elevator;
@@ -759,7 +777,7 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
        if (e->ops->elevator_set_req_fn)
                return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
 
-       rq->elevator_private = NULL;
+       rq->elevator_private[0] = NULL;
        return 0;
 }
 
@@ -785,6 +803,8 @@ void elv_abort_queue(struct request_queue *q)
 {
        struct request *rq;
 
+       blk_abort_flushes(q);
+
        while (!list_empty(&q->queue_head)) {
                rq = list_entry_rq(q->queue_head.next);
                rq->cmd_flags |= REQ_QUIET;
index cbf1112a885c0c715e4d38a3dfad91f74cd1f1fa..c91a2dac6b6b282f5104c781b8f9509dda42f38d 100644 (file)
@@ -1158,14 +1158,14 @@ static int diskstats_show(struct seq_file *seqf, void *v)
                           "%u %lu %lu %llu %u %u %u %u\n",
                           MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
                           disk_name(gp, hd->partno, buf),
-                          part_stat_read(hd, ios[0]),
-                          part_stat_read(hd, merges[0]),
-                          (unsigned long long)part_stat_read(hd, sectors[0]),
-                          jiffies_to_msecs(part_stat_read(hd, ticks[0])),
-                          part_stat_read(hd, ios[1]),
-                          part_stat_read(hd, merges[1]),
-                          (unsigned long long)part_stat_read(hd, sectors[1]),
-                          jiffies_to_msecs(part_stat_read(hd, ticks[1])),
+                          part_stat_read(hd, ios[READ]),
+                          part_stat_read(hd, merges[READ]),
+                          (unsigned long long)part_stat_read(hd, sectors[READ]),
+                          jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
+                          part_stat_read(hd, ios[WRITE]),
+                          part_stat_read(hd, merges[WRITE]),
+                          (unsigned long long)part_stat_read(hd, sectors[WRITE]),
+                          jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
                           part_in_flight(hd),
                           jiffies_to_msecs(part_stat_read(hd, io_ticks)),
                           jiffies_to_msecs(part_stat_read(hd, time_in_queue))
@@ -1494,7 +1494,7 @@ void disk_block_events(struct gendisk *disk)
 void disk_unblock_events(struct gendisk *disk)
 {
        if (disk->ev)
-               __disk_unblock_events(disk, true);
+               __disk_unblock_events(disk, false);
 }
 
 /**
index 232c4b38cd3769d31e0c79700a49f42915c683a6..06389e9ef96d552836a1509ddf86745d82c09762 100644 (file)
@@ -39,13 +39,6 @@ static void noop_add_request(struct request_queue *q, struct request *rq)
        list_add_tail(&rq->queuelist, &nd->queue);
 }
 
-static int noop_queue_empty(struct request_queue *q)
-{
-       struct noop_data *nd = q->elevator->elevator_data;
-
-       return list_empty(&nd->queue);
-}
-
 static struct request *
 noop_former_request(struct request_queue *q, struct request *rq)
 {
@@ -90,7 +83,6 @@ static struct elevator_type elevator_noop = {
                .elevator_merge_req_fn          = noop_merged_requests,
                .elevator_dispatch_fn           = noop_dispatch,
                .elevator_add_req_fn            = noop_add_request,
-               .elevator_queue_empty_fn        = noop_queue_empty,
                .elevator_former_req_fn         = noop_former_request,
                .elevator_latter_req_fn         = noop_latter_request,
                .elevator_init_fn               = noop_init_queue,
index 1f286ab461d3d62471dbaa5323cd468b1080a842..79882104e431a283235b3e1cdee24343429366eb 100644 (file)
@@ -140,13 +140,14 @@ static int DAC960_getgeo(struct block_device *bdev, struct hd_geometry *geo)
        return 0;
 }
 
-static int DAC960_media_changed(struct gendisk *disk)
+static unsigned int DAC960_check_events(struct gendisk *disk,
+                                       unsigned int clearing)
 {
        DAC960_Controller_T *p = disk->queue->queuedata;
        int drive_nr = (long)disk->private_data;
 
        if (!p->LogicalDriveInitiallyAccessible[drive_nr])
-               return 1;
+               return DISK_EVENT_MEDIA_CHANGE;
        return 0;
 }
 
@@ -163,7 +164,7 @@ static const struct block_device_operations DAC960_BlockDeviceOperations = {
        .owner                  = THIS_MODULE,
        .open                   = DAC960_open,
        .getgeo                 = DAC960_getgeo,
-       .media_changed          = DAC960_media_changed,
+       .check_events           = DAC960_check_events,
        .revalidate_disk        = DAC960_revalidate_disk,
 };
 
@@ -2546,6 +2547,7 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
        disk->major = MajorNumber;
        disk->first_minor = n << DAC960_MaxPartitionsBits;
        disk->fops = &DAC960_BlockDeviceOperations;
+       disk->events = DISK_EVENT_MEDIA_CHANGE;
    }
   /*
     Indicate the Block Device Registration completed successfully,
index 363855ca376e6792f9684e44a4eea3b1c2abcb2d..456c0cc90dcfd58d87de3d34ace5dda354795112 100644 (file)
@@ -1658,12 +1658,12 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
 }
 
 /*
- * floppy-change is never called from an interrupt, so we can relax a bit
+ * check_events is never called from an interrupt, so we can relax a bit
  * here, sleep etc. Note that floppy-on tries to set current_DOR to point
  * to the desired drive, but it will probably not survive the sleep if
  * several floppies are used at the same time: thus the loop.
  */
-static int amiga_floppy_change(struct gendisk *disk)
+static unsigned amiga_check_events(struct gendisk *disk, unsigned int clearing)
 {
        struct amiga_floppy_struct *p = disk->private_data;
        int drive = p - unit;
@@ -1686,7 +1686,7 @@ static int amiga_floppy_change(struct gendisk *disk)
                p->dirty = 0;
                writepending = 0; /* if this was true before, too bad! */
                writefromint = 0;
-               return 1;
+               return DISK_EVENT_MEDIA_CHANGE;
        }
        return 0;
 }
@@ -1697,7 +1697,7 @@ static const struct block_device_operations floppy_fops = {
        .release        = floppy_release,
        .ioctl          = fd_ioctl,
        .getgeo         = fd_getgeo,
-       .media_changed  = amiga_floppy_change,
+       .check_events   = amiga_check_events,
 };
 
 static int __init fd_probe_drives(void)
@@ -1736,6 +1736,7 @@ static int __init fd_probe_drives(void)
                disk->major = FLOPPY_MAJOR;
                disk->first_minor = drive;
                disk->fops = &floppy_fops;
+               disk->events = DISK_EVENT_MEDIA_CHANGE;
                sprintf(disk->disk_name, "fd%d", drive);
                disk->private_data = &unit[drive];
                set_capacity(disk, 880*2);
index 605a67e40bbfc902609233ebc52d22f8bc008e23..c871eae14120423f2cd618742730a6f6b51edeb3 100644 (file)
@@ -1324,23 +1324,24 @@ static void finish_fdc_done( int dummy )
  * due to unrecognised disk changes.
  */
 
-static int check_floppy_change(struct gendisk *disk)
+static unsigned int floppy_check_events(struct gendisk *disk,
+                                       unsigned int clearing)
 {
        struct atari_floppy_struct *p = disk->private_data;
        unsigned int drive = p - unit;
        if (test_bit (drive, &fake_change)) {
                /* simulated change (e.g. after formatting) */
-               return 1;
+               return DISK_EVENT_MEDIA_CHANGE;
        }
        if (test_bit (drive, &changed_floppies)) {
                /* surely changed (the WP signal changed at least once) */
-               return 1;
+               return DISK_EVENT_MEDIA_CHANGE;
        }
        if (UD.wpstat) {
                /* WP is on -> could be changed: to be sure, buffers should be
                 * invalidated...
                 */
-               return 1;
+               return DISK_EVENT_MEDIA_CHANGE;
        }
 
        return 0;
@@ -1570,7 +1571,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
                 * or the next access will revalidate - and clear UDT :-(
                 */
 
-               if (check_floppy_change(disk))
+               if (floppy_check_events(disk, 0))
                        floppy_revalidate(disk);
 
                if (UD.flags & FTD_MSG)
@@ -1904,7 +1905,7 @@ static const struct block_device_operations floppy_fops = {
        .open           = floppy_unlocked_open,
        .release        = floppy_release,
        .ioctl          = fd_ioctl,
-       .media_changed  = check_floppy_change,
+       .check_events   = floppy_check_events,
        .revalidate_disk= floppy_revalidate,
 };
 
@@ -1963,6 +1964,7 @@ static int __init atari_floppy_init (void)
                unit[i].disk->first_minor = i;
                sprintf(unit[i].disk->disk_name, "fd%d", i);
                unit[i].disk->fops = &floppy_fops;
+               unit[i].disk->events = DISK_EVENT_MEDIA_CHANGE;
                unit[i].disk->private_data = &unit[i];
                unit[i].disk->queue = blk_init_queue(do_fd_request,
                                        &ataflop_lock);
index 9279272b3732719c9b660e6abc6146dd6dfee960..35658f445fca4a494071438ad0f4788bab936231 100644 (file)
@@ -3170,12 +3170,6 @@ static void do_cciss_request(struct request_queue *q)
        int sg_index = 0;
        int chained = 0;
 
-       /* We call start_io here in case there is a command waiting on the
-        * queue that has not been sent.
-        */
-       if (blk_queue_plugged(q))
-               goto startio;
-
       queue:
        creq = blk_peek_request(q);
        if (!creq)
index 946dad4caef37c63615d151fecabc8a7ac5e0d4e..b2fceb53e8092bd28355f7678ea4e92351952299 100644 (file)
@@ -911,9 +911,6 @@ static void do_ida_request(struct request_queue *q)
        struct scatterlist tmp_sg[SG_MAX];
        int i, dir, seg;
 
-       if (blk_queue_plugged(q))
-               goto startio;
-
 queue_next:
        creq = blk_peek_request(q);
        if (!creq)
index ba95cba192be8d1d00f7b7e9b47f1cb05d460470..aca302492ff20a25e65ac645a601dbf51ada2f33 100644 (file)
@@ -80,7 +80,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
 
        if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))
                rw |= REQ_FUA;
-       rw |= REQ_UNPLUG | REQ_SYNC;
+       rw |= REQ_SYNC;
 
        bio = bio_alloc(GFP_NOIO, 1);
        bio->bi_bdev = bdev->md_bdev;
@@ -689,8 +689,6 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
                }
        }
 
-       drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
-
        /* always (try to) flush bitmap to stable storage */
        drbd_md_flush(mdev);
 
index fd42832f785b86a6056e3c4efceccaeab37f04bd..0645ca829a94163c57f4ee5ac43e96aaa344870d 100644 (file)
@@ -840,7 +840,6 @@ static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
        for (i = 0; i < num_pages; i++)
                bm_page_io_async(mdev, b, i, rw);
 
-       drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
        wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0);
 
        if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) {
index 3803a03489372911290cd539988da5e779ac85d2..b0bd27dfc1e8a88deab0c49705b3ed201da7309c 100644 (file)
@@ -377,7 +377,7 @@ union p_header {
 #define DP_HARDBARRIER       1 /* depricated */
 #define DP_RW_SYNC           2 /* equals REQ_SYNC    */
 #define DP_MAY_SET_IN_SYNC    4
-#define DP_UNPLUG             8 /* equals REQ_UNPLUG  */
+#define DP_UNPLUG             8 /* not used anymore   */
 #define DP_FUA               16 /* equals REQ_FUA     */
 #define DP_FLUSH             32 /* equals REQ_FLUSH   */
 #define DP_DISCARD           64 /* equals REQ_DISCARD */
@@ -2382,20 +2382,6 @@ static inline int drbd_queue_order_type(struct drbd_conf *mdev)
        return QUEUE_ORDERED_NONE;
 }
 
-static inline void drbd_blk_run_queue(struct request_queue *q)
-{
-       if (q && q->unplug_fn)
-               q->unplug_fn(q);
-}
-
-static inline void drbd_kick_lo(struct drbd_conf *mdev)
-{
-       if (get_ldev(mdev)) {
-               drbd_blk_run_queue(bdev_get_queue(mdev->ldev->backing_bdev));
-               put_ldev(mdev);
-       }
-}
-
 static inline void drbd_md_flush(struct drbd_conf *mdev)
 {
        int r;
index 29cd0dc9fe4f8c9fe6731aaaf6f85220b7bf5db0..8a43ce0edeed12f1007cfeb9474ed857fc8f39cd 100644 (file)
@@ -2477,12 +2477,11 @@ static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
 {
        if (mdev->agreed_pro_version >= 95)
                return  (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
-                       (bi_rw & REQ_UNPLUG ? DP_UNPLUG : 0) |
                        (bi_rw & REQ_FUA ? DP_FUA : 0) |
                        (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
                        (bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
        else
-               return bi_rw & (REQ_SYNC | REQ_UNPLUG) ? DP_RW_SYNC : 0;
+               return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
 }
 
 /* Used to send write requests
@@ -2719,35 +2718,6 @@ static int drbd_release(struct gendisk *gd, fmode_t mode)
        return 0;
 }
 
-static void drbd_unplug_fn(struct request_queue *q)
-{
-       struct drbd_conf *mdev = q->queuedata;
-
-       /* unplug FIRST */
-       spin_lock_irq(q->queue_lock);
-       blk_remove_plug(q);
-       spin_unlock_irq(q->queue_lock);
-
-       /* only if connected */
-       spin_lock_irq(&mdev->req_lock);
-       if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.conn >= C_CONNECTED) {
-               D_ASSERT(mdev->state.role == R_PRIMARY);
-               if (test_and_clear_bit(UNPLUG_REMOTE, &mdev->flags)) {
-                       /* add to the data.work queue,
-                        * unless already queued.
-                        * XXX this might be a good addition to drbd_queue_work
-                        * anyways, to detect "double queuing" ... */
-                       if (list_empty(&mdev->unplug_work.list))
-                               drbd_queue_work(&mdev->data.work,
-                                               &mdev->unplug_work);
-               }
-       }
-       spin_unlock_irq(&mdev->req_lock);
-
-       if (mdev->state.disk >= D_INCONSISTENT)
-               drbd_kick_lo(mdev);
-}
-
 static void drbd_set_defaults(struct drbd_conf *mdev)
 {
        /* This way we get a compile error when sync_conf grows,
@@ -3222,9 +3192,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
        blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE);
        blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
        blk_queue_merge_bvec(q, drbd_merge_bvec);
-       q->queue_lock = &mdev->req_lock; /* needed since we use */
-               /* plugging on a queue, that actually has no requests! */
-       q->unplug_fn = drbd_unplug_fn;
+       q->queue_lock = &mdev->req_lock;
 
        mdev->md_io_page = alloc_page(GFP_KERNEL);
        if (!mdev->md_io_page)
index 24487d4fb20297e6676a91e60c9525f26092e315..8e68be939debefb6e5960ed38c2dd42d67c3a18d 100644 (file)
@@ -187,15 +187,6 @@ static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int
        return NULL;
 }
 
-/* kick lower level device, if we have more than (arbitrary number)
- * reference counts on it, which typically are locally submitted io
- * requests.  don't use unacked_cnt, so we speed up proto A and B, too. */
-static void maybe_kick_lo(struct drbd_conf *mdev)
-{
-       if (atomic_read(&mdev->local_cnt) >= mdev->net_conf->unplug_watermark)
-               drbd_kick_lo(mdev);
-}
-
 static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
 {
        struct drbd_epoch_entry *e;
@@ -219,7 +210,6 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
        LIST_HEAD(reclaimed);
        struct drbd_epoch_entry *e, *t;
 
-       maybe_kick_lo(mdev);
        spin_lock_irq(&mdev->req_lock);
        reclaim_net_ee(mdev, &reclaimed);
        spin_unlock_irq(&mdev->req_lock);
@@ -436,8 +426,7 @@ void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
        while (!list_empty(head)) {
                prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
                spin_unlock_irq(&mdev->req_lock);
-               drbd_kick_lo(mdev);
-               schedule();
+               io_schedule();
                finish_wait(&mdev->ee_wait, &wait);
                spin_lock_irq(&mdev->req_lock);
        }
@@ -1111,8 +1100,6 @@ next_bio:
        /* > e->sector, unless this is the first bio */
        bio->bi_sector = sector;
        bio->bi_bdev = mdev->ldev->backing_bdev;
-       /* we special case some flags in the multi-bio case, see below
-        * (REQ_UNPLUG) */
        bio->bi_rw = rw;
        bio->bi_private = e;
        bio->bi_end_io = drbd_endio_sec;
@@ -1141,13 +1128,8 @@ next_bio:
                bios = bios->bi_next;
                bio->bi_next = NULL;
 
-               /* strip off REQ_UNPLUG unless it is the last bio */
-               if (bios)
-                       bio->bi_rw &= ~REQ_UNPLUG;
-
                drbd_generic_make_request(mdev, fault_type, bio);
        } while (bios);
-       maybe_kick_lo(mdev);
        return 0;
 
 fail:
@@ -1167,9 +1149,6 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign
 
        inc_unacked(mdev);
 
-       if (mdev->net_conf->wire_protocol != DRBD_PROT_C)
-               drbd_kick_lo(mdev);
-
        mdev->current_epoch->barrier_nr = p->barrier;
        rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
 
@@ -1636,12 +1615,11 @@ static unsigned long write_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
 {
        if (mdev->agreed_pro_version >= 95)
                return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
-                       (dpf & DP_UNPLUG ? REQ_UNPLUG : 0) |
                        (dpf & DP_FUA ? REQ_FUA : 0) |
                        (dpf & DP_FLUSH ? REQ_FUA : 0) |
                        (dpf & DP_DISCARD ? REQ_DISCARD : 0);
        else
-               return dpf & DP_RW_SYNC ? (REQ_SYNC | REQ_UNPLUG) : 0;
+               return dpf & DP_RW_SYNC ? REQ_SYNC : 0;
 }
 
 /* mirrored write */
@@ -3556,9 +3534,6 @@ static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 
 static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
 {
-       if (mdev->state.disk >= D_INCONSISTENT)
-               drbd_kick_lo(mdev);
-
        /* Make sure we've acked all the TCP data associated
         * with the data requests being unplugged */
        drbd_tcp_quickack(mdev->data.socket);
index 11a75d32a2e27f0d78c26b511921780f4d336264..ad3fc6228f27924d3523976fd3a5a4486c3a5b4a 100644 (file)
@@ -960,10 +960,6 @@ allocate_barrier:
                        bio_endio(req->private_bio, -EIO);
        }
 
-       /* we need to plug ALWAYS since we possibly need to kick lo_dev.
-        * we plug after submit, so we won't miss an unplug event */
-       drbd_plug_device(mdev);
-
        return 0;
 
 fail_conflicting:
index 34f224b018b37b1e1781134fedf67179799ab20e..e027446590d3752d63d2301c7d65e17d749f7b13 100644 (file)
@@ -792,7 +792,6 @@ int drbd_resync_finished(struct drbd_conf *mdev)
                 * queue (or even the read operations for those packets
                 * is not finished by now).   Retry in 100ms. */
 
-               drbd_kick_lo(mdev);
                __set_current_state(TASK_INTERRUPTIBLE);
                schedule_timeout(HZ / 10);
                w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
index defdb5013ea3444f272e83084d378964eca621ba..53586fa5ae1b098686e6171ea821fa21dc1bdb1f 100644 (file)
@@ -45,24 +45,6 @@ static inline void drbd_generic_make_request(struct drbd_conf *mdev,
                generic_make_request(bio);
 }
 
-static inline void drbd_plug_device(struct drbd_conf *mdev)
-{
-       struct request_queue *q;
-       q = bdev_get_queue(mdev->this_bdev);
-
-       spin_lock_irq(q->queue_lock);
-
-/* XXX the check on !blk_queue_plugged is redundant,
- * implicitly checked in blk_plug_device */
-
-       if (!blk_queue_plugged(q)) {
-               blk_plug_device(q);
-               del_timer(&q->unplug_timer);
-               /* unplugging should not happen automatically... */
-       }
-       spin_unlock_irq(q->queue_lock);
-}
-
 static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm)
 {
         return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK)
index 77fc76f8aea91b79be3f8cfc6c640e3e23748de0..301d7a9a41a6b51be0ae02f6ca92b2fb063316a7 100644 (file)
@@ -3770,13 +3770,14 @@ out2:
 /*
  * Check if the disk has been changed or if a change has been faked.
  */
-static int check_floppy_change(struct gendisk *disk)
+static unsigned int floppy_check_events(struct gendisk *disk,
+                                       unsigned int clearing)
 {
        int drive = (long)disk->private_data;
 
        if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
            test_bit(FD_VERIFY_BIT, &UDRS->flags))
-               return 1;
+               return DISK_EVENT_MEDIA_CHANGE;
 
        if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) {
                lock_fdc(drive, false);
@@ -3788,7 +3789,7 @@ static int check_floppy_change(struct gendisk *disk)
            test_bit(FD_VERIFY_BIT, &UDRS->flags) ||
            test_bit(drive, &fake_change) ||
            drive_no_geom(drive))
-               return 1;
+               return DISK_EVENT_MEDIA_CHANGE;
        return 0;
 }
 
@@ -3837,7 +3838,6 @@ static int __floppy_read_block_0(struct block_device *bdev)
        bio.bi_end_io = floppy_rb0_complete;
 
        submit_bio(READ, &bio);
-       generic_unplug_device(bdev_get_queue(bdev));
        process_fd_request();
        wait_for_completion(&complete);
 
@@ -3898,7 +3898,7 @@ static const struct block_device_operations floppy_fops = {
        .release                = floppy_release,
        .ioctl                  = fd_ioctl,
        .getgeo                 = fd_getgeo,
-       .media_changed          = check_floppy_change,
+       .check_events           = floppy_check_events,
        .revalidate_disk        = floppy_revalidate,
 };
 
@@ -4205,6 +4205,7 @@ static int __init floppy_init(void)
                disks[dr]->major = FLOPPY_MAJOR;
                disks[dr]->first_minor = TOMINOR(dr);
                disks[dr]->fops = &floppy_fops;
+               disks[dr]->events = DISK_EVENT_MEDIA_CHANGE;
                sprintf(disks[dr]->disk_name, "fd%d", dr);
 
                init_timer(&motor_off_timer[dr]);
index dbf31ec9114db6a23c270be8d49e3836cbc2684a..a076a14ca72d848fbf144042a7272038eab22390 100644 (file)
@@ -540,17 +540,6 @@ out:
        return 0;
 }
 
-/*
- * kick off io on the underlying address space
- */
-static void loop_unplug(struct request_queue *q)
-{
-       struct loop_device *lo = q->queuedata;
-
-       queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q);
-       blk_run_address_space(lo->lo_backing_file->f_mapping);
-}
-
 struct switch_request {
        struct file *file;
        struct completion wait;
@@ -917,7 +906,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
         */
        blk_queue_make_request(lo->lo_queue, loop_make_request);
        lo->lo_queue->queuedata = lo;
-       lo->lo_queue->unplug_fn = loop_unplug;
 
        if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
                blk_queue_flush(lo->lo_queue, REQ_FLUSH);
@@ -1019,7 +1007,6 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
 
        kthread_stop(lo->lo_thread);
 
-       lo->lo_queue->unplug_fn = NULL;
        lo->lo_backing_file = NULL;
 
        loop_release_xfer(lo);
@@ -1636,9 +1623,6 @@ out:
 
 static void loop_free(struct loop_device *lo)
 {
-       if (!lo->lo_queue->queue_lock)
-               lo->lo_queue->queue_lock = &lo->lo_queue->__queue_lock;
-
        blk_cleanup_queue(lo->lo_queue);
        put_disk(lo->lo_disk);
        list_del(&lo->lo_list);
index 62cec6afd7adf560f6792c68949f71fcc8e6f9ea..2f2ccf6862519c4bb44493ccab140cbbdab37a93 100644 (file)
@@ -172,7 +172,8 @@ module_param_array(drive3, int, NULL, 0);
 static int pcd_open(struct cdrom_device_info *cdi, int purpose);
 static void pcd_release(struct cdrom_device_info *cdi);
 static int pcd_drive_status(struct cdrom_device_info *cdi, int slot_nr);
-static int pcd_media_changed(struct cdrom_device_info *cdi, int slot_nr);
+static unsigned int pcd_check_events(struct cdrom_device_info *cdi,
+                                    unsigned int clearing, int slot_nr);
 static int pcd_tray_move(struct cdrom_device_info *cdi, int position);
 static int pcd_lock_door(struct cdrom_device_info *cdi, int lock);
 static int pcd_drive_reset(struct cdrom_device_info *cdi);
@@ -257,10 +258,11 @@ static int pcd_block_ioctl(struct block_device *bdev, fmode_t mode,
        return ret;
 }
 
-static int pcd_block_media_changed(struct gendisk *disk)
+static unsigned int pcd_block_check_events(struct gendisk *disk,
+                                          unsigned int clearing)
 {
        struct pcd_unit *cd = disk->private_data;
-       return cdrom_media_changed(&cd->info);
+       return cdrom_check_events(&cd->info, clearing);
 }
 
 static const struct block_device_operations pcd_bdops = {
@@ -268,14 +270,14 @@ static const struct block_device_operations pcd_bdops = {
        .open           = pcd_block_open,
        .release        = pcd_block_release,
        .ioctl          = pcd_block_ioctl,
-       .media_changed  = pcd_block_media_changed,
+       .check_events   = pcd_block_check_events,
 };
 
 static struct cdrom_device_ops pcd_dops = {
        .open           = pcd_open,
        .release        = pcd_release,
        .drive_status   = pcd_drive_status,
-       .media_changed  = pcd_media_changed,
+       .check_events   = pcd_check_events,
        .tray_move      = pcd_tray_move,
        .lock_door      = pcd_lock_door,
        .get_mcn        = pcd_get_mcn,
@@ -318,6 +320,7 @@ static void pcd_init_units(void)
                disk->first_minor = unit;
                strcpy(disk->disk_name, cd->name);      /* umm... */
                disk->fops = &pcd_bdops;
+               disk->events = DISK_EVENT_MEDIA_CHANGE;
        }
 }
 
@@ -502,13 +505,14 @@ static int pcd_packet(struct cdrom_device_info *cdi, struct packet_command *cgc)
 
 #define DBMSG(msg)     ((verbose>1)?(msg):NULL)
 
-static int pcd_media_changed(struct cdrom_device_info *cdi, int slot_nr)
+static unsigned int pcd_check_events(struct cdrom_device_info *cdi,
+                                    unsigned int clearing, int slot_nr)
 {
        struct pcd_unit *cd = cdi->handle;
        int res = cd->changed;
        if (res)
                cd->changed = 0;
-       return res;
+       return res ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static int pcd_lock_door(struct cdrom_device_info *cdi, int lock)
index c0ee1558b9bba105859f29f288bb4310e8ad4f3d..21dfdb7768695cfe8d9517dc59f4cf7a5b1df279 100644 (file)
@@ -794,7 +794,7 @@ static int pd_release(struct gendisk *p, fmode_t mode)
        return 0;
 }
 
-static int pd_check_media(struct gendisk *p)
+static unsigned int pd_check_events(struct gendisk *p, unsigned int clearing)
 {
        struct pd_unit *disk = p->private_data;
        int r;
@@ -803,7 +803,7 @@ static int pd_check_media(struct gendisk *p)
        pd_special_command(disk, pd_media_check);
        r = disk->changed;
        disk->changed = 0;
-       return r;
+       return r ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static int pd_revalidate(struct gendisk *p)
@@ -822,7 +822,7 @@ static const struct block_device_operations pd_fops = {
        .release        = pd_release,
        .ioctl          = pd_ioctl,
        .getgeo         = pd_getgeo,
-       .media_changed  = pd_check_media,
+       .check_events   = pd_check_events,
        .revalidate_disk= pd_revalidate
 };
 
@@ -837,6 +837,7 @@ static void pd_probe_drive(struct pd_unit *disk)
        p->fops = &pd_fops;
        p->major = major;
        p->first_minor = (disk - pd) << PD_BITS;
+       p->events = DISK_EVENT_MEDIA_CHANGE;
        disk->gd = p;
        p->private_data = disk;
        p->queue = pd_queue;
index 635f25dd9e1082c9d1099c0ccca8142436e8f38d..7adeb1edbf43faf02b5f68f79d93f6d839baa9e4 100644 (file)
@@ -243,7 +243,8 @@ static struct pf_unit units[PF_UNITS];
 static int pf_identify(struct pf_unit *pf);
 static void pf_lock(struct pf_unit *pf, int func);
 static void pf_eject(struct pf_unit *pf);
-static int pf_check_media(struct gendisk *disk);
+static unsigned int pf_check_events(struct gendisk *disk,
+                                   unsigned int clearing);
 
 static char pf_scratch[512];   /* scratch block buffer */
 
@@ -270,7 +271,7 @@ static const struct block_device_operations pf_fops = {
        .release        = pf_release,
        .ioctl          = pf_ioctl,
        .getgeo         = pf_getgeo,
-       .media_changed  = pf_check_media,
+       .check_events   = pf_check_events,
 };
 
 static void __init pf_init_units(void)
@@ -293,6 +294,7 @@ static void __init pf_init_units(void)
                disk->first_minor = unit;
                strcpy(disk->disk_name, pf->name);
                disk->fops = &pf_fops;
+               disk->events = DISK_EVENT_MEDIA_CHANGE;
                if (!(*drives[unit])[D_PRT])
                        pf_drive_count++;
        }
@@ -377,9 +379,9 @@ static int pf_release(struct gendisk *disk, fmode_t mode)
 
 }
 
-static int pf_check_media(struct gendisk *disk)
+static unsigned int pf_check_events(struct gendisk *disk, unsigned int clearing)
 {
-       return 1;
+       return DISK_EVENT_MEDIA_CHANGE;
 }
 
 static inline int status_reg(struct pf_unit *pf)
index 77d70eebb6b2121e49b94b7626383b31552a2a35..07a382eaf0a86b8b0a8f9f30a57dfe3e3e68f6a9 100644 (file)
@@ -1606,8 +1606,6 @@ static int kcdrwd(void *foobar)
                                        min_sleep_time = pkt->sleep_time;
                        }
 
-                       generic_unplug_device(bdev_get_queue(pd->bdev));
-
                        VPRINTK("kcdrwd: sleeping\n");
                        residue = schedule_timeout(min_sleep_time);
                        VPRINTK("kcdrwd: wake up\n");
@@ -2796,7 +2794,8 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
        return ret;
 }
 
-static int pkt_media_changed(struct gendisk *disk)
+static unsigned int pkt_check_events(struct gendisk *disk,
+                                    unsigned int clearing)
 {
        struct pktcdvd_device *pd = disk->private_data;
        struct gendisk *attached_disk;
@@ -2806,9 +2805,9 @@ static int pkt_media_changed(struct gendisk *disk)
        if (!pd->bdev)
                return 0;
        attached_disk = pd->bdev->bd_disk;
-       if (!attached_disk)
+       if (!attached_disk || !attached_disk->fops->check_events)
                return 0;
-       return attached_disk->fops->media_changed(attached_disk);
+       return attached_disk->fops->check_events(attached_disk, clearing);
 }
 
 static const struct block_device_operations pktcdvd_ops = {
@@ -2816,7 +2815,7 @@ static const struct block_device_operations pktcdvd_ops = {
        .open =                 pkt_open,
        .release =              pkt_close,
        .ioctl =                pkt_ioctl,
-       .media_changed =        pkt_media_changed,
+       .check_events =         pkt_check_events,
 };
 
 static char *pktcdvd_devnode(struct gendisk *gd, mode_t *mode)
@@ -2889,6 +2888,10 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
        if (ret)
                goto out_new_dev;
 
+       /* inherit events of the host device */
+       disk->events = pd->bdev->bd_disk->events;
+       disk->async_events = pd->bdev->bd_disk->async_events;
+
        add_disk(disk);
 
        pkt_sysfs_dev_new(pd);
index 75333d0a3327de785f520cdeddbb4a45b83f1a90..24a482f2fbd60fcfd0917374bd6197a76d6ddf85 100644 (file)
@@ -741,11 +741,12 @@ static int floppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
        return 0;
 }
 
-static int floppy_check_change(struct gendisk *disk)
+static unsigned int floppy_check_events(struct gendisk *disk,
+                                       unsigned int clearing)
 {
        struct floppy_state *fs = disk->private_data;
 
-       return fs->ejected;
+       return fs->ejected ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static int floppy_revalidate(struct gendisk *disk)
@@ -772,7 +773,7 @@ static const struct block_device_operations floppy_fops = {
        .release         = floppy_release,
        .ioctl           = floppy_ioctl,
        .getgeo          = floppy_getgeo,
-       .media_changed   = floppy_check_change,
+       .check_events    = floppy_check_events,
        .revalidate_disk = floppy_revalidate,
 };
 
@@ -857,6 +858,7 @@ static int __devinit swim_floppy_init(struct swim_priv *swd)
                swd->unit[drive].disk->first_minor = drive;
                sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive);
                swd->unit[drive].disk->fops = &floppy_fops;
+               swd->unit[drive].disk->events = DISK_EVENT_MEDIA_CHANGE;
                swd->unit[drive].disk->private_data = &swd->unit[drive];
                swd->unit[drive].disk->queue = swd->queue;
                set_capacity(swd->unit[drive].disk, 2880);
index bf3a5b8592990ccf640b4221973ff84114b893df..4c10f56facbff8b5c0e56615c0ebc79a1179903f 100644 (file)
@@ -250,7 +250,8 @@ static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
                        unsigned int cmd, unsigned long param);
 static int floppy_open(struct block_device *bdev, fmode_t mode);
 static int floppy_release(struct gendisk *disk, fmode_t mode);
-static int floppy_check_change(struct gendisk *disk);
+static unsigned int floppy_check_events(struct gendisk *disk,
+                                       unsigned int clearing);
 static int floppy_revalidate(struct gendisk *disk);
 
 static bool swim3_end_request(int err, unsigned int nr_bytes)
@@ -975,10 +976,11 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
        return 0;
 }
 
-static int floppy_check_change(struct gendisk *disk)
+static unsigned int floppy_check_events(struct gendisk *disk,
+                                       unsigned int clearing)
 {
        struct floppy_state *fs = disk->private_data;
-       return fs->ejected;
+       return fs->ejected ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static int floppy_revalidate(struct gendisk *disk)
@@ -1025,7 +1027,7 @@ static const struct block_device_operations floppy_fops = {
        .open           = floppy_unlocked_open,
        .release        = floppy_release,
        .ioctl          = floppy_ioctl,
-       .media_changed  = floppy_check_change,
+       .check_events   = floppy_check_events,
        .revalidate_disk= floppy_revalidate,
 };
 
@@ -1161,6 +1163,7 @@ static int __devinit swim3_attach(struct macio_dev *mdev, const struct of_device
        disk->major = FLOPPY_MAJOR;
        disk->first_minor = i;
        disk->fops = &floppy_fops;
+       disk->events = DISK_EVENT_MEDIA_CHANGE;
        disk->private_data = &floppy_states[i];
        disk->queue = swim3_queue;
        disk->flags |= GENHD_FL_REMOVABLE;
index 9ae3bb713286f0d0d3a959a81f7f4b4db5cde1db..68b9430c7cfe56f7f23330cb152b103aef3098b9 100644 (file)
@@ -1788,7 +1788,8 @@ static int ub_bd_revalidate(struct gendisk *disk)
  *
  * The return code is bool!
  */
-static int ub_bd_media_changed(struct gendisk *disk)
+static unsigned int ub_bd_check_events(struct gendisk *disk,
+                                      unsigned int clearing)
 {
        struct ub_lun *lun = disk->private_data;
 
@@ -1806,10 +1807,10 @@ static int ub_bd_media_changed(struct gendisk *disk)
         */
        if (ub_sync_tur(lun->udev, lun) != 0) {
                lun->changed = 1;
-               return 1;
+               return DISK_EVENT_MEDIA_CHANGE;
        }
 
-       return lun->changed;
+       return lun->changed ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static const struct block_device_operations ub_bd_fops = {
@@ -1817,7 +1818,7 @@ static const struct block_device_operations ub_bd_fops = {
        .open           = ub_bd_unlocked_open,
        .release        = ub_bd_release,
        .ioctl          = ub_bd_ioctl,
-       .media_changed  = ub_bd_media_changed,
+       .check_events   = ub_bd_check_events,
        .revalidate_disk = ub_bd_revalidate,
 };
 
@@ -2333,6 +2334,7 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum)
        disk->major = UB_MAJOR;
        disk->first_minor = lun->id * UB_PARTS_PER_LUN;
        disk->fops = &ub_bd_fops;
+       disk->events = DISK_EVENT_MEDIA_CHANGE;
        disk->private_data = lun;
        disk->driverfs_dev = &sc->intf->dev;
 
index 8be57151f5d6570cd9b9c9a629618bc23ae3466b..031ca720d926623e810368399359e6a4dcf1b02d 100644 (file)
@@ -241,8 +241,7 @@ static void dump_dmastat(struct cardinfo *card, unsigned int dmastat)
  *
  * Whenever IO on the active page completes, the Ready page is activated
  * and the ex-Active page is clean out and made Ready.
- * Otherwise the Ready page is only activated when it becomes full, or
- * when mm_unplug_device is called via the unplug_io_fn.
+ * Otherwise the Ready page is only activated when it becomes full.
  *
  * If a request arrives while both pages a full, it is queued, and b_rdev is
  * overloaded to record whether it was a read or a write.
@@ -333,17 +332,6 @@ static inline void reset_page(struct mm_page *page)
        page->biotail = &page->bio;
 }
 
-static void mm_unplug_device(struct request_queue *q)
-{
-       struct cardinfo *card = q->queuedata;
-       unsigned long flags;
-
-       spin_lock_irqsave(&card->lock, flags);
-       if (blk_remove_plug(q))
-               activate(card);
-       spin_unlock_irqrestore(&card->lock, flags);
-}
-
 /*
  * If there is room on Ready page, take
  * one bh off list and add it.
@@ -535,7 +523,6 @@ static int mm_make_request(struct request_queue *q, struct bio *bio)
        *card->biotail = bio;
        bio->bi_next = NULL;
        card->biotail = &bio->bi_next;
-       blk_plug_device(q);
        spin_unlock_irq(&card->lock);
 
        return 0;
@@ -779,20 +766,10 @@ static int mm_getgeo(struct block_device *bdev, struct hd_geometry *geo)
        return 0;
 }
 
-/*
- * Future support for removable devices
- */
-static int mm_check_change(struct gendisk *disk)
-{
-/*  struct cardinfo *dev = disk->private_data; */
-       return 0;
-}
-
 static const struct block_device_operations mm_fops = {
        .owner          = THIS_MODULE,
        .getgeo         = mm_getgeo,
        .revalidate_disk = mm_revalidate,
-       .media_changed  = mm_check_change,
 };
 
 static int __devinit mm_pci_probe(struct pci_dev *dev,
@@ -907,7 +884,6 @@ static int __devinit mm_pci_probe(struct pci_dev *dev,
        blk_queue_make_request(card->queue, mm_make_request);
        card->queue->queue_lock = &card->lock;
        card->queue->queuedata = card;
-       card->queue->unplug_fn = mm_unplug_device;
 
        tasklet_init(&card->tasklet, process_page, (unsigned long)card);
 
index 2c590a796aa1480bb24f2ce4e54814d971c56b76..73354b081ed3e8a629c849f3aaac865c1e1649e1 100644 (file)
@@ -867,12 +867,12 @@ static void ace_request(struct request_queue * q)
        }
 }
 
-static int ace_media_changed(struct gendisk *gd)
+static unsigned int ace_check_events(struct gendisk *gd, unsigned int clearing)
 {
        struct ace_device *ace = gd->private_data;
-       dev_dbg(ace->dev, "ace_media_changed(): %i\n", ace->media_change);
+       dev_dbg(ace->dev, "ace_check_events(): %i\n", ace->media_change);
 
-       return ace->media_change;
+       return ace->media_change ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static int ace_revalidate_disk(struct gendisk *gd)
@@ -953,7 +953,7 @@ static const struct block_device_operations ace_fops = {
        .owner = THIS_MODULE,
        .open = ace_open,
        .release = ace_release,
-       .media_changed = ace_media_changed,
+       .check_events = ace_check_events,
        .revalidate_disk = ace_revalidate_disk,
        .getgeo = ace_getgeo,
 };
@@ -1005,6 +1005,7 @@ static int __devinit ace_setup(struct ace_device *ace)
        ace->gd->major = ace_major;
        ace->gd->first_minor = ace->id * ACE_NUM_MINORS;
        ace->gd->fops = &ace_fops;
+       ace->gd->events = DISK_EVENT_MEDIA_CHANGE;
        ace->gd->queue = ace->queue;
        ace->gd->private_data = ace;
        snprintf(ace->gd->disk_name, 32, "xs%c", ace->id + 'a');
index 64a21461c408f9853d6c6199bc1ed9e38fb842ca..b2b034fea34e6b8eabf7054928e34bc4d6fc0e17 100644 (file)
@@ -395,10 +395,12 @@ static int gdrom_drivestatus(struct cdrom_device_info *cd_info, int ignore)
        return CDS_NO_INFO;
 }
 
-static int gdrom_mediachanged(struct cdrom_device_info *cd_info, int ignore)
+static unsigned int gdrom_check_events(struct cdrom_device_info *cd_info,
+                                      unsigned int clearing, int ignore)
 {
        /* check the sense key */
-       return (__raw_readb(GDROM_ERROR_REG) & 0xF0) == 0x60;
+       return (__raw_readb(GDROM_ERROR_REG) & 0xF0) == 0x60 ?
+               DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 /* reset the G1 bus */
@@ -483,7 +485,7 @@ static struct cdrom_device_ops gdrom_ops = {
        .open                   = gdrom_open,
        .release                = gdrom_release,
        .drive_status           = gdrom_drivestatus,
-       .media_changed          = gdrom_mediachanged,
+       .check_events           = gdrom_check_events,
        .get_last_session       = gdrom_get_last_session,
        .reset                  = gdrom_hardreset,
        .audio_ioctl            = gdrom_audio_ioctl,
@@ -509,9 +511,10 @@ static int gdrom_bdops_release(struct gendisk *disk, fmode_t mode)
        return 0;
 }
 
-static int gdrom_bdops_mediachanged(struct gendisk *disk)
+static unsigned int gdrom_bdops_check_events(struct gendisk *disk,
+                                            unsigned int clearing)
 {
-       return cdrom_media_changed(gd.cd_info);
+       return cdrom_check_events(gd.cd_info, clearing);
 }
 
 static int gdrom_bdops_ioctl(struct block_device *bdev, fmode_t mode,
@@ -530,7 +533,7 @@ static const struct block_device_operations gdrom_bdops = {
        .owner                  = THIS_MODULE,
        .open                   = gdrom_bdops_open,
        .release                = gdrom_bdops_release,
-       .media_changed          = gdrom_bdops_mediachanged,
+       .check_events           = gdrom_bdops_check_events,
        .ioctl                  = gdrom_bdops_ioctl,
 };
 
@@ -800,6 +803,7 @@ static int __devinit probe_gdrom(struct platform_device *devptr)
                goto probe_fail_cdrom_register;
        }
        gd.disk->fops = &gdrom_bdops;
+       gd.disk->events = DISK_EVENT_MEDIA_CHANGE;
        /* latch on to the interrupt */
        err = gdrom_set_interrupt_handlers();
        if (err)
index be73a9b493a69970135663ffd4ed7a83cb568cb9..4e874c5fa60595036a7e10b26257384715b06b56 100644 (file)
@@ -186,10 +186,11 @@ static int viocd_blk_ioctl(struct block_device *bdev, fmode_t mode,
        return ret;
 }
 
-static int viocd_blk_media_changed(struct gendisk *disk)
+static unsigned int viocd_blk_check_events(struct gendisk *disk,
+                                          unsigned int clearing)
 {
        struct disk_info *di = disk->private_data;
-       return cdrom_media_changed(&di->viocd_info);
+       return cdrom_check_events(&di->viocd_info, clearing);
 }
 
 static const struct block_device_operations viocd_fops = {
@@ -197,7 +198,7 @@ static const struct block_device_operations viocd_fops = {
        .open =                 viocd_blk_open,
        .release =              viocd_blk_release,
        .ioctl =                viocd_blk_ioctl,
-       .media_changed =        viocd_blk_media_changed,
+       .check_events =         viocd_blk_check_events,
 };
 
 static int viocd_open(struct cdrom_device_info *cdi, int purpose)
@@ -320,7 +321,8 @@ static void do_viocd_request(struct request_queue *q)
        }
 }
 
-static int viocd_media_changed(struct cdrom_device_info *cdi, int disc_nr)
+static unsigned int viocd_check_events(struct cdrom_device_info *cdi,
+                                      unsigned int clearing, int disc_nr)
 {
        struct viocd_waitevent we;
        HvLpEvent_Rc hvrc;
@@ -340,7 +342,7 @@ static int viocd_media_changed(struct cdrom_device_info *cdi, int disc_nr)
        if (hvrc != 0) {
                pr_warning("bad rc on HvCallEvent_signalLpEventFast %d\n",
                           (int)hvrc);
-               return -EIO;
+               return 0;
        }
 
        wait_for_completion(&we.com);
@@ -354,7 +356,7 @@ static int viocd_media_changed(struct cdrom_device_info *cdi, int disc_nr)
                return 0;
        }
 
-       return we.changed;
+       return we.changed ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static int viocd_lock_door(struct cdrom_device_info *cdi, int locking)
@@ -550,7 +552,7 @@ static int viocd_audio_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
 static struct cdrom_device_ops viocd_dops = {
        .open = viocd_open,
        .release = viocd_release,
-       .media_changed = viocd_media_changed,
+       .check_events = viocd_check_events,
        .lock_door = viocd_lock_door,
        .generic_packet = viocd_packet,
        .audio_ioctl = viocd_audio_ioctl,
@@ -624,6 +626,7 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
        gendisk->queue = q;
        gendisk->fops = &viocd_fops;
        gendisk->flags = GENHD_FL_CD|GENHD_FL_REMOVABLE;
+       gendisk->events = DISK_EVENT_MEDIA_CHANGE;
        set_capacity(gendisk, 0);
        gendisk->private_data = d;
        d->viocd_disk = gendisk;
index 4c95b5fd9df32622d3116b7c4360794876b0407b..799e1490cf240964962650140bc1aca3631f3691 100644 (file)
@@ -1073,6 +1073,9 @@ int drm_mode_getresources(struct drm_device *dev, void *data,
        uint32_t __user *encoder_id;
        struct drm_mode_group *mode_group;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        mutex_lock(&dev->mode_config.mutex);
 
        /*
@@ -1244,6 +1247,9 @@ int drm_mode_getcrtc(struct drm_device *dev,
        struct drm_mode_object *obj;
        int ret = 0;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        mutex_lock(&dev->mode_config.mutex);
 
        obj = drm_mode_object_find(dev, crtc_resp->crtc_id,
@@ -1312,6 +1318,9 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
        uint64_t __user *prop_values;
        uint32_t __user *encoder_ptr;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        memset(&u_mode, 0, sizeof(struct drm_mode_modeinfo));
 
        DRM_DEBUG_KMS("[CONNECTOR:%d:?]\n", out_resp->connector_id);
@@ -1431,6 +1440,9 @@ int drm_mode_getencoder(struct drm_device *dev, void *data,
        struct drm_encoder *encoder;
        int ret = 0;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        mutex_lock(&dev->mode_config.mutex);
        obj = drm_mode_object_find(dev, enc_resp->encoder_id,
                                   DRM_MODE_OBJECT_ENCODER);
@@ -1486,6 +1498,9 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
        int ret = 0;
        int i;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        mutex_lock(&dev->mode_config.mutex);
        obj = drm_mode_object_find(dev, crtc_req->crtc_id,
                                   DRM_MODE_OBJECT_CRTC);
@@ -1603,6 +1618,9 @@ int drm_mode_cursor_ioctl(struct drm_device *dev,
        struct drm_crtc *crtc;
        int ret = 0;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        if (!req->flags) {
                DRM_ERROR("no operation set\n");
                return -EINVAL;
@@ -1667,6 +1685,9 @@ int drm_mode_addfb(struct drm_device *dev,
        struct drm_framebuffer *fb;
        int ret = 0;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        if ((config->min_width > r->width) || (r->width > config->max_width)) {
                DRM_ERROR("mode new framebuffer width not within limits\n");
                return -EINVAL;
@@ -1724,6 +1745,9 @@ int drm_mode_rmfb(struct drm_device *dev,
        int ret = 0;
        int found = 0;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        mutex_lock(&dev->mode_config.mutex);
        obj = drm_mode_object_find(dev, *id, DRM_MODE_OBJECT_FB);
        /* TODO check that we realy get a framebuffer back. */
@@ -1780,6 +1804,9 @@ int drm_mode_getfb(struct drm_device *dev,
        struct drm_framebuffer *fb;
        int ret = 0;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        mutex_lock(&dev->mode_config.mutex);
        obj = drm_mode_object_find(dev, r->fb_id, DRM_MODE_OBJECT_FB);
        if (!obj) {
@@ -1813,6 +1840,9 @@ int drm_mode_dirtyfb_ioctl(struct drm_device *dev,
        int num_clips;
        int ret = 0;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        mutex_lock(&dev->mode_config.mutex);
        obj = drm_mode_object_find(dev, r->fb_id, DRM_MODE_OBJECT_FB);
        if (!obj) {
@@ -1996,6 +2026,9 @@ int drm_mode_attachmode_ioctl(struct drm_device *dev,
        struct drm_mode_modeinfo *umode = &mode_cmd->mode;
        int ret = 0;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        mutex_lock(&dev->mode_config.mutex);
 
        obj = drm_mode_object_find(dev, mode_cmd->connector_id, DRM_MODE_OBJECT_CONNECTOR);
@@ -2042,6 +2075,9 @@ int drm_mode_detachmode_ioctl(struct drm_device *dev,
        struct drm_mode_modeinfo *umode = &mode_cmd->mode;
        int ret = 0;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        mutex_lock(&dev->mode_config.mutex);
 
        obj = drm_mode_object_find(dev, mode_cmd->connector_id, DRM_MODE_OBJECT_CONNECTOR);
@@ -2211,6 +2247,9 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev,
        uint64_t __user *values_ptr;
        uint32_t __user *blob_length_ptr;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        mutex_lock(&dev->mode_config.mutex);
        obj = drm_mode_object_find(dev, out_resp->prop_id, DRM_MODE_OBJECT_PROPERTY);
        if (!obj) {
@@ -2333,6 +2372,9 @@ int drm_mode_getblob_ioctl(struct drm_device *dev,
        int ret = 0;
        void *blob_ptr;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        mutex_lock(&dev->mode_config.mutex);
        obj = drm_mode_object_find(dev, out_resp->blob_id, DRM_MODE_OBJECT_BLOB);
        if (!obj) {
@@ -2393,6 +2435,9 @@ int drm_mode_connector_property_set_ioctl(struct drm_device *dev,
        int ret = -EINVAL;
        int i;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        mutex_lock(&dev->mode_config.mutex);
 
        obj = drm_mode_object_find(dev, out_resp->connector_id, DRM_MODE_OBJECT_CONNECTOR);
@@ -2509,6 +2554,9 @@ int drm_mode_gamma_set_ioctl(struct drm_device *dev,
        int size;
        int ret = 0;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        mutex_lock(&dev->mode_config.mutex);
        obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC);
        if (!obj) {
@@ -2560,6 +2608,9 @@ int drm_mode_gamma_get_ioctl(struct drm_device *dev,
        int size;
        int ret = 0;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        mutex_lock(&dev->mode_config.mutex);
        obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC);
        if (!obj) {
index 57ce27c9a747799dfb5065a056747d1b2d30bcab..74e4ff578017b90d3e0649219d71a87163fb5b19 100644 (file)
@@ -499,11 +499,12 @@ EXPORT_SYMBOL(drm_gem_vm_open);
 void drm_gem_vm_close(struct vm_area_struct *vma)
 {
        struct drm_gem_object *obj = vma->vm_private_data;
+       struct drm_device *dev = obj->dev;
 
-       mutex_lock(&obj->dev->struct_mutex);
+       mutex_lock(&dev->struct_mutex);
        drm_vm_close_locked(vma);
        drm_gem_object_unreference(obj);
-       mutex_unlock(&obj->dev->struct_mutex);
+       mutex_unlock(&dev->struct_mutex);
 }
 EXPORT_SYMBOL(drm_gem_vm_close);
 
index 7f6912a16761a12c360310fd60e45d375d17966e..904d7e9c8e4752ffa738ee33b0f884e305825a12 100644 (file)
@@ -280,6 +280,9 @@ int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_priv)
                if (dev->driver->dumb_create)
                        req->value = 1;
                break;
+       case DRM_CAP_VBLANK_HIGH_CRTC:
+               req->value = 1;
+               break;
        default:
                return -EINVAL;
        }
index a34ef97d3c817b33012f17d344e5d3f84df0db42..741457bd1c46ef1f54af12296e348a053430329e 100644 (file)
@@ -1125,7 +1125,7 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 {
        union drm_wait_vblank *vblwait = data;
        int ret = 0;
-       unsigned int flags, seq, crtc;
+       unsigned int flags, seq, crtc, high_crtc;
 
        if ((!drm_dev_to_irq(dev)) || (!dev->irq_enabled))
                return -EINVAL;
@@ -1134,16 +1134,21 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
                return -EINVAL;
 
        if (vblwait->request.type &
-           ~(_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK)) {
+           ~(_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK |
+             _DRM_VBLANK_HIGH_CRTC_MASK)) {
                DRM_ERROR("Unsupported type value 0x%x, supported mask 0x%x\n",
                          vblwait->request.type,
-                         (_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK));
+                         (_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK |
+                          _DRM_VBLANK_HIGH_CRTC_MASK));
                return -EINVAL;
        }
 
        flags = vblwait->request.type & _DRM_VBLANK_FLAGS_MASK;
-       crtc = flags & _DRM_VBLANK_SECONDARY ? 1 : 0;
-
+       high_crtc = (vblwait->request.type & _DRM_VBLANK_HIGH_CRTC_MASK);
+       if (high_crtc)
+               crtc = high_crtc >> _DRM_VBLANK_HIGH_CRTC_SHIFT;
+       else
+               crtc = flags & _DRM_VBLANK_SECONDARY ? 1 : 0;
        if (crtc >= dev->num_crtcs)
                return -EINVAL;
 
index 09e0327fc6cee13288ba6950eb3cf765f6be1146..87c8e29465e30a7963afdd03c1aa0666ed89d55d 100644 (file)
@@ -892,7 +892,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
                seq_printf(m, "Render p-state limit: %d\n",
                           rp_state_limits & 0xff);
                seq_printf(m, "CAGF: %dMHz\n", ((rpstat & GEN6_CAGF_MASK) >>
-                                               GEN6_CAGF_SHIFT) * 100);
+                                               GEN6_CAGF_SHIFT) * 50);
                seq_printf(m, "RP CUR UP EI: %dus\n", rpupei &
                           GEN6_CURICONT_MASK);
                seq_printf(m, "RP CUR UP: %dus\n", rpcurup &
@@ -908,15 +908,15 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 
                max_freq = (rp_state_cap & 0xff0000) >> 16;
                seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
-                          max_freq * 100);
+                          max_freq * 50);
 
                max_freq = (rp_state_cap & 0xff00) >> 8;
                seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
-                          max_freq * 100);
+                          max_freq * 50);
 
                max_freq = rp_state_cap & 0xff;
                seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
-                          max_freq * 100);
+                          max_freq * 50);
 
                __gen6_gt_force_wake_put(dev_priv);
        } else {
index c4c2855d002d950beb6ecebf3eec0f8d7a8712a1..7ce3f353af33657e612f40887a89aefb9b936dda 100644 (file)
@@ -224,7 +224,7 @@ i915_gem_dumb_create(struct drm_file *file,
                     struct drm_mode_create_dumb *args)
 {
        /* have to work out size/pitch and return them */
-       args->pitch = ALIGN(args->width & ((args->bpp + 1) / 8), 64);
+       args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
        args->size = args->pitch * args->height;
        return i915_gem_create(file, dev,
                               args->size, &args->handle);
@@ -1356,9 +1356,10 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
        if (!obj->fault_mappable)
                return;
 
-       unmap_mapping_range(obj->base.dev->dev_mapping,
-                           (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
-                           obj->base.size, 1);
+       if (obj->base.dev->dev_mapping)
+               unmap_mapping_range(obj->base.dev->dev_mapping,
+                                   (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
+                                   obj->base.size, 1);
 
        obj->fault_mappable = false;
 }
@@ -1796,8 +1797,10 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
                return;
 
        spin_lock(&file_priv->mm.lock);
-       list_del(&request->client_list);
-       request->file_priv = NULL;
+       if (request->file_priv) {
+               list_del(&request->client_list);
+               request->file_priv = NULL;
+       }
        spin_unlock(&file_priv->mm.lock);
 }
 
@@ -2217,13 +2220,18 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring,
 {
        int ret;
 
+       if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0)
+               return 0;
+
        trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains);
 
        ret = ring->flush(ring, invalidate_domains, flush_domains);
        if (ret)
                return ret;
 
-       i915_gem_process_flushing_list(ring, flush_domains);
+       if (flush_domains & I915_GEM_GPU_DOMAINS)
+               i915_gem_process_flushing_list(ring, flush_domains);
+
        return 0;
 }
 
@@ -2579,8 +2587,23 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
                reg = &dev_priv->fence_regs[obj->fence_reg];
                list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
 
-               if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
-                       pipelined = NULL;
+               if (obj->tiling_changed) {
+                       ret = i915_gem_object_flush_fence(obj, pipelined);
+                       if (ret)
+                               return ret;
+
+                       if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
+                               pipelined = NULL;
+
+                       if (pipelined) {
+                               reg->setup_seqno =
+                                       i915_gem_next_request_seqno(pipelined);
+                               obj->last_fenced_seqno = reg->setup_seqno;
+                               obj->last_fenced_ring = pipelined;
+                       }
+
+                       goto update;
+               }
 
                if (!pipelined) {
                        if (reg->setup_seqno) {
@@ -2599,31 +2622,6 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
                        ret = i915_gem_object_flush_fence(obj, pipelined);
                        if (ret)
                                return ret;
-               } else if (obj->tiling_changed) {
-                       if (obj->fenced_gpu_access) {
-                               if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
-                                       ret = i915_gem_flush_ring(obj->ring,
-                                                                 0, obj->base.write_domain);
-                                       if (ret)
-                                               return ret;
-                               }
-
-                               obj->fenced_gpu_access = false;
-                       }
-               }
-
-               if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
-                       pipelined = NULL;
-               BUG_ON(!pipelined && reg->setup_seqno);
-
-               if (obj->tiling_changed) {
-                       if (pipelined) {
-                               reg->setup_seqno =
-                                       i915_gem_next_request_seqno(pipelined);
-                               obj->last_fenced_seqno = reg->setup_seqno;
-                               obj->last_fenced_ring = pipelined;
-                       }
-                       goto update;
                }
 
                return 0;
@@ -3606,6 +3604,8 @@ static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
                return;
        }
 
+       trace_i915_gem_object_destroy(obj);
+
        if (obj->base.map_list.map)
                i915_gem_free_mmap_offset(obj);
 
@@ -3615,8 +3615,6 @@ static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
        kfree(obj->page_cpu_valid);
        kfree(obj->bit_17);
        kfree(obj);
-
-       trace_i915_gem_object_destroy(obj);
 }
 
 void i915_gem_free_object(struct drm_gem_object *gem_obj)
index 7ff7f933ddf17e1395ab7140be773bda10a7132b..20a4cc5b818f51632ad50a94bdfa4092c3f1a454 100644 (file)
@@ -367,6 +367,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
                uint32_t __iomem *reloc_entry;
                void __iomem *reloc_page;
 
+               /* We can't wait for rendering with pagefaults disabled */
+               if (obj->active && in_atomic())
+                       return -EFAULT;
+
                ret = i915_gem_object_set_to_gtt_domain(obj, 1);
                if (ret)
                        return ret;
@@ -440,15 +444,24 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
                             struct list_head *objects)
 {
        struct drm_i915_gem_object *obj;
-       int ret;
-
+       int ret = 0;
+
+       /* This is the fast path and we cannot handle a pagefault whilst
+        * holding the struct mutex lest the user pass in the relocations
+        * contained within a mmaped bo. For in such a case we, the page
+        * fault handler would call i915_gem_fault() and we would try to
+        * acquire the struct mutex again. Obviously this is bad and so
+        * lockdep complains vehemently.
+        */
+       pagefault_disable();
        list_for_each_entry(obj, objects, exec_list) {
                ret = i915_gem_execbuffer_relocate_object(obj, eb);
                if (ret)
-                       return ret;
+                       break;
        }
+       pagefault_enable();
 
-       return 0;
+       return ret;
 }
 
 static int
index 3106c0dc838959d8054e9fbd06a244fbf8154b7a..432fc04c6bffc1b58d8da1645a8a5687049a9bc2 100644 (file)
@@ -1516,9 +1516,10 @@ static void intel_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe,
 
        reg = PIPECONF(pipe);
        val = I915_READ(reg);
-       val |= PIPECONF_ENABLE;
-       I915_WRITE(reg, val);
-       POSTING_READ(reg);
+       if (val & PIPECONF_ENABLE)
+               return;
+
+       I915_WRITE(reg, val | PIPECONF_ENABLE);
        intel_wait_for_vblank(dev_priv->dev, pipe);
 }
 
@@ -1552,9 +1553,10 @@ static void intel_disable_pipe(struct drm_i915_private *dev_priv,
 
        reg = PIPECONF(pipe);
        val = I915_READ(reg);
-       val &= ~PIPECONF_ENABLE;
-       I915_WRITE(reg, val);
-       POSTING_READ(reg);
+       if ((val & PIPECONF_ENABLE) == 0)
+               return;
+
+       I915_WRITE(reg, val & ~PIPECONF_ENABLE);
        intel_wait_for_pipe_off(dev_priv->dev, pipe);
 }
 
@@ -1577,9 +1579,10 @@ static void intel_enable_plane(struct drm_i915_private *dev_priv,
 
        reg = DSPCNTR(plane);
        val = I915_READ(reg);
-       val |= DISPLAY_PLANE_ENABLE;
-       I915_WRITE(reg, val);
-       POSTING_READ(reg);
+       if (val & DISPLAY_PLANE_ENABLE)
+               return;
+
+       I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE);
        intel_wait_for_vblank(dev_priv->dev, pipe);
 }
 
@@ -1610,9 +1613,10 @@ static void intel_disable_plane(struct drm_i915_private *dev_priv,
 
        reg = DSPCNTR(plane);
        val = I915_READ(reg);
-       val &= ~DISPLAY_PLANE_ENABLE;
-       I915_WRITE(reg, val);
-       POSTING_READ(reg);
+       if ((val & DISPLAY_PLANE_ENABLE) == 0)
+               return;
+
+       I915_WRITE(reg, val & ~DISPLAY_PLANE_ENABLE);
        intel_flush_display_plane(dev_priv, plane);
        intel_wait_for_vblank(dev_priv->dev, pipe);
 }
@@ -1769,7 +1773,6 @@ static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
                        return;
 
                I915_WRITE(DPFC_CONTROL, dpfc_ctl & ~DPFC_CTL_EN);
-               POSTING_READ(DPFC_CONTROL);
                intel_wait_for_vblank(dev, intel_crtc->pipe);
        }
 
@@ -1861,7 +1864,6 @@ static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
                        return;
 
                I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl & ~DPFC_CTL_EN);
-               POSTING_READ(ILK_DPFC_CONTROL);
                intel_wait_for_vblank(dev, intel_crtc->pipe);
        }
 
@@ -3883,10 +3885,7 @@ static bool g4x_compute_srwm(struct drm_device *dev,
                              display, cursor);
 }
 
-static inline bool single_plane_enabled(unsigned int mask)
-{
-       return mask && (mask & -mask) == 0;
-}
+#define single_plane_enabled(mask) is_power_of_2(mask)
 
 static void g4x_update_wm(struct drm_device *dev)
 {
@@ -5777,7 +5776,6 @@ static void intel_increase_pllclock(struct drm_crtc *crtc)
 
                dpll &= ~DISPLAY_RATE_SELECT_FPA1;
                I915_WRITE(dpll_reg, dpll);
-               POSTING_READ(dpll_reg);
                intel_wait_for_vblank(dev, pipe);
 
                dpll = I915_READ(dpll_reg);
@@ -5821,7 +5819,6 @@ static void intel_decrease_pllclock(struct drm_crtc *crtc)
 
                dpll |= DISPLAY_RATE_SELECT_FPA1;
                I915_WRITE(dpll_reg, dpll);
-               dpll = I915_READ(dpll_reg);
                intel_wait_for_vblank(dev, pipe);
                dpll = I915_READ(dpll_reg);
                if (!(dpll & DISPLAY_RATE_SELECT_FPA1))
@@ -6933,7 +6930,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
                DRM_ERROR("timeout waiting for pcode mailbox to finish\n");
        if (pcu_mbox & (1<<31)) { /* OC supported */
                max_freq = pcu_mbox & 0xff;
-               DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 100);
+               DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50);
        }
 
        /* In units of 100MHz */
index d29e33f815d7ce8e4392e1c81704e2628c58fe51..0daefca5cbb83591ec83ffc391e3303b2f3ca50b 100644 (file)
@@ -1957,9 +1957,9 @@ intel_dp_init(struct drm_device *dev, int output_reg)
                                        DP_NO_AUX_HANDSHAKE_LINK_TRAINING;
                } else {
                        /* if this fails, presume the device is a ghost */
-                       DRM_ERROR("failed to retrieve link info\n");
-                       intel_dp_destroy(&intel_connector->base);
+                       DRM_INFO("failed to retrieve link info, disabling eDP\n");
                        intel_dp_encoder_destroy(&intel_dp->base.base);
+                       intel_dp_destroy(&intel_connector->base);
                        return;
                }
        }
index 789c47801ba89bf1cf7577b5c46e01cc1f3f0e32..e9e6f71418a43122c3545d712c2ea3268861c2c4 100644 (file)
@@ -65,62 +65,60 @@ render_ring_flush(struct intel_ring_buffer *ring,
        u32 cmd;
        int ret;
 
-       if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) {
+       /*
+        * read/write caches:
+        *
+        * I915_GEM_DOMAIN_RENDER is always invalidated, but is
+        * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
+        * also flushed at 2d versus 3d pipeline switches.
+        *
+        * read-only caches:
+        *
+        * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
+        * MI_READ_FLUSH is set, and is always flushed on 965.
+        *
+        * I915_GEM_DOMAIN_COMMAND may not exist?
+        *
+        * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
+        * invalidated when MI_EXE_FLUSH is set.
+        *
+        * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
+        * invalidated with every MI_FLUSH.
+        *
+        * TLBs:
+        *
+        * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
+        * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
+        * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
+        * are flushed at any MI_FLUSH.
+        */
+
+       cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
+       if ((invalidate_domains|flush_domains) &
+           I915_GEM_DOMAIN_RENDER)
+               cmd &= ~MI_NO_WRITE_FLUSH;
+       if (INTEL_INFO(dev)->gen < 4) {
                /*
-                * read/write caches:
-                *
-                * I915_GEM_DOMAIN_RENDER is always invalidated, but is
-                * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
-                * also flushed at 2d versus 3d pipeline switches.
-                *
-                * read-only caches:
-                *
-                * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
-                * MI_READ_FLUSH is set, and is always flushed on 965.
-                *
-                * I915_GEM_DOMAIN_COMMAND may not exist?
-                *
-                * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
-                * invalidated when MI_EXE_FLUSH is set.
-                *
-                * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
-                * invalidated with every MI_FLUSH.
-                *
-                * TLBs:
-                *
-                * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
-                * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
-                * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
-                * are flushed at any MI_FLUSH.
+                * On the 965, the sampler cache always gets flushed
+                * and this bit is reserved.
                 */
+               if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
+                       cmd |= MI_READ_FLUSH;
+       }
+       if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
+               cmd |= MI_EXE_FLUSH;
 
-               cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
-               if ((invalidate_domains|flush_domains) &
-                   I915_GEM_DOMAIN_RENDER)
-                       cmd &= ~MI_NO_WRITE_FLUSH;
-               if (INTEL_INFO(dev)->gen < 4) {
-                       /*
-                        * On the 965, the sampler cache always gets flushed
-                        * and this bit is reserved.
-                        */
-                       if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
-                               cmd |= MI_READ_FLUSH;
-               }
-               if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
-                       cmd |= MI_EXE_FLUSH;
-
-               if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
-                   (IS_G4X(dev) || IS_GEN5(dev)))
-                       cmd |= MI_INVALIDATE_ISP;
+       if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
+           (IS_G4X(dev) || IS_GEN5(dev)))
+               cmd |= MI_INVALIDATE_ISP;
 
-               ret = intel_ring_begin(ring, 2);
-               if (ret)
-                       return ret;
+       ret = intel_ring_begin(ring, 2);
+       if (ret)
+               return ret;
 
-               intel_ring_emit(ring, cmd);
-               intel_ring_emit(ring, MI_NOOP);
-               intel_ring_advance(ring);
-       }
+       intel_ring_emit(ring, cmd);
+       intel_ring_emit(ring, MI_NOOP);
+       intel_ring_advance(ring);
 
        return 0;
 }
@@ -568,9 +566,6 @@ bsd_ring_flush(struct intel_ring_buffer *ring,
 {
        int ret;
 
-       if ((flush_domains & I915_GEM_DOMAIN_RENDER) == 0)
-               return 0;
-
        ret = intel_ring_begin(ring, 2);
        if (ret)
                return ret;
@@ -1056,9 +1051,6 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
        uint32_t cmd;
        int ret;
 
-       if (((invalidate | flush) & I915_GEM_GPU_DOMAINS) == 0)
-               return 0;
-
        ret = intel_ring_begin(ring, 4);
        if (ret)
                return ret;
@@ -1230,9 +1222,6 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
        uint32_t cmd;
        int ret;
 
-       if (((invalidate | flush) & I915_GEM_DOMAIN_RENDER) == 0)
-               return 0;
-
        ret = blt_ring_begin(ring, 4);
        if (ret)
                return ret;
index 3cd3234ba0af4abbe698a9e8ed02a33a2fe7eb84..10e41af6b0269d24210a945762ddead3e8a26786 100644 (file)
@@ -957,7 +957,11 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode
        /* adjust pixel clock as needed */
        adjusted_clock = atombios_adjust_pll(crtc, mode, pll, ss_enabled, &ss);
 
-       if (ASIC_IS_AVIVO(rdev))
+       if (radeon_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+               /* TV seems to prefer the legacy algo on some boards */
+               radeon_compute_pll_legacy(pll, adjusted_clock, &pll_clock, &fb_div, &frac_fb_div,
+                                         &ref_div, &post_div);
+       else if (ASIC_IS_AVIVO(rdev))
                radeon_compute_pll_avivo(pll, adjusted_clock, &pll_clock, &fb_div, &frac_fb_div,
                                         &ref_div, &post_div);
        else
index cf7c8d5b4ec24a431a81c776c367a1a54e4f29f3..cf602e2d0718ed21539dafd566405c77b3d9a4ac 100644 (file)
@@ -448,7 +448,7 @@ static uint16_t combios_get_table_offset(struct drm_device *dev,
 
 bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev)
 {
-       int edid_info;
+       int edid_info, size;
        struct edid *edid;
        unsigned char *raw;
        edid_info = combios_get_table_offset(rdev->ddev, COMBIOS_HARDCODED_EDID_TABLE);
@@ -456,11 +456,12 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev)
                return false;
 
        raw = rdev->bios + edid_info;
-       edid = kmalloc(EDID_LENGTH * (raw[0x7e] + 1), GFP_KERNEL);
+       size = EDID_LENGTH * (raw[0x7e] + 1);
+       edid = kmalloc(size, GFP_KERNEL);
        if (edid == NULL)
                return false;
 
-       memcpy((unsigned char *)edid, raw, EDID_LENGTH * (raw[0x7e] + 1));
+       memcpy((unsigned char *)edid, raw, size);
 
        if (!drm_edid_is_valid(edid)) {
                kfree(edid);
@@ -468,6 +469,7 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev)
        }
 
        rdev->mode_info.bios_hardcoded_edid = edid;
+       rdev->mode_info.bios_hardcoded_edid_size = size;
        return true;
 }
 
@@ -475,8 +477,17 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev)
 struct edid *
 radeon_bios_get_hardcoded_edid(struct radeon_device *rdev)
 {
-       if (rdev->mode_info.bios_hardcoded_edid)
-               return rdev->mode_info.bios_hardcoded_edid;
+       struct edid *edid;
+
+       if (rdev->mode_info.bios_hardcoded_edid) {
+               edid = kmalloc(rdev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL);
+               if (edid) {
+                       memcpy((unsigned char *)edid,
+                              (unsigned char *)rdev->mode_info.bios_hardcoded_edid,
+                              rdev->mode_info.bios_hardcoded_edid_size);
+                       return edid;
+               }
+       }
        return NULL;
 }
 
index 28c7961cd19bc986540dd0b0413dba2f5bc94ffe..2ef6d513506404075307c5f5865e091b8e4653ea 100644 (file)
@@ -633,6 +633,8 @@ static int radeon_vga_mode_valid(struct drm_connector *connector,
 static enum drm_connector_status
 radeon_vga_detect(struct drm_connector *connector, bool force)
 {
+       struct drm_device *dev = connector->dev;
+       struct radeon_device *rdev = dev->dev_private;
        struct radeon_connector *radeon_connector = to_radeon_connector(connector);
        struct drm_encoder *encoder;
        struct drm_encoder_helper_funcs *encoder_funcs;
@@ -683,6 +685,17 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
 
        if (ret == connector_status_connected)
                ret = radeon_connector_analog_encoder_conflict_solve(connector, encoder, ret, true);
+
+       /* RN50 and some RV100 asics in servers often have a hardcoded EDID in the
+        * vbios to deal with KVMs. If we have one and are not able to detect a monitor
+        * by other means, assume the CRT is connected and use that EDID.
+        */
+       if ((!rdev->is_atom_bios) &&
+           (ret == connector_status_disconnected) &&
+           rdev->mode_info.bios_hardcoded_edid_size) {
+               ret = connector_status_connected;
+       }
+
        radeon_connector_update_scratch_regs(connector, ret);
        return ret;
 }
@@ -794,6 +807,8 @@ static int radeon_dvi_get_modes(struct drm_connector *connector)
 static enum drm_connector_status
 radeon_dvi_detect(struct drm_connector *connector, bool force)
 {
+       struct drm_device *dev = connector->dev;
+       struct radeon_device *rdev = dev->dev_private;
        struct radeon_connector *radeon_connector = to_radeon_connector(connector);
        struct drm_encoder *encoder = NULL;
        struct drm_encoder_helper_funcs *encoder_funcs;
@@ -833,8 +848,6 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
                         * you don't really know what's connected to which port as both are digital.
                         */
                        if (radeon_connector->shared_ddc && (ret == connector_status_connected)) {
-                               struct drm_device *dev = connector->dev;
-                               struct radeon_device *rdev = dev->dev_private;
                                struct drm_connector *list_connector;
                                struct radeon_connector *list_radeon_connector;
                                list_for_each_entry(list_connector, &dev->mode_config.connector_list, head) {
@@ -899,6 +912,19 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
                ret = radeon_connector_analog_encoder_conflict_solve(connector, encoder, ret, true);
        }
 
+       /* RN50 and some RV100 asics in servers often have a hardcoded EDID in the
+        * vbios to deal with KVMs. If we have one and are not able to detect a monitor
+        * by other means, assume the DFP is connected and use that EDID.  In most
+        * cases the DVI port is actually a virtual KVM port connected to the service
+        * processor.
+        */
+       if ((!rdev->is_atom_bios) &&
+           (ret == connector_status_disconnected) &&
+           rdev->mode_info.bios_hardcoded_edid_size) {
+               radeon_connector->use_digital = true;
+               ret = connector_status_connected;
+       }
+
 out:
        /* updated in get modes as well since we need to know if it's analog or digital */
        radeon_connector_update_scratch_regs(connector, ret);
index e4582814bb78d604db3184e7134690447211e3ea..9c57538231d5f2c9b42c1516797472e149583f04 100644 (file)
@@ -239,6 +239,7 @@ struct radeon_mode_info {
        struct drm_property *underscan_vborder_property;
        /* hardcoded DFP edid from BIOS */
        struct edid *bios_hardcoded_edid;
+       int bios_hardcoded_edid_size;
 
        /* pointer to fbdev info structure */
        struct radeon_fbdev *rfbdev;
index 2aed03bde4b2ad3bf40963d7acc1f66afa843da2..08de669e025ab9bddf0bd118a457288bbb2b49e0 100644 (file)
@@ -365,12 +365,14 @@ static ssize_t radeon_set_pm_profile(struct device *dev,
                else if (strncmp("high", buf, strlen("high")) == 0)
                        rdev->pm.profile = PM_PROFILE_HIGH;
                else {
-                       DRM_ERROR("invalid power profile!\n");
+                       count = -EINVAL;
                        goto fail;
                }
                radeon_pm_update_profile(rdev);
                radeon_pm_set_clocks(rdev);
-       }
+       } else
+               count = -EINVAL;
+
 fail:
        mutex_unlock(&rdev->pm.mutex);
 
@@ -413,7 +415,7 @@ static ssize_t radeon_set_pm_method(struct device *dev,
                mutex_unlock(&rdev->pm.mutex);
                cancel_delayed_work_sync(&rdev->pm.dynpm_idle_work);
        } else {
-               DRM_ERROR("invalid power method!\n");
+               count = -EINVAL;
                goto fail;
        }
        radeon_pm_compute_clocks(rdev);
index e88a2cf177110f37fdd6bfc6d1801184ec2b94f4..6f218e014e9940a4021533159dd72e9b64fac5f5 100644 (file)
@@ -233,8 +233,7 @@ int ide_queue_sense_rq(ide_drive_t *drive, void *special)
 
        drive->hwif->rq = NULL;
 
-       elv_add_request(drive->queue, &drive->sense_rq,
-                       ELEVATOR_INSERT_FRONT, 0);
+       elv_add_request(drive->queue, &drive->sense_rq, ELEVATOR_INSERT_FRONT);
        return 0;
 }
 EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
index 0c73fe39a236bb6d6e05f37814c646098a57b715..fd1e117991373853d5b51d8827f99157fabd3b55 100644 (file)
@@ -258,17 +258,10 @@ static int ide_cd_breathe(ide_drive_t *drive, struct request *rq)
        if (time_after(jiffies, info->write_timeout))
                return 0;
        else {
-               struct request_queue *q = drive->queue;
-               unsigned long flags;
-
                /*
-                * take a breather relying on the unplug timer to kick us again
+                * take a breather
                 */
-
-               spin_lock_irqsave(q->queue_lock, flags);
-               blk_plug_device(q);
-               spin_unlock_irqrestore(q->queue_lock, flags);
-
+               blk_delay_queue(drive->queue, 1);
                return 1;
        }
 }
@@ -1177,7 +1170,7 @@ static struct cdrom_device_ops ide_cdrom_dops = {
        .open                   = ide_cdrom_open_real,
        .release                = ide_cdrom_release_real,
        .drive_status           = ide_cdrom_drive_status,
-       .media_changed          = ide_cdrom_check_media_change_real,
+       .check_events           = ide_cdrom_check_events_real,
        .tray_move              = ide_cdrom_tray_move,
        .lock_door              = ide_cdrom_lock_door,
        .select_speed           = ide_cdrom_select_speed,
@@ -1514,8 +1507,6 @@ static int ide_cdrom_setup(ide_drive_t *drive)
        blk_queue_dma_alignment(q, 31);
        blk_queue_update_dma_pad(q, 15);
 
-       q->unplug_delay = max((1 * HZ) / 1000, 1);
-
        drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
        drive->atapi_flags = IDE_AFLAG_NO_EJECT | ide_cd_flags(id);
 
@@ -1702,10 +1693,11 @@ static int idecd_ioctl(struct block_device *bdev, fmode_t mode,
 }
 
 
-static int idecd_media_changed(struct gendisk *disk)
+static unsigned int idecd_check_events(struct gendisk *disk,
+                                      unsigned int clearing)
 {
        struct cdrom_info *info = ide_drv_g(disk, cdrom_info);
-       return cdrom_media_changed(&info->devinfo);
+       return cdrom_check_events(&info->devinfo, clearing);
 }
 
 static int idecd_revalidate_disk(struct gendisk *disk)
@@ -1723,7 +1715,7 @@ static const struct block_device_operations idecd_ops = {
        .open                   = idecd_open,
        .release                = idecd_release,
        .ioctl                  = idecd_ioctl,
-       .media_changed          = idecd_media_changed,
+       .check_events           = idecd_check_events,
        .revalidate_disk        = idecd_revalidate_disk
 };
 
@@ -1790,6 +1782,7 @@ static int ide_cd_probe(ide_drive_t *drive)
        ide_cd_read_toc(drive, &sense);
        g->fops = &idecd_ops;
        g->flags |= GENHD_FL_REMOVABLE;
+       g->events = DISK_EVENT_MEDIA_CHANGE;
        add_disk(g);
        return 0;
 
index 93a3cf1b0f3f8c61b974b9b16a190502f5d4d038..1efc936f5b6672e268a38db439c63831fd59b71f 100644 (file)
@@ -111,7 +111,8 @@ int cdrom_check_status(ide_drive_t *, struct request_sense *);
 int ide_cdrom_open_real(struct cdrom_device_info *, int);
 void ide_cdrom_release_real(struct cdrom_device_info *);
 int ide_cdrom_drive_status(struct cdrom_device_info *, int);
-int ide_cdrom_check_media_change_real(struct cdrom_device_info *, int);
+unsigned int ide_cdrom_check_events_real(struct cdrom_device_info *,
+                                        unsigned int clearing, int slot_nr);
 int ide_cdrom_tray_move(struct cdrom_device_info *, int);
 int ide_cdrom_lock_door(struct cdrom_device_info *, int);
 int ide_cdrom_select_speed(struct cdrom_device_info *, int);
index 766b3deeb23c75f51eb5618a90e891d2fb220d90..2a6bc50e8a41e8918da5ad8aca49686d68da318c 100644 (file)
@@ -79,8 +79,8 @@ int ide_cdrom_drive_status(struct cdrom_device_info *cdi, int slot_nr)
        return CDS_DRIVE_NOT_READY;
 }
 
-int ide_cdrom_check_media_change_real(struct cdrom_device_info *cdi,
-                                      int slot_nr)
+unsigned int ide_cdrom_check_events_real(struct cdrom_device_info *cdi,
+                                        unsigned int clearing, int slot_nr)
 {
        ide_drive_t *drive = cdi->handle;
        int retval;
@@ -89,9 +89,9 @@ int ide_cdrom_check_media_change_real(struct cdrom_device_info *cdi,
                (void) cdrom_check_status(drive, NULL);
                retval = (drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED) ? 1 : 0;
                drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
-               return retval;
+               return retval ? DISK_EVENT_MEDIA_CHANGE : 0;
        } else {
-               return -EINVAL;
+               return 0;
        }
 }
 
index 35c4b43585e3418ebb7ad1b74c94550d5fbcfb86..c4ffd4888939a7f4e67ee7f1191a08d485c85e44 100644 (file)
@@ -285,11 +285,12 @@ static int ide_gd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
        return 0;
 }
 
-static int ide_gd_media_changed(struct gendisk *disk)
+static unsigned int ide_gd_check_events(struct gendisk *disk,
+                                       unsigned int clearing)
 {
        struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
        ide_drive_t *drive = idkp->drive;
-       int ret;
+       bool ret;
 
        /* do not scan partitions twice if this is a removable device */
        if (drive->dev_flags & IDE_DFLAG_ATTACH) {
@@ -297,10 +298,10 @@ static int ide_gd_media_changed(struct gendisk *disk)
                return 0;
        }
 
-       ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
+       ret = drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED;
        drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
 
-       return ret;
+       return ret ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static void ide_gd_unlock_native_capacity(struct gendisk *disk)
@@ -318,7 +319,7 @@ static int ide_gd_revalidate_disk(struct gendisk *disk)
        struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
        ide_drive_t *drive = idkp->drive;
 
-       if (ide_gd_media_changed(disk))
+       if (ide_gd_check_events(disk, 0))
                drive->disk_ops->get_capacity(drive);
 
        set_capacity(disk, ide_gd_capacity(drive));
@@ -340,7 +341,7 @@ static const struct block_device_operations ide_gd_ops = {
        .release                = ide_gd_release,
        .ioctl                  = ide_gd_ioctl,
        .getgeo                 = ide_gd_getgeo,
-       .media_changed          = ide_gd_media_changed,
+       .check_events           = ide_gd_check_events,
        .unlock_native_capacity = ide_gd_unlock_native_capacity,
        .revalidate_disk        = ide_gd_revalidate_disk
 };
@@ -412,6 +413,7 @@ static int ide_gd_probe(ide_drive_t *drive)
        if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
                g->flags = GENHD_FL_REMOVABLE;
        g->fops = &ide_gd_ops;
+       g->events = DISK_EVENT_MEDIA_CHANGE;
        add_disk(g);
        return 0;
 
index 999dac054bccc708dee92756b2ec4eb361e590bd..f4077840d3abdbc027585617b8379304865dae81 100644 (file)
@@ -549,8 +549,6 @@ plug_device_2:
 
        if (rq)
                blk_requeue_request(q, rq);
-       if (!elv_queue_empty(q))
-               blk_plug_device(q);
 }
 
 void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
@@ -562,8 +560,6 @@ void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
 
        if (rq)
                blk_requeue_request(q, rq);
-       if (!elv_queue_empty(q))
-               blk_plug_device(q);
 
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
index 88a380c5a4708bd9f396a52da5dd47128137f74f..6ab9ab2a5081eba538b43abf738f549562cb166b 100644 (file)
@@ -52,7 +52,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
        rq->cmd[0] = REQ_UNPARK_HEADS;
        rq->cmd_len = 1;
        rq->cmd_type = REQ_TYPE_SPECIAL;
-       elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1);
+       elv_add_request(q, rq, ELEVATOR_INSERT_FRONT);
 
 out:
        return;
index a2ce0b2da281855a583dc0119621ca098d272651..5c9362792f1d928d71ded12a61b919c9ee25e7be 100644 (file)
@@ -347,7 +347,7 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait)
                        atomic_inc(&bitmap->pending_writes);
                        set_buffer_locked(bh);
                        set_buffer_mapped(bh);
-                       submit_bh(WRITE | REQ_UNPLUG | REQ_SYNC, bh);
+                       submit_bh(WRITE | REQ_SYNC, bh);
                        bh = bh->b_this_page;
                }
 
@@ -1339,8 +1339,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
                        prepare_to_wait(&bitmap->overflow_wait, &__wait,
                                        TASK_UNINTERRUPTIBLE);
                        spin_unlock_irq(&bitmap->lock);
-                       md_unplug(bitmap->mddev);
-                       schedule();
+                       io_schedule();
                        finish_wait(&bitmap->overflow_wait, &__wait);
                        continue;
                }
index 4e054bd91664a8047968d51ec313958013b4a8af..2c62c1169f78afa27109532d1970ec201d082cae 100644 (file)
@@ -991,11 +991,6 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone)
        clone->bi_destructor = dm_crypt_bio_destructor;
 }
 
-static void kcryptd_unplug(struct crypt_config *cc)
-{
-       blk_unplug(bdev_get_queue(cc->dev->bdev));
-}
-
 static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
 {
        struct crypt_config *cc = io->target->private;
@@ -1008,10 +1003,8 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
         * one in order to decrypt the whole bio data *afterwards*.
         */
        clone = bio_alloc_bioset(gfp, bio_segments(base_bio), cc->bs);
-       if (!clone) {
-               kcryptd_unplug(cc);
+       if (!clone)
                return 1;
-       }
 
        crypt_inc_pending(io);
 
index 136d4f71a1162509abc40cd10b96ed6085c75186..76a5af00a26b5b52ce45a72bc0e58d6744d33468 100644 (file)
@@ -352,7 +352,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
        BUG_ON(num_regions > DM_IO_MAX_REGIONS);
 
        if (sync)
-               rw |= REQ_SYNC | REQ_UNPLUG;
+               rw |= REQ_SYNC;
 
        /*
         * For multiple regions we need to be careful to rewind
index 924f5f0084c27191604907eaa9983a2bf687b6c1..1bb73a13ca4003d841446efbe70ffda93a62061d 100644 (file)
@@ -37,13 +37,6 @@ struct dm_kcopyd_client {
        unsigned int nr_pages;
        unsigned int nr_free_pages;
 
-       /*
-        * Block devices to unplug.
-        * Non-NULL pointer means that a block device has some pending requests
-        * and needs to be unplugged.
-        */
-       struct block_device *unplug[2];
-
        struct dm_io_client *io_client;
 
        wait_queue_head_t destroyq;
@@ -315,31 +308,6 @@ static int run_complete_job(struct kcopyd_job *job)
        return 0;
 }
 
-/*
- * Unplug the block device at the specified index.
- */
-static void unplug(struct dm_kcopyd_client *kc, int rw)
-{
-       if (kc->unplug[rw] != NULL) {
-               blk_unplug(bdev_get_queue(kc->unplug[rw]));
-               kc->unplug[rw] = NULL;
-       }
-}
-
-/*
- * Prepare block device unplug. If there's another device
- * to be unplugged at the same array index, we unplug that
- * device first.
- */
-static void prepare_unplug(struct dm_kcopyd_client *kc, int rw,
-                          struct block_device *bdev)
-{
-       if (likely(kc->unplug[rw] == bdev))
-               return;
-       unplug(kc, rw);
-       kc->unplug[rw] = bdev;
-}
-
 static void complete_io(unsigned long error, void *context)
 {
        struct kcopyd_job *job = (struct kcopyd_job *) context;
@@ -386,16 +354,10 @@ static int run_io_job(struct kcopyd_job *job)
                .client = job->kc->io_client,
        };
 
-       if (job->rw == READ) {
+       if (job->rw == READ)
                r = dm_io(&io_req, 1, &job->source, NULL);
-               prepare_unplug(job->kc, READ, job->source.bdev);
-       } else {
-               if (job->num_dests > 1)
-                       io_req.bi_rw |= REQ_UNPLUG;
+       else
                r = dm_io(&io_req, job->num_dests, job->dests, NULL);
-               if (!(io_req.bi_rw & REQ_UNPLUG))
-                       prepare_unplug(job->kc, WRITE, job->dests[0].bdev);
-       }
 
        return r;
 }
@@ -466,6 +428,7 @@ static void do_work(struct work_struct *work)
 {
        struct dm_kcopyd_client *kc = container_of(work,
                                        struct dm_kcopyd_client, kcopyd_work);
+       struct blk_plug plug;
 
        /*
         * The order that these are called is *very* important.
@@ -473,18 +436,12 @@ static void do_work(struct work_struct *work)
         * Pages jobs when successful will jump onto the io jobs
         * list.  io jobs call wake when they complete and it all
         * starts again.
-        *
-        * Note that io_jobs add block devices to the unplug array,
-        * this array is cleared with "unplug" calls. It is thus
-        * forbidden to run complete_jobs after io_jobs and before
-        * unplug because the block device could be destroyed in
-        * job completion callback.
         */
+       blk_start_plug(&plug);
        process_jobs(&kc->complete_jobs, kc, run_complete_job);
        process_jobs(&kc->pages_jobs, kc, run_pages_job);
        process_jobs(&kc->io_jobs, kc, run_io_job);
-       unplug(kc, READ);
-       unplug(kc, WRITE);
+       blk_finish_plug(&plug);
 }
 
 /*
@@ -665,8 +622,6 @@ int dm_kcopyd_client_create(unsigned int nr_pages,
        INIT_LIST_HEAD(&kc->io_jobs);
        INIT_LIST_HEAD(&kc->pages_jobs);
 
-       memset(kc->unplug, 0, sizeof(kc->unplug));
-
        kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
        if (!kc->job_pool)
                goto bad_slab;
index b9e1e15ef11cb3d45f53b8c06a0dd6b709fa1164..5ef136cdba91dd4a891f00920029962a54185d63 100644 (file)
@@ -394,7 +394,7 @@ static void raid_unplug(struct dm_target_callbacks *cb)
 {
        struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
 
-       md_raid5_unplug_device(rs->md.private);
+       md_raid5_kick_device(rs->md.private);
 }
 
 /*
index dee326775c6064b045c8cf523a25a528d9882caf..976ad4688afc2ee03189e208663429a74f759109 100644 (file)
@@ -842,8 +842,6 @@ static void do_mirror(struct work_struct *work)
        do_reads(ms, &reads);
        do_writes(ms, &writes);
        do_failures(ms, &failures);
-
-       dm_table_unplug_all(ms->ti->table);
 }
 
 /*-----------------------------------------------------------------
index 38e4eb1bb9656ba565150a48d68594836f51c4f9..416d4e258df6df536d2cd7d38795e8e58b94efbc 100644 (file)
@@ -55,6 +55,7 @@ struct dm_table {
        struct dm_target *targets;
 
        unsigned discards_supported:1;
+       unsigned integrity_supported:1;
 
        /*
         * Indicates the rw permissions for the new logical
@@ -859,7 +860,7 @@ int dm_table_alloc_md_mempools(struct dm_table *t)
                return -EINVAL;
        }
 
-       t->mempools = dm_alloc_md_mempools(type);
+       t->mempools = dm_alloc_md_mempools(type, t->integrity_supported);
        if (!t->mempools)
                return -ENOMEM;
 
@@ -935,8 +936,10 @@ static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device
        struct dm_dev_internal *dd;
 
        list_for_each_entry(dd, devices, list)
-               if (bdev_get_integrity(dd->dm_dev.bdev))
+               if (bdev_get_integrity(dd->dm_dev.bdev)) {
+                       t->integrity_supported = 1;
                        return blk_integrity_register(dm_disk(md), NULL);
+               }
 
        return 0;
 }
@@ -1275,29 +1278,6 @@ int dm_table_any_busy_target(struct dm_table *t)
        return 0;
 }
 
-void dm_table_unplug_all(struct dm_table *t)
-{
-       struct dm_dev_internal *dd;
-       struct list_head *devices = dm_table_get_devices(t);
-       struct dm_target_callbacks *cb;
-
-       list_for_each_entry(dd, devices, list) {
-               struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev);
-               char b[BDEVNAME_SIZE];
-
-               if (likely(q))
-                       blk_unplug(q);
-               else
-                       DMWARN_LIMIT("%s: Cannot unplug nonexistent device %s",
-                                    dm_device_name(t->md),
-                                    bdevname(dd->dm_dev.bdev, b));
-       }
-
-       list_for_each_entry(cb, &t->target_callbacks, list)
-               if (cb->unplug_fn)
-                       cb->unplug_fn(cb);
-}
-
 struct mapped_device *dm_table_get_md(struct dm_table *t)
 {
        return t->md;
@@ -1345,4 +1325,3 @@ EXPORT_SYMBOL(dm_table_get_mode);
 EXPORT_SYMBOL(dm_table_get_md);
 EXPORT_SYMBOL(dm_table_put);
 EXPORT_SYMBOL(dm_table_get);
-EXPORT_SYMBOL(dm_table_unplug_all);
index eaa3af0e0632af0c96c5c17566024265578e2303..0cf68b478878f327598fb756fe2da173d5443468 100644 (file)
@@ -477,7 +477,8 @@ static void start_io_acct(struct dm_io *io)
        cpu = part_stat_lock();
        part_round_stats(cpu, &dm_disk(md)->part0);
        part_stat_unlock();
-       dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]);
+       atomic_set(&dm_disk(md)->part0.in_flight[rw],
+               atomic_inc_return(&md->pending[rw]));
 }
 
 static void end_io_acct(struct dm_io *io)
@@ -497,8 +498,8 @@ static void end_io_acct(struct dm_io *io)
         * After this is decremented the bio must not be touched if it is
         * a flush.
         */
-       dm_disk(md)->part0.in_flight[rw] = pending =
-               atomic_dec_return(&md->pending[rw]);
+       pending = atomic_dec_return(&md->pending[rw]);
+       atomic_set(&dm_disk(md)->part0.in_flight[rw], pending);
        pending += atomic_read(&md->pending[rw^0x1]);
 
        /* nudge anyone waiting on suspend queue */
@@ -807,8 +808,6 @@ void dm_requeue_unmapped_request(struct request *clone)
        dm_unprep_request(rq);
 
        spin_lock_irqsave(q->queue_lock, flags);
-       if (elv_queue_empty(q))
-               blk_plug_device(q);
        blk_requeue_request(q, rq);
        spin_unlock_irqrestore(q->queue_lock, flags);
 
@@ -1613,10 +1612,10 @@ static void dm_request_fn(struct request_queue *q)
         * number of in-flight I/Os after the queue is stopped in
         * dm_suspend().
         */
-       while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) {
+       while (!blk_queue_stopped(q)) {
                rq = blk_peek_request(q);
                if (!rq)
-                       goto plug_and_out;
+                       goto delay_and_out;
 
                /* always use block 0 to find the target for flushes for now */
                pos = 0;
@@ -1627,7 +1626,7 @@ static void dm_request_fn(struct request_queue *q)
                BUG_ON(!dm_target_is_valid(ti));
 
                if (ti->type->busy && ti->type->busy(ti))
-                       goto plug_and_out;
+                       goto delay_and_out;
 
                blk_start_request(rq);
                clone = rq->special;
@@ -1647,11 +1646,8 @@ requeued:
        BUG_ON(!irqs_disabled());
        spin_lock(q->queue_lock);
 
-plug_and_out:
-       if (!elv_queue_empty(q))
-               /* Some requests still remain, retry later */
-               blk_plug_device(q);
-
+delay_and_out:
+       blk_delay_queue(q, HZ / 10);
 out:
        dm_table_put(map);
 
@@ -1680,20 +1676,6 @@ static int dm_lld_busy(struct request_queue *q)
        return r;
 }
 
-static void dm_unplug_all(struct request_queue *q)
-{
-       struct mapped_device *md = q->queuedata;
-       struct dm_table *map = dm_get_live_table(md);
-
-       if (map) {
-               if (dm_request_based(md))
-                       generic_unplug_device(q);
-
-               dm_table_unplug_all(map);
-               dm_table_put(map);
-       }
-}
-
 static int dm_any_congested(void *congested_data, int bdi_bits)
 {
        int r = bdi_bits;
@@ -1817,7 +1799,6 @@ static void dm_init_md_queue(struct mapped_device *md)
        md->queue->backing_dev_info.congested_data = md;
        blk_queue_make_request(md->queue, dm_request);
        blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
-       md->queue->unplug_fn = dm_unplug_all;
        blk_queue_merge_bvec(md->queue, dm_merge_bvec);
        blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA);
 }
@@ -2263,8 +2244,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
        int r = 0;
        DECLARE_WAITQUEUE(wait, current);
 
-       dm_unplug_all(md->queue);
-
        add_wait_queue(&md->wait, &wait);
 
        while (1) {
@@ -2539,7 +2518,6 @@ int dm_resume(struct mapped_device *md)
 
        clear_bit(DMF_SUSPENDED, &md->flags);
 
-       dm_table_unplug_all(map);
        r = 0;
 out:
        dm_table_put(map);
@@ -2643,9 +2621,10 @@ int dm_noflush_suspending(struct dm_target *ti)
 }
 EXPORT_SYMBOL_GPL(dm_noflush_suspending);
 
-struct dm_md_mempools *dm_alloc_md_mempools(unsigned type)
+struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
 {
        struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
+       unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS;
 
        if (!pools)
                return NULL;
@@ -2662,13 +2641,18 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type)
        if (!pools->tio_pool)
                goto free_io_pool_and_out;
 
-       pools->bs = (type == DM_TYPE_BIO_BASED) ?
-                   bioset_create(16, 0) : bioset_create(MIN_IOS, 0);
+       pools->bs = bioset_create(pool_size, 0);
        if (!pools->bs)
                goto free_tio_pool_and_out;
 
+       if (integrity && bioset_integrity_create(pools->bs, pool_size))
+               goto free_bioset_and_out;
+
        return pools;
 
+free_bioset_and_out:
+       bioset_free(pools->bs);
+
 free_tio_pool_and_out:
        mempool_destroy(pools->tio_pool);
 
index 0c2dd5f4af7658936a2c44bfa62a6aa774e56723..1aaf16746da86f6bfb9c9ffaab334841c044d92a 100644 (file)
@@ -149,7 +149,7 @@ void dm_kcopyd_exit(void);
 /*
  * Mempool operations
  */
-struct dm_md_mempools *dm_alloc_md_mempools(unsigned type);
+struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity);
 void dm_free_md_mempools(struct dm_md_mempools *pools);
 
 #endif
index 0ed7f6bc2a7fb4ea9cd30d7879d4d4d1ab7a134e..abfb59a61ede75a25bc8f459456ed57eaf7acb71 100644 (file)
@@ -87,22 +87,6 @@ static int linear_mergeable_bvec(struct request_queue *q,
        return maxsectors << 9;
 }
 
-static void linear_unplug(struct request_queue *q)
-{
-       mddev_t *mddev = q->queuedata;
-       linear_conf_t *conf;
-       int i;
-
-       rcu_read_lock();
-       conf = rcu_dereference(mddev->private);
-
-       for (i=0; i < mddev->raid_disks; i++) {
-               struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev);
-               blk_unplug(r_queue);
-       }
-       rcu_read_unlock();
-}
-
 static int linear_congested(void *data, int bits)
 {
        mddev_t *mddev = data;
@@ -224,11 +208,9 @@ static int linear_run (mddev_t *mddev)
        md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
 
        blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
-       mddev->queue->unplug_fn = linear_unplug;
        mddev->queue->backing_dev_info.congested_fn = linear_congested;
        mddev->queue->backing_dev_info.congested_data = mddev;
-       md_integrity_register(mddev);
-       return 0;
+       return md_integrity_register(mddev);
 }
 
 static void free_conf(struct rcu_head *head)
index d5ad7723b1727ea0609241ffd77b9b99ac5a009a..06ecea751a39b8ec53935bcdfb331453d2e01b19 100644 (file)
@@ -780,8 +780,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
        bio->bi_end_io = super_written;
 
        atomic_inc(&mddev->pending_writes);
-       submit_bio(REQ_WRITE | REQ_SYNC | REQ_UNPLUG | REQ_FLUSH | REQ_FUA,
-                  bio);
+       submit_bio(REQ_WRITE | REQ_SYNC | REQ_FLUSH | REQ_FUA, bio);
 }
 
 void md_super_wait(mddev_t *mddev)
@@ -809,7 +808,7 @@ int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
        struct completion event;
        int ret;
 
-       rw |= REQ_SYNC | REQ_UNPLUG;
+       rw |= REQ_SYNC;
 
        bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
                rdev->meta_bdev : rdev->bdev;
@@ -1804,8 +1803,12 @@ int md_integrity_register(mddev_t *mddev)
                        mdname(mddev));
                return -EINVAL;
        }
-       printk(KERN_NOTICE "md: data integrity on %s enabled\n",
-               mdname(mddev));
+       printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
+       if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
+               printk(KERN_ERR "md: failed to create integrity pool for %s\n",
+                      mdname(mddev));
+               return -EINVAL;
+       }
        return 0;
 }
 EXPORT_SYMBOL(md_integrity_register);
@@ -4817,7 +4820,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
                __md_stop_writes(mddev);
                md_stop(mddev);
                mddev->queue->merge_bvec_fn = NULL;
-               mddev->queue->unplug_fn = NULL;
                mddev->queue->backing_dev_info.congested_fn = NULL;
 
                /* tell userspace to handle 'inactive' */
@@ -6692,8 +6694,6 @@ EXPORT_SYMBOL_GPL(md_allow_write);
 
 void md_unplug(mddev_t *mddev)
 {
-       if (mddev->queue)
-               blk_unplug(mddev->queue);
        if (mddev->plug)
                mddev->plug->unplug_fn(mddev->plug);
 }
@@ -6876,7 +6876,6 @@ void md_do_sync(mddev_t *mddev)
                     >= mddev->resync_max - mddev->curr_resync_completed
                            )) {
                        /* time to update curr_resync_completed */
-                       md_unplug(mddev);
                        wait_event(mddev->recovery_wait,
                                   atomic_read(&mddev->recovery_active) == 0);
                        mddev->curr_resync_completed = j;
@@ -6952,7 +6951,6 @@ void md_do_sync(mddev_t *mddev)
                 * about not overloading the IO subsystem. (things like an
                 * e2fsck being done on the RAID array should execute fast)
                 */
-               md_unplug(mddev);
                cond_resched();
 
                currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
@@ -6971,8 +6969,6 @@ void md_do_sync(mddev_t *mddev)
         * this also signals 'finished resyncing' to md_stop
         */
  out:
-       md_unplug(mddev);
-
        wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
 
        /* tell personality that we are finished */
index 3a62d440e27b8105bb778d36950575f77fdd28a3..c35890990985632646483a97e849277023850b1a 100644 (file)
@@ -106,36 +106,6 @@ static void multipath_end_request(struct bio *bio, int error)
        rdev_dec_pending(rdev, conf->mddev);
 }
 
-static void unplug_slaves(mddev_t *mddev)
-{
-       multipath_conf_t *conf = mddev->private;
-       int i;
-
-       rcu_read_lock();
-       for (i=0; i<mddev->raid_disks; i++) {
-               mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
-               if (rdev && !test_bit(Faulty, &rdev->flags)
-                   && atomic_read(&rdev->nr_pending)) {
-                       struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
-
-                       atomic_inc(&rdev->nr_pending);
-                       rcu_read_unlock();
-
-                       blk_unplug(r_queue);
-
-                       rdev_dec_pending(rdev, mddev);
-                       rcu_read_lock();
-               }
-       }
-       rcu_read_unlock();
-}
-
-static void multipath_unplug(struct request_queue *q)
-{
-       unplug_slaves(q->queuedata);
-}
-
-
 static int multipath_make_request(mddev_t *mddev, struct bio * bio)
 {
        multipath_conf_t *conf = mddev->private;
@@ -345,7 +315,7 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
                        p->rdev = rdev;
                        goto abort;
                }
-               md_integrity_register(mddev);
+               err = md_integrity_register(mddev);
        }
 abort:
 
@@ -517,10 +487,12 @@ static int multipath_run (mddev_t *mddev)
         */
        md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
 
-       mddev->queue->unplug_fn = multipath_unplug;
        mddev->queue->backing_dev_info.congested_fn = multipath_congested;
        mddev->queue->backing_dev_info.congested_data = mddev;
-       md_integrity_register(mddev);
+
+       if (md_integrity_register(mddev))
+               goto out_free_conf;
+
        return 0;
 
 out_free_conf:
index c0ac457f1218ca52a2d6d237be051a70198f0936..e86bf3682e1e29c65b61f0ca7d3b00f94533046a 100644 (file)
 #include "raid0.h"
 #include "raid5.h"
 
-static void raid0_unplug(struct request_queue *q)
-{
-       mddev_t *mddev = q->queuedata;
-       raid0_conf_t *conf = mddev->private;
-       mdk_rdev_t **devlist = conf->devlist;
-       int raid_disks = conf->strip_zone[0].nb_dev;
-       int i;
-
-       for (i=0; i < raid_disks; i++) {
-               struct request_queue *r_queue = bdev_get_queue(devlist[i]->bdev);
-
-               blk_unplug(r_queue);
-       }
-}
-
 static int raid0_congested(void *data, int bits)
 {
        mddev_t *mddev = data;
@@ -272,7 +257,6 @@ static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
                       mdname(mddev),
                       (unsigned long long)smallest->sectors);
        }
-       mddev->queue->unplug_fn = raid0_unplug;
        mddev->queue->backing_dev_info.congested_fn = raid0_congested;
        mddev->queue->backing_dev_info.congested_data = mddev;
 
@@ -395,8 +379,7 @@ static int raid0_run(mddev_t *mddev)
 
        blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
        dump_zones(mddev);
-       md_integrity_register(mddev);
-       return 0;
+       return md_integrity_register(mddev);
 }
 
 static int raid0_stop(mddev_t *mddev)
index 06cd712807d0c2d81c053e352a5f0cc2e73ce1b3..c2a21ae56d977d6249f9c8501f124e50c48f15dd 100644 (file)
 #define        NR_RAID1_BIOS 256
 
 
-static void unplug_slaves(mddev_t *mddev);
-
 static void allow_barrier(conf_t *conf);
 static void lower_barrier(conf_t *conf);
 
 static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
        struct pool_info *pi = data;
-       r1bio_t *r1_bio;
        int size = offsetof(r1bio_t, bios[pi->raid_disks]);
 
        /* allocate a r1bio with room for raid_disks entries in the bios array */
-       r1_bio = kzalloc(size, gfp_flags);
-       if (!r1_bio && pi->mddev)
-               unplug_slaves(pi->mddev);
-
-       return r1_bio;
+       return kzalloc(size, gfp_flags);
 }
 
 static void r1bio_pool_free(void *r1_bio, void *data)
@@ -91,10 +84,8 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
        int i, j;
 
        r1_bio = r1bio_pool_alloc(gfp_flags, pi);
-       if (!r1_bio) {
-               unplug_slaves(pi->mddev);
+       if (!r1_bio)
                return NULL;
-       }
 
        /*
         * Allocate bios : 1 for reading, n-1 for writing
@@ -520,37 +511,6 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
        return new_disk;
 }
 
-static void unplug_slaves(mddev_t *mddev)
-{
-       conf_t *conf = mddev->private;
-       int i;
-
-       rcu_read_lock();
-       for (i=0; i<mddev->raid_disks; i++) {
-               mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
-               if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
-                       struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
-
-                       atomic_inc(&rdev->nr_pending);
-                       rcu_read_unlock();
-
-                       blk_unplug(r_queue);
-
-                       rdev_dec_pending(rdev, mddev);
-                       rcu_read_lock();
-               }
-       }
-       rcu_read_unlock();
-}
-
-static void raid1_unplug(struct request_queue *q)
-{
-       mddev_t *mddev = q->queuedata;
-
-       unplug_slaves(mddev);
-       md_wakeup_thread(mddev->thread);
-}
-
 static int raid1_congested(void *data, int bits)
 {
        mddev_t *mddev = data;
@@ -580,23 +540,16 @@ static int raid1_congested(void *data, int bits)
 }
 
 
-static int flush_pending_writes(conf_t *conf)
+static void flush_pending_writes(conf_t *conf)
 {
        /* Any writes that have been queued but are awaiting
         * bitmap updates get flushed here.
-        * We return 1 if any requests were actually submitted.
         */
-       int rv = 0;
-
        spin_lock_irq(&conf->device_lock);
 
        if (conf->pending_bio_list.head) {
                struct bio *bio;
                bio = bio_list_get(&conf->pending_bio_list);
-               /* Only take the spinlock to quiet a warning */
-               spin_lock(conf->mddev->queue->queue_lock);
-               blk_remove_plug(conf->mddev->queue);
-               spin_unlock(conf->mddev->queue->queue_lock);
                spin_unlock_irq(&conf->device_lock);
                /* flush any pending bitmap writes to
                 * disk before proceeding w/ I/O */
@@ -608,10 +561,14 @@ static int flush_pending_writes(conf_t *conf)
                        generic_make_request(bio);
                        bio = next;
                }
-               rv = 1;
        } else
                spin_unlock_irq(&conf->device_lock);
-       return rv;
+}
+
+static void md_kick_device(mddev_t *mddev)
+{
+       blk_flush_plug(current);
+       md_wakeup_thread(mddev->thread);
 }
 
 /* Barriers....
@@ -643,8 +600,7 @@ static void raise_barrier(conf_t *conf)
 
        /* Wait until no block IO is waiting */
        wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
-                           conf->resync_lock,
-                           raid1_unplug(conf->mddev->queue));
+                           conf->resync_lock, md_kick_device(conf->mddev));
 
        /* block any new IO from starting */
        conf->barrier++;
@@ -652,8 +608,7 @@ static void raise_barrier(conf_t *conf)
        /* Now wait for all pending IO to complete */
        wait_event_lock_irq(conf->wait_barrier,
                            !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-                           conf->resync_lock,
-                           raid1_unplug(conf->mddev->queue));
+                           conf->resync_lock, md_kick_device(conf->mddev));
 
        spin_unlock_irq(&conf->resync_lock);
 }
@@ -675,7 +630,7 @@ static void wait_barrier(conf_t *conf)
                conf->nr_waiting++;
                wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
                                    conf->resync_lock,
-                                   raid1_unplug(conf->mddev->queue));
+                                   md_kick_device(conf->mddev));
                conf->nr_waiting--;
        }
        conf->nr_pending++;
@@ -712,7 +667,7 @@ static void freeze_array(conf_t *conf)
                            conf->nr_pending == conf->nr_queued+1,
                            conf->resync_lock,
                            ({ flush_pending_writes(conf);
-                              raid1_unplug(conf->mddev->queue); }));
+                              md_kick_device(conf->mddev); }));
        spin_unlock_irq(&conf->resync_lock);
 }
 static void unfreeze_array(conf_t *conf)
@@ -962,7 +917,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                atomic_inc(&r1_bio->remaining);
                spin_lock_irqsave(&conf->device_lock, flags);
                bio_list_add(&conf->pending_bio_list, mbio);
-               blk_plug_device_unlocked(mddev->queue);
                spin_unlock_irqrestore(&conf->device_lock, flags);
        }
        r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL);
@@ -971,7 +925,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        /* In case raid1d snuck in to freeze_array */
        wake_up(&conf->wait_barrier);
 
-       if (do_sync)
+       if (do_sync || !bitmap)
                md_wakeup_thread(mddev->thread);
 
        return 0;
@@ -1178,7 +1132,7 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
                        p->rdev = rdev;
                        goto abort;
                }
-               md_integrity_register(mddev);
+               err = md_integrity_register(mddev);
        }
 abort:
 
@@ -1561,7 +1515,6 @@ static void raid1d(mddev_t *mddev)
        unsigned long flags;
        conf_t *conf = mddev->private;
        struct list_head *head = &conf->retry_list;
-       int unplug=0;
        mdk_rdev_t *rdev;
 
        md_check_recovery(mddev);
@@ -1569,7 +1522,7 @@ static void raid1d(mddev_t *mddev)
        for (;;) {
                char b[BDEVNAME_SIZE];
 
-               unplug += flush_pending_writes(conf);
+               flush_pending_writes(conf);
 
                spin_lock_irqsave(&conf->device_lock, flags);
                if (list_empty(head)) {
@@ -1583,10 +1536,9 @@ static void raid1d(mddev_t *mddev)
 
                mddev = r1_bio->mddev;
                conf = mddev->private;
-               if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
+               if (test_bit(R1BIO_IsSync, &r1_bio->state))
                        sync_request_write(mddev, r1_bio);
-                       unplug = 1;
-               } else {
+               else {
                        int disk;
 
                        /* we got a read error. Maybe the drive is bad.  Maybe just
@@ -1636,14 +1588,11 @@ static void raid1d(mddev_t *mddev)
                                bio->bi_end_io = raid1_end_read_request;
                                bio->bi_rw = READ | do_sync;
                                bio->bi_private = r1_bio;
-                               unplug = 1;
                                generic_make_request(bio);
                        }
                }
                cond_resched();
        }
-       if (unplug)
-               unplug_slaves(mddev);
 }
 
 
@@ -2066,11 +2015,9 @@ static int run(mddev_t *mddev)
 
        md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
 
-       mddev->queue->unplug_fn = raid1_unplug;
        mddev->queue->backing_dev_info.congested_fn = raid1_congested;
        mddev->queue->backing_dev_info.congested_data = mddev;
-       md_integrity_register(mddev);
-       return 0;
+       return md_integrity_register(mddev);
 }
 
 static int stop(mddev_t *mddev)
index 747d061d8e05817878ac102a6438714d181e4356..f7b62370b37446a96d5595e8f247b84b3689b145 100644 (file)
  */
 #define        NR_RAID10_BIOS 256
 
-static void unplug_slaves(mddev_t *mddev);
-
 static void allow_barrier(conf_t *conf);
 static void lower_barrier(conf_t *conf);
 
 static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
        conf_t *conf = data;
-       r10bio_t *r10_bio;
        int size = offsetof(struct r10bio_s, devs[conf->copies]);
 
        /* allocate a r10bio with room for raid_disks entries in the bios array */
-       r10_bio = kzalloc(size, gfp_flags);
-       if (!r10_bio && conf->mddev)
-               unplug_slaves(conf->mddev);
-
-       return r10_bio;
+       return kzalloc(size, gfp_flags);
 }
 
 static void r10bio_pool_free(void *r10_bio, void *data)
@@ -106,10 +99,8 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
        int nalloc;
 
        r10_bio = r10bio_pool_alloc(gfp_flags, conf);
-       if (!r10_bio) {
-               unplug_slaves(conf->mddev);
+       if (!r10_bio)
                return NULL;
-       }
 
        if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
                nalloc = conf->copies; /* resync */
@@ -597,37 +588,6 @@ rb_out:
        return disk;
 }
 
-static void unplug_slaves(mddev_t *mddev)
-{
-       conf_t *conf = mddev->private;
-       int i;
-
-       rcu_read_lock();
-       for (i=0; i < conf->raid_disks; i++) {
-               mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
-               if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
-                       struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
-
-                       atomic_inc(&rdev->nr_pending);
-                       rcu_read_unlock();
-
-                       blk_unplug(r_queue);
-
-                       rdev_dec_pending(rdev, mddev);
-                       rcu_read_lock();
-               }
-       }
-       rcu_read_unlock();
-}
-
-static void raid10_unplug(struct request_queue *q)
-{
-       mddev_t *mddev = q->queuedata;
-
-       unplug_slaves(q->queuedata);
-       md_wakeup_thread(mddev->thread);
-}
-
 static int raid10_congested(void *data, int bits)
 {
        mddev_t *mddev = data;
@@ -649,23 +609,16 @@ static int raid10_congested(void *data, int bits)
        return ret;
 }
 
-static int flush_pending_writes(conf_t *conf)
+static void flush_pending_writes(conf_t *conf)
 {
        /* Any writes that have been queued but are awaiting
         * bitmap updates get flushed here.
-        * We return 1 if any requests were actually submitted.
         */
-       int rv = 0;
-
        spin_lock_irq(&conf->device_lock);
 
        if (conf->pending_bio_list.head) {
                struct bio *bio;
                bio = bio_list_get(&conf->pending_bio_list);
-               /* Spinlock only taken to quiet a warning */
-               spin_lock(conf->mddev->queue->queue_lock);
-               blk_remove_plug(conf->mddev->queue);
-               spin_unlock(conf->mddev->queue->queue_lock);
                spin_unlock_irq(&conf->device_lock);
                /* flush any pending bitmap writes to disk
                 * before proceeding w/ I/O */
@@ -677,11 +630,16 @@ static int flush_pending_writes(conf_t *conf)
                        generic_make_request(bio);
                        bio = next;
                }
-               rv = 1;
        } else
                spin_unlock_irq(&conf->device_lock);
-       return rv;
 }
+
+static void md_kick_device(mddev_t *mddev)
+{
+       blk_flush_plug(current);
+       md_wakeup_thread(mddev->thread);
+}
+
 /* Barriers....
  * Sometimes we need to suspend IO while we do something else,
  * either some resync/recovery, or reconfigure the array.
@@ -711,8 +669,7 @@ static void raise_barrier(conf_t *conf, int force)
 
        /* Wait until no block IO is waiting (unless 'force') */
        wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-                           conf->resync_lock,
-                           raid10_unplug(conf->mddev->queue));
+                           conf->resync_lock, md_kick_device(conf->mddev));
 
        /* block any new IO from starting */
        conf->barrier++;
@@ -720,8 +677,7 @@ static void raise_barrier(conf_t *conf, int force)
        /* No wait for all pending IO to complete */
        wait_event_lock_irq(conf->wait_barrier,
                            !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-                           conf->resync_lock,
-                           raid10_unplug(conf->mddev->queue));
+                           conf->resync_lock, md_kick_device(conf->mddev));
 
        spin_unlock_irq(&conf->resync_lock);
 }
@@ -742,7 +698,7 @@ static void wait_barrier(conf_t *conf)
                conf->nr_waiting++;
                wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
                                    conf->resync_lock,
-                                   raid10_unplug(conf->mddev->queue));
+                                   md_kick_device(conf->mddev));
                conf->nr_waiting--;
        }
        conf->nr_pending++;
@@ -779,7 +735,7 @@ static void freeze_array(conf_t *conf)
                            conf->nr_pending == conf->nr_queued+1,
                            conf->resync_lock,
                            ({ flush_pending_writes(conf);
-                              raid10_unplug(conf->mddev->queue); }));
+                              md_kick_device(conf->mddev); }));
        spin_unlock_irq(&conf->resync_lock);
 }
 
@@ -974,7 +930,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                atomic_inc(&r10_bio->remaining);
                spin_lock_irqsave(&conf->device_lock, flags);
                bio_list_add(&conf->pending_bio_list, mbio);
-               blk_plug_device_unlocked(mddev->queue);
                spin_unlock_irqrestore(&conf->device_lock, flags);
        }
 
@@ -991,7 +946,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        /* In case raid10d snuck in to freeze_array */
        wake_up(&conf->wait_barrier);
 
-       if (do_sync)
+       if (do_sync || !mddev->bitmap)
                md_wakeup_thread(mddev->thread);
 
        return 0;
@@ -1233,7 +1188,7 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
                        p->rdev = rdev;
                        goto abort;
                }
-               md_integrity_register(mddev);
+               err = md_integrity_register(mddev);
        }
 abort:
 
@@ -1684,7 +1639,6 @@ static void raid10d(mddev_t *mddev)
        unsigned long flags;
        conf_t *conf = mddev->private;
        struct list_head *head = &conf->retry_list;
-       int unplug=0;
        mdk_rdev_t *rdev;
 
        md_check_recovery(mddev);
@@ -1692,7 +1646,7 @@ static void raid10d(mddev_t *mddev)
        for (;;) {
                char b[BDEVNAME_SIZE];
 
-               unplug += flush_pending_writes(conf);
+               flush_pending_writes(conf);
 
                spin_lock_irqsave(&conf->device_lock, flags);
                if (list_empty(head)) {
@@ -1706,13 +1660,11 @@ static void raid10d(mddev_t *mddev)
 
                mddev = r10_bio->mddev;
                conf = mddev->private;
-               if (test_bit(R10BIO_IsSync, &r10_bio->state)) {
+               if (test_bit(R10BIO_IsSync, &r10_bio->state))
                        sync_request_write(mddev, r10_bio);
-                       unplug = 1;
-               } else  if (test_bit(R10BIO_IsRecover, &r10_bio->state)) {
+               else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
                        recovery_request_write(mddev, r10_bio);
-                       unplug = 1;
-               } else {
+               else {
                        int mirror;
                        /* we got a read error. Maybe the drive is bad.  Maybe just
                         * the block and we can fix it.
@@ -1759,14 +1711,11 @@ static void raid10d(mddev_t *mddev)
                                bio->bi_rw = READ | do_sync;
                                bio->bi_private = r10_bio;
                                bio->bi_end_io = raid10_end_read_request;
-                               unplug = 1;
                                generic_make_request(bio);
                        }
                }
                cond_resched();
        }
-       if (unplug)
-               unplug_slaves(mddev);
 }
 
 
@@ -2377,7 +2326,6 @@ static int run(mddev_t *mddev)
        md_set_array_sectors(mddev, size);
        mddev->resync_max_sectors = size;
 
-       mddev->queue->unplug_fn = raid10_unplug;
        mddev->queue->backing_dev_info.congested_fn = raid10_congested;
        mddev->queue->backing_dev_info.congested_data = mddev;
 
@@ -2395,7 +2343,10 @@ static int run(mddev_t *mddev)
 
        if (conf->near_copies < conf->raid_disks)
                blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec);
-       md_integrity_register(mddev);
+
+       if (md_integrity_register(mddev))
+               goto out_free_conf;
+
        return 0;
 
 out_free_conf:
index 78536fdbd87fec133894a10bdabd027061cee0eb..e867ee42b15239707c0dfede4be71d2bc9a72e20 100644 (file)
@@ -433,8 +433,6 @@ static int has_failed(raid5_conf_t *conf)
        return 0;
 }
 
-static void unplug_slaves(mddev_t *mddev);
-
 static struct stripe_head *
 get_active_stripe(raid5_conf_t *conf, sector_t sector,
                  int previous, int noblock, int noquiesce)
@@ -463,8 +461,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
                                                     < (conf->max_nr_stripes *3/4)
                                                     || !conf->inactive_blocked),
                                                    conf->device_lock,
-                                                   md_raid5_unplug_device(conf)
-                                       );
+                                                   md_raid5_kick_device(conf));
                                conf->inactive_blocked = 0;
                        } else
                                init_stripe(sh, sector, previous);
@@ -1473,8 +1470,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
                wait_event_lock_irq(conf->wait_for_stripe,
                                    !list_empty(&conf->inactive_list),
                                    conf->device_lock,
-                                   unplug_slaves(conf->mddev)
-                       );
+                                   blk_flush_plug(current));
                osh = get_free_stripe(conf);
                spin_unlock_irq(&conf->device_lock);
                atomic_set(&nsh->count, 1);
@@ -3645,58 +3641,19 @@ static void activate_bit_delay(raid5_conf_t *conf)
        }
 }
 
-static void unplug_slaves(mddev_t *mddev)
+void md_raid5_kick_device(raid5_conf_t *conf)
 {
-       raid5_conf_t *conf = mddev->private;
-       int i;
-       int devs = max(conf->raid_disks, conf->previous_raid_disks);
-
-       rcu_read_lock();
-       for (i = 0; i < devs; i++) {
-               mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
-               if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
-                       struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
-
-                       atomic_inc(&rdev->nr_pending);
-                       rcu_read_unlock();
-
-                       blk_unplug(r_queue);
-
-                       rdev_dec_pending(rdev, mddev);
-                       rcu_read_lock();
-               }
-       }
-       rcu_read_unlock();
-}
-
-void md_raid5_unplug_device(raid5_conf_t *conf)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&conf->device_lock, flags);
-
-       if (plugger_remove_plug(&conf->plug)) {
-               conf->seq_flush++;
-               raid5_activate_delayed(conf);
-       }
+       blk_flush_plug(current);
+       raid5_activate_delayed(conf);
        md_wakeup_thread(conf->mddev->thread);
-
-       spin_unlock_irqrestore(&conf->device_lock, flags);
-
-       unplug_slaves(conf->mddev);
 }
-EXPORT_SYMBOL_GPL(md_raid5_unplug_device);
+EXPORT_SYMBOL_GPL(md_raid5_kick_device);
 
 static void raid5_unplug(struct plug_handle *plug)
 {
        raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug);
-       md_raid5_unplug_device(conf);
-}
 
-static void raid5_unplug_queue(struct request_queue *q)
-{
-       mddev_t *mddev = q->queuedata;
-       md_raid5_unplug_device(mddev->private);
+       md_raid5_kick_device(conf);
 }
 
 int md_raid5_congested(mddev_t *mddev, int bits)
@@ -4100,7 +4057,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
                                 * add failed due to overlap.  Flush everything
                                 * and wait a while
                                 */
-                               md_raid5_unplug_device(conf);
+                               md_raid5_kick_device(conf);
                                release_stripe(sh);
                                schedule();
                                goto retry;
@@ -4365,7 +4322,6 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
 
        if (sector_nr >= max_sector) {
                /* just being told to finish up .. nothing much to do */
-               unplug_slaves(mddev);
 
                if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
                        end_reshape(conf);
@@ -4569,7 +4525,6 @@ static void raid5d(mddev_t *mddev)
        spin_unlock_irq(&conf->device_lock);
 
        async_tx_issue_pending_all();
-       unplug_slaves(mddev);
 
        pr_debug("--- raid5d inactive\n");
 }
@@ -5204,7 +5159,7 @@ static int run(mddev_t *mddev)
 
                mddev->queue->backing_dev_info.congested_data = mddev;
                mddev->queue->backing_dev_info.congested_fn = raid5_congested;
-               mddev->queue->unplug_fn = raid5_unplug_queue;
+               mddev->queue->queue_lock = &conf->device_lock;
 
                chunk_size = mddev->chunk_sectors << 9;
                blk_queue_io_min(mddev->queue, chunk_size);
index 2ace0582b4098f102cb294aac4355c025ad4723e..8d563a4f022a778a6d9354a3c1bf514c6f5e10b6 100644 (file)
@@ -503,6 +503,6 @@ static inline int algorithm_is_DDF(int layout)
 }
 
 extern int md_raid5_congested(mddev_t *mddev, int bits);
-extern void md_raid5_unplug_device(raid5_conf_t *conf);
+extern void md_raid5_kick_device(raid5_conf_t *conf);
 extern int raid5_set_cache_size(mddev_t *mddev, int size);
 #endif
index ae7cad1858987f489b3b0b24a3dcaff64d143dd6..47ec5bc0ed2167b080652ade6b47804dbab49ee0 100644 (file)
@@ -695,20 +695,22 @@ static int i2o_block_ioctl(struct block_device *bdev, fmode_t mode,
 };
 
 /**
- *     i2o_block_media_changed - Have we seen a media change?
+ *     i2o_block_check_events - Have we seen a media change?
  *     @disk: gendisk which should be verified
+ *     @clearing: events being cleared
  *
  *     Verifies if the media has changed.
  *
  *     Returns 1 if the media was changed or 0 otherwise.
  */
-static int i2o_block_media_changed(struct gendisk *disk)
+static unsigned int i2o_block_check_events(struct gendisk *disk,
+                                          unsigned int clearing)
 {
        struct i2o_block_device *p = disk->private_data;
 
        if (p->media_change_flag) {
                p->media_change_flag = 0;
-               return 1;
+               return DISK_EVENT_MEDIA_CHANGE;
        }
        return 0;
 }
@@ -895,11 +897,7 @@ static void i2o_block_request_fn(struct request_queue *q)
 {
        struct request *req;
 
-       while (!blk_queue_plugged(q)) {
-               req = blk_peek_request(q);
-               if (!req)
-                       break;
-
+       while ((req = blk_peek_request(q)) != NULL) {
                if (req->cmd_type == REQ_TYPE_FS) {
                        struct i2o_block_delayed_request *dreq;
                        struct i2o_block_request *ireq = req->special;
@@ -950,7 +948,7 @@ static const struct block_device_operations i2o_block_fops = {
        .ioctl = i2o_block_ioctl,
        .compat_ioctl = i2o_block_ioctl,
        .getgeo = i2o_block_getgeo,
-       .media_changed = i2o_block_media_changed
+       .check_events = i2o_block_check_events,
 };
 
 /**
@@ -1002,6 +1000,7 @@ static struct i2o_block_device *i2o_block_device_alloc(void)
        gd->major = I2O_MAJOR;
        gd->queue = queue;
        gd->fops = &i2o_block_fops;
+       gd->events = DISK_EVENT_MEDIA_CHANGE;
        gd->private_data = dev;
 
        dev->gd = gd;
index 4e42d030e09724324e9eb6ec475103dff83a86af..2ae727568df92b9edeaf330ea46751d6e0493474 100644 (file)
@@ -55,8 +55,7 @@ static int mmc_queue_thread(void *d)
 
                spin_lock_irq(q->queue_lock);
                set_current_state(TASK_INTERRUPTIBLE);
-               if (!blk_queue_plugged(q))
-                       req = blk_fetch_request(q);
+               req = blk_fetch_request(q);
                mq->req = req;
                spin_unlock_irq(q->queue_lock);
 
index 794bfd962266f3c55c95e24c60c768a9f5b261c0..4d2df2f76ea0daa718215c179e88fe2b45249516 100644 (file)
@@ -1917,7 +1917,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
                return;
        }
        /* Now we try to fetch requests from the request queue */
-       while (!blk_queue_plugged(queue) && (req = blk_peek_request(queue))) {
+       while ((req = blk_peek_request(queue))) {
                if (basedev->features & DASD_FEATURE_READONLY &&
                    rq_data_dir(req) == WRITE) {
                        DBF_DEV_EVENT(DBF_ERR, basedev,
index 55d2d0f4eabc9cbd67a4a6fe1638305f883beb00..83cea9a55e2f8c10df077710a66f23760a7431e6 100644 (file)
 static DEFINE_MUTEX(tape_block_mutex);
 static int tapeblock_open(struct block_device *, fmode_t);
 static int tapeblock_release(struct gendisk *, fmode_t);
-static int tapeblock_medium_changed(struct gendisk *);
+static unsigned int tapeblock_check_events(struct gendisk *, unsigned int);
 static int tapeblock_revalidate_disk(struct gendisk *);
 
 static const struct block_device_operations tapeblock_fops = {
        .owner           = THIS_MODULE,
        .open            = tapeblock_open,
        .release         = tapeblock_release,
-       .media_changed   = tapeblock_medium_changed,
+       .check_events    = tapeblock_check_events,
        .revalidate_disk = tapeblock_revalidate_disk,
 };
 
@@ -161,7 +161,6 @@ tapeblock_requeue(struct work_struct *work) {
 
        spin_lock_irq(&device->blk_data.request_queue_lock);
        while (
-               !blk_queue_plugged(queue) &&
                blk_peek_request(queue) &&
                nr_queued < TAPEBLOCK_MIN_REQUEUE
        ) {
@@ -237,6 +236,7 @@ tapeblock_setup_device(struct tape_device * device)
        disk->major = tapeblock_major;
        disk->first_minor = device->first_minor;
        disk->fops = &tapeblock_fops;
+       disk->events = DISK_EVENT_MEDIA_CHANGE;
        disk->private_data = tape_get_device(device);
        disk->queue = blkdat->request_queue;
        set_capacity(disk, 0);
@@ -340,8 +340,8 @@ tapeblock_revalidate_disk(struct gendisk *disk)
        return 0;
 }
 
-static int
-tapeblock_medium_changed(struct gendisk *disk)
+static unsigned int
+tapeblock_check_events(struct gendisk *disk, unsigned int clearing)
 {
        struct tape_device *device;
 
@@ -349,7 +349,7 @@ tapeblock_medium_changed(struct gendisk *disk)
        DBF_LH(6, "tapeblock_medium_changed(%p) = %d\n",
                device, device->blk_data.medium_changed);
 
-       return device->blk_data.medium_changed;
+       return device->blk_data.medium_changed ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 /*
index 2d63c8ad1442b6d99a07d0c10c4cb02b4292d850..6d5c7ff43f5bdaf1984a62696e8acb03cdead3c5 100644 (file)
@@ -67,6 +67,13 @@ static struct scsi_host_sg_pool scsi_sg_pools[] = {
 
 struct kmem_cache *scsi_sdb_cache;
 
+/*
+ * When to reinvoke queueing after a resource shortage. It's 3 msecs to
+ * not change behaviour from the previous unplug mechanism, experimentation
+ * may prove this needs changing.
+ */
+#define SCSI_QUEUE_DELAY       3
+
 static void scsi_run_queue(struct request_queue *q);
 
 /*
@@ -149,14 +156,7 @@ static int __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
        /*
         * Requeue this command.  It will go before all other commands
         * that are already in the queue.
-        *
-        * NOTE: there is magic here about the way the queue is plugged if
-        * we have no outstanding commands.
-        * 
-        * Although we *don't* plug the queue, we call the request
-        * function.  The SCSI request function detects the blocked condition
-        * and plugs the queue appropriately.
-         */
+        */
        spin_lock_irqsave(q->queue_lock, flags);
        blk_requeue_request(q, cmd->request);
        spin_unlock_irqrestore(q->queue_lock, flags);
@@ -1226,11 +1226,11 @@ int scsi_prep_return(struct request_queue *q, struct request *req, int ret)
        case BLKPREP_DEFER:
                /*
                 * If we defer, the blk_peek_request() returns NULL, but the
-                * queue must be restarted, so we plug here if no returning
-                * command will automatically do that.
+                * queue must be restarted, so we schedule a callback to happen
+                * shortly.
                 */
                if (sdev->device_busy == 0)
-                       blk_plug_device(q);
+                       blk_delay_queue(q, SCSI_QUEUE_DELAY);
                break;
        default:
                req->cmd_flags |= REQ_DONTPREP;
@@ -1269,7 +1269,7 @@ static inline int scsi_dev_queue_ready(struct request_queue *q,
                                   sdev_printk(KERN_INFO, sdev,
                                   "unblocking device at zero depth\n"));
                } else {
-                       blk_plug_device(q);
+                       blk_delay_queue(q, SCSI_QUEUE_DELAY);
                        return 0;
                }
        }
@@ -1499,7 +1499,7 @@ static void scsi_request_fn(struct request_queue *q)
         * the host is no longer able to accept any more requests.
         */
        shost = sdev->host;
-       while (!blk_queue_plugged(q)) {
+       for (;;) {
                int rtn;
                /*
                 * get next queueable request.  We do this early to make sure
@@ -1578,15 +1578,8 @@ static void scsi_request_fn(struct request_queue *q)
                 */
                rtn = scsi_dispatch_cmd(cmd);
                spin_lock_irq(q->queue_lock);
-               if(rtn) {
-                       /* we're refusing the command; because of
-                        * the way locks get dropped, we need to 
-                        * check here if plugging is required */
-                       if(sdev->device_busy == 0)
-                               blk_plug_device(q);
-
-                       break;
-               }
+               if (rtn)
+                       goto out_delay;
        }
 
        goto out;
@@ -1605,9 +1598,10 @@ static void scsi_request_fn(struct request_queue *q)
        spin_lock_irq(q->queue_lock);
        blk_requeue_request(q, req);
        sdev->device_busy--;
-       if(sdev->device_busy == 0)
-               blk_plug_device(q);
- out:
+out_delay:
+       if (sdev->device_busy == 0)
+               blk_delay_queue(q, SCSI_QUEUE_DELAY);
+out:
        /* must be careful here...if we trigger the ->remove() function
         * we cannot be holding the q lock */
        spin_unlock_irq(q->queue_lock);
index 5c3ccfc6b6220d99e08f21d24458e23866876392..2941d2d92c9472349da1a29edd103e5094aa295b 100644 (file)
@@ -3913,7 +3913,7 @@ fc_bsg_request_handler(struct request_queue *q, struct Scsi_Host *shost,
        if (!get_device(dev))
                return;
 
-       while (!blk_queue_plugged(q)) {
+       while (1) {
                if (rport && (rport->port_state == FC_PORTSTATE_BLOCKED) &&
                    !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT))
                        break;
index 927e99cb72250c61154639ea8ca1eae2dd32b198..c6fcf76cade549f2fb880d976c839b915e0b9dc3 100644 (file)
@@ -173,11 +173,7 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
        int ret;
        int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
 
-       while (!blk_queue_plugged(q)) {
-               req = blk_fetch_request(q);
-               if (!req)
-                       break;
-
+       while ((req = blk_fetch_request(q)) != NULL) {
                spin_unlock_irq(q->queue_lock);
 
                handler = to_sas_internal(shost->transportt)->f->smp_handler;
index 6e02f1b0c46fc9574874d2a68a1b2dffd2e456f3..af789937be4ed8fd48a5041739290c290122d7c2 100644 (file)
@@ -124,7 +124,8 @@ static void blkvsc_shutdown(struct device *device);
 
 static int blkvsc_open(struct block_device *bdev,  fmode_t mode);
 static int blkvsc_release(struct gendisk *disk, fmode_t mode);
-static int blkvsc_media_changed(struct gendisk *gd);
+static unsigned int blkvsc_check_events(struct gendisk *gd,
+                                       unsigned int clearing);
 static int blkvsc_revalidate_disk(struct gendisk *gd);
 static int blkvsc_getgeo(struct block_device *bd, struct hd_geometry *hg);
 static int blkvsc_ioctl(struct block_device *bd, fmode_t mode,
@@ -155,7 +156,7 @@ static const struct block_device_operations block_ops = {
        .owner = THIS_MODULE,
        .open = blkvsc_open,
        .release = blkvsc_release,
-       .media_changed = blkvsc_media_changed,
+       .check_events = blkvsc_check_events,
        .revalidate_disk = blkvsc_revalidate_disk,
        .getgeo = blkvsc_getgeo,
        .ioctl  = blkvsc_ioctl,
@@ -357,6 +358,7 @@ static int blkvsc_probe(struct device *device)
        else
                blkdev->gd->first_minor = 0;
        blkdev->gd->fops = &block_ops;
+       blkdev->gd->events = DISK_EVENT_MEDIA_CHANGE;
        blkdev->gd->private_data = blkdev;
        blkdev->gd->driverfs_dev = &(blkdev->device_ctx->device);
        sprintf(blkdev->gd->disk_name, "hd%c", 'a' + devnum);
@@ -1337,10 +1339,11 @@ static int blkvsc_release(struct gendisk *disk, fmode_t mode)
        return 0;
 }
 
-static int blkvsc_media_changed(struct gendisk *gd)
+static unsigned int blkvsc_check_events(struct gendisk *gd,
+                                       unsigned int clearing)
 {
        DPRINT_DBG(BLKVSC_DRV, "- enter\n");
-       return 1;
+       return DISK_EVENT_MEDIA_CHANGE;
 }
 
 static int blkvsc_revalidate_disk(struct gendisk *gd)
index e1851f00be568f07e17fbd6c45070bdc9d01de93..842cd9214a5ec120989e8a339ab158433dff5b91 100644 (file)
@@ -381,10 +381,10 @@ static int cyasblkdev_blk_ioctl(
        return -ENOTTY;
 }
 
-/* Media_changed block_device opp
+/* check_events block_device opp
  * this one is called by kernel to confirm if the media really changed
  * as we indicated by issuing check_disk_change() call */
-int cyasblkdev_media_changed(struct gendisk *gd)
+unsigned int cyasblkdev_check_events(struct gendisk *gd, unsigned int clearing)
 {
        struct cyasblkdev_blk_data *bd;
 
@@ -402,7 +402,7 @@ int cyasblkdev_media_changed(struct gendisk *gd)
                #endif
        }
 
-       /* return media change state "1" yes, 0 no */
+       /* return media change state - DISK_EVENT_MEDIA_CHANGE yes, 0 no */
        return 0;
 }
 
@@ -432,7 +432,7 @@ static struct block_device_operations cyasblkdev_bdops = {
        .ioctl                  = cyasblkdev_blk_ioctl,
        /* .getgeo              = cyasblkdev_blk_getgeo, */
        /* added to support media removal( real and simulated) media */
-       .media_changed  = cyasblkdev_media_changed,
+       .check_events           = cyasblkdev_check_events,
        /* added to support media removal( real and simulated) media */
        .revalidate_disk = cyasblkdev_revalidate_disk,
        .owner                  = THIS_MODULE,
@@ -1090,6 +1090,7 @@ static int cyasblkdev_add_disks(int bus_num,
                bd->user_disk_0->first_minor = devidx << CYASBLKDEV_SHIFT;
                bd->user_disk_0->minors = 8;
                bd->user_disk_0->fops = &cyasblkdev_bdops;
+               bd->user_disk_0->events = DISK_EVENT_MEDIA_CHANGE;
                bd->user_disk_0->private_data = bd;
                bd->user_disk_0->queue = bd->queue.queue;
                bd->dbgprn_flags = DBGPRN_RD_RQ;
@@ -1190,6 +1191,7 @@ static int cyasblkdev_add_disks(int bus_num,
                bd->user_disk_1->first_minor = (devidx + 1) << CYASBLKDEV_SHIFT;
                bd->user_disk_1->minors = 8;
                bd->user_disk_1->fops = &cyasblkdev_bdops;
+               bd->user_disk_0->events = DISK_EVENT_MEDIA_CHANGE;
                bd->user_disk_1->private_data = bd;
                bd->user_disk_1->queue = bd->queue.queue;
                bd->dbgprn_flags = DBGPRN_RD_RQ;
@@ -1278,6 +1280,7 @@ static int cyasblkdev_add_disks(int bus_num,
                                (devidx + 2) << CYASBLKDEV_SHIFT;
                        bd->system_disk->minors = 8;
                        bd->system_disk->fops = &cyasblkdev_bdops;
+                       bd->system_disk->events = DISK_EVENT_MEDIA_CHANGE;
                        bd->system_disk->private_data = bd;
                        bd->system_disk->queue = bd->queue.queue;
                        /* don't search for vfat
index 3df570db0e4ff21cd57f4f0c229bf555f9b4fbdd..eb0afec046e115d99d729788875e2bcc9c6a6ebd 100644 (file)
@@ -391,9 +391,8 @@ static int iblock_do_task(struct se_task *task)
 {
        struct se_device *dev = task->task_se_cmd->se_dev;
        struct iblock_req *req = IBLOCK_REQ(task);
-       struct iblock_dev *ibd = (struct iblock_dev *)req->ib_dev;
-       struct request_queue *q = bdev_get_queue(ibd->ibd_bd);
        struct bio *bio = req->ib_bio, *nbio = NULL;
+       struct blk_plug plug;
        int rw;
 
        if (task->task_data_direction == DMA_TO_DEVICE) {
@@ -411,6 +410,7 @@ static int iblock_do_task(struct se_task *task)
                rw = READ;
        }
 
+       blk_start_plug(&plug);
        while (bio) {
                nbio = bio->bi_next;
                bio->bi_next = NULL;
@@ -420,9 +420,8 @@ static int iblock_do_task(struct se_task *task)
                submit_bio(rw, bio);
                bio = nbio;
        }
+       blk_finish_plug(&plug);
 
-       if (q->unplug_fn)
-               q->unplug_fn(q);
        return PYX_TRANSPORT_SENT_TO_TRANSPORT;
 }
 
index 81f13958e751095d869a2935460163d8111f4098..43db715f15026d491d87cc1d3c7e443ddcf2f341 100644 (file)
@@ -306,7 +306,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = {
 
 static void sysrq_handle_showmem(int key)
 {
-       show_mem();
+       show_mem(0);
 }
 static struct sysrq_key_op sysrq_showmem_op = {
        .handler        = sysrq_handle_showmem,
index 6dd3c68c13add15f4a0452f06694fbd7a2c06cf4..d6b342b5b423ce69fc989cdf3593fa892d7f9167 100644 (file)
@@ -600,7 +600,7 @@ static void fn_scroll_back(struct vc_data *vc)
 
 static void fn_show_mem(struct vc_data *vc)
 {
-       show_mem();
+       show_mem(0);
 }
 
 static void fn_show_state(struct vc_data *vc)
index 92444e94f84254387eb88efb50d47330033e6987..d5250c5aae21e10c560180e1042234d3a4d91f25 100644 (file)
@@ -72,7 +72,6 @@ static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
 static const struct address_space_operations adfs_aops = {
        .readpage       = adfs_readpage,
        .writepage      = adfs_writepage,
-       .sync_page      = block_sync_page,
        .write_begin    = adfs_write_begin,
        .write_end      = generic_write_end,
        .bmap           = _adfs_bmap
index 0a90dcd46de28d33f2768fde829ac7800abdc9e4..acf321b70fcd1a8522da3658449ebcaed3a9e733 100644 (file)
@@ -429,7 +429,6 @@ static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
 const struct address_space_operations affs_aops = {
        .readpage = affs_readpage,
        .writepage = affs_writepage,
-       .sync_page = block_sync_page,
        .write_begin = affs_write_begin,
        .write_end = generic_write_end,
        .bmap = _affs_bmap
@@ -786,7 +785,6 @@ out:
 const struct address_space_operations affs_aops_ofs = {
        .readpage = affs_readpage_ofs,
        //.writepage = affs_writepage_ofs,
-       //.sync_page = affs_sync_page_ofs,
        .write_begin = affs_write_begin_ofs,
        .write_end = affs_write_end_ofs
 };
index ebb6a22e4e1b36dfb530fba6406aaa42b5a9c14b..e29ec485af255822b8414be128fc8ef66da9a204 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -34,8 +34,6 @@
 #include <linux/security.h>
 #include <linux/eventfd.h>
 #include <linux/blkdev.h>
-#include <linux/mempool.h>
-#include <linux/hash.h>
 #include <linux/compat.h>
 
 #include <asm/kmap_types.h>
@@ -65,14 +63,6 @@ static DECLARE_WORK(fput_work, aio_fput_routine);
 static DEFINE_SPINLOCK(fput_lock);
 static LIST_HEAD(fput_head);
 
-#define AIO_BATCH_HASH_BITS    3 /* allocated on-stack, so don't go crazy */
-#define AIO_BATCH_HASH_SIZE    (1 << AIO_BATCH_HASH_BITS)
-struct aio_batch_entry {
-       struct hlist_node list;
-       struct address_space *mapping;
-};
-mempool_t *abe_pool;
-
 static void aio_kick_handler(struct work_struct *);
 static void aio_queue_work(struct kioctx *);
 
@@ -86,8 +76,7 @@ static int __init aio_setup(void)
        kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 
        aio_wq = alloc_workqueue("aio", 0, 1);  /* used to limit concurrency */
-       abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
-       BUG_ON(!aio_wq || !abe_pool);
+       BUG_ON(!aio_wq);
 
        pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
 
@@ -1525,57 +1514,8 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
        return 0;
 }
 
-static void aio_batch_add(struct address_space *mapping,
-                         struct hlist_head *batch_hash)
-{
-       struct aio_batch_entry *abe;
-       struct hlist_node *pos;
-       unsigned bucket;
-
-       bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS);
-       hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) {
-               if (abe->mapping == mapping)
-                       return;
-       }
-
-       abe = mempool_alloc(abe_pool, GFP_KERNEL);
-
-       /*
-        * we should be using igrab here, but
-        * we don't want to hammer on the global
-        * inode spinlock just to take an extra
-        * reference on a file that we must already
-        * have a reference to.
-        *
-        * When we're called, we always have a reference
-        * on the file, so we must always have a reference
-        * on the inode, so ihold() is safe here.
-        */
-       ihold(mapping->host);
-       abe->mapping = mapping;
-       hlist_add_head(&abe->list, &batch_hash[bucket]);
-       return;
-}
-
-static void aio_batch_free(struct hlist_head *batch_hash)
-{
-       struct aio_batch_entry *abe;
-       struct hlist_node *pos, *n;
-       int i;
-
-       for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) {
-               hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) {
-                       blk_run_address_space(abe->mapping);
-                       iput(abe->mapping->host);
-                       hlist_del(&abe->list);
-                       mempool_free(abe, abe_pool);
-               }
-       }
-}
-
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
-                        struct iocb *iocb, struct hlist_head *batch_hash,
-                        bool compat)
+                        struct iocb *iocb, bool compat)
 {
        struct kiocb *req;
        struct file *file;
@@ -1666,11 +1606,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
                        ;
        }
        spin_unlock_irq(&ctx->ctx_lock);
-       if (req->ki_opcode == IOCB_CMD_PREAD ||
-           req->ki_opcode == IOCB_CMD_PREADV ||
-           req->ki_opcode == IOCB_CMD_PWRITE ||
-           req->ki_opcode == IOCB_CMD_PWRITEV)
-               aio_batch_add(file->f_mapping, batch_hash);
 
        aio_put_req(req);       /* drop extra ref to req */
        return 0;
@@ -1687,7 +1622,7 @@ long do_io_submit(aio_context_t ctx_id, long nr,
        struct kioctx *ctx;
        long ret = 0;
        int i;
-       struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, };
+       struct blk_plug plug;
 
        if (unlikely(nr < 0))
                return -EINVAL;
@@ -1704,6 +1639,8 @@ long do_io_submit(aio_context_t ctx_id, long nr,
                return -EINVAL;
        }
 
+       blk_start_plug(&plug);
+
        /*
         * AKPM: should this return a partial result if some of the IOs were
         * successfully submitted?
@@ -1722,11 +1659,11 @@ long do_io_submit(aio_context_t ctx_id, long nr,
                        break;
                }
 
-               ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash, compat);
+               ret = io_submit_one(ctx, user_iocb, &tmp, compat);
                if (ret)
                        break;
        }
-       aio_batch_free(batch_hash);
+       blk_finish_plug(&plug);
 
        put_ioctx(ctx);
        return i ? i : ret;
index b1d0c794747b7537dbade4895eccae70aa78cc5e..06457ed8f3e7c5fc258fc8cdc7f0f3bf88eeddf4 100644 (file)
@@ -75,7 +75,6 @@ static const struct inode_operations befs_dir_inode_operations = {
 
 static const struct address_space_operations befs_aops = {
        .readpage       = befs_readpage,
-       .sync_page      = block_sync_page,
        .bmap           = befs_bmap,
 };
 
index eb67edd0f8ea3f39c112faebfed1a877b37f686d..f20e8a71062f4d2bc15e1daacf44398b8cab4906 100644 (file)
@@ -186,7 +186,6 @@ static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
 const struct address_space_operations bfs_aops = {
        .readpage       = bfs_readpage,
        .writepage      = bfs_writepage,
-       .sync_page      = block_sync_page,
        .write_begin    = bfs_write_begin,
        .write_end      = generic_write_end,
        .bmap           = bfs_bmap,
index e49cce234c653162560796710ee125134053183d..9c5e6b2cd11a84fa3f04611b35cebb6e2232b186 100644 (file)
@@ -761,6 +761,9 @@ int bioset_integrity_create(struct bio_set *bs, int pool_size)
 {
        unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
 
+       if (bs->bio_integrity_pool)
+               return 0;
+
        bs->bio_integrity_pool =
                mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
 
index 4cf2a52fbc54c1ee4a20b9647db2dc5e430ca766..4d6d4b6c2bf1d2d8122e6383e2a469be8dae8fbf 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -43,7 +43,7 @@ static mempool_t *bio_split_pool __read_mostly;
  * unsigned short
  */
 #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
-struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
+static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
        BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
 };
 #undef BV
@@ -1636,9 +1636,6 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
        if (!bs->bio_pool)
                goto bad;
 
-       if (bioset_integrity_create(bs, pool_size))
-               goto bad;
-
        if (!biovec_create_pools(bs, pool_size))
                return bs;
 
@@ -1656,12 +1653,10 @@ static void __init biovec_init_slabs(void)
                int size;
                struct biovec_slab *bvs = bvec_slabs + i;
 
-#ifndef CONFIG_BLK_DEV_INTEGRITY
                if (bvs->nr_vecs <= BIO_INLINE_VECS) {
                        bvs->slab = NULL;
                        continue;
                }
-#endif
 
                size = bvs->nr_vecs * sizeof(struct bio_vec);
                bvs->slab = kmem_cache_create(bvs->name, size, 0,
@@ -1684,6 +1679,9 @@ static int __init init_bio(void)
        if (!fs_bio_set)
                panic("bio: can't allocate bios\n");
 
+       if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
+               panic("bio: can't create integrity pool\n");
+
        bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
                                                     sizeof(struct bio_pair));
        if (!bio_split_pool)
index 889287019599a861d90a51d6808cfb2d032fc255..7d02afb2b7f4b5f404277936d52ab7f1cb885215 100644 (file)
@@ -1087,6 +1087,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
        if (!disk)
                goto out;
 
+       disk_block_events(disk);
        mutex_lock_nested(&bdev->bd_mutex, for_part);
        if (!bdev->bd_openers) {
                bdev->bd_disk = disk;
@@ -1108,10 +1109,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                                         */
                                        disk_put_part(bdev->bd_part);
                                        bdev->bd_part = NULL;
-                                       module_put(disk->fops->owner);
-                                       put_disk(disk);
                                        bdev->bd_disk = NULL;
                                        mutex_unlock(&bdev->bd_mutex);
+                                       disk_unblock_events(disk);
+                                       module_put(disk->fops->owner);
+                                       put_disk(disk);
                                        goto restart;
                                }
                                if (ret)
@@ -1148,9 +1150,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                        bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
                }
        } else {
-               module_put(disk->fops->owner);
-               put_disk(disk);
-               disk = NULL;
                if (bdev->bd_contains == bdev) {
                        if (bdev->bd_disk->fops->open) {
                                ret = bdev->bd_disk->fops->open(bdev, mode);
@@ -1160,11 +1159,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                        if (bdev->bd_invalidated)
                                rescan_partitions(bdev->bd_disk, bdev);
                }
+               /* only one opener holds refs to the module and disk */
+               module_put(disk->fops->owner);
+               put_disk(disk);
        }
        bdev->bd_openers++;
        if (for_part)
                bdev->bd_part_count++;
        mutex_unlock(&bdev->bd_mutex);
+       disk_unblock_events(disk);
        return 0;
 
  out_clear:
@@ -1177,10 +1180,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
        bdev->bd_contains = NULL;
  out_unlock_bdev:
        mutex_unlock(&bdev->bd_mutex);
- out:
-       if (disk)
-               module_put(disk->fops->owner);
+       disk_unblock_events(disk);
+       module_put(disk->fops->owner);
        put_disk(disk);
+ out:
        bdput(bdev);
 
        return ret;
@@ -1446,14 +1449,13 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
                if (bdev_free) {
                        if (bdev->bd_write_holder) {
                                disk_unblock_events(bdev->bd_disk);
-                               bdev->bd_write_holder = false;
-                       } else
                                disk_check_events(bdev->bd_disk);
+                               bdev->bd_write_holder = false;
+                       }
                }
 
                mutex_unlock(&bdev->bd_mutex);
-       } else
-               disk_check_events(bdev->bd_disk);
+       }
 
        return __blkdev_put(bdev, mode, 0);
 }
@@ -1527,7 +1529,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
 static const struct address_space_operations def_blk_aops = {
        .readpage       = blkdev_readpage,
        .writepage      = blkdev_writepage,
-       .sync_page      = block_sync_page,
        .write_begin    = blkdev_write_begin,
        .write_end      = blkdev_write_end,
        .writepages     = generic_writepages,
index 100b07f021b406a97d9f24ae04e3665418eca833..830d261d0e6b9a7faba479160edb72df3d8a6d2a 100644 (file)
@@ -847,7 +847,6 @@ static const struct address_space_operations btree_aops = {
        .writepages     = btree_writepages,
        .releasepage    = btree_releasepage,
        .invalidatepage = btree_invalidatepage,
-       .sync_page      = block_sync_page,
 #ifdef CONFIG_MIGRATION
        .migratepage    = btree_migratepage,
 #endif
@@ -1330,82 +1329,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
        return ret;
 }
 
-/*
- * this unplugs every device on the box, and it is only used when page
- * is null
- */
-static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-       struct btrfs_device *device;
-       struct btrfs_fs_info *info;
-
-       info = (struct btrfs_fs_info *)bdi->unplug_io_data;
-       list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
-               if (!device->bdev)
-                       continue;
-
-               bdi = blk_get_backing_dev_info(device->bdev);
-               if (bdi->unplug_io_fn)
-                       bdi->unplug_io_fn(bdi, page);
-       }
-}
-
-static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-       struct inode *inode;
-       struct extent_map_tree *em_tree;
-       struct extent_map *em;
-       struct address_space *mapping;
-       u64 offset;
-
-       /* the generic O_DIRECT read code does this */
-       if (1 || !page) {
-               __unplug_io_fn(bdi, page);
-               return;
-       }
-
-       /*
-        * page->mapping may change at any time.  Get a consistent copy
-        * and use that for everything below
-        */
-       smp_mb();
-       mapping = page->mapping;
-       if (!mapping)
-               return;
-
-       inode = mapping->host;
-
-       /*
-        * don't do the expensive searching for a small number of
-        * devices
-        */
-       if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) {
-               __unplug_io_fn(bdi, page);
-               return;
-       }
-
-       offset = page_offset(page);
-
-       em_tree = &BTRFS_I(inode)->extent_tree;
-       read_lock(&em_tree->lock);
-       em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
-       read_unlock(&em_tree->lock);
-       if (!em) {
-               __unplug_io_fn(bdi, page);
-               return;
-       }
-
-       if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
-               free_extent_map(em);
-               __unplug_io_fn(bdi, page);
-               return;
-       }
-       offset = offset - em->start;
-       btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree,
-                         em->block_start + offset, page);
-       free_extent_map(em);
-}
-
 /*
  * If this fails, caller must call bdi_destroy() to get rid of the
  * bdi again.
@@ -1420,8 +1343,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
                return err;
 
        bdi->ra_pages   = default_backing_dev_info.ra_pages;
-       bdi->unplug_io_fn       = btrfs_unplug_io_fn;
-       bdi->unplug_io_data     = info;
        bdi->congested_fn       = btrfs_congested_fn;
        bdi->congested_data     = info;
        return 0;
index 714adc4ac4c24eaae26900bb9e862bc8b874432b..b5b92824a27137980287ad2418c565b425dd1bfa 100644 (file)
@@ -2188,7 +2188,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        unsigned long nr_written = 0;
 
        if (wbc->sync_mode == WB_SYNC_ALL)
-               write_flags = WRITE_SYNC_PLUG;
+               write_flags = WRITE_SYNC;
        else
                write_flags = WRITE;
 
index 512c3d1da083add52ddac41d85e1b694dbfd18f3..119520bdb9a540d8dc0f9bcf94d66b01df0af068 100644 (file)
@@ -7340,7 +7340,6 @@ static const struct address_space_operations btrfs_aops = {
        .writepage      = btrfs_writepage,
        .writepages     = btrfs_writepages,
        .readpages      = btrfs_readpages,
-       .sync_page      = block_sync_page,
        .direct_IO      = btrfs_direct_IO,
        .invalidatepage = btrfs_invalidatepage,
        .releasepage    = btrfs_releasepage,
index dd13eb81ee4011df4d52103665d30465fe696306..9d554e8e6583e62a277eaa906ebd8dbe03d0f39f 100644 (file)
@@ -162,7 +162,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
        struct bio *cur;
        int again = 0;
        unsigned long num_run;
-       unsigned long num_sync_run;
        unsigned long batch_run = 0;
        unsigned long limit;
        unsigned long last_waited = 0;
@@ -173,11 +172,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
        limit = btrfs_async_submit_limit(fs_info);
        limit = limit * 2 / 3;
 
-       /* we want to make sure that every time we switch from the sync
-        * list to the normal list, we unplug
-        */
-       num_sync_run = 0;
-
 loop:
        spin_lock(&device->io_lock);
 
@@ -223,15 +217,6 @@ loop_lock:
 
        spin_unlock(&device->io_lock);
 
-       /*
-        * if we're doing the regular priority list, make sure we unplug
-        * for any high prio bios we've sent down
-        */
-       if (pending_bios == &device->pending_bios && num_sync_run > 0) {
-               num_sync_run = 0;
-               blk_run_backing_dev(bdi, NULL);
-       }
-
        while (pending) {
 
                rmb();
@@ -259,19 +244,11 @@ loop_lock:
 
                BUG_ON(atomic_read(&cur->bi_cnt) == 0);
 
-               if (cur->bi_rw & REQ_SYNC)
-                       num_sync_run++;
-
                submit_bio(cur->bi_rw, cur);
                num_run++;
                batch_run++;
-               if (need_resched()) {
-                       if (num_sync_run) {
-                               blk_run_backing_dev(bdi, NULL);
-                               num_sync_run = 0;
-                       }
+               if (need_resched())
                        cond_resched();
-               }
 
                /*
                 * we made progress, there is more work to do and the bdi
@@ -304,13 +281,8 @@ loop_lock:
                                 * against it before looping
                                 */
                                last_waited = ioc->last_waited;
-                               if (need_resched()) {
-                                       if (num_sync_run) {
-                                               blk_run_backing_dev(bdi, NULL);
-                                               num_sync_run = 0;
-                                       }
+                               if (need_resched())
                                        cond_resched();
-                               }
                                continue;
                        }
                        spin_lock(&device->io_lock);
@@ -323,22 +295,6 @@ loop_lock:
                }
        }
 
-       if (num_sync_run) {
-               num_sync_run = 0;
-               blk_run_backing_dev(bdi, NULL);
-       }
-       /*
-        * IO has already been through a long path to get here.  Checksumming,
-        * async helper threads, perhaps compression.  We've done a pretty
-        * good job of collecting a batch of IO and should just unplug
-        * the device right away.
-        *
-        * This will help anyone who is waiting on the IO, they might have
-        * already unplugged, but managed to do so before the bio they
-        * cared about found its way down here.
-        */
-       blk_run_backing_dev(bdi, NULL);
-
        cond_resched();
        if (again)
                goto loop;
@@ -2955,7 +2911,7 @@ static int find_live_mirror(struct map_lookup *map, int first, int num,
 static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
                             u64 logical, u64 *length,
                             struct btrfs_multi_bio **multi_ret,
-                            int mirror_num, struct page *unplug_page)
+                            int mirror_num)
 {
        struct extent_map *em;
        struct map_lookup *map;
@@ -2987,11 +2943,6 @@ again:
        em = lookup_extent_mapping(em_tree, logical, *length);
        read_unlock(&em_tree->lock);
 
-       if (!em && unplug_page) {
-               kfree(multi);
-               return 0;
-       }
-
        if (!em) {
                printk(KERN_CRIT "unable to find logical %llu len %llu\n",
                       (unsigned long long)logical,
@@ -3047,13 +2998,13 @@ again:
                *length = em->len - offset;
        }
 
-       if (!multi_ret && !unplug_page)
+       if (!multi_ret)
                goto out;
 
        num_stripes = 1;
        stripe_index = 0;
        if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-               if (unplug_page || (rw & REQ_WRITE))
+               if (rw & REQ_WRITE)
                        num_stripes = map->num_stripes;
                else if (mirror_num)
                        stripe_index = mirror_num - 1;
@@ -3075,7 +3026,7 @@ again:
                stripe_index = do_div(stripe_nr, factor);
                stripe_index *= map->sub_stripes;
 
-               if (unplug_page || (rw & REQ_WRITE))
+               if (rw & REQ_WRITE)
                        num_stripes = map->sub_stripes;
                else if (mirror_num)
                        stripe_index += mirror_num - 1;
@@ -3095,22 +3046,10 @@ again:
        BUG_ON(stripe_index >= map->num_stripes);
 
        for (i = 0; i < num_stripes; i++) {
-               if (unplug_page) {
-                       struct btrfs_device *device;
-                       struct backing_dev_info *bdi;
-
-                       device = map->stripes[stripe_index].dev;
-                       if (device->bdev) {
-                               bdi = blk_get_backing_dev_info(device->bdev);
-                               if (bdi->unplug_io_fn)
-                                       bdi->unplug_io_fn(bdi, unplug_page);
-                       }
-               } else {
-                       multi->stripes[i].physical =
-                               map->stripes[stripe_index].physical +
-                               stripe_offset + stripe_nr * map->stripe_len;
-                       multi->stripes[i].dev = map->stripes[stripe_index].dev;
-               }
+               multi->stripes[i].physical =
+                       map->stripes[stripe_index].physical +
+                       stripe_offset + stripe_nr * map->stripe_len;
+               multi->stripes[i].dev = map->stripes[stripe_index].dev;
                stripe_index++;
        }
        if (multi_ret) {
@@ -3128,7 +3067,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
                      struct btrfs_multi_bio **multi_ret, int mirror_num)
 {
        return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
-                                mirror_num, NULL);
+                                mirror_num);
 }
 
 int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@@ -3196,14 +3135,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
        return 0;
 }
 
-int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
-                     u64 logical, struct page *page)
-{
-       u64 length = PAGE_CACHE_SIZE;
-       return __btrfs_map_block(map_tree, READ, logical, &length,
-                                NULL, 0, page);
-}
-
 static void end_bio_multi_stripe(struct bio *bio, int err)
 {
        struct btrfs_multi_bio *multi = bio->bi_private;
index 2219a76e2caf08415b2e207bc23466d4154d35e0..2e6b1a387b7eb136870ee0ab38c9bc86a794264a 100644 (file)
@@ -54,23 +54,15 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
 }
 EXPORT_SYMBOL(init_buffer);
 
-static int sync_buffer(void *word)
+static int sleep_on_buffer(void *word)
 {
-       struct block_device *bd;
-       struct buffer_head *bh
-               = container_of(word, struct buffer_head, b_state);
-
-       smp_mb();
-       bd = bh->b_bdev;
-       if (bd)
-               blk_run_address_space(bd->bd_inode->i_mapping);
        io_schedule();
        return 0;
 }
 
 void __lock_buffer(struct buffer_head *bh)
 {
-       wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer,
+       wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
                                                        TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__lock_buffer);
@@ -90,7 +82,7 @@ EXPORT_SYMBOL(unlock_buffer);
  */
 void __wait_on_buffer(struct buffer_head * bh)
 {
-       wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
+       wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__wait_on_buffer);
 
@@ -749,10 +741,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 {
        struct buffer_head *bh;
        struct list_head tmp;
-       struct address_space *mapping, *prev_mapping = NULL;
+       struct address_space *mapping;
        int err = 0, err2;
+       struct blk_plug plug;
 
        INIT_LIST_HEAD(&tmp);
+       blk_start_plug(&plug);
 
        spin_lock(lock);
        while (!list_empty(list)) {
@@ -775,7 +769,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
                                 * still in flight on potentially older
                                 * contents.
                                 */
-                               write_dirty_buffer(bh, WRITE_SYNC_PLUG);
+                               write_dirty_buffer(bh, WRITE_SYNC);
 
                                /*
                                 * Kick off IO for the previous mapping. Note
@@ -783,16 +777,16 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
                                 * wait_on_buffer() will do that for us
                                 * through sync_buffer().
                                 */
-                               if (prev_mapping && prev_mapping != mapping)
-                                       blk_run_address_space(prev_mapping);
-                               prev_mapping = mapping;
-
                                brelse(bh);
                                spin_lock(lock);
                        }
                }
        }
 
+       spin_unlock(lock);
+       blk_finish_plug(&plug);
+       spin_lock(lock);
+
        while (!list_empty(&tmp)) {
                bh = BH_ENTRY(tmp.prev);
                get_bh(bh);
@@ -1614,14 +1608,8 @@ EXPORT_SYMBOL(unmap_underlying_metadata);
  * prevents this contention from occurring.
  *
  * If block_write_full_page() is called with wbc->sync_mode ==
- * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this
- * causes the writes to be flagged as synchronous writes, but the
- * block device queue will NOT be unplugged, since usually many pages
- * will be pushed to the out before the higher-level caller actually
- * waits for the writes to be completed.  The various wait functions,
- * such as wait_on_writeback_range() will ultimately call sync_page()
- * which will ultimately call blk_run_backing_dev(), which will end up
- * unplugging the device queue.
+ * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this
+ * causes the writes to be flagged as synchronous writes.
  */
 static int __block_write_full_page(struct inode *inode, struct page *page,
                        get_block_t *get_block, struct writeback_control *wbc,
@@ -1634,7 +1622,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
        const unsigned blocksize = 1 << inode->i_blkbits;
        int nr_underway = 0;
        int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
-                       WRITE_SYNC_PLUG : WRITE);
+                       WRITE_SYNC : WRITE);
 
        BUG_ON(!PageLocked(page));
 
@@ -3138,17 +3126,6 @@ out:
 }
 EXPORT_SYMBOL(try_to_free_buffers);
 
-void block_sync_page(struct page *page)
-{
-       struct address_space *mapping;
-
-       smp_mb();
-       mapping = page_mapping(page);
-       if (mapping)
-               blk_run_backing_dev(mapping->backing_dev_info, page);
-}
-EXPORT_SYMBOL(block_sync_page);
-
 /*
  * There are no bdflush tunables left.  But distributions are
  * still running obsolete flush daemons, so we terminate them here.
index e964b1cd5dd092bda83d274da53248a50dfd0ba3..c27d236738fc08ff80d32eceabc8f412140afbb2 100644 (file)
@@ -1569,34 +1569,6 @@ int cifs_fsync(struct file *file, int datasync)
        return rc;
 }
 
-/* static void cifs_sync_page(struct page *page)
-{
-       struct address_space *mapping;
-       struct inode *inode;
-       unsigned long index = page->index;
-       unsigned int rpages = 0;
-       int rc = 0;
-
-       cFYI(1, "sync page %p", page);
-       mapping = page->mapping;
-       if (!mapping)
-               return 0;
-       inode = mapping->host;
-       if (!inode)
-               return; */
-
-/*     fill in rpages then
-       result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
-
-/*     cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index);
-
-#if 0
-       if (rc < 0)
-               return rc;
-       return 0;
-#endif
-} */
-
 /*
  * As file closes, flush all cached write data for this inode checking
  * for write behind errors.
@@ -2510,7 +2482,6 @@ const struct address_space_operations cifs_addr_ops = {
        .set_page_dirty = __set_page_dirty_nobuffers,
        .releasepage = cifs_release_page,
        .invalidatepage = cifs_invalidate_page,
-       /* .sync_page = cifs_sync_page, */
        /* .direct_IO = */
 };
 
@@ -2528,6 +2499,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
        .set_page_dirty = __set_page_dirty_nobuffers,
        .releasepage = cifs_release_page,
        .invalidatepage = cifs_invalidate_page,
-       /* .sync_page = cifs_sync_page, */
        /* .direct_IO = */
 };
index dcb5577cde1de8ebdf57106a91248c17db81b823..ac5f164170e386fe242eb758345173c6e8e8697c 100644 (file)
@@ -1110,11 +1110,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
            ((rw & READ) || (dio->result == dio->size)))
                ret = -EIOCBQUEUED;
 
-       if (ret != -EIOCBQUEUED) {
-               /* All IO is now issued, send it on its way */
-               blk_run_address_space(inode->i_mapping);
+       if (ret != -EIOCBQUEUED)
                dio_await_completion(dio);
-       }
 
        /*
         * Sync will always be dropping the final ref and completing the
@@ -1176,7 +1173,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        struct dio *dio;
 
        if (rw & WRITE)
-               rw = WRITE_ODIRECT_PLUG;
+               rw = WRITE_ODIRECT;
 
        if (bdev)
                bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
index a8e7797b947795f2ef70a05c82a204fd493ba472..9c13412e6c99c78d11c3a89a42e007c82fb1b58b 100644 (file)
@@ -23,7 +23,6 @@ static sector_t _efs_bmap(struct address_space *mapping, sector_t block)
 }
 static const struct address_space_operations efs_aops = {
        .readpage = efs_readpage,
-       .sync_page = block_sync_page,
        .bmap = _efs_bmap
 };
 
index 0c713cfbebf062c312427f8d88e9a1adb0a3a87c..8472c098445ddbe332c0ee33aa10988cb03a9b15 100644 (file)
@@ -823,7 +823,6 @@ const struct address_space_operations exofs_aops = {
        .direct_IO      = NULL, /* TODO: Should be trivial to do */
 
        /* With these NULL has special meaning or default is not exported */
-       .sync_page      = NULL,
        .get_xip_mem    = NULL,
        .migratepage    = NULL,
        .launder_page   = NULL,
index 40ad210a5049a6eed9b184b3c30750ce6951fa46..c47f706878b5f1c8befcd65e5dc9c68b46382200 100644 (file)
@@ -860,7 +860,6 @@ const struct address_space_operations ext2_aops = {
        .readpage               = ext2_readpage,
        .readpages              = ext2_readpages,
        .writepage              = ext2_writepage,
-       .sync_page              = block_sync_page,
        .write_begin            = ext2_write_begin,
        .write_end              = ext2_write_end,
        .bmap                   = ext2_bmap,
@@ -880,7 +879,6 @@ const struct address_space_operations ext2_nobh_aops = {
        .readpage               = ext2_readpage,
        .readpages              = ext2_readpages,
        .writepage              = ext2_nobh_writepage,
-       .sync_page              = block_sync_page,
        .write_begin            = ext2_nobh_write_begin,
        .write_end              = nobh_write_end,
        .bmap                   = ext2_bmap,
index ae94f6d949f526d04fc0e2dcb8983622eeab8fa0..fe2541d250e44d0f9fe8a79f7ecbe39f5fbca34c 100644 (file)
@@ -1894,7 +1894,6 @@ static const struct address_space_operations ext3_ordered_aops = {
        .readpage               = ext3_readpage,
        .readpages              = ext3_readpages,
        .writepage              = ext3_ordered_writepage,
-       .sync_page              = block_sync_page,
        .write_begin            = ext3_write_begin,
        .write_end              = ext3_ordered_write_end,
        .bmap                   = ext3_bmap,
@@ -1910,7 +1909,6 @@ static const struct address_space_operations ext3_writeback_aops = {
        .readpage               = ext3_readpage,
        .readpages              = ext3_readpages,
        .writepage              = ext3_writeback_writepage,
-       .sync_page              = block_sync_page,
        .write_begin            = ext3_write_begin,
        .write_end              = ext3_writeback_write_end,
        .bmap                   = ext3_bmap,
@@ -1926,7 +1924,6 @@ static const struct address_space_operations ext3_journalled_aops = {
        .readpage               = ext3_readpage,
        .readpages              = ext3_readpages,
        .writepage              = ext3_journalled_writepage,
-       .sync_page              = block_sync_page,
        .write_begin            = ext3_write_begin,
        .write_end              = ext3_journalled_write_end,
        .set_page_dirty         = ext3_journalled_set_page_dirty,
index 9f7f9e49914fa775709d5c99e805440fd4ff0f9f..9297ad46c4658ee3d7e05198754dc14789db8c2e 100644 (file)
@@ -3903,7 +3903,6 @@ static const struct address_space_operations ext4_ordered_aops = {
        .readpage               = ext4_readpage,
        .readpages              = ext4_readpages,
        .writepage              = ext4_writepage,
-       .sync_page              = block_sync_page,
        .write_begin            = ext4_write_begin,
        .write_end              = ext4_ordered_write_end,
        .bmap                   = ext4_bmap,
@@ -3919,7 +3918,6 @@ static const struct address_space_operations ext4_writeback_aops = {
        .readpage               = ext4_readpage,
        .readpages              = ext4_readpages,
        .writepage              = ext4_writepage,
-       .sync_page              = block_sync_page,
        .write_begin            = ext4_write_begin,
        .write_end              = ext4_writeback_write_end,
        .bmap                   = ext4_bmap,
@@ -3935,7 +3933,6 @@ static const struct address_space_operations ext4_journalled_aops = {
        .readpage               = ext4_readpage,
        .readpages              = ext4_readpages,
        .writepage              = ext4_writepage,
-       .sync_page              = block_sync_page,
        .write_begin            = ext4_write_begin,
        .write_end              = ext4_journalled_write_end,
        .set_page_dirty         = ext4_journalled_set_page_dirty,
@@ -3951,7 +3948,6 @@ static const struct address_space_operations ext4_da_aops = {
        .readpages              = ext4_readpages,
        .writepage              = ext4_writepage,
        .writepages             = ext4_da_writepages,
-       .sync_page              = block_sync_page,
        .write_begin            = ext4_da_write_begin,
        .write_end              = ext4_da_write_end,
        .bmap                   = ext4_bmap,
index 955cc309142fe983dc41d95467b1f5a66d7a6b69..e2cd90e4bb7c9e20cd0c2d274ac6f0b372eda5ba 100644 (file)
@@ -310,8 +310,7 @@ static int io_submit_init(struct ext4_io_submit *io,
        io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
 
        io->io_bio = bio;
-       io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?
-                       WRITE_SYNC_PLUG : WRITE);
+       io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
        io->io_next_block = bh->b_blocknr;
        return 0;
 }
index 0e277ec4b6120663795086b3a0cc80c4d1fcc813..8d68690bdcf1b2f26bb318972218930daa9676a6 100644 (file)
@@ -236,7 +236,6 @@ static const struct address_space_operations fat_aops = {
        .readpages      = fat_readpages,
        .writepage      = fat_writepage,
        .writepages     = fat_writepages,
-       .sync_page      = block_sync_page,
        .write_begin    = fat_write_begin,
        .write_end      = fat_write_end,
        .direct_IO      = fat_direct_IO,
index 1429f3ae1e868f2cb6f066542c3675d1d09134cd..5d318c44f8554bdbf5304a707557e413089b2913 100644 (file)
@@ -44,7 +44,6 @@ static sector_t               vxfs_bmap(struct address_space *, sector_t);
 const struct address_space_operations vxfs_aops = {
        .readpage =             vxfs_readpage,
        .bmap =                 vxfs_bmap,
-       .sync_page =            block_sync_page,
 };
 
 inline void
index 051b1a084528b382201ae2a5dd3f7c536a60122c..cc6ec4b2f0ffed9c05959b149614a8a1e19e467d 100644 (file)
@@ -870,7 +870,6 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 
        fc->bdi.name = "fuse";
        fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
-       fc->bdi.unplug_io_fn = default_unplug_io_fn;
        /* fuse does it's own writeback accounting */
        fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
 
index aad77e4f61b574a3b29bb253a01e29046c70da55..c71995b111bf6f4347ffb313692c4ee66ff6f74e 100644 (file)
@@ -1117,7 +1117,6 @@ static const struct address_space_operations gfs2_writeback_aops = {
        .writepages = gfs2_writeback_writepages,
        .readpage = gfs2_readpage,
        .readpages = gfs2_readpages,
-       .sync_page = block_sync_page,
        .write_begin = gfs2_write_begin,
        .write_end = gfs2_write_end,
        .bmap = gfs2_bmap,
@@ -1133,7 +1132,6 @@ static const struct address_space_operations gfs2_ordered_aops = {
        .writepage = gfs2_ordered_writepage,
        .readpage = gfs2_readpage,
        .readpages = gfs2_readpages,
-       .sync_page = block_sync_page,
        .write_begin = gfs2_write_begin,
        .write_end = gfs2_write_end,
        .set_page_dirty = gfs2_set_page_dirty,
@@ -1151,7 +1149,6 @@ static const struct address_space_operations gfs2_jdata_aops = {
        .writepages = gfs2_jdata_writepages,
        .readpage = gfs2_readpage,
        .readpages = gfs2_readpages,
-       .sync_page = block_sync_page,
        .write_begin = gfs2_write_begin,
        .write_end = gfs2_write_end,
        .set_page_dirty = gfs2_set_page_dirty,
index e7ed31f858dda0b9219391adb94ab639885b3673..5b102c1887fd92ffd9b1c14dc01458f112736973 100644 (file)
@@ -121,7 +121,7 @@ __acquires(&sdp->sd_ail_lock)
                        lock_buffer(bh);
                        if (test_clear_buffer_dirty(bh)) {
                                bh->b_end_io = end_buffer_write_sync;
-                               submit_bh(WRITE_SYNC_PLUG, bh);
+                               submit_bh(WRITE_SYNC, bh);
                        } else {
                                unlock_buffer(bh);
                                brelse(bh);
@@ -647,7 +647,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
                lock_buffer(bh);
                if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
                        bh->b_end_io = end_buffer_write_sync;
-                       submit_bh(WRITE_SYNC_PLUG, bh);
+                       submit_bh(WRITE_SYNC, bh);
                } else {
                        unlock_buffer(bh);
                        brelse(bh);
index e919abf25ecde693dcb361412ea7438d5a5f7510..51d27f00ebb47ad1b7c5be178c2ad630e6fbefd0 100644 (file)
@@ -204,7 +204,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
                }
 
                gfs2_log_unlock(sdp);
-               submit_bh(WRITE_SYNC_PLUG, bh);
+               submit_bh(WRITE_SYNC, bh);
                gfs2_log_lock(sdp);
 
                n = 0;
@@ -214,7 +214,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
                        gfs2_log_unlock(sdp);
                        lock_buffer(bd2->bd_bh);
                        bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
-                       submit_bh(WRITE_SYNC_PLUG, bh);
+                       submit_bh(WRITE_SYNC, bh);
                        gfs2_log_lock(sdp);
                        if (++n >= num)
                                break;
@@ -356,7 +356,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
                sdp->sd_log_num_revoke--;
 
                if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
-                       submit_bh(WRITE_SYNC_PLUG, bh);
+                       submit_bh(WRITE_SYNC, bh);
 
                        bh = gfs2_log_get_buf(sdp);
                        mh = (struct gfs2_meta_header *)bh->b_data;
@@ -373,7 +373,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
        }
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 
-       submit_bh(WRITE_SYNC_PLUG, bh);
+       submit_bh(WRITE_SYNC, bh);
 }
 
 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
@@ -575,7 +575,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
        ptr = bh_log_ptr(bh);
        
        get_bh(bh);
-       submit_bh(WRITE_SYNC_PLUG, bh);
+       submit_bh(WRITE_SYNC, bh);
        gfs2_log_lock(sdp);
        while(!list_empty(list)) {
                bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
@@ -601,7 +601,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
                } else {
                        bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
                }
-               submit_bh(WRITE_SYNC_PLUG, bh1);
+               submit_bh(WRITE_SYNC, bh1);
                gfs2_log_lock(sdp);
                ptr += 2;
        }
index 01d97f4865535cb11accdd4689dddfa2c5ece66a..675349b5a1335de5e7c8f4ef22730fbed46b7463 100644 (file)
@@ -37,7 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
        struct buffer_head *bh, *head;
        int nr_underway = 0;
        int write_op = REQ_META |
-               (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE);
+               (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
 
        BUG_ON(!PageLocked(page));
        BUG_ON(!page_has_buffers(page));
@@ -94,7 +94,6 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 const struct address_space_operations gfs2_meta_aops = {
        .writepage = gfs2_aspace_writepage,
        .releasepage = gfs2_releasepage,
-       .sync_page = block_sync_page,
 };
 
 /**
index dffb4e996643557f04ae1c83292bd729ef5adada..fff16c968e67705a4d82e0bfcda6fca54aa005b0 100644 (file)
@@ -150,7 +150,6 @@ static int hfs_writepages(struct address_space *mapping,
 const struct address_space_operations hfs_btree_aops = {
        .readpage       = hfs_readpage,
        .writepage      = hfs_writepage,
-       .sync_page      = block_sync_page,
        .write_begin    = hfs_write_begin,
        .write_end      = generic_write_end,
        .bmap           = hfs_bmap,
@@ -160,7 +159,6 @@ const struct address_space_operations hfs_btree_aops = {
 const struct address_space_operations hfs_aops = {
        .readpage       = hfs_readpage,
        .writepage      = hfs_writepage,
-       .sync_page      = block_sync_page,
        .write_begin    = hfs_write_begin,
        .write_end      = generic_write_end,
        .bmap           = hfs_bmap,
index a8df651747f0eadaa8af44dffda657f2d64aea3d..b248a6cfcad93bb76631f7756b07b4ee6aff3d7e 100644 (file)
@@ -146,7 +146,6 @@ static int hfsplus_writepages(struct address_space *mapping,
 const struct address_space_operations hfsplus_btree_aops = {
        .readpage       = hfsplus_readpage,
        .writepage      = hfsplus_writepage,
-       .sync_page      = block_sync_page,
        .write_begin    = hfsplus_write_begin,
        .write_end      = generic_write_end,
        .bmap           = hfsplus_bmap,
@@ -156,7 +155,6 @@ const struct address_space_operations hfsplus_btree_aops = {
 const struct address_space_operations hfsplus_aops = {
        .readpage       = hfsplus_readpage,
        .writepage      = hfsplus_writepage,
-       .sync_page      = block_sync_page,
        .write_begin    = hfsplus_write_begin,
        .write_end      = generic_write_end,
        .bmap           = hfsplus_bmap,
index 2dbae20450f8f7953109d781b0faff8a933354c5..9b9eb6933e43d8620a38ec744f77d4d8712e3b9a 100644 (file)
@@ -119,7 +119,6 @@ static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
 const struct address_space_operations hpfs_aops = {
        .readpage = hpfs_readpage,
        .writepage = hpfs_writepage,
-       .sync_page = block_sync_page,
        .write_begin = hpfs_write_begin,
        .write_end = generic_write_end,
        .bmap = _hpfs_bmap
index a0f3833c0dbf578ae0f6477007ae443eae9b728b..3db5ba4568fc8efd30025a9e9906eb01a47f9c45 100644 (file)
@@ -1158,7 +1158,6 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
 
 static const struct address_space_operations isofs_aops = {
        .readpage = isofs_readpage,
-       .sync_page = block_sync_page,
        .bmap = _isofs_bmap
 };
 
index 34a4861c14b85d493a8b653c4ce700a4c580842e..da871ee084d365780f8a3f78078550e48d3f12ea 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/bio.h>
+#include <linux/blkdev.h>
 
 /*
  * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -294,7 +295,7 @@ void journal_commit_transaction(journal_t *journal)
        int first_tag = 0;
        int tag_flag;
        int i;
-       int write_op = WRITE_SYNC;
+       struct blk_plug plug;
 
        /*
         * First job: lock down the current transaction and wait for
@@ -327,13 +328,6 @@ void journal_commit_transaction(journal_t *journal)
        spin_lock(&journal->j_state_lock);
        commit_transaction->t_state = T_LOCKED;
 
-       /*
-        * Use plugged writes here, since we want to submit several before
-        * we unplug the device. We don't do explicit unplugging in here,
-        * instead we rely on sync_buffer() doing the unplug for us.
-        */
-       if (commit_transaction->t_synchronous_commit)
-               write_op = WRITE_SYNC_PLUG;
        spin_lock(&commit_transaction->t_handle_lock);
        while (commit_transaction->t_updates) {
                DEFINE_WAIT(wait);
@@ -418,8 +412,10 @@ void journal_commit_transaction(journal_t *journal)
         * Now start flushing things to disk, in the order they appear
         * on the transaction lists.  Data blocks go first.
         */
+       blk_start_plug(&plug);
        err = journal_submit_data_buffers(journal, commit_transaction,
-                                         write_op);
+                                         WRITE_SYNC);
+       blk_finish_plug(&plug);
 
        /*
         * Wait for all previously submitted IO to complete.
@@ -480,7 +476,9 @@ void journal_commit_transaction(journal_t *journal)
                err = 0;
        }
 
-       journal_write_revoke_records(journal, commit_transaction, write_op);
+       blk_start_plug(&plug);
+
+       journal_write_revoke_records(journal, commit_transaction, WRITE_SYNC);
 
        /*
         * If we found any dirty or locked buffers, then we should have
@@ -650,7 +648,7 @@ start_journal_io:
                                clear_buffer_dirty(bh);
                                set_buffer_uptodate(bh);
                                bh->b_end_io = journal_end_buffer_io_sync;
-                               submit_bh(write_op, bh);
+                               submit_bh(WRITE_SYNC, bh);
                        }
                        cond_resched();
 
@@ -661,6 +659,8 @@ start_journal_io:
                }
        }
 
+       blk_finish_plug(&plug);
+
        /* Lo and behold: we have just managed to send a transaction to
            the log.  Before we can commit it, wait for the IO so far to
            complete.  Control buffers being written are on the
index f3ad1598b20128bc3acaaa1bd81e7ece1e27e270..fa36d7662b217455a99a83f93e35899dbf950e4a 100644 (file)
@@ -137,9 +137,9 @@ static int journal_submit_commit_record(journal_t *journal,
        if (journal->j_flags & JBD2_BARRIER &&
            !JBD2_HAS_INCOMPAT_FEATURE(journal,
                                       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
-               ret = submit_bh(WRITE_SYNC_PLUG | WRITE_FLUSH_FUA, bh);
+               ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh);
        else
-               ret = submit_bh(WRITE_SYNC_PLUG, bh);
+               ret = submit_bh(WRITE_SYNC, bh);
 
        *cbh = bh;
        return ret;
@@ -329,7 +329,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        int tag_bytes = journal_tag_bytes(journal);
        struct buffer_head *cbh = NULL; /* For transactional checksums */
        __u32 crc32_sum = ~0;
-       int write_op = WRITE_SYNC;
+       struct blk_plug plug;
 
        /*
         * First job: lock down the current transaction and wait for
@@ -363,13 +363,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        write_lock(&journal->j_state_lock);
        commit_transaction->t_state = T_LOCKED;
 
-       /*
-        * Use plugged writes here, since we want to submit several before
-        * we unplug the device. We don't do explicit unplugging in here,
-        * instead we rely on sync_buffer() doing the unplug for us.
-        */
-       if (commit_transaction->t_synchronous_commit)
-               write_op = WRITE_SYNC_PLUG;
        trace_jbd2_commit_locking(journal, commit_transaction);
        stats.run.rs_wait = commit_transaction->t_max_wait;
        stats.run.rs_locked = jiffies;
@@ -469,8 +462,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        if (err)
                jbd2_journal_abort(journal, err);
 
+       blk_start_plug(&plug);
        jbd2_journal_write_revoke_records(journal, commit_transaction,
-                                         write_op);
+                                         WRITE_SYNC);
+       blk_finish_plug(&plug);
 
        jbd_debug(3, "JBD: commit phase 2\n");
 
@@ -497,6 +492,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        err = 0;
        descriptor = NULL;
        bufs = 0;
+       blk_start_plug(&plug);
        while (commit_transaction->t_buffers) {
 
                /* Find the next buffer to be journaled... */
@@ -658,7 +654,7 @@ start_journal_io:
                                clear_buffer_dirty(bh);
                                set_buffer_uptodate(bh);
                                bh->b_end_io = journal_end_buffer_io_sync;
-                               submit_bh(write_op, bh);
+                               submit_bh(WRITE_SYNC, bh);
                        }
                        cond_resched();
                        stats.run.rs_blocks_logged += bufs;
@@ -699,6 +695,8 @@ start_journal_io:
                        __jbd2_journal_abort_hard(journal);
        }
 
+       blk_finish_plug(&plug);
+
        /* Lo and behold: we have just managed to send a transaction to
            the log.  Before we can commit it, wait for the IO so far to
            complete.  Control buffers being written are on the
index 9978803ceedc519ea90a84bbe04f7a77b8bc5b4e..eddbb373209e9c2dc013657bc041d44badb0aa29 100644 (file)
@@ -352,7 +352,6 @@ const struct address_space_operations jfs_aops = {
        .readpages      = jfs_readpages,
        .writepage      = jfs_writepage,
        .writepages     = jfs_writepages,
-       .sync_page      = block_sync_page,
        .write_begin    = jfs_write_begin,
        .write_end      = nobh_write_end,
        .bmap           = jfs_bmap,
index 48b44bd8267b960e7e5bd330521f9da93d6d1a6f..6740d34cd82b802e948b8760fcf13954a8a12ad2 100644 (file)
@@ -583,7 +583,6 @@ static void metapage_invalidatepage(struct page *page, unsigned long offset)
 const struct address_space_operations jfs_metapage_aops = {
        .readpage       = metapage_readpage,
        .writepage      = metapage_writepage,
-       .sync_page      = block_sync_page,
        .releasepage    = metapage_releasepage,
        .invalidatepage = metapage_invalidatepage,
        .set_page_dirty = __set_page_dirty_nobuffers,
index 723bc5bca09ae3837448c3a286e50ff38e427576..1adc8d455f0ea2d66237436184766c5ed120688f 100644 (file)
@@ -39,7 +39,6 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
        bio.bi_end_io = request_complete;
 
        submit_bio(rw, &bio);
-       generic_unplug_device(bdev_get_queue(bdev));
        wait_for_completion(&complete);
        return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO;
 }
@@ -168,7 +167,6 @@ static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len)
        }
        len = PAGE_ALIGN(len);
        __bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
-       generic_unplug_device(bdev_get_queue(logfs_super(sb)->s_bdev));
 }
 
 
index ae0b83f476a63be6a51bdef77e10504b29bd9f99..adcdc0a4e182673ef953e5bafd8e040987df6948 100644 (file)
@@ -399,7 +399,6 @@ static sector_t minix_bmap(struct address_space *mapping, sector_t block)
 static const struct address_space_operations minix_aops = {
        .readpage = minix_readpage,
        .writepage = minix_writepage,
-       .sync_page = block_sync_page,
        .write_begin = minix_write_begin,
        .write_end = generic_write_end,
        .bmap = minix_bmap
index d78455a81ec979a734f28d7874b6654e5a087e7e..0afc809e46e09b53cb767e56795fba3139c4b181 100644 (file)
@@ -364,6 +364,9 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
        sector_t last_block_in_bio = 0;
        struct buffer_head map_bh;
        unsigned long first_logical_block = 0;
+       struct blk_plug plug;
+
+       blk_start_plug(&plug);
 
        map_bh.b_state = 0;
        map_bh.b_size = 0;
@@ -385,6 +388,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
        BUG_ON(!list_empty(pages));
        if (bio)
                mpage_bio_submit(READ, bio);
+       blk_finish_plug(&plug);
        return 0;
 }
 EXPORT_SYMBOL(mpage_readpages);
@@ -666,8 +670,11 @@ int
 mpage_writepages(struct address_space *mapping,
                struct writeback_control *wbc, get_block_t get_block)
 {
+       struct blk_plug plug;
        int ret;
 
+       blk_start_plug(&plug);
+
        if (!get_block)
                ret = generic_writepages(mapping, wbc);
        else {
@@ -682,6 +689,7 @@ mpage_writepages(struct address_space *mapping,
                if (mpd.bio)
                        mpage_bio_submit(WRITE, mpd.bio);
        }
+       blk_finish_plug(&plug);
        return ret;
 }
 EXPORT_SYMBOL(mpage_writepages);
index 85f7baa15f5dd8fa1eac905b8c8840c73b0294fa..609cd223eea85abf39232688e2ccbd9dad94ee68 100644 (file)
 #include "page.h"
 #include "btnode.h"
 
-
-static const struct address_space_operations def_btnode_aops = {
-       .sync_page              = block_sync_page,
-};
-
 void nilfs_btnode_cache_init(struct address_space *btnc,
                             struct backing_dev_info *bdi)
 {
-       nilfs_mapping_init(btnc, bdi, &def_btnode_aops);
+       nilfs_mapping_init(btnc, bdi);
 }
 
 void nilfs_btnode_cache_clear(struct address_space *btnc)
index caf9a6a3fb54f0e0dd4cd63ccf103fd6aef20a70..1c2a3e23f8b2dec6b1f895098239864d7f5e9946 100644 (file)
@@ -49,7 +49,6 @@
 #include "ifile.h"
 
 static const struct address_space_operations def_gcinode_aops = {
-       .sync_page              = block_sync_page,
 };
 
 /*
index d5625be236a8c430e8b025b542cdcae98ce5f9b4..c0aa27490c027f40c40a12951caf38720f0ff417 100644 (file)
@@ -280,7 +280,6 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 const struct address_space_operations nilfs_aops = {
        .writepage              = nilfs_writepage,
        .readpage               = nilfs_readpage,
-       .sync_page              = block_sync_page,
        .writepages             = nilfs_writepages,
        .set_page_dirty         = nilfs_set_page_dirty,
        .readpages              = nilfs_readpages,
index a0babd2bff6a2e03a924e45110ff1a8698f59fd6..a649b05f7069db7b0beb31b7ebf10e5639a245bd 100644 (file)
@@ -399,7 +399,6 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
 
 static const struct address_space_operations def_mdt_aops = {
        .writepage              = nilfs_mdt_write_page,
-       .sync_page              = block_sync_page,
 };
 
 static const struct inode_operations def_mdt_iops;
@@ -438,10 +437,6 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size,
        mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
 }
 
-static const struct address_space_operations shadow_map_aops = {
-       .sync_page              = block_sync_page,
-};
-
 /**
  * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file
  * @inode: inode of the metadata file
@@ -455,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
 
        INIT_LIST_HEAD(&shadow->frozen_buffers);
        address_space_init_once(&shadow->frozen_data);
-       nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
+       nilfs_mapping_init(&shadow->frozen_data, bdi);
        address_space_init_once(&shadow->frozen_btnodes);
-       nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
+       nilfs_mapping_init(&shadow->frozen_btnodes, bdi);
        mi->mi_shadow = shadow;
        return 0;
 }
index a585b35fd6bc201c9d3063005792101da769ab01..4d2a1ee0eb47f492663a4211012ec5710bcd22c4 100644 (file)
@@ -493,15 +493,14 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
 }
 
 void nilfs_mapping_init(struct address_space *mapping,
-                       struct backing_dev_info *bdi,
-                       const struct address_space_operations *aops)
+                       struct backing_dev_info *bdi)
 {
        mapping->host = NULL;
        mapping->flags = 0;
        mapping_set_gfp_mask(mapping, GFP_NOFS);
        mapping->assoc_mapping = NULL;
        mapping->backing_dev_info = bdi;
-       mapping->a_ops = aops;
+       mapping->a_ops = NULL;
 }
 
 /*
index 2a00953ebd5f1b58b92494dbd684eb6128d76598..f06b79ad7493160877b1b82e45bc2f2acfd9b02f 100644 (file)
@@ -62,8 +62,7 @@ int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
 void nilfs_copy_back_pages(struct address_space *, struct address_space *);
 void nilfs_clear_dirty_pages(struct address_space *);
 void nilfs_mapping_init(struct address_space *mapping,
-                       struct backing_dev_info *bdi,
-                       const struct address_space_operations *aops);
+                       struct backing_dev_info *bdi);
 unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
 unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
                                            sector_t start_blk,
index 0f83e93935b2fb02347c7c0a7c0f3e131d94af14..2853ff20f85a2b30f8f6ac9a9c36e880bebf845c 100644 (file)
@@ -509,7 +509,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
                 * Last BIO is always sent through the following
                 * submission.
                 */
-               rw |= REQ_SYNC | REQ_UNPLUG;
+               rw |= REQ_SYNC;
                res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
        }
 
index c3c2c7ac9020402d28dd0d2357ae2828c6e639a5..0b1e885b8cf8f8f72325c5a1c09d17fef64acd6f 100644 (file)
@@ -1543,8 +1543,6 @@ err_out:
  */
 const struct address_space_operations ntfs_aops = {
        .readpage       = ntfs_readpage,        /* Fill page with data. */
-       .sync_page      = block_sync_page,      /* Currently, just unplugs the
-                                                  disk request queue. */
 #ifdef NTFS_RW
        .writepage      = ntfs_writepage,       /* Write dirty page to disk. */
 #endif /* NTFS_RW */
@@ -1560,8 +1558,6 @@ const struct address_space_operations ntfs_aops = {
  */
 const struct address_space_operations ntfs_mst_aops = {
        .readpage       = ntfs_readpage,        /* Fill page with data. */
-       .sync_page      = block_sync_page,      /* Currently, just unplugs the
-                                                  disk request queue. */
 #ifdef NTFS_RW
        .writepage      = ntfs_writepage,       /* Write dirty page to disk. */
        .set_page_dirty = __set_page_dirty_nobuffers,   /* Set the page dirty
index 6551c7cbad92954202258d8715a3d44e06bc7d4b..ef9ed854255c8d2c8b15bdaf0e99545004f05aed 100644 (file)
@@ -698,8 +698,7 @@ lock_retry_remap:
                                        "uptodate! Unplugging the disk queue "
                                        "and rescheduling.");
                        get_bh(tbh);
-                       blk_run_address_space(mapping);
-                       schedule();
+                       io_schedule();
                        put_bh(tbh);
                        if (unlikely(!buffer_uptodate(tbh)))
                                goto read_err;
index 1fbb0e20131bf39e82f1822e0de172b7889497de..daea0359e9740263a525cec5c7f645e456d9a909 100644 (file)
@@ -2043,7 +2043,6 @@ const struct address_space_operations ocfs2_aops = {
        .write_begin            = ocfs2_write_begin,
        .write_end              = ocfs2_write_end,
        .bmap                   = ocfs2_bmap,
-       .sync_page              = block_sync_page,
        .direct_IO              = ocfs2_direct_IO,
        .invalidatepage         = ocfs2_invalidatepage,
        .releasepage            = ocfs2_releasepage,
index b108e863d8f65ab23ec5921fd425a9273f78365e..1adab287bd24c814ceb042be381744b3c3fc3e7c 100644 (file)
@@ -367,11 +367,7 @@ static inline void o2hb_bio_wait_dec(struct o2hb_bio_wait_ctxt *wc,
 static void o2hb_wait_on_io(struct o2hb_region *reg,
                            struct o2hb_bio_wait_ctxt *wc)
 {
-       struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping;
-
-       blk_run_address_space(mapping);
        o2hb_bio_wait_dec(wc, 1);
-
        wait_for_completion(&wc->wc_io_complete);
 }
 
index 8a6d34fa668a0715e349000f0706b7d6526bfe61..d738a7e493ddc07ed1b4b1fb7a30198b3d477c5f 100644 (file)
@@ -372,7 +372,6 @@ const struct address_space_operations omfs_aops = {
        .readpages = omfs_readpages,
        .writepage = omfs_writepage,
        .writepages = omfs_writepages,
-       .sync_page = block_sync_page,
        .write_begin = omfs_write_begin,
        .write_end = generic_write_end,
        .bmap = omfs_bmap,
index 9c21119512b9ace640f1d6107f73b7681c4f1417..ac546975031f7dd9a0f110f9a4479c6e34cef15c 100644 (file)
@@ -290,7 +290,8 @@ ssize_t part_inflight_show(struct device *dev,
 {
        struct hd_struct *p = dev_to_part(dev);
 
-       return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]);
+       return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
+               atomic_read(&p->in_flight[1]));
 }
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
index e63b4171d583bca9f5f32bc106eacac52ba89fde..2b0646613f5a1f86da0637e7c131dc36c28e9005 100644 (file)
@@ -335,7 +335,6 @@ static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
 static const struct address_space_operations qnx4_aops = {
        .readpage       = qnx4_readpage,
        .writepage      = qnx4_writepage,
-       .sync_page      = block_sync_page,
        .write_begin    = qnx4_write_begin,
        .write_end      = generic_write_end,
        .bmap           = qnx4_bmap
index 1bba24bad82080382e3007a10da67b85305d88db..4fd5bb33dbb5e40f4969e38d7c4b52756d8bfab7 100644 (file)
@@ -3217,7 +3217,6 @@ const struct address_space_operations reiserfs_address_space_operations = {
        .readpages = reiserfs_readpages,
        .releasepage = reiserfs_releasepage,
        .invalidatepage = reiserfs_invalidatepage,
-       .sync_page = block_sync_page,
        .write_begin = reiserfs_write_begin,
        .write_end = reiserfs_write_end,
        .bmap = reiserfs_aop_bmap,
index e84864908264067fcde8f26736a80b31b6a2ec1b..8a06881b1920b9e164ab276ffe73180ebfbbe97a 100644 (file)
@@ -71,6 +71,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 #else
                INIT_LIST_HEAD(&s->s_files);
 #endif
+               s->s_bdi = &default_backing_dev_info;
                INIT_LIST_HEAD(&s->s_instances);
                INIT_HLIST_BL_HEAD(&s->s_anon);
                INIT_LIST_HEAD(&s->s_inodes);
@@ -936,6 +937,7 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
        sb = root->d_sb;
        BUG_ON(!sb);
        WARN_ON(!sb->s_bdi);
+       WARN_ON(sb->s_bdi == &default_backing_dev_info);
        sb->s_flags |= MS_BORN;
 
        error = security_sb_kern_mount(sb, flags, secdata);
index 92ca208777d562e64bfc769dc41a326f7c3da84e..c38ec163da6ccba00a0146c75606c1b548b31343 100644 (file)
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -34,7 +34,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
         * This should be safe, as we require bdi backing to actually
         * write out data in the first place
         */
-       if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info)
+       if (sb->s_bdi == &noop_backing_dev_info)
                return 0;
 
        if (sb->s_qcop && sb->s_qcop->quota_sync)
@@ -80,7 +80,7 @@ EXPORT_SYMBOL_GPL(sync_filesystem);
 
 static void sync_one_sb(struct super_block *sb, void *arg)
 {
-       if (!(sb->s_flags & MS_RDONLY) && sb->s_bdi)
+       if (!(sb->s_flags & MS_RDONLY))
                __sync_filesystem(sb, *(int *)arg);
 }
 /*
index 9ca66276315e08828b4b4708b82060013f416320..fa8d43c92bb81a7a03d3eda50f35dccec42bd44b 100644 (file)
@@ -488,7 +488,6 @@ static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
 const struct address_space_operations sysv_aops = {
        .readpage = sysv_readpage,
        .writepage = sysv_writepage,
-       .sync_page = block_sync_page,
        .write_begin = sysv_write_begin,
        .write_end = generic_write_end,
        .bmap = sysv_bmap
index e5dc1e120e8dfd9d4018ffc9ae961593f284a867..6ddd9973e68175da389451831852fc7bb6874029 100644 (file)
@@ -2011,7 +2011,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
         */
        c->bdi.name = "ubifs",
        c->bdi.capabilities = BDI_CAP_MAP_COPY;
-       c->bdi.unplug_io_fn = default_unplug_io_fn;
        err  = bdi_init(&c->bdi);
        if (err)
                goto out_close;
index f391a2adc69970c7a44a54883d1d01b35572498b..2a346bb1d9f5f082338555fb50e754e19eb4aa67 100644 (file)
@@ -98,7 +98,6 @@ static int udf_adinicb_write_end(struct file *file,
 const struct address_space_operations udf_adinicb_aops = {
        .readpage       = udf_adinicb_readpage,
        .writepage      = udf_adinicb_writepage,
-       .sync_page      = block_sync_page,
        .write_begin = simple_write_begin,
        .write_end = udf_adinicb_write_end,
 };
index ccc81432141411aadb24aa10b076d82e376ae5c4..1d1358ed80c13e5da17849c7773bc8a6d1861e7c 100644 (file)
@@ -140,7 +140,6 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
 const struct address_space_operations udf_aops = {
        .readpage       = udf_readpage,
        .writepage      = udf_writepage,
-       .sync_page      = block_sync_page,
        .write_begin            = udf_write_begin,
        .write_end              = generic_write_end,
        .bmap           = udf_bmap,
index 03c255f12df5110acdee54cbf8433ac13ac79e75..27a4babe7df0bcaf406e260631d86e44abb5ba51 100644 (file)
@@ -552,7 +552,6 @@ static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
 const struct address_space_operations ufs_aops = {
        .readpage = ufs_readpage,
        .writepage = ufs_writepage,
-       .sync_page = block_sync_page,
        .write_begin = ufs_write_begin,
        .write_end = generic_write_end,
        .bmap = ufs_bmap
index e56a4f567212a211c61a2f9f0f132ab3fd4f42be..11014302c9ca63959ea29310a8173c74f48bd3ea 100644 (file)
@@ -479,7 +479,7 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
                        break;
                if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
                        ufs_sync_inode (inode);
-               blk_run_address_space(inode->i_mapping);
+               blk_flush_plug(current);
                yield();
        }
 
index 8c5c8727745607800f91270ccf7184986b86b75e..52dbd14260ba691fdbf9cc46b1e46d13fc461e9e 100644 (file)
@@ -413,8 +413,7 @@ xfs_submit_ioend_bio(
        if (xfs_ioend_new_eof(ioend))
                xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
 
-       submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
-                  WRITE_SYNC_PLUG : WRITE, bio);
+       submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
 }
 
 STATIC struct bio *
@@ -1495,7 +1494,6 @@ const struct address_space_operations xfs_address_space_operations = {
        .readpages              = xfs_vm_readpages,
        .writepage              = xfs_vm_writepage,
        .writepages             = xfs_vm_writepages,
-       .sync_page              = block_sync_page,
        .releasepage            = xfs_vm_releasepage,
        .invalidatepage         = xfs_vm_invalidatepage,
        .write_begin            = xfs_vm_write_begin,
index 5cb230f2cb4f613263cbb1e90c5ec06a5a1eb597..c05324d3282c0bae23ac943b8b1b47d7e9dfe416 100644 (file)
@@ -990,7 +990,7 @@ xfs_buf_lock(
        if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
                xfs_log_force(bp->b_target->bt_mount, 0);
        if (atomic_read(&bp->b_io_remaining))
-               blk_run_address_space(bp->b_target->bt_mapping);
+               blk_flush_plug(current);
        down(&bp->b_sema);
        XB_SET_OWNER(bp);
 
@@ -1034,9 +1034,7 @@ xfs_buf_wait_unpin(
                set_current_state(TASK_UNINTERRUPTIBLE);
                if (atomic_read(&bp->b_pin_count) == 0)
                        break;
-               if (atomic_read(&bp->b_io_remaining))
-                       blk_run_address_space(bp->b_target->bt_mapping);
-               schedule();
+               io_schedule();
        }
        remove_wait_queue(&bp->b_waiters, &wait);
        set_current_state(TASK_RUNNING);
@@ -1442,7 +1440,7 @@ xfs_buf_iowait(
        trace_xfs_buf_iowait(bp, _RET_IP_);
 
        if (atomic_read(&bp->b_io_remaining))
-               blk_run_address_space(bp->b_target->bt_mapping);
+               blk_flush_plug(current);
        wait_for_completion(&bp->b_iowait);
 
        trace_xfs_buf_iowait_done(bp, _RET_IP_);
@@ -1666,7 +1664,6 @@ xfs_mapping_buftarg(
        struct inode            *inode;
        struct address_space    *mapping;
        static const struct address_space_operations mapping_aops = {
-               .sync_page = block_sync_page,
                .migratepage = fail_migrate_page,
        };
 
@@ -1947,7 +1944,7 @@ xfsbufd(
                        count++;
                }
                if (count)
-                       blk_run_address_space(target->bt_mapping);
+                       blk_flush_plug(current);
 
        } while (!kthread_should_stop());
 
@@ -1995,7 +1992,7 @@ xfs_flush_buftarg(
 
        if (wait) {
                /* Expedite and wait for IO to complete. */
-               blk_run_address_space(target->bt_mapping);
+               blk_flush_plug(current);
                while (!list_empty(&wait_list)) {
                        bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
 
index 9ac431396176d6c5e89d44084af7b249c41b94fc..4be33b4ca2f898a10296d3e2a6a4469942c6555a 100644 (file)
@@ -463,12 +463,15 @@ struct drm_irq_busid {
 enum drm_vblank_seq_type {
        _DRM_VBLANK_ABSOLUTE = 0x0,     /**< Wait for specific vblank sequence number */
        _DRM_VBLANK_RELATIVE = 0x1,     /**< Wait for given number of vblanks */
+       /* bits 1-6 are reserved for high crtcs */
+       _DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e,
        _DRM_VBLANK_EVENT = 0x4000000,   /**< Send event instead of blocking */
        _DRM_VBLANK_FLIP = 0x8000000,   /**< Scheduled buffer swap should flip */
        _DRM_VBLANK_NEXTONMISS = 0x10000000,    /**< If missed, wait for next vblank */
        _DRM_VBLANK_SECONDARY = 0x20000000,     /**< Secondary display controller */
        _DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */
 };
+#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1
 
 #define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE)
 #define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \
@@ -753,6 +756,7 @@ struct drm_event_vblank {
 };
 
 #define DRM_CAP_DUMB_BUFFER 0x1
+#define DRM_CAP_VBLANK_HIGH_CRTC 0x2
 
 /* typedef area */
 #ifndef __KERNEL__
index 4ce34fa937d4910cf8d7546e54de1e888fb0abb2..96f4094b706d9ddd8f8e694655d36c8648326a8a 100644 (file)
@@ -66,8 +66,6 @@ struct backing_dev_info {
        unsigned int capabilities; /* Device capabilities */
        congested_fn *congested_fn; /* Function pointer if device is md/dm */
        void *congested_data;   /* Pointer to aux data for congested func */
-       void (*unplug_io_fn)(struct backing_dev_info *, struct page *);
-       void *unplug_io_data;
 
        char *name;
 
@@ -251,7 +249,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 
 extern struct backing_dev_info default_backing_dev_info;
 extern struct backing_dev_info noop_backing_dev_info;
-void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page);
 
 int writeback_in_progress(struct backing_dev_info *bdi);
 
@@ -336,17 +333,4 @@ static inline int bdi_sched_wait(void *word)
        return 0;
 }
 
-static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
-                                      struct page *page)
-{
-       if (bdi && bdi->unplug_io_fn)
-               bdi->unplug_io_fn(bdi, page);
-}
-
-static inline void blk_run_address_space(struct address_space *mapping)
-{
-       if (mapping)
-               blk_run_backing_dev(mapping->backing_dev_info, NULL);
-}
-
 #endif         /* _LINUX_BACKING_DEV_H */
index 35dcdb3589bc9a59047a58e166efc540c22a3105..ce33e6868a2f57116ae76510f4dc5de183c2b8fd 100644 (file)
@@ -304,7 +304,6 @@ struct biovec_slab {
 };
 
 extern struct bio_set *fs_bio_set;
-extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly;
 
 /*
  * a small number of entries is fine, not going to be performance critical.
index 46ad5197537af19697396a4e43179e6322a255aa..be50d9e70a7d45577782c90050384252db735bd8 100644 (file)
@@ -128,7 +128,6 @@ enum rq_flag_bits {
        __REQ_NOIDLE,           /* don't anticipate more IO after this one */
 
        /* bio only flags */
-       __REQ_UNPLUG,           /* unplug the immediately after submission */
        __REQ_RAHEAD,           /* read ahead, can fail anytime */
        __REQ_THROTTLED,        /* This bio has already been subjected to
                                 * throttling rules. Don't do it again. */
@@ -148,9 +147,11 @@ enum rq_flag_bits {
        __REQ_ALLOCED,          /* request came from our alloc pool */
        __REQ_COPY_USER,        /* contains copies of user pages */
        __REQ_FLUSH,            /* request for cache flush */
+       __REQ_FLUSH_SEQ,        /* request for flush sequence */
        __REQ_IO_STAT,          /* account I/O stat */
        __REQ_MIXED_MERGE,      /* merge of different types, fail separately */
        __REQ_SECURE,           /* secure discard (used with __REQ_DISCARD) */
+       __REQ_ON_PLUG,          /* on plug list */
        __REQ_NR_BITS,          /* stops here */
 };
 
@@ -170,7 +171,6 @@ enum rq_flag_bits {
         REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
 #define REQ_CLONE_MASK         REQ_COMMON_MASK
 
-#define REQ_UNPLUG             (1 << __REQ_UNPLUG)
 #define REQ_RAHEAD             (1 << __REQ_RAHEAD)
 #define REQ_THROTTLED          (1 << __REQ_THROTTLED)
 
@@ -188,8 +188,10 @@ enum rq_flag_bits {
 #define REQ_ALLOCED            (1 << __REQ_ALLOCED)
 #define REQ_COPY_USER          (1 << __REQ_COPY_USER)
 #define REQ_FLUSH              (1 << __REQ_FLUSH)
+#define REQ_FLUSH_SEQ          (1 << __REQ_FLUSH_SEQ)
 #define REQ_IO_STAT            (1 << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE                (1 << __REQ_MIXED_MERGE)
 #define REQ_SECURE             (1 << __REQ_SECURE)
+#define REQ_ON_PLUG            (1 << __REQ_ON_PLUG)
 
 #endif /* __LINUX_BLK_TYPES_H */
index d5063e1b55559f0ecfa1e0e757d136510c99b4bf..16a902f099ac0ee7b1f64c4322c6255a82aa0e09 100644 (file)
@@ -108,11 +108,17 @@ struct request {
 
        /*
         * Three pointers are available for the IO schedulers, if they need
-        * more they have to dynamically allocate it.
+        * more they have to dynamically allocate it.  Flush requests are
+        * never put on the IO scheduler. So let the flush fields share
+        * space with the three elevator_private pointers.
         */
-       void *elevator_private;
-       void *elevator_private2;
-       void *elevator_private3;
+       union {
+               void *elevator_private[3];
+               struct {
+                       unsigned int            seq;
+                       struct list_head        list;
+               } flush;
+       };
 
        struct gendisk *rq_disk;
        struct hd_struct *part;
@@ -190,7 +196,6 @@ typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
-typedef void (unplug_fn) (struct request_queue *);
 
 struct bio_vec;
 struct bvec_merge_data {
@@ -273,7 +278,6 @@ struct request_queue
        make_request_fn         *make_request_fn;
        prep_rq_fn              *prep_rq_fn;
        unprep_rq_fn            *unprep_rq_fn;
-       unplug_fn               *unplug_fn;
        merge_bvec_fn           *merge_bvec_fn;
        softirq_done_fn         *softirq_done_fn;
        rq_timed_out_fn         *rq_timed_out_fn;
@@ -287,12 +291,9 @@ struct request_queue
        struct request          *boundary_rq;
 
        /*
-        * Auto-unplugging state
+        * Delayed queue handling
         */
-       struct timer_list       unplug_timer;
-       int                     unplug_thresh;  /* After this many requests */
-       unsigned long           unplug_delay;   /* After this many jiffies */
-       struct work_struct      unplug_work;
+       struct delayed_work     delay_work;
 
        struct backing_dev_info backing_dev_info;
 
@@ -363,11 +364,12 @@ struct request_queue
         * for flush operations
         */
        unsigned int            flush_flags;
-       unsigned int            flush_seq;
-       int                     flush_err;
+       unsigned int            flush_pending_idx:1;
+       unsigned int            flush_running_idx:1;
+       unsigned long           flush_pending_since;
+       struct list_head        flush_queue[2];
+       struct list_head        flush_data_in_flight;
        struct request          flush_rq;
-       struct request          *orig_flush_rq;
-       struct list_head        pending_flushes;
 
        struct mutex            sysfs_lock;
 
@@ -387,14 +389,13 @@ struct request_queue
 #define QUEUE_FLAG_ASYNCFULL   4       /* write queue has been filled */
 #define QUEUE_FLAG_DEAD                5       /* queue being torn down */
 #define QUEUE_FLAG_REENTER     6       /* Re-entrancy avoidance */
-#define QUEUE_FLAG_PLUGGED     7       /* queue is plugged */
-#define QUEUE_FLAG_ELVSWITCH   8       /* don't use elevator, just do FIFO */
-#define QUEUE_FLAG_BIDI                9       /* queue supports bidi requests */
-#define QUEUE_FLAG_NOMERGES    10      /* disable merge attempts */
-#define QUEUE_FLAG_SAME_COMP   11      /* force complete on same CPU */
-#define QUEUE_FLAG_FAIL_IO     12      /* fake timeout */
-#define QUEUE_FLAG_STACKABLE   13      /* supports request stacking */
-#define QUEUE_FLAG_NONROT      14      /* non-rotational device (SSD) */
+#define QUEUE_FLAG_ELVSWITCH   7       /* don't use elevator, just do FIFO */
+#define QUEUE_FLAG_BIDI                8       /* queue supports bidi requests */
+#define QUEUE_FLAG_NOMERGES     9      /* disable merge attempts */
+#define QUEUE_FLAG_SAME_COMP   10      /* force complete on same CPU */
+#define QUEUE_FLAG_FAIL_IO     11      /* fake timeout */
+#define QUEUE_FLAG_STACKABLE   12      /* supports request stacking */
+#define QUEUE_FLAG_NONROT      13      /* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15      /* do IO stats */
 #define QUEUE_FLAG_DISCARD     16      /* supports DISCARD */
@@ -472,7 +473,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
        __clear_bit(flag, &q->queue_flags);
 }
 
-#define blk_queue_plugged(q)   test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)    test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)   test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)  test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
@@ -667,9 +667,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 extern void blk_rq_unprep_clone(struct request *rq);
 extern int blk_insert_cloned_request(struct request_queue *q,
                                     struct request *rq);
-extern void blk_plug_device(struct request_queue *);
-extern void blk_plug_device_unlocked(struct request_queue *);
-extern int blk_remove_plug(struct request_queue *);
+extern void blk_delay_queue(struct request_queue *, unsigned long);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
 extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
                          unsigned int, void __user *);
@@ -713,7 +711,6 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *,
                          struct request *, int);
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
                                  struct request *, int, rq_end_io_fn *);
-extern void blk_unplug(struct request_queue *q);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
@@ -850,7 +847,6 @@ extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bd
 
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
-extern void generic_unplug_device(struct request_queue *);
 extern long nr_blockdev_pages(void);
 
 int blk_get_queue(struct request_queue *);
@@ -858,6 +854,31 @@ struct request_queue *blk_alloc_queue(gfp_t);
 struct request_queue *blk_alloc_queue_node(gfp_t, int);
 extern void blk_put_queue(struct request_queue *);
 
+struct blk_plug {
+       unsigned long magic;
+       struct list_head list;
+       unsigned int should_sort;
+};
+
+extern void blk_start_plug(struct blk_plug *);
+extern void blk_finish_plug(struct blk_plug *);
+extern void __blk_flush_plug(struct task_struct *, struct blk_plug *);
+
+static inline void blk_flush_plug(struct task_struct *tsk)
+{
+       struct blk_plug *plug = tsk->plug;
+
+       if (unlikely(plug))
+               __blk_flush_plug(tsk, plug);
+}
+
+static inline bool blk_needs_flush_plug(struct task_struct *tsk)
+{
+       struct blk_plug *plug = tsk->plug;
+
+       return plug && !list_empty(&plug->list);
+}
+
 /*
  * tag stuff
  */
@@ -1135,7 +1156,6 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
 extern int blk_throtl_init(struct request_queue *q);
 extern void blk_throtl_exit(struct request_queue *q);
 extern int blk_throtl_bio(struct request_queue *q, struct bio **bio);
-extern void throtl_shutdown_timer_wq(struct request_queue *q);
 #else /* CONFIG_BLK_DEV_THROTTLING */
 static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
 {
@@ -1144,7 +1164,6 @@ static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
 
 static inline int blk_throtl_init(struct request_queue *q) { return 0; }
 static inline int blk_throtl_exit(struct request_queue *q) { return 0; }
-static inline void throtl_shutdown_timer_wq(struct request_queue *q) {}
 #endif /* CONFIG_BLK_DEV_THROTTLING */
 
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
@@ -1278,6 +1297,26 @@ static inline long nr_blockdev_pages(void)
        return 0;
 }
 
+struct blk_plug {
+};
+
+static inline void blk_start_plug(struct blk_plug *plug)
+{
+}
+
+static inline void blk_finish_plug(struct blk_plug *plug)
+{
+}
+
+static inline void blk_flush_plug(struct task_struct *task)
+{
+}
+
+static inline bool blk_needs_flush_plug(struct task_struct *tsk)
+{
+       return false;
+}
+
 #endif /* CONFIG_BLOCK */
 
 #endif
index 68d1fe7b877c82e2d302fb31f3702836bc6d6843..f5df23561b96d0428cfed1d26fdaa11127e46b0d 100644 (file)
@@ -219,7 +219,6 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size);
 int block_commit_write(struct page *page, unsigned from, unsigned to);
 int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                                get_block_t get_block);
-void block_sync_page(struct page *);
 sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
 int block_truncate_page(struct address_space *, loff_t, get_block_t *);
 int nobh_write_begin(struct address_space *, loff_t, unsigned, unsigned,
index 272496d1fae41bbb7de1db89b1131ab87283972b..e2768834f39775f4baeae6e5e9316b64184a823c 100644 (file)
@@ -285,11 +285,6 @@ void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callback
  */
 int dm_table_complete(struct dm_table *t);
 
-/*
- * Unplug all devices in a table.
- */
-void dm_table_unplug_all(struct dm_table *t);
-
 /*
  * Table reference counting.
  */
index 4d857973d2c94317cf11041a4a7070794fc13a99..d93efcc4457050ffa1e511a825c273f60796ed69 100644 (file)
@@ -20,7 +20,6 @@ typedef void (elevator_bio_merged_fn) (struct request_queue *,
 typedef int (elevator_dispatch_fn) (struct request_queue *, int);
 
 typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
-typedef int (elevator_queue_empty_fn) (struct request_queue *);
 typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
 typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
 typedef int (elevator_may_queue_fn) (struct request_queue *, int);
@@ -46,7 +45,6 @@ struct elevator_ops
        elevator_activate_req_fn *elevator_activate_req_fn;
        elevator_deactivate_req_fn *elevator_deactivate_req_fn;
 
-       elevator_queue_empty_fn *elevator_queue_empty_fn;
        elevator_completed_req_fn *elevator_completed_req_fn;
 
        elevator_request_list_fn *elevator_former_req_fn;
@@ -101,17 +99,17 @@ struct elevator_queue
  */
 extern void elv_dispatch_sort(struct request_queue *, struct request *);
 extern void elv_dispatch_add_tail(struct request_queue *, struct request *);
-extern void elv_add_request(struct request_queue *, struct request *, int, int);
-extern void __elv_add_request(struct request_queue *, struct request *, int, int);
+extern void elv_add_request(struct request_queue *, struct request *, int);
+extern void __elv_add_request(struct request_queue *, struct request *, int);
 extern void elv_insert(struct request_queue *, struct request *, int);
 extern int elv_merge(struct request_queue *, struct request **, struct bio *);
+extern int elv_try_merge(struct request *, struct bio *);
 extern void elv_merge_requests(struct request_queue *, struct request *,
                               struct request *);
 extern void elv_merged_request(struct request_queue *, struct request *, int);
 extern void elv_bio_merged(struct request_queue *q, struct request *,
                                struct bio *);
 extern void elv_requeue_request(struct request_queue *, struct request *);
-extern int elv_queue_empty(struct request_queue *);
 extern struct request *elv_former_request(struct request_queue *, struct request *);
 extern struct request *elv_latter_request(struct request_queue *, struct request *);
 extern int elv_register_queue(struct request_queue *q);
@@ -167,6 +165,8 @@ extern struct request *elv_rb_find(struct rb_root *, sector_t);
 #define ELEVATOR_INSERT_BACK   2
 #define ELEVATOR_INSERT_SORT   3
 #define ELEVATOR_INSERT_REQUEUE        4
+#define ELEVATOR_INSERT_FLUSH  5
+#define ELEVATOR_INSERT_SORT_MERGE     6
 
 /*
  * return values from elevator_may_queue_fn
index 4dda076c24a1efac5119988ba915bf0277c2ecf5..ce7e185551976d4d29f1bb811eeb09bf8e8fcea4 100644 (file)
@@ -138,16 +138,10 @@ struct inodes_stat_t {
  *                     block layer could (in theory) choose to ignore this
  *                     request if it runs into resource problems.
  * WRITE               A normal async write. Device will be plugged.
- * WRITE_SYNC_PLUG     Synchronous write. Identical to WRITE, but passes down
+ * WRITE_SYNC          Synchronous write. Identical to WRITE, but passes down
  *                     the hint that someone will be waiting on this IO
- *                     shortly. The device must still be unplugged explicitly,
- *                     WRITE_SYNC_PLUG does not do this as we could be
- *                     submitting more writes before we actually wait on any
- *                     of them.
- * WRITE_SYNC          Like WRITE_SYNC_PLUG, but also unplugs the device
- *                     immediately after submission. The write equivalent
- *                     of READ_SYNC.
- * WRITE_ODIRECT_PLUG  Special case write for O_DIRECT only.
+ *                     shortly. The write equivalent of READ_SYNC.
+ * WRITE_ODIRECT       Special case write for O_DIRECT only.
  * WRITE_FLUSH         Like WRITE_SYNC but with preceding cache flush.
  * WRITE_FUA           Like WRITE_SYNC but data is guaranteed to be on
  *                     non-volatile media on completion.
@@ -163,18 +157,14 @@ struct inodes_stat_t {
 #define WRITE                  RW_MASK
 #define READA                  RWA_MASK
 
-#define READ_SYNC              (READ | REQ_SYNC | REQ_UNPLUG)
+#define READ_SYNC              (READ | REQ_SYNC)
 #define READ_META              (READ | REQ_META)
-#define WRITE_SYNC_PLUG                (WRITE | REQ_SYNC | REQ_NOIDLE)
-#define WRITE_SYNC             (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
-#define WRITE_ODIRECT_PLUG     (WRITE | REQ_SYNC)
+#define WRITE_SYNC             (WRITE | REQ_SYNC | REQ_NOIDLE)
+#define WRITE_ODIRECT          (WRITE | REQ_SYNC)
 #define WRITE_META             (WRITE | REQ_META)
-#define WRITE_FLUSH            (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
-                                REQ_FLUSH)
-#define WRITE_FUA              (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
-                                REQ_FUA)
-#define WRITE_FLUSH_FUA                (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
-                                REQ_FLUSH | REQ_FUA)
+#define WRITE_FLUSH            (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH)
+#define WRITE_FUA              (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
+#define WRITE_FLUSH_FUA                (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
 
 #define SEL_IN         1
 #define SEL_OUT                2
@@ -586,7 +576,6 @@ typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
 struct address_space_operations {
        int (*writepage)(struct page *page, struct writeback_control *wbc);
        int (*readpage)(struct file *, struct page *);
-       void (*sync_page)(struct page *);
 
        /* Write back some dirty pages from this mapping. */
        int (*writepages)(struct address_space *, struct writeback_control *);
index c0d5f6945c1ebffcccc452da109e353b646a4b0c..d764a426e9fdbf5b5086542e3eeb6c6a2e6d3c8a 100644 (file)
@@ -109,7 +109,7 @@ struct hd_struct {
        int make_it_fail;
 #endif
        unsigned long stamp;
-       int in_flight[2];
+       atomic_t in_flight[2];
 #ifdef CONFIG_SMP
        struct disk_stats __percpu *dkstats;
 #else
@@ -370,21 +370,21 @@ static inline void free_part_stats(struct hd_struct *part)
 
 static inline void part_inc_in_flight(struct hd_struct *part, int rw)
 {
-       part->in_flight[rw]++;
+       atomic_inc(&part->in_flight[rw]);
        if (part->partno)
-               part_to_disk(part)->part0.in_flight[rw]++;
+               atomic_inc(&part_to_disk(part)->part0.in_flight[rw]);
 }
 
 static inline void part_dec_in_flight(struct hd_struct *part, int rw)
 {
-       part->in_flight[rw]--;
+       atomic_dec(&part->in_flight[rw]);
        if (part->partno)
-               part_to_disk(part)->part0.in_flight[rw]--;
+               atomic_dec(&part_to_disk(part)->part0.in_flight[rw]);
 }
 
 static inline int part_in_flight(struct hd_struct *part)
 {
-       return part->in_flight[0] + part->in_flight[1];
+       return atomic_read(&part->in_flight[0]) + atomic_read(&part->in_flight[1]);
 }
 
 static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk)
index 092e4250a4583f735140221d5f4dd7a4667d3d69..10ca03d0a250e280682479c4228536120c2cee4f 100644 (file)
@@ -297,6 +297,7 @@ extern int
 kgdb_handle_exception(int ex_vector, int signo, int err_code,
                      struct pt_regs *regs);
 extern int kgdb_nmicallback(int cpu, void *regs);
+extern void gdbstub_exit(int status);
 
 extern int                     kgdb_single_step;
 extern atomic_t                        kgdb_active;
index f9535b2c9558ffe59905a4836a6a2937c159ece2..7606d7db96c90be584687bf478e416dae1d535ec 100644 (file)
@@ -861,7 +861,7 @@ extern void pagefault_out_of_memory(void);
 #define offset_in_page(p)      ((unsigned long)(p) & ~PAGE_MASK)
 
 /*
- * Flags passed to __show_mem() and __show_free_areas() to suppress output in
+ * Flags passed to show_mem() and __show_free_areas() to suppress output in
  * various contexts.
  */
 #define SHOW_MEM_FILTER_NODES  (0x0001u)       /* filter disallowed nodes */
@@ -1360,8 +1360,7 @@ extern void setup_per_zone_wmarks(void);
 extern void calculate_zone_inactive_ratio(struct zone *zone);
 extern void mem_init(void);
 extern void __init mmap_init(void);
-extern void show_mem(void);
-extern void __show_mem(unsigned int flags);
+extern void show_mem(unsigned int flags);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern int after_bootmem;
index 29ebba54c238c61fe3b1fb5767b829f8db01f53b..c119506526467d204b2c78a7d47bc0a7152129a4 100644 (file)
@@ -298,7 +298,6 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 
 extern void __lock_page(struct page *page);
 extern int __lock_page_killable(struct page *page);
-extern void __lock_page_nosync(struct page *page);
 extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
                                unsigned int flags);
 extern void unlock_page(struct page *page);
@@ -341,17 +340,6 @@ static inline int lock_page_killable(struct page *page)
        return 0;
 }
 
-/*
- * lock_page_nosync should only be used if we can't pin the page's inode.
- * Doesn't play quite so well with block device plugging.
- */
-static inline void lock_page_nosync(struct page *page)
-{
-       might_sleep();
-       if (!trylock_page(page))
-               __lock_page_nosync(page);
-}
-       
 /*
  * lock_page_or_retry - Lock the page, unless this would block and the
  * caller indicated that it can handle a retry.
index 98fc7ed4b191317db5141b9264a39fc28a2d2c2e..b8369d522bf8e83df7f0e04189d232c4bd59329f 100644 (file)
@@ -99,6 +99,7 @@ struct robust_list_head;
 struct bio_list;
 struct fs_struct;
 struct perf_event_context;
+struct blk_plug;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -1428,6 +1429,11 @@ struct task_struct {
 /* stacked block device info */
        struct bio_list *bio_list;
 
+#ifdef CONFIG_BLOCK
+/* stack plugging */
+       struct blk_plug *plug;
+#endif
+
 /* VM state */
        struct reclaim_state *reclaim_state;
 
index ed6ebe690f4ac9211a253d162c92c9b5fd98d71b..a5c6da5d8df8dd5ad28b6b9525bfa787caa3e74d 100644 (file)
@@ -309,8 +309,6 @@ extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
                                        struct page **pagep, swp_entry_t *ent);
 #endif
 
-extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
-
 #ifdef CONFIG_SWAP
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct page *);
index 481a7bd2dfe752267ba3273a1bb8e4a972c17fa5..a11db956dd62c4c5a5fa22699ccd3662a88f00bc 100644 (file)
@@ -1093,3 +1093,33 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd)
        put_packet(remcom_out_buffer);
        return 0;
 }
+
+/**
+ * gdbstub_exit - Send an exit message to GDB
+ * @status: The exit code to report.
+ */
+void gdbstub_exit(int status)
+{
+       unsigned char checksum, ch, buffer[3];
+       int loop;
+
+       buffer[0] = 'W';
+       buffer[1] = hex_asc_hi(status);
+       buffer[2] = hex_asc_lo(status);
+
+       dbg_io_ops->write_char('$');
+       checksum = 0;
+
+       for (loop = 0; loop < 3; loop++) {
+               ch = buffer[loop];
+               checksum += ch;
+               dbg_io_ops->write_char(ch);
+       }
+
+       dbg_io_ops->write_char('#');
+       dbg_io_ops->write_char(hex_asc_hi(checksum));
+       dbg_io_ops->write_char(hex_asc_lo(checksum));
+
+       /* make sure the output is flushed, lest the bootloader clobber it */
+       dbg_io_ops->flush();
+}
index f9a45ebcc7b17c30ac1d39338b75600b6287b10f..6a488ad2dce5bf9a1b6ba24376e1449e603687e4 100644 (file)
@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code)
        profile_task_exit(tsk);
 
        WARN_ON(atomic_read(&tsk->fs_excl));
+       WARN_ON(blk_needs_flush_plug(tsk));
 
        if (unlikely(in_interrupt()))
                panic("Aiee, killing interrupt handler!");
index 457fff2e17e0a95eac114878b460d8160aec6f40..e7548dee636b1f29f344d4c2a9749a963135f477 100644 (file)
@@ -1205,6 +1205,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         * Clear TID on mm_release()?
         */
        p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
+#ifdef CONFIG_BLOCK
+       p->plug = NULL;
+#endif
 #ifdef CONFIG_FUTEX
        p->robust_list = NULL;
 #ifdef CONFIG_COMPAT
index 83bbc7c02df95fd3560bdac7340cc3804d167d38..d09dd10c5a5efc2c206a85bd31a431268e37cc7f 100644 (file)
@@ -28,7 +28,7 @@
 static int submit(int rw, struct block_device *bdev, sector_t sector,
                struct page *page, struct bio **bio_chain)
 {
-       const int bio_rw = rw | REQ_SYNC | REQ_UNPLUG;
+       const int bio_rw = rw | REQ_SYNC;
        struct bio *bio;
 
        bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
index 480adeb63f8ffb7942ddad9daca114a1c6bad27a..ae659b99ce73e30e403011d662eb5337879c4bdd 100644 (file)
@@ -4115,6 +4115,16 @@ need_resched:
                switch_count = &prev->nvcsw;
        }
 
+       /*
+        * If we are going to sleep and we have plugged IO queued, make
+        * sure to submit it to avoid deadlocks.
+        */
+       if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
+               raw_spin_unlock(&rq->lock);
+               blk_flush_plug(prev);
+               raw_spin_lock(&rq->lock);
+       }
+
        pre_schedule(rq, prev);
 
        if (unlikely(!rq->nr_running))
@@ -5528,6 +5538,7 @@ void __sched io_schedule(void)
 
        delayacct_blkio_start();
        atomic_inc(&rq->nr_iowait);
+       blk_flush_plug(current);
        current->in_iowait = 1;
        schedule();
        current->in_iowait = 0;
@@ -5543,6 +5554,7 @@ long __sched io_schedule_timeout(long timeout)
 
        delayacct_blkio_start();
        atomic_inc(&rq->nr_iowait);
+       blk_flush_plug(current);
        current->in_iowait = 1;
        ret = schedule_timeout(timeout);
        current->in_iowait = 0;
index cbafed7d4f386c77816abb4ffe9d7141af29f446..7aa40f8e182d569bcba8c0a3f33bb0f73b6f4950 100644 (file)
@@ -703,28 +703,21 @@ void blk_trace_shutdown(struct request_queue *q)
  *
  **/
 static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
-                                   u32 what)
+                            u32 what)
 {
        struct blk_trace *bt = q->blk_trace;
-       int rw = rq->cmd_flags & 0x03;
 
        if (likely(!bt))
                return;
 
-       if (rq->cmd_flags & REQ_DISCARD)
-               rw |= REQ_DISCARD;
-
-       if (rq->cmd_flags & REQ_SECURE)
-               rw |= REQ_SECURE;
-
        if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
                what |= BLK_TC_ACT(BLK_TC_PC);
-               __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw,
+               __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags,
                                what, rq->errors, rq->cmd_len, rq->cmd);
        } else  {
                what |= BLK_TC_ACT(BLK_TC_FS);
-               __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw,
-                               what, rq->errors, 0, NULL);
+               __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
+                               rq->cmd_flags, what, rq->errors, 0, NULL);
        }
 }
 
index d8d602b58c31ba84908f2be24daa0a7d2eb33764..90cbe4bb5960fc480eaf45273638159ac48d2aa7 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/nmi.h>
 #include <linux/quicklist.h>
 
-void __show_mem(unsigned int filter)
+void show_mem(unsigned int filter)
 {
        pg_data_t *pgdat;
        unsigned long total = 0, reserved = 0, shared = 0,
@@ -61,8 +61,3 @@ void __show_mem(unsigned int filter)
                quicklist_total_size());
 #endif
 }
-
-void show_mem(void)
-{
-       __show_mem(0);
-}
index 027100d30227fead0a4010d1feb0e2791a98fcaa..8fe9d340792135fbf29dd0b4fa4d98c25329220a 100644 (file)
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 
-void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-}
-EXPORT_SYMBOL(default_unplug_io_fn);
-
 struct backing_dev_info default_backing_dev_info = {
        .name           = "default",
        .ra_pages       = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
        .state          = 0,
        .capabilities   = BDI_CAP_MAP_COPY,
-       .unplug_io_fn   = default_unplug_io_fn,
 };
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
 
@@ -604,7 +598,7 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
        spin_lock(&sb_lock);
        list_for_each_entry(sb, &super_blocks, s_list) {
                if (sb->s_bdi == bdi)
-                       sb->s_bdi = NULL;
+                       sb->s_bdi = &default_backing_dev_info;
        }
        spin_unlock(&sb_lock);
 }
index f807afda86f26014413636cae2f26544a34e5a52..04d1992fd86b66134d78f3070f6037ea51f8f4c0 100644 (file)
@@ -164,45 +164,15 @@ void delete_from_page_cache(struct page *page)
 }
 EXPORT_SYMBOL(delete_from_page_cache);
 
-static int sync_page(void *word)
+static int sleep_on_page(void *word)
 {
-       struct address_space *mapping;
-       struct page *page;
-
-       page = container_of((unsigned long *)word, struct page, flags);
-
-       /*
-        * page_mapping() is being called without PG_locked held.
-        * Some knowledge of the state and use of the page is used to
-        * reduce the requirements down to a memory barrier.
-        * The danger here is of a stale page_mapping() return value
-        * indicating a struct address_space different from the one it's
-        * associated with when it is associated with one.
-        * After smp_mb(), it's either the correct page_mapping() for
-        * the page, or an old page_mapping() and the page's own
-        * page_mapping() has gone NULL.
-        * The ->sync_page() address_space operation must tolerate
-        * page_mapping() going NULL. By an amazing coincidence,
-        * this comes about because none of the users of the page
-        * in the ->sync_page() methods make essential use of the
-        * page_mapping(), merely passing the page down to the backing
-        * device's unplug functions when it's non-NULL, which in turn
-        * ignore it for all cases but swap, where only page_private(page) is
-        * of interest. When page_mapping() does go NULL, the entire
-        * call stack gracefully ignores the page and returns.
-        * -- wli
-        */
-       smp_mb();
-       mapping = page_mapping(page);
-       if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
-               mapping->a_ops->sync_page(page);
        io_schedule();
        return 0;
 }
 
-static int sync_page_killable(void *word)
+static int sleep_on_page_killable(void *word)
 {
-       sync_page(word);
+       sleep_on_page(word);
        return fatal_signal_pending(current) ? -EINTR : 0;
 }
 
@@ -558,12 +528,6 @@ struct page *__page_cache_alloc(gfp_t gfp)
 EXPORT_SYMBOL(__page_cache_alloc);
 #endif
 
-static int __sleep_on_page_lock(void *word)
-{
-       io_schedule();
-       return 0;
-}
-
 /*
  * In order to wait for pages to become available there must be
  * waitqueues associated with pages. By using a hash table of
@@ -591,7 +555,7 @@ void wait_on_page_bit(struct page *page, int bit_nr)
        DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
 
        if (test_bit(bit_nr, &page->flags))
-               __wait_on_bit(page_waitqueue(page), &wait, sync_page,
+               __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page,
                                                        TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(wait_on_page_bit);
@@ -655,17 +619,12 @@ EXPORT_SYMBOL(end_page_writeback);
 /**
  * __lock_page - get a lock on the page, assuming we need to sleep to get it
  * @page: the page to lock
- *
- * Ugly. Running sync_page() in state TASK_UNINTERRUPTIBLE is scary.  If some
- * random driver's requestfn sets TASK_RUNNING, we could busywait.  However
- * chances are that on the second loop, the block layer's plug list is empty,
- * so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
  */
 void __lock_page(struct page *page)
 {
        DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 
-       __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page,
+       __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
                                                        TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__lock_page);
@@ -675,24 +634,10 @@ int __lock_page_killable(struct page *page)
        DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 
        return __wait_on_bit_lock(page_waitqueue(page), &wait,
-                                       sync_page_killable, TASK_KILLABLE);
+                                       sleep_on_page_killable, TASK_KILLABLE);
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
 
-/**
- * __lock_page_nosync - get a lock on the page, without calling sync_page()
- * @page: the page to lock
- *
- * Variant of lock_page that does not require the caller to hold a reference
- * on the page's mapping.
- */
-void __lock_page_nosync(struct page *page)
-{
-       DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
-       __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
-                                                       TASK_UNINTERRUPTIBLE);
-}
-
 int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
                         unsigned int flags)
 {
@@ -1407,12 +1352,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
        unsigned long seg = 0;
        size_t count;
        loff_t *ppos = &iocb->ki_pos;
+       struct blk_plug plug;
 
        count = 0;
        retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
        if (retval)
                return retval;
 
+       blk_start_plug(&plug);
+
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
        if (filp->f_flags & O_DIRECT) {
                loff_t size;
@@ -1485,6 +1433,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
                        break;
        }
 out:
+       blk_finish_plug(&plug);
        return retval;
 }
 EXPORT_SYMBOL(generic_file_aio_read);
@@ -2596,11 +2545,13 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
+       struct blk_plug plug;
        ssize_t ret;
 
        BUG_ON(iocb->ki_pos != pos);
 
        mutex_lock(&inode->i_mutex);
+       blk_start_plug(&plug);
        ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
        mutex_unlock(&inode->i_mutex);
 
@@ -2611,6 +2562,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                if (err < 0 && ret > 0)
                        ret = err;
        }
+       blk_finish_plug(&plug);
        return ret;
 }
 EXPORT_SYMBOL(generic_file_aio_write);
index e0af336530c6dfcb78c0fe82b531fc7152fe7eaf..37feb9fec228ae6a8f34ba42d0845ca33ed5da32 100644 (file)
@@ -945,7 +945,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
                collect_procs(ppage, &tokill);
 
        if (hpage != ppage)
-               lock_page_nosync(ppage);
+               lock_page(ppage);
 
        ret = try_to_unmap(ppage, ttu);
        if (ret != SWAP_SUCCESS)
@@ -1038,7 +1038,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
                         * Check "just unpoisoned", "filter hit", and
                         * "race with other subpage."
                         */
-                       lock_page_nosync(hpage);
+                       lock_page(hpage);
                        if (!PageHWPoison(hpage)
                            || (hwpoison_filter(p) && TestClearPageHWPoison(p))
                            || (p != hpage && TestSetPageHWPoison(hpage))) {
@@ -1088,7 +1088,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
         * It's very difficult to mess with pages currently under IO
         * and in many cases impossible, so we just avoid it here.
         */
-       lock_page_nosync(hpage);
+       lock_page(hpage);
 
        /*
         * unpoison always clear PG_hwpoison inside page lock
@@ -1231,7 +1231,7 @@ int unpoison_memory(unsigned long pfn)
                return 0;
        }
 
-       lock_page_nosync(page);
+       lock_page(page);
        /*
         * This test is racy because PG_hwpoison is set outside of page lock.
         * That's acceptable because that won't trigger kernel panic. Instead,
index e629143f944035d6085760ff20db87b1e31776f4..cb86e7d5e7f5591c8508fba1f98a150e1e968e50 100644 (file)
@@ -1842,10 +1842,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 }
 EXPORT_SYMBOL(remap_vmalloc_range);
 
-void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-}
-
 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
        unsigned long len, unsigned long pgoff, unsigned long flags)
 {
index 62a5cec08a1752acbce54ebfc69ecd51ebfb8ef0..6a819d1b2c7dd70015b9fadff8f28d70f3b6f422 100644 (file)
@@ -406,7 +406,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
        task_unlock(current);
        dump_stack();
        mem_cgroup_print_oom_info(mem, p);
-       __show_mem(SHOW_MEM_FILTER_NODES);
+       show_mem(SHOW_MEM_FILTER_NODES);
        if (sysctl_oom_dump_tasks)
                dump_tasks(mem, nodemask);
 }
index 632b46479c940a41ccf7b9eb14360c25a0dfe323..31f698862420021cc9b1e39e608077d1d468730a 100644 (file)
@@ -1040,11 +1040,17 @@ static int __writepage(struct page *page, struct writeback_control *wbc,
 int generic_writepages(struct address_space *mapping,
                       struct writeback_control *wbc)
 {
+       struct blk_plug plug;
+       int ret;
+
        /* deal with chardevs and other special file */
        if (!mapping->a_ops->writepage)
                return 0;
 
-       return write_cache_pages(mapping, wbc, __writepage, mapping);
+       blk_start_plug(&plug);
+       ret = write_cache_pages(mapping, wbc, __writepage, mapping);
+       blk_finish_plug(&plug);
+       return ret;
 }
 
 EXPORT_SYMBOL(generic_writepages);
@@ -1251,7 +1257,7 @@ int set_page_dirty_lock(struct page *page)
 {
        int ret;
 
-       lock_page_nosync(page);
+       lock_page(page);
        ret = set_page_dirty(page);
        unlock_page(page);
        return ret;
index 8e5726ab0d85789ee553241f4172787d3c88d0d6..d6e7ba7373be7caea9f12eb73bf994df5ddc7467 100644 (file)
@@ -2195,7 +2195,7 @@ nopage:
                        current->comm, order, gfp_mask);
                dump_stack();
                if (!should_suppress_show_mem())
-                       __show_mem(filter);
+                       show_mem(filter);
        }
        return page;
 got_pg:
index 2dee975bf469003dd02e2f318399c16a428e4937..dc76b4d0611ecb59fd85d89a78896c792443a62f 100644 (file)
@@ -106,7 +106,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
                goto out;
        }
        if (wbc->sync_mode == WB_SYNC_ALL)
-               rw |= REQ_SYNC | REQ_UNPLUG;
+               rw |= REQ_SYNC;
        count_vm_event(PSWPOUT);
        set_page_writeback(page);
        unlock_page(page);
index 77506a291a2d4caa90c64839896df7b9176bbaa1..2c0cc489e2880cb319a92b905a5ee7b292039b7f 100644 (file)
@@ -109,9 +109,12 @@ EXPORT_SYMBOL(read_cache_pages);
 static int read_pages(struct address_space *mapping, struct file *filp,
                struct list_head *pages, unsigned nr_pages)
 {
+       struct blk_plug plug;
        unsigned page_idx;
        int ret;
 
+       blk_start_plug(&plug);
+
        if (mapping->a_ops->readpages) {
                ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
                /* Clean up the remaining pages */
@@ -129,7 +132,10 @@ static int read_pages(struct address_space *mapping, struct file *filp,
                page_cache_release(page);
        }
        ret = 0;
+
 out:
+       blk_finish_plug(&plug);
+
        return ret;
 }
 
@@ -554,17 +560,5 @@ page_cache_async_readahead(struct address_space *mapping,
 
        /* do read-ahead */
        ondemand_readahead(mapping, ra, filp, true, offset, req_size);
-
-#ifdef CONFIG_BLOCK
-       /*
-        * Normally the current page is !uptodate and lock_page() will be
-        * immediately called to implicitly unplug the device. However this
-        * is not always true for RAID conifgurations, where data arrives
-        * not strictly in their submission order. In this case we need to
-        * explicitly kick off the IO.
-        */
-       if (PageUptodate(page))
-               blk_run_backing_dev(mapping->backing_dev_info, NULL);
-#endif
 }
 EXPORT_SYMBOL_GPL(page_cache_async_readahead);
index 91ce9a1024d7f87abab6c68b9d53e73e857f1fc0..58da7c150ba6d7b4132f423663ff920345038d7c 100644 (file)
@@ -224,7 +224,6 @@ static const struct vm_operations_struct shmem_vm_ops;
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
        .ra_pages       = 0,    /* No readahead */
        .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
-       .unplug_io_fn   = default_unplug_io_fn,
 };
 
 static LIST_HEAD(shmem_swaplist);
index 5c8cfabbc9bc3abdbf7f342656ea8c58b727aae8..46680461785bef647c3618bc7d493d1909aea80f 100644 (file)
 
 /*
  * swapper_space is a fiction, retained to simplify the path through
- * vmscan's shrink_page_list, to make sync_page look nicer, and to allow
- * future use of radix_tree tags in the swap cache.
+ * vmscan's shrink_page_list.
  */
 static const struct address_space_operations swap_aops = {
        .writepage      = swap_writepage,
-       .sync_page      = block_sync_page,
        .set_page_dirty = __set_page_dirty_nobuffers,
        .migratepage    = migrate_page,
 };
@@ -37,7 +35,6 @@ static const struct address_space_operations swap_aops = {
 static struct backing_dev_info swap_backing_dev_info = {
        .name           = "swap",
        .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
-       .unplug_io_fn   = swap_unplug_io_fn,
 };
 
 struct address_space swapper_space = {
index 039e61677635a815e8165b365f521034dd20bf95..8c6b3ce38f09aa0e4f824ce3f70e6e9254a87724 100644 (file)
@@ -94,39 +94,6 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
        return ret;
 }
 
-/*
- * We need this because the bdev->unplug_fn can sleep and we cannot
- * hold swap_lock while calling the unplug_fn. And swap_lock
- * cannot be turned into a mutex.
- */
-static DECLARE_RWSEM(swap_unplug_sem);
-
-void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
-{
-       swp_entry_t entry;
-
-       down_read(&swap_unplug_sem);
-       entry.val = page_private(page);
-       if (PageSwapCache(page)) {
-               struct block_device *bdev = swap_info[swp_type(entry)]->bdev;
-               struct backing_dev_info *bdi;
-
-               /*
-                * If the page is removed from swapcache from under us (with a
-                * racy try_to_unuse/swapoff) we need an additional reference
-                * count to avoid reading garbage from page_private(page) above.
-                * If the WARN_ON triggers during a swapoff it maybe the race
-                * condition and it's harmless. However if it triggers without
-                * swapoff it signals a problem.
-                */
-               WARN_ON(page_count(page) <= 1);
-
-               bdi = bdev->bd_inode->i_mapping->backing_dev_info;
-               blk_run_backing_dev(bdi, page);
-       }
-       up_read(&swap_unplug_sem);
-}
-
 /*
  * swapon tell device that all the old swap contents can be discarded,
  * to allow the swap device to optimize its wear-levelling.
@@ -1662,10 +1629,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
                goto out_dput;
        }
 
-       /* wait for any unplug function to finish */
-       down_write(&swap_unplug_sem);
-       up_write(&swap_unplug_sem);
-
        destroy_swap_extents(p);
        if (p->flags & SWP_CONTINUED)
                free_swap_count_continuations(p);
index 060e4c1914033fe61b2f06a445d6935b559eba2f..f73b8657c2d03053f9b0340a55132ee2d3ebd2f0 100644 (file)
@@ -358,7 +358,7 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
 static void handle_write_error(struct address_space *mapping,
                                struct page *page, int error)
 {
-       lock_page_nosync(page);
+       lock_page(page);
        if (page_mapping(page) == mapping)
                mapping_set_error(mapping, error);
        unlock_page(page);
This page took 0.427252 seconds and 5 git commands to generate.